X86MCInstLower.cpp source code [llvm_projects/llvm/lib/Target/X86/X86MCInstLower.cpp]

1	//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains code to lower X86 MachineInstrs to their corresponding
10	// MCInst records.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "MCTargetDesc/X86ATTInstPrinter.h"
15	#include "MCTargetDesc/X86BaseInfo.h"
16	#include "MCTargetDesc/X86EncodingOptimization.h"
17	#include "MCTargetDesc/X86InstComments.h"
18	#include "MCTargetDesc/X86MCAsmInfo.h"
19	#include "MCTargetDesc/X86ShuffleDecode.h"
20	#include "MCTargetDesc/X86TargetStreamer.h"
21	#include "X86AsmPrinter.h"
22	#include "X86MachineFunctionInfo.h"
23	#include "X86RegisterInfo.h"
24	#include "X86ShuffleDecodeConstantPool.h"
25	#include "X86Subtarget.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/SmallString.h"
28	#include "llvm/ADT/StringExtras.h"
29	#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
30	#include "llvm/CodeGen/MachineConstantPool.h"
31	#include "llvm/CodeGen/MachineFunction.h"
32	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
33	#include "llvm/CodeGen/MachineOperand.h"
34	#include "llvm/CodeGen/StackMaps.h"
35	#include "llvm/IR/DataLayout.h"
36	#include "llvm/IR/GlobalValue.h"
37	#include "llvm/IR/Mangler.h"
38	#include "llvm/MC/MCAsmInfo.h"
39	#include "llvm/MC/MCCodeEmitter.h"
40	#include "llvm/MC/MCContext.h"
41	#include "llvm/MC/MCExpr.h"
42	#include "llvm/MC/MCFixup.h"
43	#include "llvm/MC/MCInst.h"
44	#include "llvm/MC/MCInstBuilder.h"
45	#include "llvm/MC/MCSection.h"
46	#include "llvm/MC/MCStreamer.h"
47	#include "llvm/MC/MCSymbol.h"
48	#include "llvm/MC/TargetRegistry.h"
49	#include "llvm/Target/TargetLoweringObjectFile.h"
50	#include "llvm/Target/TargetMachine.h"
51	#include "llvm/Transforms/CFGuard.h"
52	#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
53	#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
54	#include <string>
55
56	using namespace llvm;
57
58	static cl::opt<bool> EnableBranchHint("enable-branch-hint",
59	cl::desc ("Enable branch hint."),
60	cl::init(Val: false), cl::Hidden);
61	static cl::opt<unsigned> BranchHintProbabilityThreshold(
62	"branch-hint-probability-threshold",
63	cl::desc ("The probability threshold of enabling branch hint."),
64	cl::init(Val: `50`), cl::Hidden);
65
66	namespace {
67
68	/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
69	class X86MCInstLower {
70	MCContext &Ctx;
71	const MachineFunction &MF;
72	const TargetMachine &TM;
73	const MCAsmInfo &MAI;
74	X86AsmPrinter &AsmPrinter;
75
76	public:
77	X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
78
79	MCOperand LowerMachineOperand(const MachineInstr *MI,
80	const MachineOperand &MO) const;
81	void Lower(const MachineInstr MI, MCInst &OutMI) const*;
82
83	MCSymbol GetSymbolFromOperand(const* MachineOperand &MO) const;
84	MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol Sym) const*;
85
86	private:
87	MachineModuleInfoMachO &getMachOMMI() const;
88	};
89
90	} // end anonymous namespace
91
92	/// A RAII helper which defines a region of instructions which can't have
93	/// padding added between them for correctness.
94	struct NoAutoPaddingScope {
95	MCStreamer &OS;
96	const bool OldAllowAutoPadding;
97	NoAutoPaddingScope(MCStreamer &OS)
98	: OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
99	changeAndComment(b: false);
100	}
101	~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); }
102	void changeAndComment(bool b) {
103	if (b == OS.getAllowAutoPadding())
104	return;
105	OS.setAllowAutoPadding(b);
106	if (b)
107	OS.emitRawComment(T: "autopadding");
108	else
109	OS.emitRawComment(T: "noautopadding");
110	}
111	};
112
113	// Emit a minimal sequence of nops spanning NumBytes bytes.
114	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
115	const X86Subtarget *Subtarget);
116
117	void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst,
118	const MCSubtargetInfo &STI,
119	MCCodeEmitter *CodeEmitter) {
120	if (InShadow) {
121	SmallString<`256`> Code;
122	SmallVector<MCFixup, `4`> Fixups;
123	CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI);
124	CurrentShadowSize += Code.size();
125	if (CurrentShadowSize >= RequiredShadowSize)
126	InShadow = false; // The shadow is big enough. Stop counting.
127	}
128	}
129
130	void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
131	MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
132	if (InShadow && CurrentShadowSize < RequiredShadowSize) {
133	InShadow = false;
134	emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize,
135	Subtarget: &MF->getSubtarget<X86Subtarget>());
136	}
137	}
138
139	void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
140	OutStreamer ->emitInstruction(Inst, STI: getSubtargetInfo());
141	SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
142	}
143
144	X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
145	X86AsmPrinter &asmprinter)
146	: Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()),
147	MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
148
149	MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
150	return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
151	}
152
153	/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
154	/// operand to an MCSymbol.
155	MCSymbol X86MCInstLower::GetSymbolFromOperand(const* MachineOperand &MO) const {
156	const Triple &TT = TM.getTargetTriple();
157	if (MO.isGlobal() && TT.isOSBinFormatELF())
158	return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal());
159
160	const DataLayout &DL = MF.getDataLayout();
161	assert((MO.isGlobal() \|\| MO.isSymbol() \|\| MO.isMBB()) &&
162	"Isn't a symbol reference");
163
164	MCSymbol Sym = nullptr*;
165	SmallString<`128`> Name;
166	StringRef Suffix;
167
168	switch (MO.getTargetFlags()) {
169	case X86II::MO_DLLIMPORT:
170	// Handle dllimport linkage.
171	Name += "__imp_";
172	break;
173	case X86II::MO_COFFSTUB:
174	Name += ".refptr.";
175	break;
176	case X86II::MO_DARWIN_NONLAZY:
177	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
178	Suffix = "$non_lazy_ptr";
179	break;
180	}
181
182	if (!Suffix.empty())
183	Name += DL.getPrivateGlobalPrefix();
184
185	if (MO.isGlobal()) {
186	const GlobalValue *GV = MO.getGlobal();
187	AsmPrinter.getNameWithPrefix(Name, GV);
188	} else if (MO.isSymbol()) {
189	Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL);
190	} else if (MO.isMBB()) {
191	assert(Suffix.empty());
192	Sym = MO.getMBB()->getSymbol();
193	}
194
195	Name += Suffix;
196	if (!Sym)
197	Sym = Ctx.getOrCreateSymbol(Name);
198
199	// If the target flags on the operand changes the name of the symbol, do that
200	// before we return the symbol.
201	switch (MO.getTargetFlags()) {
202	default:
203	break;
204	case X86II::MO_COFFSTUB: {
205	MachineModuleInfoCOFF &MMICOFF =
206	AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
207	MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
208	if (!StubSym.getPointer()) {
209	assert(MO.isGlobal() && "Extern symbol not handled yet");
210	StubSym = MachineModuleInfoImpl::StubValueTy (
211	AsmPrinter.getSymbol(GV: MO.getGlobal()), true);
212	}
213	break;
214	}
215	case X86II::MO_DARWIN_NONLAZY:
216	case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
217	MachineModuleInfoImpl::StubValueTy &StubSym =
218	getMachOMMI().getGVStubEntry(Sym);
219	if (!StubSym.getPointer()) {
220	assert(MO.isGlobal() && "Extern symbol not handled yet");
221	StubSym = MachineModuleInfoImpl::StubValueTy (
222	AsmPrinter.getSymbol(GV: MO.getGlobal()),
223	!MO.getGlobal()->hasInternalLinkage());
224	}
225	break;
226	}
227	}
228
229	return Sym;
230	}
231
232	MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
233	MCSymbol Sym) const* {
234	// FIXME: We would like an efficient form for this, so we don't have to do a
235	// lot of extra uniquing.
236	const MCExpr Expr = nullptr*;
237	uint16_t Specifier = X86::S_None;
238
239	switch (MO.getTargetFlags()) {
240	default:
241	llvm_unreachable("Unknown target flag on GV operand");
242	case X86II::MO_NO_FLAG: // No flag.
243	// These affect the name of the symbol, not any suffix.
244	case X86II::MO_DARWIN_NONLAZY:
245	case X86II::MO_DLLIMPORT:
246	case X86II::MO_COFFSTUB:
247	break;
248
249	case X86II::MO_TLVP:
250	Specifier = X86::S_TLVP;
251	break;
252	case X86II::MO_TLVP_PIC_BASE:
253	Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: X86::S_TLVP, Ctx);
254	// Subtract the pic base.
255	Expr = MCBinaryExpr::createSub(
256	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
257	break;
258	case X86II::MO_SECREL:
259	Specifier = uint16_t(X86::S_COFF_SECREL);
260	break;
261	case X86II::MO_TLSGD:
262	Specifier = X86::S_TLSGD;
263	break;
264	case X86II::MO_TLSLD:
265	Specifier = X86::S_TLSLD;
266	break;
267	case X86II::MO_TLSLDM:
268	Specifier = X86::S_TLSLDM;
269	break;
270	case X86II::MO_GOTTPOFF:
271	Specifier = X86::S_GOTTPOFF;
272	break;
273	case X86II::MO_INDNTPOFF:
274	Specifier = X86::S_INDNTPOFF;
275	break;
276	case X86II::MO_TPOFF:
277	Specifier = X86::S_TPOFF;
278	break;
279	case X86II::MO_DTPOFF:
280	Specifier = X86::S_DTPOFF;
281	break;
282	case X86II::MO_NTPOFF:
283	Specifier = X86::S_NTPOFF;
284	break;
285	case X86II::MO_GOTNTPOFF:
286	Specifier = X86::S_GOTNTPOFF;
287	break;
288	case X86II::MO_GOTPCREL:
289	Specifier = X86::S_GOTPCREL;
290	break;
291	case X86II::MO_GOTPCREL_NORELAX:
292	Specifier = X86::S_GOTPCREL_NORELAX;
293	break;
294	case X86II::MO_GOT:
295	Specifier = X86::S_GOT;
296	break;
297	case X86II::MO_GOTOFF:
298	Specifier = X86::S_GOTOFF;
299	break;
300	case X86II::MO_PLT:
301	Specifier = X86::S_PLT;
302	break;
303	case X86II::MO_ABS8:
304	Specifier = X86::S_ABS8;
305	break;
306	case X86II::MO_PIC_BASE_OFFSET:
307	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
308	Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
309	// Subtract the pic base.
310	Expr = MCBinaryExpr::createSub(
311	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
312	if (MO.isJTI()) {
313	assert(MAI.doesSetDirectiveSuppressReloc());
314	// If .set directive is supported, use it to reduce the number of
315	// relocations the assembler will generate for differences between
316	// local labels. This is only safe when the symbols are in the same
317	// section so we are restricting it to jumptable references.
318	MCSymbol *Label = Ctx.createTempSymbol();
319	AsmPrinter.OutStreamer ->emitAssignment(Symbol: Label, Value: Expr);
320	Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx);
321	}
322	break;
323	}
324
325	if (!Expr)
326	Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: Specifier, Ctx);
327
328	if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
329	Expr = MCBinaryExpr::createAdd(
330	LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx);
331	return MCOperand::createExpr(Val: Expr);
332	}
333
334	static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
335	return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
336	}
337
338	MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
339	const MachineOperand &MO) const {
340	switch (MO.getType()) {
341	default:
342	MI->print(OS&: errs());
343	llvm_unreachable("unknown operand type");
344	case MachineOperand::MO_Register:
345	// Ignore all implicit register operands.
346	if (MO.isImplicit())
347	return MCOperand ();
348	return MCOperand::createReg(Reg: MO.getReg());
349	case MachineOperand::MO_Immediate:
350	return MCOperand::createImm(Val: MO.getImm());
351	case MachineOperand::MO_MachineBasicBlock:
352	case MachineOperand::MO_GlobalAddress:
353	case MachineOperand::MO_ExternalSymbol:
354	return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO));
355	case MachineOperand::MO_MCSymbol:
356	return LowerSymbolOperand(MO, Sym: MO.getMCSymbol());
357	case MachineOperand::MO_JumpTableIndex:
358	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex()));
359	case MachineOperand::MO_ConstantPoolIndex:
360	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex()));
361	case MachineOperand::MO_BlockAddress:
362	return LowerSymbolOperand(
363	MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress()));
364	case MachineOperand::MO_RegisterMask:
365	// Ignore call clobbers.
366	return MCOperand ();
367	}
368	}
369
370	// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
371	// information.
372	static unsigned convertTailJumpOpcode(unsigned Opcode) {
373	switch (Opcode) {
374	case X86::TAILJMPr:
375	Opcode = X86::JMP32r;
376	break;
377	case X86::TAILJMPm:
378	Opcode = X86::JMP32m;
379	break;
380	case X86::TAILJMPr64:
381	Opcode = X86::JMP64r;
382	break;
383	case X86::TAILJMPm64:
384	Opcode = X86::JMP64m;
385	break;
386	case X86::TAILJMPr64_REX:
387	Opcode = X86::JMP64r_REX;
388	break;
389	case X86::TAILJMPm64_REX:
390	Opcode = X86::JMP64m_REX;
391	break;
392	case X86::TAILJMPd:
393	case X86::TAILJMPd64:
394	Opcode = X86::JMP_1;
395	break;
396	case X86::TAILJMPd_CC:
397	case X86::TAILJMPd64_CC:
398	Opcode = X86::JCC_1;
399	break;
400	}
401
402	return Opcode;
403	}
404
405	void X86MCInstLower::Lower(const MachineInstr MI, MCInst &OutMI) const* {
406	OutMI.setOpcode(MI->getOpcode());
407
408	for (const MachineOperand &MO : MI->operands())
409	if (auto Op = LowerMachineOperand(MI, MO); Op.isValid())
410	OutMI.addOperand(Op);
411
412	bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
413	if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) \|\|
414	X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) \|\|
415	X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) \|\|
416	X86::optimizeMOVSX(MI&: OutMI) \|\| X86::optimizeINCDEC(MI&: OutMI, In64BitMode) \|\|
417	X86::optimizeMOV(MI&: OutMI, In64BitMode) \|\|
418	X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI))
419	return;
420
421	// Handle a few special cases to eliminate operand modifiers.
422	switch (OutMI.getOpcode()) {
423	case X86::LEA64_32r:
424	case X86::LEA64r:
425	case X86::LEA16r:
426	case X86::LEA32r:
427	// LEA should have a segment register, but it must be empty.
428	assert(OutMI.getNumOperands() == `1` + X86::AddrNumOperands &&
429	"Unexpected # of LEA operands");
430	assert(OutMI.getOperand(`1` + X86::AddrSegmentReg).getReg() == `0` &&
431	"LEA has segment specified!");
432	break;
433	case X86::MULX32Hrr:
434	case X86::MULX32Hrm:
435	case X86::MULX64Hrr:
436	case X86::MULX64Hrm: {
437	// Turn into regular MULX by duplicating the destination.
438	unsigned NewOpc;
439	switch (OutMI.getOpcode()) {
440	default: llvm_unreachable("Invalid opcode");
441	case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
442	case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
443	case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
444	case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
445	}
446	OutMI.setOpcode(NewOpc);
447	// Duplicate the destination.
448	MCRegister DestReg = OutMI.getOperand(i: `0`).getReg();
449	OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg));
450	break;
451	}
452	// CALL64r, CALL64pcrel32 - These instructions used to have
453	// register inputs modeled as normal uses instead of implicit uses. As such,
454	// they we used to truncate off all but the first operand (the callee). This
455	// issue seems to have been fixed at some point. This assert verifies that.
456	case X86::CALL64r:
457	case X86::CALL64pcrel32:
458	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
459	break;
460	case X86::EH_RETURN:
461	case X86::EH_RETURN64: {
462	OutMI = MCInst ();
463	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
464	break;
465	}
466	case X86::CLEANUPRET: {
467	// Replace CLEANUPRET with the appropriate RET.
468	OutMI = MCInst ();
469	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
470	break;
471	}
472	case X86::CATCHRET: {
473	// Replace CATCHRET with the appropriate RET.
474	const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
475	unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
476	OutMI = MCInst ();
477	OutMI.setOpcode(getRetOpcode(Subtarget));
478	OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg));
479	break;
480	}
481	// TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
482	// instruction.
483	case X86::TAILJMPr:
484	case X86::TAILJMPr64:
485	case X86::TAILJMPr64_REX:
486	case X86::TAILJMPd:
487	case X86::TAILJMPd64:
488	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
489	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
490	break;
491	case X86::TAILJMPd_CC:
492	case X86::TAILJMPd64_CC:
493	assert(OutMI.getNumOperands() == `2` && "Unexpected number of operands!");
494	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
495	break;
496	case X86::TAILJMPm:
497	case X86::TAILJMPm64:
498	case X86::TAILJMPm64_REX:
499	assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
500	"Unexpected number of operands!");
501	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
502	break;
503	case X86::MASKMOVDQU:
504	case X86::VMASKMOVDQU:
505	if (In64BitMode)
506	OutMI.setFlags(X86::IP_HAS_AD_SIZE);
507	break;
508	case X86::BSF16rm:
509	case X86::BSF16rr:
510	case X86::BSF32rm:
511	case X86::BSF32rr:
512	case X86::BSF64rm:
513	case X86::BSF64rr: {
514	// Add an REP prefix to BSF instructions so that new processors can
515	// recognize as TZCNT, which has better performance than BSF.
516	// BSF and TZCNT have different interpretations on ZF bit. So make sure
517	// it won't be used later.
518	const MachineOperand *FlagDef =
519	MI->findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr);
520	if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
521	OutMI.setFlags(X86::IP_HAS_REPEAT);
522	break;
523	}
524	default:
525	break;
526	}
527	}
528
529	void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
530	const MachineInstr &MI) {
531	NoAutoPaddingScope NoPadScope(*OutStreamer);
532	bool Is64Bits = getSubtarget().is64Bit();
533	bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
534	MCContext &Ctx = OutStreamer ->getContext();
535
536	X86::Specifier Specifier;
537	switch (MI.getOpcode()) {
538	case X86::TLS_addr32:
539	case X86::TLS_addr64:
540	case X86::TLS_addrX32:
541	Specifier = X86::S_TLSGD;
542	break;
543	case X86::TLS_base_addr32:
544	Specifier = X86::S_TLSLDM;
545	break;
546	case X86::TLS_base_addr64:
547	case X86::TLS_base_addrX32:
548	Specifier = X86::S_TLSLD;
549	break;
550	case X86::TLS_desc32:
551	case X86::TLS_desc64:
552	Specifier = X86::S_TLSDESC;
553	break;
554	default:
555	llvm_unreachable("unexpected opcode");
556	}
557
558	const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
559	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)), specifier: Specifier, Ctx);
560
561	// Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD
562	// code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
563	// attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
564	// only using GOT when GOTPCRELX is enabled.
565	// TODO Delete the workaround when rustc no longer relies on the hack
566	bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
567	Ctx.getTargetOptions()->X86RelaxRelocations;
568
569	if (Specifier == X86::S_TLSDESC) {
570	const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
571	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)), specifier: X86::S_TLSCALL,
572	Ctx);
573	EmitAndCountInstruction(
574	Inst&: MCInstBuilder (Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
575	.addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX)
576	.addReg(Reg: Is64Bits ? X86::RIP : X86::EBX)
577	.addImm(Val: `1`)
578	.addReg(Reg: `0`)
579	.addExpr(Val: Sym)
580	.addReg(Reg: `0`));
581	EmitAndCountInstruction(
582	Inst&: MCInstBuilder (Is64Bits ? X86::CALL64m : X86::CALL32m)
583	.addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX)
584	.addImm(Val: `1`)
585	.addReg(Reg: `0`)
586	.addExpr(Val: Expr)
587	.addReg(Reg: `0`));
588	} else if (Is64Bits) {
589	bool NeedsPadding = Specifier == X86::S_TLSGD;
590	if (NeedsPadding && Is64BitsLP64)
591	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
592	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA64r)
593	.addReg(Reg: X86::RDI)
594	.addReg(Reg: X86::RIP)
595	.addImm(Val: `1`)
596	.addReg(Reg: `0`)
597	.addExpr(Val: Sym)
598	.addReg(Reg: `0`));
599	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr");
600	if (NeedsPadding) {
601	if (!UseGot)
602	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
603	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
604	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::REX64_PREFIX));
605	}
606	if (UseGot) {
607	const MCExpr *Expr =
608	MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOTPCREL, Ctx);
609	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64m)
610	.addReg(Reg: X86::RIP)
611	.addImm(Val: `1`)
612	.addReg(Reg: `0`)
613	.addExpr(Val: Expr)
614	.addReg(Reg: `0`));
615	} else {
616	EmitAndCountInstruction(
617	Inst&: MCInstBuilder (X86::CALL64pcrel32)
618	.addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx)));
619	}
620	} else {
621	if (Specifier == X86::S_TLSGD && !UseGot) {
622	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA32r)
623	.addReg(Reg: X86::EAX)
624	.addReg(Reg: `0`)
625	.addImm(Val: `1`)
626	.addReg(Reg: X86::EBX)
627	.addExpr(Val: Sym)
628	.addReg(Reg: `0`));
629	} else {
630	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA32r)
631	.addReg(Reg: X86::EAX)
632	.addReg(Reg: X86::EBX)
633	.addImm(Val: `1`)
634	.addReg(Reg: `0`)
635	.addExpr(Val: Sym)
636	.addReg(Reg: `0`));
637	}
638
639	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr");
640	if (UseGot) {
641	const MCExpr *Expr = MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOT, Ctx);
642	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL32m)
643	.addReg(Reg: X86::EBX)
644	.addImm(Val: `1`)
645	.addReg(Reg: `0`)
646	.addExpr(Val: Expr)
647	.addReg(Reg: `0`));
648	} else {
649	EmitAndCountInstruction(
650	Inst&: MCInstBuilder (X86::CALLpcrel32)
651	.addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx)));
652	}
653	}
654	}
655
656	/// Emit the largest nop instruction smaller than or equal to \p NumBytes
657	/// bytes. Return the size of nop emitted.
658	static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
659	const X86Subtarget *Subtarget) {
660	// Determine the longest nop which can be efficiently decoded for the given
661	// target cpu. 15-bytes is the longest single NOP instruction, but some
662	// platforms can't decode the longest forms efficiently.
663	unsigned MaxNopLength = `1`;
664	if (Subtarget->is64Bit()) {
665	// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
666	// IndexReg/BaseReg below need to be updated.
667	if (Subtarget->hasFeature(Feature: X86::TuningFast7ByteNOP))
668	MaxNopLength = `7`;
669	else if (Subtarget->hasFeature(Feature: X86::TuningFast15ByteNOP))
670	MaxNopLength = `15`;
671	else if (Subtarget->hasFeature(Feature: X86::TuningFast11ByteNOP))
672	MaxNopLength = `11`;
673	else
674	MaxNopLength = `10`;
675	} if (Subtarget->is32Bit())
676	MaxNopLength = `2`;
677
678	// Cap a single nop emission at the profitable value for the target
679	NumBytes = std::min(a: NumBytes, b: MaxNopLength);
680
681	unsigned NopSize;
682	unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
683	IndexReg = Displacement = SegmentReg = `0`;
684	BaseReg = X86::RAX;
685	ScaleVal = `1`;
686	switch (NumBytes) {
687	case `0`:
688	llvm_unreachable("Zero nops?");
689	break;
690	case `1`:
691	NopSize = `1`;
692	Opc = X86::NOOP;
693	break;
694	case `2`:
695	NopSize = `2`;
696	Opc = X86::XCHG16ar;
697	break;
698	case `3`:
699	NopSize = `3`;
700	Opc = X86::NOOPL;
701	break;
702	case `4`:
703	NopSize = `4`;
704	Opc = X86::NOOPL;
705	Displacement = `8`;
706	break;
707	case `5`:
708	NopSize = `5`;
709	Opc = X86::NOOPL;
710	Displacement = `8`;
711	IndexReg = X86::RAX;
712	break;
713	case `6`:
714	NopSize = `6`;
715	Opc = X86::NOOPW;
716	Displacement = `8`;
717	IndexReg = X86::RAX;
718	break;
719	case `7`:
720	NopSize = `7`;
721	Opc = X86::NOOPL;
722	Displacement = `512`;
723	break;
724	case `8`:
725	NopSize = `8`;
726	Opc = X86::NOOPL;
727	Displacement = `512`;
728	IndexReg = X86::RAX;
729	break;
730	case `9`:
731	NopSize = `9`;
732	Opc = X86::NOOPW;
733	Displacement = `512`;
734	IndexReg = X86::RAX;
735	break;
736	default:
737	NopSize = `10`;
738	Opc = X86::NOOPW;
739	Displacement = `512`;
740	IndexReg = X86::RAX;
741	SegmentReg = X86::CS;
742	break;
743	}
744
745	unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: `5U`);
746	NopSize += NumPrefixes;
747	for (unsigned i = `0`; i != NumPrefixes; ++i)
748	OS.emitBytes(Data: "\x66");
749
750	switch (Opc) {
751	default: llvm_unreachable("Unexpected opcode");
752	case X86::NOOP:
753	OS.emitInstruction(Inst: MCInstBuilder (Opc), STI: *Subtarget);
754	break;
755	case X86::XCHG16ar:
756	OS.emitInstruction(Inst: MCInstBuilder (Opc).addReg(Reg: X86::AX).addReg(Reg: X86::AX),
757	STI: *Subtarget);
758	break;
759	case X86::NOOPL:
760	case X86::NOOPW:
761	OS.emitInstruction(Inst: MCInstBuilder (Opc)
762	.addReg(Reg: BaseReg)
763	.addImm(Val: ScaleVal)
764	.addReg(Reg: IndexReg)
765	.addImm(Val: Displacement)
766	.addReg(Reg: SegmentReg),
767	STI: *Subtarget);
768	break;
769	}
770	assert(NopSize <= NumBytes && "We overemitted?");
771	return NopSize;
772	}
773
774	/// Emit the optimal amount of multi-byte nops on X86.
775	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
776	const X86Subtarget *Subtarget) {
777	unsigned NopsToEmit = NumBytes;
778	(void)NopsToEmit;
779	while (NumBytes) {
780	NumBytes -= emitNop(OS, NumBytes, Subtarget);
781	assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
782	}
783	}
784
785	void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
786	X86MCInstLower &MCIL) {
787	assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
788
789	NoAutoPaddingScope NoPadScope(*OutStreamer);
790
791	StatepointOpers SOpers(&MI);
792	if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
793	emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget);
794	} else {
795	// Lower call target and choose correct opcode
796	const MachineOperand &CallTarget = SOpers.getCallTarget();
797	MCOperand CallTargetMCOp;
798	unsigned CallOpcode;
799	switch (CallTarget.getType()) {
800	case MachineOperand::MO_GlobalAddress:
801	case MachineOperand::MO_ExternalSymbol:
802	CallTargetMCOp = MCIL.LowerSymbolOperand(
803	MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget));
804	CallOpcode = X86::CALL64pcrel32;
805	// Currently, we only support relative addressing with statepoints.
806	// Otherwise, we'll need a scratch register to hold the target
807	// address. You'll fail asserts during load & relocation if this
808	// symbol is to far away. (TODO: support non-relative addressing)
809	break;
810	case MachineOperand::MO_Immediate:
811	CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm());
812	CallOpcode = X86::CALL64pcrel32;
813	// Currently, we only support relative addressing with statepoints.
814	// Otherwise, we'll need a scratch register to hold the target
815	// immediate. You'll fail asserts during load & relocation if this
816	// address is to far away. (TODO: support non-relative addressing)
817	break;
818	case MachineOperand::MO_Register:
819	// FIXME: Add retpoline support and remove this.
820	if (Subtarget->useIndirectThunkCalls())
821	report_fatal_error(reason: "Lowering register statepoints with thunks not "
822	"yet implemented.");
823	CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg());
824	CallOpcode = X86::CALL64r;
825	break;
826	default:
827	llvm_unreachable("Unsupported operand type in statepoint call target");
828	break;
829	}
830
831	// Emit call
832	MCInst CallInst;
833	CallInst.setOpcode(CallOpcode);
834	CallInst.addOperand(Op: CallTargetMCOp);
835	OutStreamer ->emitInstruction(Inst: CallInst, STI: getSubtargetInfo());
836	}
837
838	// Record our statepoint node in the same section used by STACKMAP
839	// and PATCHPOINT
840	auto &Ctx = OutStreamer ->getContext();
841	MCSymbol *MILabel = Ctx.createTempSymbol();
842	OutStreamer ->emitLabel(Symbol: MILabel);
843	SM.recordStatepoint(L: *MILabel, MI);
844	}
845
846	void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
847	X86MCInstLower &MCIL) {
848	// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
849	// <opcode>, <operands>
850
851	NoAutoPaddingScope NoPadScope(*OutStreamer);
852
853	Register DefRegister = FaultingMI.getOperand(i: `0`).getReg();
854	FaultMaps::FaultKind FK =
855	static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: `1`).getImm());
856	MCSymbol *HandlerLabel = FaultingMI.getOperand(i: `2`).getMBB()->getSymbol();
857	unsigned Opcode = FaultingMI.getOperand(i: `3`).getImm();
858	unsigned OperandsBeginIdx = `4`;
859
860	auto &Ctx = OutStreamer ->getContext();
861	MCSymbol *FaultingLabel = Ctx.createTempSymbol();
862	OutStreamer ->emitLabel(Symbol: FaultingLabel);
863
864	assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
865	FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel);
866
867	MCInst MI;
868	MI.setOpcode(Opcode);
869
870	if (DefRegister != X86::NoRegister)
871	MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister));
872
873	for (const MachineOperand &MO :
874	llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx))
875	if (auto Op = MCIL.LowerMachineOperand(MI: &FaultingMI, MO); Op.isValid())
876	MI.addOperand(Op);
877
878	OutStreamer ->AddComment(T: "on-fault: " + HandlerLabel->getName());
879	OutStreamer ->emitInstruction(Inst: MI, STI: getSubtargetInfo());
880	}
881
882	void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
883	X86MCInstLower &MCIL) {
884	bool Is64Bits = Subtarget->is64Bit();
885	MCContext &Ctx = OutStreamer ->getContext();
886	MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__");
887	const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(Symbol: fentry, Ctx);
888
889	EmitAndCountInstruction(
890	Inst&: MCInstBuilder (Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
891	.addExpr(Val: Op));
892	}
893
894	void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
895	assert(std::next(MI.getIterator())->isCall() &&
896	"KCFI_CHECK not followed by a call instruction");
897
898	// Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
899	// returns a 1-byte X86::NOOP, which means the offset is the same in
900	// bytes. This assumes that patchable-function-prefix is the same for all
901	// functions.
902	const MachineFunction &MF = *MI.getMF();
903	int64_t PrefixNops = `0`;
904	(void)MF.getFunction()
905	.getFnAttribute(Kind: "patchable-function-prefix")
906	.getValueAsString()
907	.getAsInteger(Radix: `10`, Result&: PrefixNops);
908
909	// KCFI allows indirect calls to any location that's preceded by a valid
910	// type identifier. To avoid encoding the full constant into an instruction,
911	// and thus emitting potential call target gadgets at each indirect call
912	// site, load a negated constant to a register and compare that to the
913	// expected value at the call target.
914	const Register AddrReg = MI.getOperand(i: `0`).getReg();
915	const uint32_t Type = MI.getOperand(i: `1`).getImm();
916	// The check is immediately before the call. If the call target is in R10,
917	// we can clobber R11 for the check instead.
918	unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
919	EmitAndCountInstruction(
920	Inst&: MCInstBuilder (X86::MOV32ri).addReg(Reg: TempReg).addImm(Val: -MaskKCFIType(Value: Type)));
921	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::ADD32rm)
922	.addReg(Reg: X86::NoRegister)
923	.addReg(Reg: TempReg)
924	.addReg(Reg: AddrReg)
925	.addImm(Val: `1`)
926	.addReg(Reg: X86::NoRegister)
927	.addImm(Val: -(PrefixNops + `4`))
928	.addReg(Reg: X86::NoRegister));
929
930	MCSymbol *Pass = OutContext.createTempSymbol();
931	EmitAndCountInstruction(
932	Inst&: MCInstBuilder (X86::JCC_1)
933	.addExpr(Val: MCSymbolRefExpr::create(Symbol: Pass, Ctx&: OutContext))
934	.addImm(Val: X86::COND_E));
935
936	MCSymbol *Trap = OutContext.createTempSymbol();
937	OutStreamer ->emitLabel(Symbol: Trap);
938	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::TRAP));
939	emitKCFITrapEntry(MF, Symbol: Trap);
940	OutStreamer ->emitLabel(Symbol: Pass);
941	}
942
943	void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
944	// FIXME: Make this work on non-ELF.
945	if (!TM.getTargetTriple().isOSBinFormatELF()) {
946	report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF");
947	return;
948	}
949
950	const auto &Reg = MI.getOperand(i: `0`).getReg();
951	ASanAccessInfo AccessInfo(MI.getOperand(i: `1`).getImm());
952
953	uint64_t ShadowBase;
954	int MappingScale;
955	bool OrShadowOffset;
956	getAddressSanitizerParams(TargetTriple: TM.getTargetTriple(), LongSize: `64`, IsKasan: AccessInfo.CompileKernel,
957	ShadowBase: &ShadowBase, MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset);
958
959	StringRef Name = AccessInfo.IsWrite ? "store" : "load";
960	StringRef Op = OrShadowOffset ? "or" : "add";
961	std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
962	Twine (`1ULL` << AccessInfo.AccessSizeIndex) + "_" +
963	TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg()))
964	.str();
965	if (OrShadowOffset)
966	report_fatal_error(
967	reason: "OrShadowOffset is not supported with optimized callbacks");
968
969	EmitAndCountInstruction(
970	Inst&: MCInstBuilder (X86::CALL64pcrel32)
971	.addExpr(Val: MCSymbolRefExpr::create(
972	Symbol: OutContext.getOrCreateSymbol(Name: SymName), Ctx&: OutContext)));
973	}
974
975	void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
976	X86MCInstLower &MCIL) {
977	// PATCHABLE_OP minsize
978
979	NoAutoPaddingScope NoPadScope(*OutStreamer);
980
981	auto NextMI = std::find_if(first: std::next(x: MI.getIterator()),
982	last: MI.getParent()->end().getInstrIterator(),
983	pred: [](auto &II) { return !II.isMetaInstruction(); });
984
985	SmallString<`256`> Code;
986	unsigned MinSize = MI.getOperand(i: `0`).getImm();
987
988	if (NextMI != MI.getParent()->end() && !NextMI ->isInlineAsm()) {
989	// Lower the next MachineInstr to find its byte size.
990	// If the next instruction is inline assembly, we skip lowering it for now,
991	// and assume we should always generate NOPs.
992	MCInst MCI;
993	MCIL.Lower(MI: &*NextMI, OutMI&: MCI);
994
995	SmallVector<MCFixup, `4`> Fixups;
996	CodeEmitter ->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo());
997	}
998
999	if (Code.size() < MinSize) {
1000	if (MinSize == `2` && Subtarget->is32Bit() &&
1001	Subtarget->isTargetWindowsMSVC() &&
1002	(Subtarget->getCPU().empty() \|\| Subtarget->getCPU() == "pentium3")) {
1003	// For compatibility reasons, when targetting MSVC, it is important to
1004	// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1005	// rely specifically on this pattern to be able to patch a function.
1006	// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1007	OutStreamer ->emitInstruction(
1008	Inst: MCInstBuilder (X86::MOV32rr_REV).addReg(Reg: X86::EDI).addReg(Reg: X86::EDI),
1009	STI: *Subtarget);
1010	} else {
1011	unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget);
1012	assert(NopSize == MinSize && "Could not implement MinSize!");
1013	(void)NopSize;
1014	}
1015	}
1016	}
1017
1018	// Lower a stackmap of the form:
1019	// <id>, <shadowBytes>, ...
1020	void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1021	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1022
1023	auto &Ctx = OutStreamer ->getContext();
1024	MCSymbol *MILabel = Ctx.createTempSymbol();
1025	OutStreamer ->emitLabel(Symbol: MILabel);
1026
1027	SM.recordStackMap(L: *MILabel, MI);
1028	unsigned NumShadowBytes = MI.getOperand(i: `1`).getImm();
1029	SMShadowTracker.reset(RequiredSize: NumShadowBytes);
1030	}
1031
1032	// Lower a patchpoint of the form:
1033	// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1034	void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1035	X86MCInstLower &MCIL) {
1036	assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1037
1038	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1039
1040	NoAutoPaddingScope NoPadScope(*OutStreamer);
1041
1042	auto &Ctx = OutStreamer ->getContext();
1043	MCSymbol *MILabel = Ctx.createTempSymbol();
1044	OutStreamer ->emitLabel(Symbol: MILabel);
1045	SM.recordPatchPoint(L: *MILabel, MI);
1046
1047	PatchPointOpers opers(&MI);
1048	unsigned ScratchIdx = opers.getNextScratchIdx();
1049	unsigned EncodedBytes = `0`;
1050	const MachineOperand &CalleeMO = opers.getCallTarget();
1051
1052	// Check for null target. If target is non-null (i.e. is non-zero or is
1053	// symbolic) then emit a call.
1054	if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1055	MCOperand CalleeMCOp;
1056	switch (CalleeMO.getType()) {
1057	default:
1058	/// FIXME: Add a verifier check for bad callee types.
1059	llvm_unreachable("Unrecognized callee operand type.");
1060	case MachineOperand::MO_Immediate:
1061	if (CalleeMO.getImm())
1062	CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm());
1063	break;
1064	case MachineOperand::MO_ExternalSymbol:
1065	case MachineOperand::MO_GlobalAddress:
1066	CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO,
1067	Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO));
1068	break;
1069	}
1070
1071	// Emit MOV to materialize the target address and the CALL to target.
1072	// This is encoded with 12-13 bytes, depending on which register is used.
1073	Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg();
1074	if (X86II::isX86_64ExtendedReg(Reg: ScratchReg))
1075	EncodedBytes = `13`;
1076	else
1077	EncodedBytes = `12`;
1078
1079	EmitAndCountInstruction(
1080	Inst&: MCInstBuilder (X86::MOV64ri).addReg(Reg: ScratchReg).addOperand(Op: CalleeMCOp));
1081	// FIXME: Add retpoline support and remove this.
1082	if (Subtarget->useIndirectThunkCalls())
1083	report_fatal_error(
1084	reason: "Lowering patchpoint with thunks not yet implemented.");
1085	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64r).addReg(Reg: ScratchReg));
1086	}
1087
1088	// Emit padding.
1089	unsigned NumBytes = opers.getNumPatchBytes();
1090	assert(NumBytes >= EncodedBytes &&
1091	"Patchpoint can't request size less than the length of a call.");
1092
1093	emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget);
1094	}
1095
1096	void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1097	X86MCInstLower &MCIL) {
1098	assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1099
1100	NoAutoPaddingScope NoPadScope(*OutStreamer);
1101
1102	// We want to emit the following pattern, which follows the x86 calling
1103	// convention to prepare for the trampoline call to be patched in.
1104	//
1105	// .p2align 1, ...
1106	// .Lxray_event_sled_N:
1107	// jmp +N // jump across the instrumentation sled
1108	// ... // set up arguments in register
1109	// callq __xray_CustomEvent@plt // force dependency to symbol
1110	// ...
1111	// <jump here>
1112	//
1113	// After patching, it would look something like:
1114	//
1115	// nopw (2-byte nop)
1116	// ...
1117	// callq __xrayCustomEvent // already lowered
1118	// ...
1119	//
1120	// ---
1121	// First we emit the label and the jump.
1122	auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_", AlwaysAddSuffix: true);
1123	OutStreamer ->AddComment(T: "# XRay Custom Event Log");
1124	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1125	OutStreamer ->emitLabel(Symbol: CurSled);
1126
1127	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1128	// an operand (computed as an offset from the jmp instruction).
1129	// FIXME: Find another less hacky way do force the relative jump.
1130	OutStreamer ->emitBinaryData(Data: "\xeb\x0f");
1131
1132	// The default C calling convention will place two arguments into %rcx and
1133	// %rdx -- so we only work with those.
1134	const Register DestRegs[] = {X86::RDI, X86::RSI};
1135	bool UsedMask[] = {false, false};
1136	// Filled out in loop.
1137	Register SrcRegs[] = {`0`, `0`};
1138
1139	// Then we put the operands in the %rdi and %rsi registers. We spill the
1140	// values in the register before we clobber them, and mark them as used in
1141	// UsedMask. In case the arguments are already in the correct register, we use
1142	// emit nops appropriately sized to keep the sled the same size in every
1143	// situation.
1144	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1145	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I));
1146	Op.isValid()) {
1147	assert(Op.isReg() && "Only support arguments in registers");
1148	SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: `64`);
1149	assert(SrcRegs[I].isValid() && "Invalid operand");
1150	if (SrcRegs[I] != DestRegs[I]) {
1151	UsedMask[I] = true;
1152	EmitAndCountInstruction(
1153	Inst&: MCInstBuilder (X86::PUSH64r).addReg(Reg: DestRegs[I]));
1154	} else {
1155	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1156	}
1157	}
1158
1159	// Now that the register values are stashed, mov arguments into place.
1160	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1161	// earlier DestReg. We will have already overwritten over the register before
1162	// we can copy from it.
1163	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1164	if (SrcRegs[I] != DestRegs[I])
1165	EmitAndCountInstruction(
1166	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I]));
1167
1168	// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1169	// name of the trampoline to be implemented by the XRay runtime.
1170	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent");
1171	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1172	if (isPositionIndependent())
1173	TOp.setTargetFlags(X86II::MO_PLT);
1174
1175	// Emit the call instruction.
1176	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64pcrel32)
1177	.addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym)));
1178
1179	// Restore caller-saved and used registers.
1180	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1181	if (UsedMask[I])
1182	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::POP64r).addReg(Reg: DestRegs[I]));
1183	else
1184	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1185
1186	OutStreamer ->AddComment(T: "xray custom event end.");
1187
1188	// Record the sled version. Version 0 of this sled was spelled differently, so
1189	// we let the runtime handle the different offsets we're using. Version 2
1190	// changed the absolute address to a PC-relative address.
1191	recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: `2`);
1192	}
1193
1194	void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1195	X86MCInstLower &MCIL) {
1196	assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1197
1198	NoAutoPaddingScope NoPadScope(*OutStreamer);
1199
1200	// We want to emit the following pattern, which follows the x86 calling
1201	// convention to prepare for the trampoline call to be patched in.
1202	//
1203	// .p2align 1, ...
1204	// .Lxray_event_sled_N:
1205	// jmp +N // jump across the instrumentation sled
1206	// ... // set up arguments in register
1207	// callq __xray_TypedEvent@plt // force dependency to symbol
1208	// ...
1209	// <jump here>
1210	//
1211	// After patching, it would look something like:
1212	//
1213	// nopw (2-byte nop)
1214	// ...
1215	// callq __xrayTypedEvent // already lowered
1216	// ...
1217	//
1218	// ---
1219	// First we emit the label and the jump.
1220	auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_", AlwaysAddSuffix: true);
1221	OutStreamer ->AddComment(T: "# XRay Typed Event Log");
1222	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1223	OutStreamer ->emitLabel(Symbol: CurSled);
1224
1225	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1226	// an operand (computed as an offset from the jmp instruction).
1227	// FIXME: Find another less hacky way do force the relative jump.
1228	OutStreamer ->emitBinaryData(Data: "\xeb\x14");
1229
1230	// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1231	// so we'll work with those. Or we may be called via SystemV, in which case
1232	// we don't have to do any translation.
1233	const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1234	bool UsedMask[] = {false, false, false};
1235
1236	// Will fill out src regs in the loop.
1237	Register SrcRegs[] = {`0`, `0`, `0`};
1238
1239	// Then we put the operands in the SystemV registers. We spill the values in
1240	// the registers before we clobber them, and mark them as used in UsedMask.
1241	// In case the arguments are already in the correct register, we emit nops
1242	// appropriately sized to keep the sled the same size in every situation.
1243	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1244	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I));
1245	Op.isValid()) {
1246	// TODO: Is register only support adequate?
1247	assert(Op.isReg() && "Only supports arguments in registers");
1248	SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: `64`);
1249	assert(SrcRegs[I].isValid() && "Invalid operand");
1250	if (SrcRegs[I] != DestRegs[I]) {
1251	UsedMask[I] = true;
1252	EmitAndCountInstruction(
1253	Inst&: MCInstBuilder (X86::PUSH64r).addReg(Reg: DestRegs[I]));
1254	} else {
1255	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1256	}
1257	}
1258
1259	// In the above loop we only stash all of the destination registers or emit
1260	// nops if the arguments are already in the right place. Doing the actually
1261	// moving is postponed until after all the registers are stashed so nothing
1262	// is clobbers. We've already added nops to account for the size of mov and
1263	// push if the register is in the right place, so we only have to worry about
1264	// emitting movs.
1265	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1266	// earlier DestReg. We will have already overwritten over the register before
1267	// we can copy from it.
1268	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1269	if (UsedMask[I])
1270	EmitAndCountInstruction(
1271	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I]));
1272
1273	// We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1274	// name of the trampoline to be implemented by the XRay runtime.
1275	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent");
1276	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1277	if (isPositionIndependent())
1278	TOp.setTargetFlags(X86II::MO_PLT);
1279
1280	// Emit the call instruction.
1281	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64pcrel32)
1282	.addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym)));
1283
1284	// Restore caller-saved and used registers.
1285	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1286	if (UsedMask[I])
1287	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::POP64r).addReg(Reg: DestRegs[I]));
1288	else
1289	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1290
1291	OutStreamer ->AddComment(T: "xray typed event end.");
1292
1293	// Record the sled version.
1294	recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: `2`);
1295	}
1296
1297	void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1298	X86MCInstLower &MCIL) {
1299
1300	NoAutoPaddingScope NoPadScope(*OutStreamer);
1301
1302	const Function &F = MF->getFunction();
1303	if (F.hasFnAttribute(Kind: "patchable-function-entry")) {
1304	unsigned Num;
1305	if (F.getFnAttribute(Kind: "patchable-function-entry")
1306	.getValueAsString()
1307	.getAsInteger(Radix: `10`, Result&: Num))
1308	return;
1309	emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget);
1310	return;
1311	}
1312	// We want to emit the following pattern:
1313	//
1314	// .p2align 1, ...
1315	// .Lxray_sled_N:
1316	// jmp .tmpN
1317	// # 9 bytes worth of noops
1318	//
1319	// We need the 9 bytes because at runtime, we'd be patching over the full 11
1320	// bytes with the following pattern:
1321	//
1322	// mov %r10, <function id, 32-bit> // 6 bytes
1323	// call <relative offset, 32-bits> // 5 bytes
1324	//
1325	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1326	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1327	OutStreamer ->emitLabel(Symbol: CurSled);
1328
1329	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1330	// an operand (computed as an offset from the jmp instruction).
1331	// FIXME: Find another less hacky way do force the relative jump.
1332	OutStreamer ->emitBytes(Data: "\xeb\x09");
1333	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1334	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: `2`);
1335	}
1336
1337	void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1338	X86MCInstLower &MCIL) {
1339	NoAutoPaddingScope NoPadScope(*OutStreamer);
1340
1341	// Since PATCHABLE_RET takes the opcode of the return statement as an
1342	// argument, we use that to emit the correct form of the RET that we want.
1343	// i.e. when we see this:
1344	//
1345	// PATCHABLE_RET X86::RET ...
1346	//
1347	// We should emit the RET followed by sleds.
1348	//
1349	// .p2align 1, ...
1350	// .Lxray_sled_N:
1351	// ret # or equivalent instruction
1352	// # 10 bytes worth of noops
1353	//
1354	// This just makes sure that the alignment for the next instruction is 2.
1355	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1356	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1357	OutStreamer ->emitLabel(Symbol: CurSled);
1358	unsigned OpCode = MI.getOperand(i: `0`).getImm();
1359	MCInst Ret;
1360	Ret.setOpcode(OpCode);
1361	for (auto &MO : drop_begin(RangeOrContainer: MI.operands()))
1362	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid())
1363	Ret.addOperand(Op);
1364	OutStreamer ->emitInstruction(Inst: Ret, STI: getSubtargetInfo());
1365	emitX86Nops(OS&: *OutStreamer, NumBytes: `10`, Subtarget);
1366	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: `2`);
1367	}
1368
1369	void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1370	X86MCInstLower &MCIL) {
1371	MCInst TC;
1372	TC.setOpcode(convertTailJumpOpcode(Opcode: MI.getOperand(i: `0`).getImm()));
1373	// Drop the tail jump opcode.
1374	auto TCOperands = drop_begin(RangeOrContainer: MI.operands());
1375	bool IsConditional = TC.getOpcode() == X86::JCC_1;
1376	MCSymbol *FallthroughLabel;
1377	if (IsConditional) {
1378	// Rewrite:
1379	// je target
1380	//
1381	// To:
1382	// jne .fallthrough
1383	// .p2align 1, ...
1384	// .Lxray_sled_N:
1385	// SLED_CODE
1386	// jmp target
1387	// .fallthrough:
1388	FallthroughLabel = OutContext.createTempSymbol();
1389	EmitToStreamer(
1390	S&: *OutStreamer,
1391	Inst: MCInstBuilder (X86::JCC_1)
1392	.addExpr(Val: MCSymbolRefExpr::create(Symbol: FallthroughLabel, Ctx&: OutContext))
1393	.addImm(Val: X86::GetOppositeBranchCondition(
1394	CC: static_cast<X86::CondCode>(MI.getOperand(i: `2`).getImm()))));
1395	TC.setOpcode(X86::JMP_1);
1396	// Drop the condition code.
1397	TCOperands = drop_end(RangeOrContainer&: TCOperands);
1398	}
1399
1400	NoAutoPaddingScope NoPadScope(*OutStreamer);
1401
1402	// Like PATCHABLE_RET, we have the actual instruction in the operands to this
1403	// instruction so we lower that particular instruction and its operands.
1404	// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1405	// we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1406	// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1407	// tail call much like how we have it in PATCHABLE_RET.
1408	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1409	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1410	OutStreamer ->emitLabel(Symbol: CurSled);
1411	auto Target = OutContext.createTempSymbol();
1412
1413	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1414	// an operand (computed as an offset from the jmp instruction).
1415	// FIXME: Find another less hacky way do force the relative jump.
1416	OutStreamer ->emitBytes(Data: "\xeb\x09");
1417	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1418	OutStreamer ->emitLabel(Symbol: Target);
1419	recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: `2`);
1420
1421	// Before emitting the instruction, add a comment to indicate that this is
1422	// indeed a tail call.
1423	OutStreamer ->AddComment(T: "TAILCALL");
1424	for (auto &MO : TCOperands)
1425	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid())
1426	TC.addOperand(Op);
1427	OutStreamer ->emitInstruction(Inst: TC, STI: getSubtargetInfo());
1428
1429	if (IsConditional)
1430	OutStreamer ->emitLabel(Symbol: FallthroughLabel);
1431	}
1432
1433	// Returns instruction preceding MBBI in MachineFunction.
1434	// If MBBI is the first instruction of the first basic block, returns null.
1435	static MachineBasicBlock::const_iterator
1436	PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1437	const MachineBasicBlock *MBB = MBBI ->getParent();
1438	while (MBBI == MBB->begin()) {
1439	if (MBB == &MBB->getParent()->front())
1440	return MachineBasicBlock::const_iterator ();
1441	MBB = MBB->getPrevNode();
1442	MBBI = MBB->end();
1443	}
1444	--MBBI;
1445	return MBBI;
1446	}
1447
1448	static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) {
1449	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1450	// Skip mask operand.
1451	++SrcIdx;
1452	if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1453	// Skip passthru operand.
1454	++SrcIdx;
1455	}
1456	}
1457	return SrcIdx;
1458	}
1459
1460	static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI,
1461	unsigned SrcOpIdx) {
1462	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1463	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg());
1464
1465	// Handle AVX512 MASK/MASXZ write mask comments.
1466	// MASK: zmmX {%kY}
1467	// MASKZ: zmmX {%kY} {z}
1468	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1469	const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - `1`);
1470	StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg());
1471	CS << " {%" << Mask << "}";
1472	if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1473	CS << " {z}";
1474	}
1475	}
1476	}
1477
1478	static void printShuffleMask(raw_ostream &CS, StringRef Src1Name,
1479	StringRef Src2Name, ArrayRef<int> Mask) {
1480	// One source operand, fix the mask to print all elements in one span.
1481	SmallVector<int, `8`> ShuffleMask(Mask);
1482	if (Src1Name == Src2Name)
1483	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i)
1484	if (ShuffleMask [i] >= e)
1485	ShuffleMask [i] -= e;
1486
1487	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i) {
1488	if (i != `0`)
1489	CS << ",";
1490	if (ShuffleMask [i] == SM_SentinelZero) {
1491	CS << "zero";
1492	continue;
1493	}
1494
1495	// Otherwise, it must come from src1 or src2. Print the span of elements
1496	// that comes from this src.
1497	bool isSrc1 = ShuffleMask [i] < (int)e;
1498	CS << (isSrc1 ? Src1Name : Src2Name) << `'['`;
1499
1500	bool IsFirst = true;
1501	while (i != e && ShuffleMask [i] != SM_SentinelZero &&
1502	(ShuffleMask [i] < (int)e) == isSrc1) {
1503	if (!IsFirst)
1504	CS << `','`;
1505	else
1506	IsFirst = false;
1507	if (ShuffleMask [i] == SM_SentinelUndef)
1508	CS << "u";
1509	else
1510	CS << ShuffleMask [i] % (int)e;
1511	++i;
1512	}
1513	CS << `']'`;
1514	--i; // For loop increments element #.
1515	}
1516	}
1517
1518	static std::string getShuffleComment(const MachineInstr MI, unsigned* SrcOp1Idx,
1519	unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1520	std::string Comment;
1521
1522	const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx);
1523	const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx);
1524	StringRef Src1Name = SrcOp1.isReg()
1525	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg())
1526	: "mem";
1527	StringRef Src2Name = SrcOp2.isReg()
1528	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg())
1529	: "mem";
1530
1531	raw_string_ostream CS(Comment);
1532	printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx);
1533	CS << " = ";
1534	printShuffleMask(CS, Src1Name, Src2Name, Mask);
1535
1536	return Comment;
1537	}
1538
1539	static void printConstant(const APInt &Val, raw_ostream &CS,
1540	bool PrintZero = false) {
1541	if (Val.getBitWidth() <= `64`) {
1542	CS << (PrintZero ? `0ULL` : Val.getZExtValue());
1543	} else {
1544	// print multi-word constant as (w0,w1)
1545	CS << "(";
1546	for (int i = `0`, N = Val.getNumWords(); i < N; ++i) {
1547	if (i > `0`)
1548	CS << ",";
1549	CS << (PrintZero ? `0ULL` : Val.getRawData()[i]);
1550	}
1551	CS << ")";
1552	}
1553	}
1554
1555	static void printConstant(const APFloat &Flt, raw_ostream &CS,
1556	bool PrintZero = false) {
1557	SmallString<`32`> Str;
1558	// Force scientific notation to distinguish from integers.
1559	if (PrintZero)
1560	APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1561	else
1562	Flt.toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1563	CS << Str;
1564	}
1565
1566	static void printConstant(const Constant COp, unsigned* BitWidth,
1567	raw_ostream &CS, bool PrintZero = false) {
1568	if (isa<UndefValue>(Val: COp)) {
1569	CS << "u";
1570	} else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) {
1571	if (auto VTy = dyn_cast<FixedVectorType>(Val: CI->getType())) {
1572	for (unsigned I = `0`, E = VTy->getNumElements(); I != E; ++I) {
1573	if (I != `0`)
1574	CS << `','`;
1575	printConstant(Val: CI->getValue(), CS, PrintZero);
1576	}
1577	} else
1578	printConstant(Val: CI->getValue(), CS, PrintZero);
1579	} else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) {
1580	if (auto VTy = dyn_cast<FixedVectorType>(Val: CF->getType())) {
1581	for (unsigned I = `0`, E = VTy->getNumElements(); I != E; ++I) {
1582	if (I != `0`)
1583	CS << `','`;
1584	printConstant(Flt: CF->getValueAPF(), CS, PrintZero);
1585	}
1586	} else
1587	printConstant(Flt: CF->getValueAPF(), CS, PrintZero);
1588	} else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) {
1589	Type *EltTy = CDS->getElementType();
1590	bool IsInteger = EltTy->isIntegerTy();
1591	bool IsFP = EltTy->isHalfTy() \|\| EltTy->isFloatTy() \|\| EltTy->isDoubleTy();
1592	unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1593	unsigned E = std::min(a: BitWidth / EltBits, b: (unsigned)CDS->getNumElements());
1594	if ((BitWidth % EltBits) == `0`) {
1595	for (unsigned I = `0`; I != E; ++I) {
1596	if (I != `0`)
1597	CS << ",";
1598	if (IsInteger)
1599	printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero);
1600	else if (IsFP)
1601	printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero);
1602	else
1603	CS << "?";
1604	}
1605	} else {
1606	CS << "?";
1607	}
1608	} else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) {
1609	unsigned EltBits = CV->getType()->getScalarSizeInBits();
1610	unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands());
1611	if ((BitWidth % EltBits) == `0`) {
1612	for (unsigned I = `0`; I != E; ++I) {
1613	if (I != `0`)
1614	CS << ",";
1615	printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero);
1616	}
1617	} else {
1618	CS << "?";
1619	}
1620	} else {
1621	CS << "?";
1622	}
1623	}
1624
1625	static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
1626	int SclWidth, int VecWidth,
1627	const char *ShuffleComment) {
1628	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1629
1630	std::string Comment;
1631	raw_string_ostream CS(Comment);
1632	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1633	CS << " = ";
1634
1635	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1636	CS << "[";
1637	printConstant(COp: C, BitWidth: SclWidth, CS);
1638	for (int I = `1`, E = VecWidth / SclWidth; I < E; ++I) {
1639	CS << ",";
1640	printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true);
1641	}
1642	CS << "]";
1643	OutStreamer.AddComment(T: CS.str());
1644	return; // early-out
1645	}
1646
1647	// We didn't find a constant load, fallback to a shuffle mask decode.
1648	CS << ShuffleComment;
1649	OutStreamer.AddComment(T: CS.str());
1650	}
1651
1652	static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
1653	int Repeats, int BitWidth) {
1654	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1655	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1656	std::string Comment;
1657	raw_string_ostream CS(Comment);
1658	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1659	CS << " = [";
1660	for (int l = `0`; l != Repeats; ++l) {
1661	if (l != `0`)
1662	CS << ",";
1663	printConstant(COp: C, BitWidth, CS);
1664	}
1665	CS << "]";
1666	OutStreamer.AddComment(T: CS.str());
1667	}
1668	}
1669
1670	static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1671	int SrcEltBits, int DstEltBits, bool IsSext) {
1672	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1673	auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx);
1674	if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
1675	if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) {
1676	int NumElts = CDS->getNumElements();
1677	std::string Comment;
1678	raw_string_ostream CS(Comment);
1679	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1680	CS << " = [";
1681	for (int i = `0`; i != NumElts; ++i) {
1682	if (i != `0`)
1683	CS << ",";
1684	if (CDS->getElementType()->isIntegerTy()) {
1685	APInt Elt = CDS->getElementAsAPInt(i);
1686	Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits);
1687	printConstant(Val: Elt, CS);
1688	} else
1689	CS << "?";
1690	}
1691	CS << "]";
1692	OutStreamer.AddComment(T: CS.str());
1693	return true;
1694	}
1695	}
1696
1697	return false;
1698	}
1699	static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1700	int SrcEltBits, int DstEltBits) {
1701	printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true);
1702	}
1703	static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1704	int SrcEltBits, int DstEltBits) {
1705	if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false))
1706	return;
1707
1708	// We didn't find a constant load, fallback to a shuffle mask decode.
1709	std::string Comment;
1710	raw_string_ostream CS(Comment);
1711	printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: `1`));
1712	CS << " = ";
1713
1714	SmallVector<int> Mask;
1715	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1716	assert((Width % DstEltBits) == `0` && (DstEltBits % SrcEltBits) == `0` &&
1717	"Illegal extension ratio");
1718	DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask);
1719	printShuffleMask(CS, Src1Name: "mem", Src2Name: "", Mask);
1720
1721	OutStreamer.AddComment(T: CS.str());
1722	}
1723
1724	void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1725	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1726	assert((getSubtarget().isOSWindows() \|\| getSubtarget().isUEFI()) &&
1727	"SEH_ instruction Windows and UEFI only");
1728
1729	// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1730	if (EmitFPOData) {
1731	X86TargetStreamer *XTS =
1732	static_cast<X86TargetStreamer *>(OutStreamer ->getTargetStreamer());
1733	switch (MI->getOpcode()) {
1734	case X86::SEH_PushReg:
1735	XTS->emitFPOPushReg(Reg: MI->getOperand(i: `0`).getImm());
1736	break;
1737	case X86::SEH_StackAlloc:
1738	XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: `0`).getImm());
1739	break;
1740	case X86::SEH_StackAlign:
1741	XTS->emitFPOStackAlign(Align: MI->getOperand(i: `0`).getImm());
1742	break;
1743	case X86::SEH_SetFrame:
1744	assert(MI->getOperand(`1`).getImm() == `0` &&
1745	".cv_fpo_setframe takes no offset");
1746	XTS->emitFPOSetFrame(Reg: MI->getOperand(i: `0`).getImm());
1747	break;
1748	case X86::SEH_EndPrologue:
1749	XTS->emitFPOEndPrologue();
1750	break;
1751	case X86::SEH_SaveReg:
1752	case X86::SEH_SaveXMM:
1753	case X86::SEH_PushFrame:
1754	llvm_unreachable("SEH_ directive incompatible with FPO");
1755	break;
1756	default:
1757	llvm_unreachable("expected SEH_ instruction");
1758	}
1759	return;
1760	}
1761
1762	// Otherwise, use the .seh_ directives for all other Windows platforms.
1763	switch (MI->getOpcode()) {
1764	case X86::SEH_PushReg:
1765	OutStreamer ->emitWinCFIPushReg(Register: MI->getOperand(i: `0`).getImm());
1766	break;
1767
1768	case X86::SEH_SaveReg:
1769	OutStreamer ->emitWinCFISaveReg(Register: MI->getOperand(i: `0`).getImm(),
1770	Offset: MI->getOperand(i: `1`).getImm());
1771	break;
1772
1773	case X86::SEH_SaveXMM:
1774	OutStreamer ->emitWinCFISaveXMM(Register: MI->getOperand(i: `0`).getImm(),
1775	Offset: MI->getOperand(i: `1`).getImm());
1776	break;
1777
1778	case X86::SEH_StackAlloc:
1779	OutStreamer ->emitWinCFIAllocStack(Size: MI->getOperand(i: `0`).getImm());
1780	break;
1781
1782	case X86::SEH_SetFrame:
1783	OutStreamer ->emitWinCFISetFrame(Register: MI->getOperand(i: `0`).getImm(),
1784	Offset: MI->getOperand(i: `1`).getImm());
1785	break;
1786
1787	case X86::SEH_PushFrame:
1788	OutStreamer ->emitWinCFIPushFrame(Code: MI->getOperand(i: `0`).getImm());
1789	break;
1790
1791	case X86::SEH_EndPrologue:
1792	OutStreamer ->emitWinCFIEndProlog();
1793	break;
1794
1795	case X86::SEH_BeginEpilogue:
1796	OutStreamer ->emitWinCFIBeginEpilogue();
1797	break;
1798
1799	case X86::SEH_EndEpilogue:
1800	OutStreamer ->emitWinCFIEndEpilogue();
1801	break;
1802
1803	case X86::SEH_UnwindV2Start:
1804	OutStreamer ->emitWinCFIUnwindV2Start();
1805	break;
1806
1807	case X86::SEH_UnwindVersion:
1808	OutStreamer ->emitWinCFIUnwindVersion(Version: MI->getOperand(i: `0`).getImm());
1809	break;
1810
1811	default:
1812	llvm_unreachable("expected SEH_ instruction");
1813	}
1814	}
1815
1816	static void addConstantComments(const MachineInstr *MI,
1817	MCStreamer &OutStreamer) {
1818	switch (MI->getOpcode()) {
1819	// Lower PSHUFB and VPERMILP normally but add a comment if we can find
1820	// a constant shuffle mask. We won't be able to do this at the MC layer
1821	// because the mask isn't an immediate.
1822	case X86::PSHUFBrm:
1823	case X86::VPSHUFBrm:
1824	case X86::VPSHUFBYrm:
1825	case X86::VPSHUFBZ128rm:
1826	case X86::VPSHUFBZ128rmk:
1827	case X86::VPSHUFBZ128rmkz:
1828	case X86::VPSHUFBZ256rm:
1829	case X86::VPSHUFBZ256rmk:
1830	case X86::VPSHUFBZ256rmkz:
1831	case X86::VPSHUFBZrm:
1832	case X86::VPSHUFBZrmk:
1833	case X86::VPSHUFBZrmkz: {
1834	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1835	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1836	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1837	SmallVector<int, `64`> Mask;
1838	DecodePSHUFBMask(C, Width, ShuffleMask&: Mask);
1839	if (!Mask.empty())
1840	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1841	}
1842	break;
1843	}
1844
1845	case X86::VPERMILPSrm:
1846	case X86::VPERMILPSYrm:
1847	case X86::VPERMILPSZ128rm:
1848	case X86::VPERMILPSZ128rmk:
1849	case X86::VPERMILPSZ128rmkz:
1850	case X86::VPERMILPSZ256rm:
1851	case X86::VPERMILPSZ256rmk:
1852	case X86::VPERMILPSZ256rmkz:
1853	case X86::VPERMILPSZrm:
1854	case X86::VPERMILPSZrmk:
1855	case X86::VPERMILPSZrmkz: {
1856	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1857	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1858	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1859	SmallVector<int, `16`> Mask;
1860	DecodeVPERMILPMask(C, ElSize: `32`, Width, ShuffleMask&: Mask);
1861	if (!Mask.empty())
1862	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1863	}
1864	break;
1865	}
1866	case X86::VPERMILPDrm:
1867	case X86::VPERMILPDYrm:
1868	case X86::VPERMILPDZ128rm:
1869	case X86::VPERMILPDZ128rmk:
1870	case X86::VPERMILPDZ128rmkz:
1871	case X86::VPERMILPDZ256rm:
1872	case X86::VPERMILPDZ256rmk:
1873	case X86::VPERMILPDZ256rmkz:
1874	case X86::VPERMILPDZrm:
1875	case X86::VPERMILPDZrmk:
1876	case X86::VPERMILPDZrmkz: {
1877	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1878	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1879	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1880	SmallVector<int, `16`> Mask;
1881	DecodeVPERMILPMask(C, ElSize: `64`, Width, ShuffleMask&: Mask);
1882	if (!Mask.empty())
1883	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1884	}
1885	break;
1886	}
1887
1888	case X86::VPERMIL2PDrm:
1889	case X86::VPERMIL2PSrm:
1890	case X86::VPERMIL2PDYrm:
1891	case X86::VPERMIL2PSYrm: {
1892	assert(MI->getNumOperands() >= (`3` + X86::AddrNumOperands + `1`) &&
1893	"Unexpected number of operands!");
1894
1895	const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - `1`);
1896	if (!CtrlOp.isImm())
1897	break;
1898
1899	unsigned ElSize;
1900	switch (MI->getOpcode()) {
1901	default: llvm_unreachable("Invalid opcode");
1902	case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = `32`; break;
1903	case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = `64`; break;
1904	}
1905
1906	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1907	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1908	SmallVector<int, `16`> Mask;
1909	DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask);
1910	if (!Mask.empty())
1911	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1912	}
1913	break;
1914	}
1915
1916	case X86::VPPERMrrm: {
1917	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1918	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1919	SmallVector<int, `16`> Mask;
1920	DecodeVPPERMMask(C, Width, ShuffleMask&: Mask);
1921	if (!Mask.empty())
1922	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1923	}
1924	break;
1925	}
1926
1927	case X86::MMX_MOVQ64rm: {
1928	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `1`)) {
1929	std::string Comment;
1930	raw_string_ostream CS(Comment);
1931	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1932	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = ";
1933	if (auto *CF = dyn_cast<ConstantFP>(Val: C)) {
1934	CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: `16`, Signed: false);
1935	OutStreamer.AddComment(T: CS.str());
1936	}
1937	}
1938	break;
1939	}
1940
1941	#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
1942	case X86::Prefix##Instr##Suffix##rm##Postfix:
1943
1944	#define CASE_ARITH_RM(Instr) \
1945	INSTR_CASE(, Instr, , ) /* SSE */ \
1946	INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
1947	INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \
1948	INSTR_CASE(V, Instr, Z128, ) \
1949	INSTR_CASE(V, Instr, Z128, k) \
1950	INSTR_CASE(V, Instr, Z128, kz) \
1951	INSTR_CASE(V, Instr, Z256, ) \
1952	INSTR_CASE(V, Instr, Z256, k) \
1953	INSTR_CASE(V, Instr, Z256, kz) \
1954	INSTR_CASE(V, Instr, Z, ) \
1955	INSTR_CASE(V, Instr, Z, k) \
1956	INSTR_CASE(V, Instr, Z, kz)
1957
1958	// TODO: Add additional instructions when useful.
1959	CASE_ARITH_RM(PMADDUBSW) {
1960	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1961	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1962	if (C->getType()->getScalarSizeInBits() == `8`) {
1963	std::string Comment;
1964	raw_string_ostream CS(Comment);
1965	unsigned VectorWidth =
1966	X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1967	CS << "[";
1968	printConstant(COp: C, BitWidth: VectorWidth, CS);
1969	CS << "]";
1970	OutStreamer.AddComment(T: CS.str());
1971	}
1972	}
1973	break;
1974	}
1975
1976	CASE_ARITH_RM(PMADDWD)
1977	CASE_ARITH_RM(PMULLW)
1978	CASE_ARITH_RM(PMULHW)
1979	CASE_ARITH_RM(PMULHUW)
1980	CASE_ARITH_RM(PMULHRSW) {
1981	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1982	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1983	if (C->getType()->getScalarSizeInBits() == `16`) {
1984	std::string Comment;
1985	raw_string_ostream CS(Comment);
1986	unsigned VectorWidth =
1987	X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1988	CS << "[";
1989	printConstant(COp: C, BitWidth: VectorWidth, CS);
1990	CS << "]";
1991	OutStreamer.AddComment(T: CS.str());
1992	}
1993	}
1994	break;
1995	}
1996
1997	#define MASK_AVX512_CASE(Instr) \
1998	case Instr: \
1999	case Instr##k: \
2000	case Instr##kz:
2001
2002	case X86::MOVSDrm:
2003	case X86::VMOVSDrm:
2004	MASK_AVX512_CASE(X86::VMOVSDZrm)
2005	case X86::MOVSDrm_alt:
2006	case X86::VMOVSDrm_alt:
2007	case X86::VMOVSDZrm_alt:
2008	case X86::MOVQI2PQIrm:
2009	case X86::VMOVQI2PQIrm:
2010	case X86::VMOVQI2PQIZrm:
2011	printZeroUpperMove(MI, OutStreamer, SclWidth: `64`, VecWidth: `128`, ShuffleComment: "mem[0],zero");
2012	break;
2013
2014	MASK_AVX512_CASE(X86::VMOVSHZrm)
2015	case X86::VMOVSHZrm_alt:
2016	printZeroUpperMove(MI, OutStreamer, SclWidth: `16`, VecWidth: `128`,
2017	ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero");
2018	break;
2019
2020	case X86::MOVSSrm:
2021	case X86::VMOVSSrm:
2022	MASK_AVX512_CASE(X86::VMOVSSZrm)
2023	case X86::MOVSSrm_alt:
2024	case X86::VMOVSSrm_alt:
2025	case X86::VMOVSSZrm_alt:
2026	case X86::MOVDI2PDIrm:
2027	case X86::VMOVDI2PDIrm:
2028	case X86::VMOVDI2PDIZrm:
2029	printZeroUpperMove(MI, OutStreamer, SclWidth: `32`, VecWidth: `128`, ShuffleComment: "mem[0],zero,zero,zero");
2030	break;
2031
2032	#define MOV_CASE(Prefix, Suffix) \
2033	case X86::Prefix##MOVAPD##Suffix##rm: \
2034	case X86::Prefix##MOVAPS##Suffix##rm: \
2035	case X86::Prefix##MOVUPD##Suffix##rm: \
2036	case X86::Prefix##MOVUPS##Suffix##rm: \
2037	case X86::Prefix##MOVDQA##Suffix##rm: \
2038	case X86::Prefix##MOVDQU##Suffix##rm:
2039
2040	#define MOV_AVX512_CASE(Suffix, Postfix) \
2041	case X86::VMOVDQA64##Suffix##rm##Postfix: \
2042	case X86::VMOVDQA32##Suffix##rm##Postfix: \
2043	case X86::VMOVDQU64##Suffix##rm##Postfix: \
2044	case X86::VMOVDQU32##Suffix##rm##Postfix: \
2045	case X86::VMOVDQU16##Suffix##rm##Postfix: \
2046	case X86::VMOVDQU8##Suffix##rm##Postfix: \
2047	case X86::VMOVAPS##Suffix##rm##Postfix: \
2048	case X86::VMOVAPD##Suffix##rm##Postfix: \
2049	case X86::VMOVUPS##Suffix##rm##Postfix: \
2050	case X86::VMOVUPD##Suffix##rm##Postfix:
2051
2052	#define CASE_128_MOV_RM() \
2053	MOV_CASE(, ) /* SSE */ \
2054	MOV_CASE(V, ) /* AVX-128 */ \
2055	MOV_AVX512_CASE(Z128, ) \
2056	MOV_AVX512_CASE(Z128, k) \
2057	MOV_AVX512_CASE(Z128, kz)
2058
2059	#define CASE_256_MOV_RM() \
2060	MOV_CASE(V, Y) /* AVX-256 */ \
2061	MOV_AVX512_CASE(Z256, ) \
2062	MOV_AVX512_CASE(Z256, k) \
2063	MOV_AVX512_CASE(Z256, kz) \
2064
2065	#define CASE_512_MOV_RM() \
2066	MOV_AVX512_CASE(Z, ) \
2067	MOV_AVX512_CASE(Z, k) \
2068	MOV_AVX512_CASE(Z, kz) \
2069
2070	// For loads from a constant pool to a vector register, print the constant
2071	// loaded.
2072	CASE_128_MOV_RM()
2073	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `128`);
2074	break;
2075	CASE_256_MOV_RM()
2076	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `256`);
2077	break;
2078	CASE_512_MOV_RM()
2079	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `512`);
2080	break;
2081	case X86::VBROADCASTF128rm:
2082	case X86::VBROADCASTI128rm:
2083	MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
2084	MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
2085	MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
2086	MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
2087	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `128`);
2088	break;
2089	MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
2090	MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
2091	MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
2092	MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
2093	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `128`);
2094	break;
2095	MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
2096	MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
2097	MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
2098	MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
2099	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `256`);
2100	break;
2101
2102	// For broadcast loads from a constant pool to a vector register, repeatedly
2103	// print the constant loaded.
2104	case X86::MOVDDUPrm:
2105	case X86::VMOVDDUPrm:
2106	MASK_AVX512_CASE(X86::VMOVDDUPZ128rm)
2107	case X86::VPBROADCASTQrm:
2108	MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm)
2109	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `64`);
2110	break;
2111	case X86::VBROADCASTSDYrm:
2112	MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm)
2113	case X86::VPBROADCASTQYrm:
2114	MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm)
2115	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `64`);
2116	break;
2117	MASK_AVX512_CASE(X86::VBROADCASTSDZrm)
2118	MASK_AVX512_CASE(X86::VPBROADCASTQZrm)
2119	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `64`);
2120	break;
2121	case X86::VBROADCASTSSrm:
2122	MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm)
2123	case X86::VPBROADCASTDrm:
2124	MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm)
2125	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `32`);
2126	break;
2127	case X86::VBROADCASTSSYrm:
2128	MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm)
2129	case X86::VPBROADCASTDYrm:
2130	MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm)
2131	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `32`);
2132	break;
2133	MASK_AVX512_CASE(X86::VBROADCASTSSZrm)
2134	MASK_AVX512_CASE(X86::VPBROADCASTDZrm)
2135	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `32`);
2136	break;
2137	case X86::VPBROADCASTWrm:
2138	MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm)
2139	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `16`);
2140	break;
2141	case X86::VPBROADCASTWYrm:
2142	MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm)
2143	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `16`);
2144	break;
2145	MASK_AVX512_CASE(X86::VPBROADCASTWZrm)
2146	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `16`);
2147	break;
2148	case X86::VPBROADCASTBrm:
2149	MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm)
2150	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `8`);
2151	break;
2152	case X86::VPBROADCASTBYrm:
2153	MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm)
2154	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `8`);
2155	break;
2156	MASK_AVX512_CASE(X86::VPBROADCASTBZrm)
2157	printBroadcast(MI, OutStreamer, Repeats: `64`, BitWidth: `8`);
2158	break;
2159
2160	#define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \
2161	case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix:
2162
2163	#define CASE_MOVX_RM(Ext, Type) \
2164	MOVX_CASE(, Ext, Type, , ) \
2165	MOVX_CASE(V, Ext, Type, , ) \
2166	MOVX_CASE(V, Ext, Type, Y, ) \
2167	MOVX_CASE(V, Ext, Type, Z128, ) \
2168	MOVX_CASE(V, Ext, Type, Z128, k ) \
2169	MOVX_CASE(V, Ext, Type, Z128, kz ) \
2170	MOVX_CASE(V, Ext, Type, Z256, ) \
2171	MOVX_CASE(V, Ext, Type, Z256, k ) \
2172	MOVX_CASE(V, Ext, Type, Z256, kz ) \
2173	MOVX_CASE(V, Ext, Type, Z, ) \
2174	MOVX_CASE(V, Ext, Type, Z, k ) \
2175	MOVX_CASE(V, Ext, Type, Z, kz )
2176
2177	CASE_MOVX_RM(SX, BD)
2178	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2179	break;
2180	CASE_MOVX_RM(SX, BQ)
2181	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2182	break;
2183	CASE_MOVX_RM(SX, BW)
2184	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2185	break;
2186	CASE_MOVX_RM(SX, DQ)
2187	printSignExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2188	break;
2189	CASE_MOVX_RM(SX, WD)
2190	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2191	break;
2192	CASE_MOVX_RM(SX, WQ)
2193	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2194	break;
2195
2196	CASE_MOVX_RM(ZX, BD)
2197	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2198	break;
2199	CASE_MOVX_RM(ZX, BQ)
2200	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2201	break;
2202	CASE_MOVX_RM(ZX, BW)
2203	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2204	break;
2205	CASE_MOVX_RM(ZX, DQ)
2206	printZeroExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2207	break;
2208	CASE_MOVX_RM(ZX, WD)
2209	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2210	break;
2211	CASE_MOVX_RM(ZX, WQ)
2212	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2213	break;
2214	}
2215	}
2216
2217	// Does the given operand refer to a DLLIMPORT function?
2218	bool isImportedFunction(const MachineOperand &MO) {
2219	return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT);
2220	}
2221
2222	// Is the given instruction a call to a CFGuard function?
2223	bool isCallToCFGuardFunction(const MachineInstr *MI) {
2224	assert(MI->getOpcode() == X86::TAILJMPm64_REX \|\|
2225	MI->getOpcode() == X86::CALL64m);
2226	const MachineOperand &MO = MI->getOperand(i: `3`);
2227	return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) &&
2228	isCFGuardFunction(GV: MO.getGlobal());
2229	}
2230
2231	// Does the containing block for the given instruction contain any jump table
2232	// info (indicating that the block is a dispatch for a jump table)?
2233	bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI) {
2234	const MachineBasicBlock &MBB = *MI->getParent();
2235	for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I)
2236	if (I ->isJumpTableDebugInfo())
2237	return true;
2238
2239	return false;
2240	}
2241
2242	void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2243	// FIXME: Enable feature predicate checks once all the test pass.
2244	// X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2245	// Subtarget->getFeatureBits());
2246
2247	X86MCInstLower MCInstLowering(MF, this);
2248	const X86RegisterInfo *RI =
2249	MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2250
2251	if (MI->getOpcode() == X86::OR64rm) {
2252	for (auto &Opd : MI->operands()) {
2253	if (Opd.isSymbol() && StringRef (Opd.getSymbolName()) ==
2254	"swift_async_extendedFramePointerFlags") {
2255	ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2256	}
2257	}
2258	}
2259
2260	// Add comments for values loaded from constant pool.
2261	if (OutStreamer ->isVerboseAsm())
2262	addConstantComments(MI, OutStreamer&: *OutStreamer);
2263
2264	// Add a comment about EVEX compression
2265	if (TM.Options.MCOptions.ShowMCEncoding) {
2266	if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
2267	OutStreamer ->AddComment(T: "EVEX TO LEGACY Compression ", EOL: false);
2268	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2269	OutStreamer ->AddComment(T: "EVEX TO VEX Compression ", EOL: false);
2270	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX)
2271	OutStreamer ->AddComment(T: "EVEX TO EVEX Compression ", EOL: false);
2272	}
2273
2274	switch (MI->getOpcode()) {
2275	case TargetOpcode::DBG_VALUE:
2276	llvm_unreachable("Should be handled target independently");
2277
2278	case X86::EH_RETURN:
2279	case X86::EH_RETURN64: {
2280	// Lower these as normal, but add some comments.
2281	Register Reg = MI->getOperand(i: `0`).getReg();
2282	OutStreamer ->AddComment(T: StringRef ("eh_return, addr: %") +
2283	X86ATTInstPrinter::getRegisterName(Reg));
2284	break;
2285	}
2286	case X86::CLEANUPRET: {
2287	// Lower these as normal, but add some comments.
2288	OutStreamer ->AddComment(T: "CLEANUPRET");
2289	break;
2290	}
2291
2292	case X86::CATCHRET: {
2293	// Lower these as normal, but add some comments.
2294	OutStreamer ->AddComment(T: "CATCHRET");
2295	break;
2296	}
2297
2298	case X86::ENDBR32:
2299	case X86::ENDBR64: {
2300	// CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2301	// -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2302	// non-empty. If MI is the initial ENDBR, place the
2303	// __patchable_function_entries label after ENDBR.
2304	if (CurrentPatchableFunctionEntrySym &&
2305	CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2306	MI == &MF->front().front()) {
2307	MCInst Inst;
2308	MCInstLowering.Lower(MI, OutMI&: Inst);
2309	EmitAndCountInstruction(Inst);
2310	CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch");
2311	OutStreamer ->emitLabel(Symbol: CurrentPatchableFunctionEntrySym);
2312	return;
2313	}
2314	break;
2315	}
2316
2317	case X86::TAILJMPd64:
2318	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11))
2319	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CS_PREFIX));
2320
2321	if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: `0`))) {
2322	emitLabelAndRecordForImportCallOptimization(
2323	Kind: IMAGE_RETPOLINE_AMD64_IMPORT_BR);
2324	}
2325
2326	// Lower this as normal, but add a comment.
2327	OutStreamer ->AddComment(T: "TAILCALL");
2328	break;
2329
2330	case X86::TAILJMPr:
2331	case X86::TAILJMPm:
2332	case X86::TAILJMPd:
2333	case X86::TAILJMPd_CC:
2334	case X86::TAILJMPr64:
2335	case X86::TAILJMPm64:
2336	case X86::TAILJMPd64_CC:
2337	if (EnableImportCallOptimization)
2338	report_fatal_error(reason: "Unexpected TAILJMP instruction was emitted when "
2339	"import call optimization was enabled");
2340
2341	// Lower these as normal, but add some comments.
2342	OutStreamer ->AddComment(T: "TAILCALL");
2343	break;
2344
2345	case X86::TAILJMPm64_REX:
2346	if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2347	emitLabelAndRecordForImportCallOptimization(
2348	Kind: IMAGE_RETPOLINE_AMD64_CFG_BR_REX);
2349	}
2350
2351	OutStreamer ->AddComment(T: "TAILCALL");
2352	break;
2353
2354	case X86::TAILJMPr64_REX: {
2355	if (EnableImportCallOptimization) {
2356	assert(MI->getOperand(`0`).getReg() == X86::RAX &&
2357	"Indirect tail calls with impcall enabled must go through RAX (as "
2358	"enforced by TCRETURNImpCallri64)");
2359	emitLabelAndRecordForImportCallOptimization(
2360	Kind: IMAGE_RETPOLINE_AMD64_INDIR_BR);
2361	}
2362
2363	OutStreamer ->AddComment(T: "TAILCALL");
2364	break;
2365	}
2366
2367	case X86::JMP64r:
2368	if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) {
2369	uint16_t EncodedReg =
2370	this->getSubtarget().getRegisterInfo()->getEncodingValue(
2371	Reg: MI->getOperand(i: `0`).getReg().asMCReg());
2372	emitLabelAndRecordForImportCallOptimization(
2373	Kind: (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST +
2374	EncodedReg));
2375	}
2376	break;
2377
2378	case X86::JMP16r:
2379	case X86::JMP16m:
2380	case X86::JMP32r:
2381	case X86::JMP32m:
2382	case X86::JMP64m:
2383	if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI))
2384	report_fatal_error(
2385	reason: "Unexpected JMP instruction was emitted for a jump-table when import "
2386	"call optimization was enabled");
2387	break;
2388
2389	case X86::TLS_addr32:
2390	case X86::TLS_addr64:
2391	case X86::TLS_addrX32:
2392	case X86::TLS_base_addr32:
2393	case X86::TLS_base_addr64:
2394	case X86::TLS_base_addrX32:
2395	case X86::TLS_desc32:
2396	case X86::TLS_desc64:
2397	return LowerTlsAddr(MCInstLowering, MI: *MI);
2398
2399	case X86::MOVPC32r: {
2400	// This is a pseudo op for a two instruction sequence with a label, which
2401	// looks like:
2402	// call "L1$pb"
2403	// "L1$pb":
2404	// popl %esi
2405
2406	// Emit the call.
2407	MCSymbol *PICBase = MF->getPICBaseSymbol();
2408	// FIXME: We would like an efficient form for this, so we don't have to do a
2409	// lot of extra uniquing.
2410	EmitAndCountInstruction(
2411	Inst&: MCInstBuilder (X86::CALLpcrel32)
2412	.addExpr(Val: MCSymbolRefExpr::create(Symbol: PICBase, Ctx&: OutContext)));
2413
2414	const X86FrameLowering *FrameLowering =
2415	MF->getSubtarget<X86Subtarget>().getFrameLowering();
2416	bool hasFP = FrameLowering->hasFP(MF: *MF);
2417
2418	// TODO: This is needed only if we require precise CFA.
2419	bool HasActiveDwarfFrame = OutStreamer ->getNumFrameInfos() &&
2420	!OutStreamer ->getDwarfFrameInfos().back().End;
2421
2422	int stackGrowth = -RI->getSlotSize();
2423
2424	if (HasActiveDwarfFrame && !hasFP) {
2425	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth);
2426	MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2427	}
2428
2429	// Emit the label.
2430	OutStreamer ->emitLabel(Symbol: PICBase);
2431
2432	// popl $reg
2433	EmitAndCountInstruction(
2434	Inst&: MCInstBuilder (X86::POP32r).addReg(Reg: MI->getOperand(i: `0`).getReg()));
2435
2436	if (HasActiveDwarfFrame && !hasFP) {
2437	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: stackGrowth);
2438	}
2439	return;
2440	}
2441
2442	case X86::ADD32ri: {
2443	// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2444	if (MI->getOperand(i: `2`).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2445	break;
2446
2447	// Okay, we have something like:
2448	// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2449
2450	// For this, we want to print something like:
2451	// MYGLOBAL + (. - PICBASE)
2452	// However, we can't generate a ".", so just emit a new label here and refer
2453	// to it.
2454	MCSymbol *DotSym = OutContext.createTempSymbol();
2455	OutStreamer ->emitLabel(Symbol: DotSym);
2456
2457	// Now that we have emitted the label, lower the complex operand expression.
2458	MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: `2`));
2459
2460	const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext);
2461	const MCExpr *PICBase =
2462	MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext);
2463	DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext);
2464
2465	DotExpr = MCBinaryExpr::createAdd(
2466	LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext);
2467
2468	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::ADD32ri)
2469	.addReg(Reg: MI->getOperand(i: `0`).getReg())
2470	.addReg(Reg: MI->getOperand(i: `1`).getReg())
2471	.addExpr(Val: DotExpr));
2472	return;
2473	}
2474	case TargetOpcode::STATEPOINT:
2475	return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering);
2476
2477	case TargetOpcode::FAULTING_OP:
2478	return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering);
2479
2480	case TargetOpcode::FENTRY_CALL:
2481	return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering);
2482
2483	case TargetOpcode::PATCHABLE_OP:
2484	return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering);
2485
2486	case TargetOpcode::STACKMAP:
2487	return LowerSTACKMAP(MI: *MI);
2488
2489	case TargetOpcode::PATCHPOINT:
2490	return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering);
2491
2492	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2493	return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering);
2494
2495	case TargetOpcode::PATCHABLE_RET:
2496	return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering);
2497
2498	case TargetOpcode::PATCHABLE_TAIL_CALL:
2499	return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering);
2500
2501	case TargetOpcode::PATCHABLE_EVENT_CALL:
2502	return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2503
2504	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2505	return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2506
2507	case X86::MORESTACK_RET:
2508	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2509	return;
2510
2511	case X86::KCFI_CHECK:
2512	return LowerKCFI_CHECK(MI: *MI);
2513
2514	case X86::ASAN_CHECK_MEMACCESS:
2515	return LowerASAN_CHECK_MEMACCESS(MI: *MI);
2516
2517	case X86::MORESTACK_RET_RESTORE_R10:
2518	// Return, then restore R10.
2519	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2520	EmitAndCountInstruction(
2521	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: X86::R10).addReg(Reg: X86::RAX));
2522	return;
2523
2524	case X86::SEH_PushReg:
2525	case X86::SEH_SaveReg:
2526	case X86::SEH_SaveXMM:
2527	case X86::SEH_StackAlloc:
2528	case X86::SEH_StackAlign:
2529	case X86::SEH_SetFrame:
2530	case X86::SEH_PushFrame:
2531	case X86::SEH_EndPrologue:
2532	case X86::SEH_EndEpilogue:
2533	case X86::SEH_UnwindV2Start:
2534	case X86::SEH_UnwindVersion:
2535	EmitSEHInstruction(MI);
2536	return;
2537
2538	case X86::SEH_BeginEpilogue: {
2539	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2540	// Windows unwinder will not invoke function's exception handler if IP is
2541	// either in prologue or in epilogue. This behavior causes a problem when a
2542	// call immediately precedes an epilogue, because the return address points
2543	// into the epilogue. To cope with that, we insert a 'nop' if it ends up
2544	// immediately after a CALL in the final emitted code.
2545	MachineBasicBlock::const_iterator MBBI(MI);
2546	// Check if preceded by a call and emit nop if so.
2547	for (MBBI = PrevCrossBBInst(MBBI);
2548	MBBI != MachineBasicBlock::const_iterator ();
2549	MBBI = PrevCrossBBInst(MBBI)) {
2550	// Pseudo instructions that aren't a call are assumed to not emit any
2551	// code. If they do, we worst case generate unnecessary noops after a
2552	// call.
2553	if (MBBI ->isCall() \|\| !MBBI ->isPseudo()) {
2554	if (MBBI ->isCall())
2555	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::NOOP));
2556	break;
2557	}
2558	}
2559
2560	EmitSEHInstruction(MI);
2561	return;
2562	}
2563	case X86::UBSAN_UD1:
2564	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::UD1Lm)
2565	.addReg(Reg: X86::EAX)
2566	.addReg(Reg: X86::EAX)
2567	.addImm(Val: `1`)
2568	.addReg(Reg: X86::NoRegister)
2569	.addImm(Val: MI->getOperand(i: `0`).getImm())
2570	.addReg(Reg: X86::NoRegister));
2571	return;
2572	case X86::CALL64pcrel32:
2573	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11))
2574	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CS_PREFIX));
2575
2576	if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: `0`))) {
2577	emitLabelAndRecordForImportCallOptimization(
2578	Kind: IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
2579
2580	MCInst TmpInst;
2581	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2582
2583	// For Import Call Optimization to work, we need a the call instruction
2584	// with a rex prefix, and a 5-byte nop after the call instruction.
2585	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::REX64_PREFIX));
2586	emitCallInstruction(MCI: TmpInst);
2587	emitNop(OS&: *OutStreamer, NumBytes: `5`, Subtarget);
2588	return;
2589	}
2590
2591	break;
2592
2593	case X86::CALL64r:
2594	if (EnableImportCallOptimization) {
2595	assert(MI->getOperand(`0`).getReg() == X86::RAX &&
2596	"Indirect calls with impcall enabled must go through RAX (as "
2597	"enforced by CALL64r_ImpCall)");
2598
2599	emitLabelAndRecordForImportCallOptimization(
2600	Kind: IMAGE_RETPOLINE_AMD64_INDIR_CALL);
2601	MCInst TmpInst;
2602	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2603	emitCallInstruction(MCI: TmpInst);
2604
2605	// For Import Call Optimization to work, we need a 3-byte nop after the
2606	// call instruction.
2607	emitNop(OS&: *OutStreamer, NumBytes: `3`, Subtarget);
2608	return;
2609	}
2610	break;
2611
2612	case X86::CALL64m:
2613	if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2614	emitLabelAndRecordForImportCallOptimization(
2615	Kind: IMAGE_RETPOLINE_AMD64_CFG_CALL);
2616	}
2617	break;
2618
2619	case X86::JCC_1:
2620	// Two instruction prefixes (2EH for branch not-taken and 3EH for branch
2621	// taken) are used as branch hints. Here we add branch taken prefix for
2622	// jump instruction with higher probability than threshold.
2623	if (getSubtarget().hasBranchHint() && EnableBranchHint) {
2624	const MachineBranchProbabilityInfo *MBPI =
2625	&getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
2626	MachineBasicBlock *DestBB = MI->getOperand(i: `0`).getMBB();
2627	BranchProbability EdgeProb =
2628	MBPI->getEdgeProbability(Src: MI->getParent(), Dst: DestBB);
2629	BranchProbability Threshold(BranchHintProbabilityThreshold, `100`);
2630	if (EdgeProb > Threshold)
2631	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DS_PREFIX));
2632	}
2633	break;
2634	}
2635
2636	MCInst TmpInst;
2637	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2638
2639	if (MI->isCall()) {
2640	emitCallInstruction(MCI: TmpInst);
2641	return;
2642	}
2643
2644	EmitAndCountInstruction(Inst&: TmpInst);
2645	}
2646
2647	void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) {
2648	// Stackmap shadows cannot include branch targets, so we can count the bytes
2649	// in a call towards the shadow, but must ensure that the no thread returns
2650	// in to the stackmap shadow. The only way to achieve this is if the call
2651	// is at the end of the shadow.
2652
2653	// Count then size of the call towards the shadow
2654	SMShadowTracker.count(Inst: MCI, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
2655	// Then flush the shadow so that we fill with nops before the call, not
2656	// after it.
2657	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
2658	// Then emit the call
2659	OutStreamer ->emitInstruction(Inst: MCI, STI: getSubtargetInfo());
2660	}
2661
2662	void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization(
2663	ImportCallKind Kind) {
2664	assert(EnableImportCallOptimization);
2665
2666	MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol(Name: "impcall");
2667	OutStreamer ->emitLabel(Symbol: CallSiteSymbol);
2668
2669	SectionToImportedFunctionCalls [OutStreamer ->getCurrentSectionOnly()]
2670	.push_back(x: {.CalleeSymbol: CallSiteSymbol, .Kind: Kind});
2671	}
2672

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86MCInstLower.cpp