X86MCInstLower.cpp source code [llvm_projects/llvm/lib/Target/X86/X86MCInstLower.cpp]

1	//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains code to lower X86 MachineInstrs to their corresponding
10	// MCInst records.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "MCTargetDesc/X86ATTInstPrinter.h"
15	#include "MCTargetDesc/X86BaseInfo.h"
16	#include "MCTargetDesc/X86EncodingOptimization.h"
17	#include "MCTargetDesc/X86InstComments.h"
18	#include "MCTargetDesc/X86MCAsmInfo.h"
19	#include "MCTargetDesc/X86ShuffleDecode.h"
20	#include "MCTargetDesc/X86TargetStreamer.h"
21	#include "X86AsmPrinter.h"
22	#include "X86MachineFunctionInfo.h"
23	#include "X86RegisterInfo.h"
24	#include "X86ShuffleDecodeConstantPool.h"
25	#include "X86Subtarget.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/SmallString.h"
28	#include "llvm/ADT/StringExtras.h"
29	#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
30	#include "llvm/CodeGen/MachineConstantPool.h"
31	#include "llvm/CodeGen/MachineFunction.h"
32	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
33	#include "llvm/CodeGen/MachineOperand.h"
34	#include "llvm/CodeGen/StackMaps.h"
35	#include "llvm/CodeGen/WinEHFuncInfo.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/GlobalValue.h"
38	#include "llvm/IR/Mangler.h"
39	#include "llvm/MC/MCAsmInfo.h"
40	#include "llvm/MC/MCCodeEmitter.h"
41	#include "llvm/MC/MCContext.h"
42	#include "llvm/MC/MCExpr.h"
43	#include "llvm/MC/MCFixup.h"
44	#include "llvm/MC/MCInst.h"
45	#include "llvm/MC/MCInstBuilder.h"
46	#include "llvm/MC/MCSection.h"
47	#include "llvm/MC/MCStreamer.h"
48	#include "llvm/MC/MCSymbol.h"
49	#include "llvm/MC/TargetRegistry.h"
50	#include "llvm/Target/TargetLoweringObjectFile.h"
51	#include "llvm/Target/TargetMachine.h"
52	#include "llvm/Transforms/CFGuard.h"
53	#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
54	#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
55	#include <string>
56
57	using namespace llvm;
58
59	static cl::opt<bool> EnableBranchHint("enable-branch-hint",
60	cl::desc ("Enable branch hint."),
61	cl::init(Val: false), cl::Hidden);
62	static cl::opt<unsigned> BranchHintProbabilityThreshold(
63	"branch-hint-probability-threshold",
64	cl::desc ("The probability threshold of enabling branch hint."),
65	cl::init(Val: `50`), cl::Hidden);
66
67	namespace {
68
69	/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
70	class X86MCInstLower {
71	MCContext &Ctx;
72	const MachineFunction &MF;
73	const TargetMachine &TM;
74	const MCAsmInfo &MAI;
75	X86AsmPrinter &AsmPrinter;
76
77	public:
78	X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
79
80	MCOperand LowerMachineOperand(const MachineInstr *MI,
81	const MachineOperand &MO) const;
82	void Lower(const MachineInstr MI, MCInst &OutMI) const*;
83
84	MCSymbol GetSymbolFromOperand(const* MachineOperand &MO) const;
85	MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol Sym) const*;
86
87	private:
88	MachineModuleInfoMachO &getMachOMMI() const;
89	};
90
91	} // end anonymous namespace
92
93	/// A RAII helper which defines a region of instructions which can't have
94	/// padding added between them for correctness.
95	struct NoAutoPaddingScope {
96	MCStreamer &OS;
97	const bool OldAllowAutoPadding;
98	NoAutoPaddingScope(MCStreamer &OS)
99	: OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
100	changeAndComment(b: false);
101	}
102	~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); }
103	void changeAndComment(bool b) {
104	if (b == OS.getAllowAutoPadding())
105	return;
106	OS.setAllowAutoPadding(b);
107	if (b)
108	OS.emitRawComment(T: "autopadding");
109	else
110	OS.emitRawComment(T: "noautopadding");
111	}
112	};
113
114	// Emit a minimal sequence of nops spanning NumBytes bytes.
115	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
116	const X86Subtarget *Subtarget);
117
118	void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst,
119	const MCSubtargetInfo &STI,
120	MCCodeEmitter *CodeEmitter) {
121	if (InShadow) {
122	SmallString<`256`> Code;
123	SmallVector<MCFixup, `4`> Fixups;
124	CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI);
125	CurrentShadowSize += Code.size();
126	if (CurrentShadowSize >= RequiredShadowSize)
127	InShadow = false; // The shadow is big enough. Stop counting.
128	}
129	}
130
131	void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
132	MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
133	if (InShadow && CurrentShadowSize < RequiredShadowSize) {
134	InShadow = false;
135	emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize,
136	Subtarget: &MF->getSubtarget<X86Subtarget>());
137	}
138	}
139
140	void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
141	OutStreamer ->emitInstruction(Inst, STI: getSubtargetInfo());
142	SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
143	}
144
145	X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
146	X86AsmPrinter &asmprinter)
147	: Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()),
148	MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
149
150	MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
151	return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
152	}
153
154	/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
155	/// operand to an MCSymbol.
156	MCSymbol X86MCInstLower::GetSymbolFromOperand(const* MachineOperand &MO) const {
157	const Triple &TT = TM.getTargetTriple();
158	if (MO.isGlobal() && TT.isOSBinFormatELF())
159	return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal());
160
161	const DataLayout &DL = MF.getDataLayout();
162	assert((MO.isGlobal() \|\| MO.isSymbol() \|\| MO.isMBB()) &&
163	"Isn't a symbol reference");
164
165	MCSymbol Sym = nullptr*;
166	SmallString<`128`> Name;
167	StringRef Suffix;
168
169	switch (MO.getTargetFlags()) {
170	case X86II::MO_DLLIMPORT:
171	// Handle dllimport linkage.
172	Name += "__imp_";
173	break;
174	case X86II::MO_COFFSTUB:
175	Name += ".refptr.";
176	break;
177	case X86II::MO_DARWIN_NONLAZY:
178	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
179	Suffix = "$non_lazy_ptr";
180	break;
181	}
182
183	if (!Suffix.empty())
184	Name += DL.getInternalSymbolPrefix();
185
186	if (MO.isGlobal()) {
187	const GlobalValue *GV = MO.getGlobal();
188	AsmPrinter.getNameWithPrefix(Name, GV);
189	} else if (MO.isSymbol()) {
190	Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL);
191	} else if (MO.isMBB()) {
192	assert(Suffix.empty());
193	Sym = MO.getMBB()->getSymbol();
194	}
195
196	Name += Suffix;
197	if (!Sym)
198	Sym = Ctx.getOrCreateSymbol(Name);
199
200	// If the target flags on the operand changes the name of the symbol, do that
201	// before we return the symbol.
202	switch (MO.getTargetFlags()) {
203	default:
204	break;
205	case X86II::MO_COFFSTUB: {
206	MachineModuleInfoCOFF &MMICOFF =
207	AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
208	MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
209	if (!StubSym.getPointer()) {
210	assert(MO.isGlobal() && "Extern symbol not handled yet");
211	StubSym = MachineModuleInfoImpl::StubValueTy (
212	AsmPrinter.getSymbol(GV: MO.getGlobal()), true);
213	}
214	break;
215	}
216	case X86II::MO_DARWIN_NONLAZY:
217	case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
218	MachineModuleInfoImpl::StubValueTy &StubSym =
219	getMachOMMI().getGVStubEntry(Sym);
220	if (!StubSym.getPointer()) {
221	assert(MO.isGlobal() && "Extern symbol not handled yet");
222	StubSym = MachineModuleInfoImpl::StubValueTy (
223	AsmPrinter.getSymbol(GV: MO.getGlobal()),
224	!MO.getGlobal()->hasInternalLinkage());
225	}
226	break;
227	}
228	}
229
230	return Sym;
231	}
232
233	MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
234	MCSymbol Sym) const* {
235	// FIXME: We would like an efficient form for this, so we don't have to do a
236	// lot of extra uniquing.
237	const MCExpr Expr = nullptr*;
238	uint16_t Specifier = X86::S_None;
239
240	switch (MO.getTargetFlags()) {
241	default:
242	llvm_unreachable("Unknown target flag on GV operand");
243	case X86II::MO_NO_FLAG: // No flag.
244	// These affect the name of the symbol, not any suffix.
245	case X86II::MO_DARWIN_NONLAZY:
246	case X86II::MO_DLLIMPORT:
247	case X86II::MO_COFFSTUB:
248	break;
249
250	case X86II::MO_TLVP:
251	Specifier = X86::S_TLVP;
252	break;
253	case X86II::MO_TLVP_PIC_BASE:
254	Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: X86::S_TLVP, Ctx);
255	// Subtract the pic base.
256	Expr = MCBinaryExpr::createSub(
257	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
258	break;
259	case X86II::MO_SECREL:
260	Specifier = uint16_t(X86::S_COFF_SECREL);
261	break;
262	case X86II::MO_TLSGD:
263	Specifier = X86::S_TLSGD;
264	break;
265	case X86II::MO_TLSLD:
266	Specifier = X86::S_TLSLD;
267	break;
268	case X86II::MO_TLSLDM:
269	Specifier = X86::S_TLSLDM;
270	break;
271	case X86II::MO_GOTTPOFF:
272	Specifier = X86::S_GOTTPOFF;
273	break;
274	case X86II::MO_INDNTPOFF:
275	Specifier = X86::S_INDNTPOFF;
276	break;
277	case X86II::MO_TPOFF:
278	Specifier = X86::S_TPOFF;
279	break;
280	case X86II::MO_DTPOFF:
281	Specifier = X86::S_DTPOFF;
282	break;
283	case X86II::MO_NTPOFF:
284	Specifier = X86::S_NTPOFF;
285	break;
286	case X86II::MO_GOTNTPOFF:
287	Specifier = X86::S_GOTNTPOFF;
288	break;
289	case X86II::MO_GOTPCREL:
290	Specifier = X86::S_GOTPCREL;
291	break;
292	case X86II::MO_GOTPCREL_NORELAX:
293	Specifier = X86::S_GOTPCREL_NORELAX;
294	break;
295	case X86II::MO_GOT:
296	Specifier = X86::S_GOT;
297	break;
298	case X86II::MO_GOTOFF:
299	Specifier = X86::S_GOTOFF;
300	break;
301	case X86II::MO_PLT:
302	Specifier = X86::S_PLT;
303	break;
304	case X86II::MO_ABS8:
305	Specifier = X86::S_ABS8;
306	break;
307	case X86II::MO_PIC_BASE_OFFSET:
308	case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
309	Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
310	// Subtract the pic base.
311	Expr = MCBinaryExpr::createSub(
312	LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx);
313	if (MO.isJTI()) {
314	assert(MAI.doesSetDirectiveSuppressReloc());
315	// If .set directive is supported, use it to reduce the number of
316	// relocations the assembler will generate for differences between
317	// local labels. This is only safe when the symbols are in the same
318	// section so we are restricting it to jumptable references.
319	MCSymbol *Label = Ctx.createTempSymbol();
320	AsmPrinter.OutStreamer ->emitAssignment(Symbol: Label, Value: Expr);
321	Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx);
322	}
323	break;
324	}
325
326	if (!Expr)
327	Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: Specifier, Ctx);
328
329	if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
330	Expr = MCBinaryExpr::createAdd(
331	LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx);
332	return MCOperand::createExpr(Val: Expr);
333	}
334
335	static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
336	return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
337	}
338
339	MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
340	const MachineOperand &MO) const {
341	switch (MO.getType()) {
342	default:
343	MI->print(OS&: errs());
344	llvm_unreachable("unknown operand type");
345	case MachineOperand::MO_Register:
346	// Ignore all implicit register operands.
347	if (MO.isImplicit())
348	return MCOperand ();
349	return MCOperand::createReg(Reg: MO.getReg());
350	case MachineOperand::MO_Immediate:
351	return MCOperand::createImm(Val: MO.getImm());
352	case MachineOperand::MO_MachineBasicBlock:
353	case MachineOperand::MO_GlobalAddress:
354	case MachineOperand::MO_ExternalSymbol:
355	return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO));
356	case MachineOperand::MO_MCSymbol:
357	return LowerSymbolOperand(MO, Sym: MO.getMCSymbol());
358	case MachineOperand::MO_JumpTableIndex:
359	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex()));
360	case MachineOperand::MO_ConstantPoolIndex:
361	return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex()));
362	case MachineOperand::MO_BlockAddress:
363	return LowerSymbolOperand(
364	MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress()));
365	case MachineOperand::MO_RegisterMask:
366	// Ignore call clobbers.
367	return MCOperand ();
368	}
369	}
370
371	// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
372	// information.
373	static unsigned convertTailJumpOpcode(unsigned Opcode, bool IsLarge = false) {
374	switch (Opcode) {
375	case X86::TAILJMPr:
376	Opcode = X86::JMP32r;
377	break;
378	case X86::TAILJMPm:
379	Opcode = X86::JMP32m;
380	break;
381	case X86::TAILJMPr64:
382	Opcode = X86::JMP64r;
383	break;
384	case X86::TAILJMPm64:
385	Opcode = X86::JMP64m;
386	break;
387	case X86::TAILJMPr64_REX:
388	Opcode = X86::JMP64r_REX;
389	break;
390	case X86::TAILJMPm64_REX:
391	Opcode = X86::JMP64m_REX;
392	break;
393	case X86::TAILJMPd:
394	case X86::TAILJMPd64:
395	Opcode = IsLarge ? X86::JMPABS64i : X86::JMP_1;
396	break;
397	case X86::TAILJMPd_CC:
398	case X86::TAILJMPd64_CC:
399	Opcode = X86::JCC_1;
400	break;
401	}
402
403	return Opcode;
404	}
405
406	void X86MCInstLower::Lower(const MachineInstr MI, MCInst &OutMI) const* {
407	OutMI.setOpcode(MI->getOpcode());
408
409	for (const MachineOperand &MO : MI->operands())
410	if (auto Op = LowerMachineOperand(MI, MO); Op.isValid())
411	OutMI.addOperand(Op);
412
413	bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
414	if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) \|\|
415	X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) \|\|
416	X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) \|\|
417	X86::optimizeMOVSX(MI&: OutMI) \|\| X86::optimizeINCDEC(MI&: OutMI, In64BitMode) \|\|
418	X86::optimizeMOV(MI&: OutMI, In64BitMode) \|\|
419	X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI))
420	return;
421
422	// Handle a few special cases to eliminate operand modifiers.
423	switch (OutMI.getOpcode()) {
424	case X86::LEA64_32r:
425	case X86::LEA64r:
426	case X86::LEA16r:
427	case X86::LEA32r:
428	// LEA should have a segment register, but it must be empty.
429	assert(OutMI.getNumOperands() == `1` + X86::AddrNumOperands &&
430	"Unexpected # of LEA operands");
431	assert(OutMI.getOperand(`1` + X86::AddrSegmentReg).getReg() == `0` &&
432	"LEA has segment specified!");
433	break;
434	case X86::MULX32Hrr:
435	case X86::MULX32Hrm:
436	case X86::MULX64Hrr:
437	case X86::MULX64Hrm: {
438	// Turn into regular MULX by duplicating the destination.
439	unsigned NewOpc;
440	switch (OutMI.getOpcode()) {
441	default: llvm_unreachable("Invalid opcode");
442	case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
443	case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
444	case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
445	case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
446	}
447	OutMI.setOpcode(NewOpc);
448	// Duplicate the destination.
449	MCRegister DestReg = OutMI.getOperand(i: `0`).getReg();
450	OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg));
451	break;
452	}
453	// CALL64r, CALL64pcrel32 - These instructions used to have
454	// register inputs modeled as normal uses instead of implicit uses. As such,
455	// they we used to truncate off all but the first operand (the callee). This
456	// issue seems to have been fixed at some point. This assert verifies that.
457	case X86::CALL64r:
458	case X86::CALL64pcrel32:
459	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
460	break;
461	case X86::EH_RETURN:
462	case X86::EH_RETURN64: {
463	OutMI = MCInst ();
464	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
465	break;
466	}
467	case X86::CLEANUPRET: {
468	// Replace CLEANUPRET with the appropriate RET.
469	OutMI = MCInst ();
470	OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget()));
471	break;
472	}
473	case X86::CATCHRET: {
474	// Replace CATCHRET with the appropriate RET.
475	const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
476	unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
477	OutMI = MCInst ();
478	OutMI.setOpcode(getRetOpcode(Subtarget));
479	OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg));
480	break;
481	}
482	// TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
483	// instruction.
484	case X86::TAILJMPr:
485	case X86::TAILJMPr64:
486	case X86::TAILJMPr64_REX:
487	case X86::TAILJMPd:
488	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
489	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
490	break;
491	case X86::TAILJMPd64: {
492	assert(OutMI.getNumOperands() == `1` && "Unexpected number of operands!");
493	bool IsLarge = TM.getCodeModel() == CodeModel::Large;
494	assert((!IsLarge \|\| AsmPrinter.getSubtarget().hasJMPABS()) &&
495	"Unexpected TAILJMPd64 in large code model without JMPABS");
496	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode(), IsLarge));
497	break;
498	}
499	case X86::TAILJMPd_CC:
500	case X86::TAILJMPd64_CC:
501	assert(OutMI.getNumOperands() == `2` && "Unexpected number of operands!");
502	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
503	break;
504	case X86::TAILJMPm:
505	case X86::TAILJMPm64:
506	case X86::TAILJMPm64_REX:
507	assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
508	"Unexpected number of operands!");
509	OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode()));
510	break;
511	case X86::MASKMOVDQU:
512	case X86::VMASKMOVDQU:
513	if (In64BitMode)
514	OutMI.setFlags(X86::IP_HAS_AD_SIZE);
515	break;
516	case X86::BSF16rm:
517	case X86::BSF16rr:
518	case X86::BSF32rm:
519	case X86::BSF32rr:
520	case X86::BSF64rm:
521	case X86::BSF64rr: {
522	// Add an REP prefix to BSF instructions so that new processors can
523	// recognize as TZCNT, which has better performance than BSF.
524	// BSF and TZCNT have different interpretations on ZF bit. So make sure
525	// it won't be used later.
526	const MachineOperand *FlagDef =
527	MI->findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr);
528	if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
529	OutMI.setFlags(X86::IP_HAS_REPEAT);
530	break;
531	}
532	default:
533	break;
534	}
535	}
536
537	void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
538	const MachineInstr &MI) {
539	NoAutoPaddingScope NoPadScope(*OutStreamer);
540	bool Is64Bits = getSubtarget().is64Bit();
541	bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
542	MCContext &Ctx = OutStreamer ->getContext();
543
544	X86::Specifier Specifier;
545	switch (MI.getOpcode()) {
546	case X86::TLS_addr32:
547	case X86::TLS_addr64:
548	case X86::TLS_addrX32:
549	Specifier = X86::S_TLSGD;
550	break;
551	case X86::TLS_base_addr32:
552	Specifier = X86::S_TLSLDM;
553	break;
554	case X86::TLS_base_addr64:
555	case X86::TLS_base_addrX32:
556	Specifier = X86::S_TLSLD;
557	break;
558	case X86::TLS_desc32:
559	case X86::TLS_desc64:
560	Specifier = X86::S_TLSDESC;
561	break;
562	default:
563	llvm_unreachable("unexpected opcode");
564	}
565
566	const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
567	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)), specifier: Specifier, Ctx);
568
569	// Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD
570	// code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
571	// attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
572	// only using GOT when GOTPCRELX is enabled.
573	// TODO Delete the workaround when rustc no longer relies on the hack
574	bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
575	Ctx.getTargetOptions()->X86RelaxRelocations;
576
577	if (Specifier == X86::S_TLSDESC) {
578	const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
579	Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: `3`)), specifier: X86::S_TLSCALL,
580	Ctx);
581	EmitAndCountInstruction(
582	Inst&: MCInstBuilder (Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
583	.addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX)
584	.addReg(Reg: Is64Bits ? X86::RIP : X86::EBX)
585	.addImm(Val: `1`)
586	.addReg(Reg: `0`)
587	.addExpr(Val: Sym)
588	.addReg(Reg: `0`));
589	EmitAndCountInstruction(
590	Inst&: MCInstBuilder (Is64Bits ? X86::CALL64m : X86::CALL32m)
591	.addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX)
592	.addImm(Val: `1`)
593	.addReg(Reg: `0`)
594	.addExpr(Val: Expr)
595	.addReg(Reg: `0`));
596	} else if (Is64Bits) {
597	bool NeedsPadding = Specifier == X86::S_TLSGD;
598	if (NeedsPadding && Is64BitsLP64)
599	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
600	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA64r)
601	.addReg(Reg: X86::RDI)
602	.addReg(Reg: X86::RIP)
603	.addImm(Val: `1`)
604	.addReg(Reg: `0`)
605	.addExpr(Val: Sym)
606	.addReg(Reg: `0`));
607	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr");
608	if (NeedsPadding) {
609	if (!UseGot)
610	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
611	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DATA16_PREFIX));
612	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::REX64_PREFIX));
613	}
614	if (UseGot) {
615	const MCExpr *Expr =
616	MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOTPCREL, Ctx);
617	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64m)
618	.addReg(Reg: X86::RIP)
619	.addImm(Val: `1`)
620	.addReg(Reg: `0`)
621	.addExpr(Val: Expr)
622	.addReg(Reg: `0`));
623	} else {
624	EmitAndCountInstruction(
625	Inst&: MCInstBuilder (X86::CALL64pcrel32)
626	.addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx)));
627	}
628	} else {
629	if (Specifier == X86::S_TLSGD && !UseGot) {
630	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA32r)
631	.addReg(Reg: X86::EAX)
632	.addReg(Reg: `0`)
633	.addImm(Val: `1`)
634	.addReg(Reg: X86::EBX)
635	.addExpr(Val: Sym)
636	.addReg(Reg: `0`));
637	} else {
638	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::LEA32r)
639	.addReg(Reg: X86::EAX)
640	.addReg(Reg: X86::EBX)
641	.addImm(Val: `1`)
642	.addReg(Reg: `0`)
643	.addExpr(Val: Sym)
644	.addReg(Reg: `0`));
645	}
646
647	const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr");
648	if (UseGot) {
649	const MCExpr *Expr = MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOT, Ctx);
650	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL32m)
651	.addReg(Reg: X86::EBX)
652	.addImm(Val: `1`)
653	.addReg(Reg: `0`)
654	.addExpr(Val: Expr)
655	.addReg(Reg: `0`));
656	} else {
657	EmitAndCountInstruction(
658	Inst&: MCInstBuilder (X86::CALLpcrel32)
659	.addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx)));
660	}
661	}
662	}
663
664	/// Emit the largest nop instruction smaller than or equal to \p NumBytes
665	/// bytes. Return the size of nop emitted.
666	static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
667	const X86Subtarget *Subtarget) {
668	// Determine the longest nop which can be efficiently decoded for the given
669	// target cpu. 15-bytes is the longest single NOP instruction, but some
670	// platforms can't decode the longest forms efficiently.
671	unsigned MaxNopLength = `1`;
672	if (Subtarget->is64Bit()) {
673	// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
674	// IndexReg/BaseReg below need to be updated.
675	if (Subtarget->hasFeature(Feature: X86::TuningFast7ByteNOP))
676	MaxNopLength = `7`;
677	else if (Subtarget->hasFeature(Feature: X86::TuningFast15ByteNOP))
678	MaxNopLength = `15`;
679	else if (Subtarget->hasFeature(Feature: X86::TuningFast11ByteNOP))
680	MaxNopLength = `11`;
681	else
682	MaxNopLength = `10`;
683	} if (Subtarget->is32Bit())
684	MaxNopLength = `2`;
685
686	// Cap a single nop emission at the profitable value for the target
687	NumBytes = std::min(a: NumBytes, b: MaxNopLength);
688
689	unsigned NopSize;
690	unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
691	IndexReg = Displacement = SegmentReg = `0`;
692	BaseReg = X86::RAX;
693	ScaleVal = `1`;
694	switch (NumBytes) {
695	case `0`:
696	llvm_unreachable("Zero nops?");
697	break;
698	case `1`:
699	NopSize = `1`;
700	Opc = X86::NOOP;
701	break;
702	case `2`:
703	NopSize = `2`;
704	Opc = X86::XCHG16ar;
705	break;
706	case `3`:
707	NopSize = `3`;
708	Opc = X86::NOOPL;
709	break;
710	case `4`:
711	NopSize = `4`;
712	Opc = X86::NOOPL;
713	Displacement = `8`;
714	break;
715	case `5`:
716	NopSize = `5`;
717	Opc = X86::NOOPL;
718	Displacement = `8`;
719	IndexReg = X86::RAX;
720	break;
721	case `6`:
722	NopSize = `6`;
723	Opc = X86::NOOPW;
724	Displacement = `8`;
725	IndexReg = X86::RAX;
726	break;
727	case `7`:
728	NopSize = `7`;
729	Opc = X86::NOOPL;
730	Displacement = `512`;
731	break;
732	case `8`:
733	NopSize = `8`;
734	Opc = X86::NOOPL;
735	Displacement = `512`;
736	IndexReg = X86::RAX;
737	break;
738	case `9`:
739	NopSize = `9`;
740	Opc = X86::NOOPW;
741	Displacement = `512`;
742	IndexReg = X86::RAX;
743	break;
744	default:
745	NopSize = `10`;
746	Opc = X86::NOOPW;
747	Displacement = `512`;
748	IndexReg = X86::RAX;
749	SegmentReg = X86::CS;
750	break;
751	}
752
753	unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: `5U`);
754	NopSize += NumPrefixes;
755	for (unsigned i = `0`; i != NumPrefixes; ++i)
756	OS.emitBytes(Data: "\x66");
757
758	switch (Opc) {
759	default: llvm_unreachable("Unexpected opcode");
760	case X86::NOOP:
761	OS.emitInstruction(Inst: MCInstBuilder (Opc), STI: *Subtarget);
762	break;
763	case X86::XCHG16ar:
764	OS.emitInstruction(Inst: MCInstBuilder (Opc).addReg(Reg: X86::AX).addReg(Reg: X86::AX),
765	STI: *Subtarget);
766	break;
767	case X86::NOOPL:
768	case X86::NOOPW:
769	OS.emitInstruction(Inst: MCInstBuilder (Opc)
770	.addReg(Reg: BaseReg)
771	.addImm(Val: ScaleVal)
772	.addReg(Reg: IndexReg)
773	.addImm(Val: Displacement)
774	.addReg(Reg: SegmentReg),
775	STI: *Subtarget);
776	break;
777	}
778	assert(NopSize <= NumBytes && "We overemitted?");
779	return NopSize;
780	}
781
782	/// Emit the optimal amount of multi-byte nops on X86.
783	static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
784	const X86Subtarget *Subtarget) {
785	unsigned NopsToEmit = NumBytes;
786	(void)NopsToEmit;
787	while (NumBytes) {
788	NumBytes -= emitNop(OS, NumBytes, Subtarget);
789	assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
790	}
791	}
792
793	void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
794	X86MCInstLower &MCIL) {
795	assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
796
797	NoAutoPaddingScope NoPadScope(*OutStreamer);
798
799	StatepointOpers SOpers(&MI);
800	if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
801	emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget);
802	} else {
803	// Lower call target and choose correct opcode
804	const MachineOperand &CallTarget = SOpers.getCallTarget();
805	MCOperand CallTargetMCOp;
806	unsigned CallOpcode;
807	switch (CallTarget.getType()) {
808	case MachineOperand::MO_GlobalAddress:
809	case MachineOperand::MO_ExternalSymbol:
810	CallTargetMCOp = MCIL.LowerSymbolOperand(
811	MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget));
812	CallOpcode = X86::CALL64pcrel32;
813	// Currently, we only support relative addressing with statepoints.
814	// Otherwise, we'll need a scratch register to hold the target
815	// address. You'll fail asserts during load & relocation if this
816	// symbol is to far away. (TODO: support non-relative addressing)
817	break;
818	case MachineOperand::MO_Immediate:
819	CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm());
820	CallOpcode = X86::CALL64pcrel32;
821	// Currently, we only support relative addressing with statepoints.
822	// Otherwise, we'll need a scratch register to hold the target
823	// immediate. You'll fail asserts during load & relocation if this
824	// address is to far away. (TODO: support non-relative addressing)
825	break;
826	case MachineOperand::MO_Register:
827	// FIXME: Add retpoline support and remove this.
828	if (Subtarget->useIndirectThunkCalls())
829	report_fatal_error(reason: "Lowering register statepoints with thunks not "
830	"yet implemented.");
831	CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg());
832	CallOpcode = X86::CALL64r;
833	break;
834	default:
835	llvm_unreachable("Unsupported operand type in statepoint call target");
836	break;
837	}
838
839	// Emit call
840	MCInst CallInst;
841	CallInst.setOpcode(CallOpcode);
842	CallInst.addOperand(Op: CallTargetMCOp);
843	OutStreamer ->emitInstruction(Inst: CallInst, STI: getSubtargetInfo());
844	maybeEmitNopAfterCallForWindowsEH(MI: &MI);
845	}
846
847	// Record our statepoint node in the same section used by STACKMAP
848	// and PATCHPOINT
849	auto &Ctx = OutStreamer ->getContext();
850	MCSymbol *MILabel = Ctx.createTempSymbol();
851	OutStreamer ->emitLabel(Symbol: MILabel);
852	SM.recordStatepoint(L: *MILabel, MI);
853	}
854
855	void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
856	X86MCInstLower &MCIL) {
857	// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
858	// <opcode>, <operands>
859
860	NoAutoPaddingScope NoPadScope(*OutStreamer);
861
862	Register DefRegister = FaultingMI.getOperand(i: `0`).getReg();
863	FaultMaps::FaultKind FK =
864	static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: `1`).getImm());
865	MCSymbol *HandlerLabel = FaultingMI.getOperand(i: `2`).getMBB()->getSymbol();
866	unsigned Opcode = FaultingMI.getOperand(i: `3`).getImm();
867	unsigned OperandsBeginIdx = `4`;
868
869	auto &Ctx = OutStreamer ->getContext();
870	MCSymbol *FaultingLabel = Ctx.createTempSymbol();
871	OutStreamer ->emitLabel(Symbol: FaultingLabel);
872
873	assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
874	FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel);
875
876	MCInst MI;
877	MI.setOpcode(Opcode);
878
879	if (DefRegister != X86::NoRegister)
880	MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister));
881
882	for (const MachineOperand &MO :
883	llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx))
884	if (auto Op = MCIL.LowerMachineOperand(MI: &FaultingMI, MO); Op.isValid())
885	MI.addOperand(Op);
886
887	OutStreamer ->AddComment(T: "on-fault: " + HandlerLabel->getName());
888	OutStreamer ->emitInstruction(Inst: MI, STI: getSubtargetInfo());
889	}
890
891	void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
892	X86MCInstLower &MCIL) {
893	bool Is64Bits = Subtarget->is64Bit();
894	MCContext &Ctx = OutStreamer ->getContext();
895	MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__");
896	const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(Symbol: fentry, Ctx);
897
898	EmitAndCountInstruction(
899	Inst&: MCInstBuilder (Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
900	.addExpr(Val: Op));
901	}
902
903	void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
904	assert(std::next(MI.getIterator())->isCall() &&
905	"KCFI_CHECK not followed by a call instruction");
906
907	// Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
908	// returns a 1-byte X86::NOOP, which means the offset is the same in
909	// bytes. This assumes that patchable-function-prefix is the same for all
910	// functions.
911	const MachineFunction &MF = *MI.getMF();
912	int64_t PrefixNops = `0`;
913	(void)MF.getFunction()
914	.getFnAttribute(Kind: "patchable-function-prefix")
915	.getValueAsString()
916	.getAsInteger(Radix: `10`, Result&: PrefixNops);
917
918	// KCFI allows indirect calls to any location that's preceded by a valid
919	// type identifier. To avoid encoding the full constant into an instruction,
920	// and thus emitting potential call target gadgets at each indirect call
921	// site, load a negated constant to a register and compare that to the
922	// expected value at the call target.
923	const Register AddrReg = MI.getOperand(i: `0`).getReg();
924	const uint32_t Type = MI.getOperand(i: `1`).getImm();
925	// The check is immediately before the call. If the call target is in R10,
926	// we can clobber R11 for the check instead.
927	unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
928	EmitAndCountInstruction(
929	Inst&: MCInstBuilder (X86::MOV32ri).addReg(Reg: TempReg).addImm(Val: -MaskKCFIType(Value: Type)));
930	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::ADD32rm)
931	.addReg(Reg: X86::NoRegister)
932	.addReg(Reg: TempReg)
933	.addReg(Reg: AddrReg)
934	.addImm(Val: `1`)
935	.addReg(Reg: X86::NoRegister)
936	.addImm(Val: -(PrefixNops + `4`))
937	.addReg(Reg: X86::NoRegister));
938
939	MCSymbol *Pass = OutContext.createTempSymbol();
940	EmitAndCountInstruction(
941	Inst&: MCInstBuilder (X86::JCC_1)
942	.addExpr(Val: MCSymbolRefExpr::create(Symbol: Pass, Ctx&: OutContext))
943	.addImm(Val: X86::COND_E));
944
945	MCSymbol *Trap = OutContext.createTempSymbol();
946	OutStreamer ->emitLabel(Symbol: Trap);
947	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::TRAP));
948	emitKCFITrapEntry(MF, Symbol: Trap);
949	OutStreamer ->emitLabel(Symbol: Pass);
950	}
951
952	void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
953	// FIXME: Make this work on non-ELF.
954	if (!TM.getTargetTriple().isOSBinFormatELF()) {
955	report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF");
956	return;
957	}
958
959	const auto &Reg = MI.getOperand(i: `0`).getReg();
960	ASanAccessInfo AccessInfo(MI.getOperand(i: `1`).getImm());
961
962	uint64_t ShadowBase;
963	int MappingScale;
964	bool OrShadowOffset;
965	getAddressSanitizerParams(TargetTriple: TM.getTargetTriple(), LongSize: `64`, IsKasan: AccessInfo.CompileKernel,
966	ShadowBase: &ShadowBase, MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset);
967
968	StringRef Name = AccessInfo.IsWrite ? "store" : "load";
969	StringRef Op = OrShadowOffset ? "or" : "add";
970	std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
971	Twine (`1ULL` << AccessInfo.AccessSizeIndex) + "_" +
972	TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg()))
973	.str();
974	if (OrShadowOffset)
975	report_fatal_error(
976	reason: "OrShadowOffset is not supported with optimized callbacks");
977
978	EmitAndCountInstruction(
979	Inst&: MCInstBuilder (X86::CALL64pcrel32)
980	.addExpr(Val: MCSymbolRefExpr::create(
981	Symbol: OutContext.getOrCreateSymbol(Name: SymName), Ctx&: OutContext)));
982	}
983
984	void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
985	X86MCInstLower &MCIL) {
986	// PATCHABLE_OP minsize
987
988	NoAutoPaddingScope NoPadScope(*OutStreamer);
989
990	auto NextMI = std::find_if(first: std::next(x: MI.getIterator()),
991	last: MI.getParent()->end().getInstrIterator(),
992	pred: [](auto &II) { return !II.isMetaInstruction(); });
993
994	SmallString<`256`> Code;
995	unsigned MinSize = MI.getOperand(i: `0`).getImm();
996
997	if (NextMI != MI.getParent()->end() && !NextMI ->isInlineAsm()) {
998	// Lower the next MachineInstr to find its byte size.
999	// If the next instruction is inline assembly, we skip lowering it for now,
1000	// and assume we should always generate NOPs.
1001	MCInst MCI;
1002	MCIL.Lower(MI: &*NextMI, OutMI&: MCI);
1003
1004	SmallVector<MCFixup, `4`> Fixups;
1005	CodeEmitter ->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo());
1006	}
1007
1008	if (Code.size() < MinSize) {
1009	if (MinSize == `2` && Subtarget->is32Bit() &&
1010	Subtarget->isTargetWindowsMSVC() &&
1011	(Subtarget->getCPU().empty() \|\| Subtarget->getCPU() == "pentium3")) {
1012	// For compatibility reasons, when targetting MSVC, it is important to
1013	// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1014	// rely specifically on this pattern to be able to patch a function.
1015	// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1016	OutStreamer ->emitInstruction(
1017	Inst: MCInstBuilder (X86::MOV32rr_REV).addReg(Reg: X86::EDI).addReg(Reg: X86::EDI),
1018	STI: *Subtarget);
1019	} else {
1020	unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget);
1021	assert(NopSize == MinSize && "Could not implement MinSize!");
1022	(void)NopSize;
1023	}
1024	}
1025	}
1026
1027	// Lower a stackmap of the form:
1028	// <id>, <shadowBytes>, ...
1029	void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1030	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1031
1032	auto &Ctx = OutStreamer ->getContext();
1033	MCSymbol *MILabel = Ctx.createTempSymbol();
1034	OutStreamer ->emitLabel(Symbol: MILabel);
1035
1036	SM.recordStackMap(L: *MILabel, MI);
1037	unsigned NumShadowBytes = MI.getOperand(i: `1`).getImm();
1038	SMShadowTracker.reset(RequiredSize: NumShadowBytes);
1039	}
1040
1041	// Lower a patchpoint of the form:
1042	// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1043	void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1044	X86MCInstLower &MCIL) {
1045	assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1046
1047	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
1048
1049	NoAutoPaddingScope NoPadScope(*OutStreamer);
1050
1051	auto &Ctx = OutStreamer ->getContext();
1052	MCSymbol *MILabel = Ctx.createTempSymbol();
1053	OutStreamer ->emitLabel(Symbol: MILabel);
1054	SM.recordPatchPoint(L: *MILabel, MI);
1055
1056	PatchPointOpers opers(&MI);
1057	unsigned ScratchIdx = opers.getNextScratchIdx();
1058	unsigned EncodedBytes = `0`;
1059	const MachineOperand &CalleeMO = opers.getCallTarget();
1060
1061	// Check for null target. If target is non-null (i.e. is non-zero or is
1062	// symbolic) then emit a call.
1063	if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1064	MCOperand CalleeMCOp;
1065	switch (CalleeMO.getType()) {
1066	default:
1067	/// FIXME: Add a verifier check for bad callee types.
1068	llvm_unreachable("Unrecognized callee operand type.");
1069	case MachineOperand::MO_Immediate:
1070	if (CalleeMO.getImm())
1071	CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm());
1072	break;
1073	case MachineOperand::MO_ExternalSymbol:
1074	case MachineOperand::MO_GlobalAddress:
1075	CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO,
1076	Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO));
1077	break;
1078	}
1079
1080	// Emit MOV to materialize the target address and the CALL to target.
1081	// This is encoded with 12-13 bytes, depending on which register is used.
1082	Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg();
1083	if (X86II::isX86_64ExtendedReg(Reg: ScratchReg))
1084	EncodedBytes = `13`;
1085	else
1086	EncodedBytes = `12`;
1087
1088	EmitAndCountInstruction(
1089	Inst&: MCInstBuilder (X86::MOV64ri).addReg(Reg: ScratchReg).addOperand(Op: CalleeMCOp));
1090	// FIXME: Add retpoline support and remove this.
1091	if (Subtarget->useIndirectThunkCalls())
1092	report_fatal_error(
1093	reason: "Lowering patchpoint with thunks not yet implemented.");
1094	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64r).addReg(Reg: ScratchReg));
1095	}
1096
1097	// Emit padding.
1098	unsigned NumBytes = opers.getNumPatchBytes();
1099	assert(NumBytes >= EncodedBytes &&
1100	"Patchpoint can't request size less than the length of a call.");
1101
1102	emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget);
1103	}
1104
1105	void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1106	X86MCInstLower &MCIL) {
1107	assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1108
1109	NoAutoPaddingScope NoPadScope(*OutStreamer);
1110
1111	// We want to emit the following pattern, which follows the x86 calling
1112	// convention to prepare for the trampoline call to be patched in.
1113	//
1114	// .p2align 1, ...
1115	// .Lxray_event_sled_N:
1116	// jmp +N // jump across the instrumentation sled
1117	// ... // set up arguments in register
1118	// callq __xray_CustomEvent@plt // force dependency to symbol
1119	// ...
1120	// <jump here>
1121	//
1122	// After patching, it would look something like:
1123	//
1124	// nopw (2-byte nop)
1125	// ...
1126	// callq __xrayCustomEvent // already lowered
1127	// ...
1128	//
1129	// ---
1130	// First we emit the label and the jump.
1131	auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_", AlwaysAddSuffix: true);
1132	OutStreamer ->AddComment(T: "# XRay Custom Event Log");
1133	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1134	OutStreamer ->emitLabel(Symbol: CurSled);
1135
1136	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1137	// an operand (computed as an offset from the jmp instruction).
1138	// FIXME: Find another less hacky way do force the relative jump.
1139	OutStreamer ->emitBinaryData(Data: "\xeb\x0f");
1140
1141	// The default C calling convention will place two arguments into %rcx and
1142	// %rdx -- so we only work with those.
1143	const Register DestRegs[] = {X86::RDI, X86::RSI};
1144	bool UsedMask[] = {false, false};
1145	// Filled out in loop.
1146	Register SrcRegs[] = {`0`, `0`};
1147
1148	// Then we put the operands in the %rdi and %rsi registers. We spill the
1149	// values in the register before we clobber them, and mark them as used in
1150	// UsedMask. In case the arguments are already in the correct register, we use
1151	// emit nops appropriately sized to keep the sled the same size in every
1152	// situation.
1153	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1154	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I));
1155	Op.isValid()) {
1156	assert(Op.isReg() && "Only support arguments in registers");
1157	SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: `64`);
1158	assert(SrcRegs[I].isValid() && "Invalid operand");
1159	if (SrcRegs[I] != DestRegs[I]) {
1160	UsedMask[I] = true;
1161	EmitAndCountInstruction(
1162	Inst&: MCInstBuilder (X86::PUSH64r).addReg(Reg: DestRegs[I]));
1163	} else {
1164	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1165	}
1166	}
1167
1168	// Now that the register values are stashed, mov arguments into place.
1169	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1170	// earlier DestReg. We will have already overwritten over the register before
1171	// we can copy from it.
1172	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1173	if (SrcRegs[I] != DestRegs[I])
1174	EmitAndCountInstruction(
1175	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I]));
1176
1177	// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1178	// name of the trampoline to be implemented by the XRay runtime.
1179	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent");
1180	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1181	if (isPositionIndependent())
1182	TOp.setTargetFlags(X86II::MO_PLT);
1183
1184	// Emit the call instruction.
1185	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64pcrel32)
1186	.addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym)));
1187
1188	// Restore caller-saved and used registers.
1189	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1190	if (UsedMask[I])
1191	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::POP64r).addReg(Reg: DestRegs[I]));
1192	else
1193	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1194
1195	OutStreamer ->AddComment(T: "xray custom event end.");
1196
1197	// Record the sled version. Version 0 of this sled was spelled differently, so
1198	// we let the runtime handle the different offsets we're using. Version 2
1199	// changed the absolute address to a PC-relative address.
1200	recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: `2`);
1201	}
1202
1203	void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1204	X86MCInstLower &MCIL) {
1205	assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1206
1207	NoAutoPaddingScope NoPadScope(*OutStreamer);
1208
1209	// We want to emit the following pattern, which follows the x86 calling
1210	// convention to prepare for the trampoline call to be patched in.
1211	//
1212	// .p2align 1, ...
1213	// .Lxray_event_sled_N:
1214	// jmp +N // jump across the instrumentation sled
1215	// ... // set up arguments in register
1216	// callq __xray_TypedEvent@plt // force dependency to symbol
1217	// ...
1218	// <jump here>
1219	//
1220	// After patching, it would look something like:
1221	//
1222	// nopw (2-byte nop)
1223	// ...
1224	// callq __xrayTypedEvent // already lowered
1225	// ...
1226	//
1227	// ---
1228	// First we emit the label and the jump.
1229	auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_", AlwaysAddSuffix: true);
1230	OutStreamer ->AddComment(T: "# XRay Typed Event Log");
1231	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1232	OutStreamer ->emitLabel(Symbol: CurSled);
1233
1234	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1235	// an operand (computed as an offset from the jmp instruction).
1236	// FIXME: Find another less hacky way do force the relative jump.
1237	OutStreamer ->emitBinaryData(Data: "\xeb\x14");
1238
1239	// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1240	// so we'll work with those. Or we may be called via SystemV, in which case
1241	// we don't have to do any translation.
1242	const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1243	bool UsedMask[] = {false, false, false};
1244
1245	// Will fill out src regs in the loop.
1246	Register SrcRegs[] = {`0`, `0`, `0`};
1247
1248	// Then we put the operands in the SystemV registers. We spill the values in
1249	// the registers before we clobber them, and mark them as used in UsedMask.
1250	// In case the arguments are already in the correct register, we emit nops
1251	// appropriately sized to keep the sled the same size in every situation.
1252	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1253	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I));
1254	Op.isValid()) {
1255	// TODO: Is register only support adequate?
1256	assert(Op.isReg() && "Only supports arguments in registers");
1257	SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: `64`);
1258	assert(SrcRegs[I].isValid() && "Invalid operand");
1259	if (SrcRegs[I] != DestRegs[I]) {
1260	UsedMask[I] = true;
1261	EmitAndCountInstruction(
1262	Inst&: MCInstBuilder (X86::PUSH64r).addReg(Reg: DestRegs[I]));
1263	} else {
1264	emitX86Nops(OS&: *OutStreamer, NumBytes: `4`, Subtarget);
1265	}
1266	}
1267
1268	// In the above loop we only stash all of the destination registers or emit
1269	// nops if the arguments are already in the right place. Doing the actually
1270	// moving is postponed until after all the registers are stashed so nothing
1271	// is clobbers. We've already added nops to account for the size of mov and
1272	// push if the register is in the right place, so we only have to worry about
1273	// emitting movs.
1274	// FIXME: This doesn't work if one of the later SrcRegs is equal to an
1275	// earlier DestReg. We will have already overwritten over the register before
1276	// we can copy from it.
1277	for (unsigned I = `0`; I < MI.getNumOperands(); ++I)
1278	if (UsedMask[I])
1279	EmitAndCountInstruction(
1280	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I]));
1281
1282	// We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1283	// name of the trampoline to be implemented by the XRay runtime.
1284	auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent");
1285	MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym);
1286	if (isPositionIndependent())
1287	TOp.setTargetFlags(X86II::MO_PLT);
1288
1289	// Emit the call instruction.
1290	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CALL64pcrel32)
1291	.addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym)));
1292
1293	// Restore caller-saved and used registers.
1294	for (unsigned I = sizeof UsedMask; I-- > `0`;)
1295	if (UsedMask[I])
1296	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::POP64r).addReg(Reg: DestRegs[I]));
1297	else
1298	emitX86Nops(OS&: *OutStreamer, NumBytes: `1`, Subtarget);
1299
1300	OutStreamer ->AddComment(T: "xray typed event end.");
1301
1302	// Record the sled version.
1303	recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: `2`);
1304	}
1305
1306	void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1307	X86MCInstLower &MCIL) {
1308
1309	NoAutoPaddingScope NoPadScope(*OutStreamer);
1310
1311	const Function &F = MF->getFunction();
1312	if (F.hasFnAttribute(Kind: "patchable-function-entry")) {
1313	unsigned Num;
1314	if (F.getFnAttribute(Kind: "patchable-function-entry")
1315	.getValueAsString()
1316	.getAsInteger(Radix: `10`, Result&: Num))
1317	return;
1318	emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget);
1319	return;
1320	}
1321	// We want to emit the following pattern:
1322	//
1323	// .p2align 1, ...
1324	// .Lxray_sled_N:
1325	// jmp .tmpN
1326	// # 9 bytes worth of noops
1327	//
1328	// We need the 9 bytes because at runtime, we'd be patching over the full 11
1329	// bytes with the following pattern:
1330	//
1331	// mov %r10, <function id, 32-bit> // 6 bytes
1332	// call <relative offset, 32-bits> // 5 bytes
1333	//
1334	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1335	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1336	OutStreamer ->emitLabel(Symbol: CurSled);
1337
1338	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1339	// an operand (computed as an offset from the jmp instruction).
1340	// FIXME: Find another less hacky way do force the relative jump.
1341	OutStreamer ->emitBytes(Data: "\xeb\x09");
1342	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1343	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: `2`);
1344	}
1345
1346	void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1347	X86MCInstLower &MCIL) {
1348	NoAutoPaddingScope NoPadScope(*OutStreamer);
1349
1350	// Since PATCHABLE_RET takes the opcode of the return statement as an
1351	// argument, we use that to emit the correct form of the RET that we want.
1352	// i.e. when we see this:
1353	//
1354	// PATCHABLE_RET X86::RET ...
1355	//
1356	// We should emit the RET followed by sleds.
1357	//
1358	// .p2align 1, ...
1359	// .Lxray_sled_N:
1360	// ret # or equivalent instruction
1361	// # 10 bytes worth of noops
1362	//
1363	// This just makes sure that the alignment for the next instruction is 2.
1364	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1365	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1366	OutStreamer ->emitLabel(Symbol: CurSled);
1367	unsigned OpCode = MI.getOperand(i: `0`).getImm();
1368	MCInst Ret;
1369	Ret.setOpcode(OpCode);
1370	for (auto &MO : drop_begin(RangeOrContainer: MI.operands()))
1371	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid())
1372	Ret.addOperand(Op);
1373	OutStreamer ->emitInstruction(Inst: Ret, STI: getSubtargetInfo());
1374	emitX86Nops(OS&: *OutStreamer, NumBytes: `10`, Subtarget);
1375	recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: `2`);
1376	}
1377
1378	void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1379	X86MCInstLower &MCIL) {
1380	MCInst TC;
1381	TC.setOpcode(convertTailJumpOpcode(Opcode: MI.getOperand(i: `0`).getImm()));
1382	// Drop the tail jump opcode.
1383	auto TCOperands = drop_begin(RangeOrContainer: MI.operands());
1384	bool IsConditional = TC.getOpcode() == X86::JCC_1;
1385	MCSymbol *FallthroughLabel;
1386	if (IsConditional) {
1387	// Rewrite:
1388	// je target
1389	//
1390	// To:
1391	// jne .fallthrough
1392	// .p2align 1, ...
1393	// .Lxray_sled_N:
1394	// SLED_CODE
1395	// jmp target
1396	// .fallthrough:
1397	FallthroughLabel = OutContext.createTempSymbol();
1398	EmitToStreamer(
1399	S&: *OutStreamer,
1400	Inst: MCInstBuilder (X86::JCC_1)
1401	.addExpr(Val: MCSymbolRefExpr::create(Symbol: FallthroughLabel, Ctx&: OutContext))
1402	.addImm(Val: X86::GetOppositeBranchCondition(
1403	CC: static_cast<X86::CondCode>(MI.getOperand(i: `2`).getImm()))));
1404	TC.setOpcode(X86::JMP_1);
1405	// Drop the condition code.
1406	TCOperands = drop_end(RangeOrContainer&: TCOperands);
1407	}
1408
1409	NoAutoPaddingScope NoPadScope(*OutStreamer);
1410
1411	// Like PATCHABLE_RET, we have the actual instruction in the operands to this
1412	// instruction so we lower that particular instruction and its operands.
1413	// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1414	// we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1415	// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1416	// tail call much like how we have it in PATCHABLE_RET.
1417	auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_", AlwaysAddSuffix: true);
1418	OutStreamer ->emitCodeAlignment(Alignment: Align (`2`), STI: &getSubtargetInfo());
1419	OutStreamer ->emitLabel(Symbol: CurSled);
1420	auto Target = OutContext.createTempSymbol();
1421
1422	// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1423	// an operand (computed as an offset from the jmp instruction).
1424	// FIXME: Find another less hacky way do force the relative jump.
1425	OutStreamer ->emitBytes(Data: "\xeb\x09");
1426	emitX86Nops(OS&: *OutStreamer, NumBytes: `9`, Subtarget);
1427	OutStreamer ->emitLabel(Symbol: Target);
1428	recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: `2`);
1429
1430	// Before emitting the instruction, add a comment to indicate that this is
1431	// indeed a tail call.
1432	OutStreamer ->AddComment(T: "TAILCALL");
1433	for (auto &MO : TCOperands)
1434	if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid())
1435	TC.addOperand(Op);
1436	OutStreamer ->emitInstruction(Inst: TC, STI: getSubtargetInfo());
1437
1438	if (IsConditional)
1439	OutStreamer ->emitLabel(Symbol: FallthroughLabel);
1440	}
1441
1442	static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) {
1443	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1444	// Skip mask operand.
1445	++SrcIdx;
1446	if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1447	// Skip passthru operand.
1448	++SrcIdx;
1449	}
1450	}
1451	return SrcIdx;
1452	}
1453
1454	static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI,
1455	unsigned SrcOpIdx) {
1456	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1457	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg());
1458
1459	// Handle AVX512 MASK/MASXZ write mask comments.
1460	// MASK: zmmX {%kY}
1461	// MASKZ: zmmX {%kY} {z}
1462	if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) {
1463	const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - `1`);
1464	StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg());
1465	CS << " {%" << Mask << "}";
1466	if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) {
1467	CS << " {z}";
1468	}
1469	}
1470	}
1471
1472	static void printShuffleMask(raw_ostream &CS, StringRef Src1Name,
1473	StringRef Src2Name, ArrayRef<int> Mask) {
1474	// One source operand, fix the mask to print all elements in one span.
1475	SmallVector<int, `8`> ShuffleMask(Mask);
1476	if (Src1Name == Src2Name)
1477	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i)
1478	if (ShuffleMask [i] >= e)
1479	ShuffleMask [i] -= e;
1480
1481	for (int i = `0`, e = ShuffleMask.size(); i != e; ++i) {
1482	if (i != `0`)
1483	CS << ",";
1484	if (ShuffleMask [i] == SM_SentinelZero) {
1485	CS << "zero";
1486	continue;
1487	}
1488
1489	// Otherwise, it must come from src1 or src2. Print the span of elements
1490	// that comes from this src.
1491	bool isSrc1 = ShuffleMask [i] < (int)e;
1492	CS << (isSrc1 ? Src1Name : Src2Name) << `'['`;
1493
1494	bool IsFirst = true;
1495	while (i != e && ShuffleMask [i] != SM_SentinelZero &&
1496	(ShuffleMask [i] < (int)e) == isSrc1) {
1497	if (!IsFirst)
1498	CS << `','`;
1499	else
1500	IsFirst = false;
1501	if (ShuffleMask [i] == SM_SentinelUndef)
1502	CS << "u";
1503	else
1504	CS << ShuffleMask [i] % (int)e;
1505	++i;
1506	}
1507	CS << `']'`;
1508	--i; // For loop increments element #.
1509	}
1510	}
1511
1512	static std::string getShuffleComment(const MachineInstr MI, unsigned* SrcOp1Idx,
1513	unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1514	std::string Comment;
1515
1516	const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx);
1517	const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx);
1518	StringRef Src1Name = SrcOp1.isReg()
1519	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg())
1520	: "mem";
1521	StringRef Src2Name = SrcOp2.isReg()
1522	? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg())
1523	: "mem";
1524
1525	raw_string_ostream CS(Comment);
1526	printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx);
1527	CS << " = ";
1528	printShuffleMask(CS, Src1Name, Src2Name, Mask);
1529
1530	return Comment;
1531	}
1532
1533	static void printConstant(const APInt &Val, raw_ostream &CS,
1534	bool PrintZero = false) {
1535	if (Val.getBitWidth() <= `64`) {
1536	CS << (PrintZero ? `0ULL` : Val.getZExtValue());
1537	} else {
1538	// print multi-word constant as (w0,w1)
1539	CS << "(";
1540	for (int i = `0`, N = Val.getNumWords(); i < N; ++i) {
1541	if (i > `0`)
1542	CS << ",";
1543	CS << (PrintZero ? `0ULL` : Val.getRawData()[i]);
1544	}
1545	CS << ")";
1546	}
1547	}
1548
1549	static void printConstant(const APFloat &Flt, raw_ostream &CS,
1550	bool PrintZero = false) {
1551	SmallString<`32`> Str;
1552	// Force scientific notation to distinguish from integers.
1553	if (PrintZero)
1554	APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1555	else
1556	Flt.toString(Str, FormatPrecision: `0`, FormatMaxPadding: `0`);
1557	CS << Str;
1558	}
1559
1560	static void printConstant(const Constant COp, unsigned* BitWidth,
1561	raw_ostream &CS, bool PrintZero = false) {
1562	if (isa<UndefValue>(Val: COp)) {
1563	CS << "u";
1564	} else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) {
1565	if (auto VTy = dyn_cast<FixedVectorType>(Val: CI->getType())) {
1566	for (unsigned I = `0`, E = VTy->getNumElements(); I != E; ++I) {
1567	if (I != `0`)
1568	CS << `','`;
1569	printConstant(Val: CI->getValue(), CS, PrintZero);
1570	}
1571	} else
1572	printConstant(Val: CI->getValue(), CS, PrintZero);
1573	} else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) {
1574	if (auto VTy = dyn_cast<FixedVectorType>(Val: CF->getType())) {
1575	unsigned EltBits = VTy->getScalarSizeInBits();
1576	unsigned E = std::min(a: BitWidth / EltBits, b: VTy->getNumElements());
1577	if ((BitWidth % EltBits) == `0`) {
1578	for (unsigned I = `0`; I != E; ++I) {
1579	if (I != `0`)
1580	CS << ",";
1581	printConstant(Flt: CF->getValueAPF(), CS, PrintZero);
1582	}
1583	} else {
1584	CS << "?";
1585	}
1586	} else
1587	printConstant(Flt: CF->getValueAPF(), CS, PrintZero);
1588	} else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) {
1589	Type *EltTy = CDS->getElementType();
1590	bool IsInteger = EltTy->isIntegerTy();
1591	bool IsFP = EltTy->isHalfTy() \|\| EltTy->isFloatTy() \|\| EltTy->isDoubleTy();
1592	unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1593	unsigned E = std::min(a: BitWidth / EltBits, b: (unsigned)CDS->getNumElements());
1594	if ((BitWidth % EltBits) == `0`) {
1595	for (unsigned I = `0`; I != E; ++I) {
1596	if (I != `0`)
1597	CS << ",";
1598	if (IsInteger)
1599	printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero);
1600	else if (IsFP)
1601	printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero);
1602	else
1603	CS << "?";
1604	}
1605	} else {
1606	CS << "?";
1607	}
1608	} else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) {
1609	unsigned EltBits = CV->getType()->getScalarSizeInBits();
1610	unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands());
1611	if ((BitWidth % EltBits) == `0`) {
1612	for (unsigned I = `0`; I != E; ++I) {
1613	if (I != `0`)
1614	CS << ",";
1615	printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero);
1616	}
1617	} else {
1618	CS << "?";
1619	}
1620	} else {
1621	CS << "?";
1622	}
1623	}
1624
1625	static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
1626	int SclWidth, int VecWidth,
1627	const char *ShuffleComment) {
1628	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1629
1630	std::string Comment;
1631	raw_string_ostream CS(Comment);
1632	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1633	CS << " = ";
1634
1635	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1636	CS << "[";
1637	printConstant(COp: C, BitWidth: SclWidth, CS);
1638	for (int I = `1`, E = VecWidth / SclWidth; I < E; ++I) {
1639	CS << ",";
1640	printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true);
1641	}
1642	CS << "]";
1643	OutStreamer.AddComment(T: CS.str());
1644	return; // early-out
1645	}
1646
1647	// We didn't find a constant load, fallback to a shuffle mask decode.
1648	CS << ShuffleComment;
1649	OutStreamer.AddComment(T: CS.str());
1650	}
1651
1652	static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
1653	int Repeats, int BitWidth) {
1654	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1655	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx)) {
1656	std::string Comment;
1657	raw_string_ostream CS(Comment);
1658	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1659	CS << " = [";
1660	for (int l = `0`; l != Repeats; ++l) {
1661	if (l != `0`)
1662	CS << ",";
1663	printConstant(COp: C, BitWidth, CS);
1664	}
1665	CS << "]";
1666	OutStreamer.AddComment(T: CS.str());
1667	}
1668	}
1669
1670	static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1671	int SrcEltBits, int DstEltBits, bool IsSext) {
1672	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1673	auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx);
1674	if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
1675	if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) {
1676	int NumElts = CDS->getNumElements();
1677	std::string Comment;
1678	raw_string_ostream CS(Comment);
1679	printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx);
1680	CS << " = [";
1681	for (int i = `0`; i != NumElts; ++i) {
1682	if (i != `0`)
1683	CS << ",";
1684	if (CDS->getElementType()->isIntegerTy()) {
1685	APInt Elt = CDS->getElementAsAPInt(i);
1686	Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits);
1687	printConstant(Val: Elt, CS);
1688	} else
1689	CS << "?";
1690	}
1691	CS << "]";
1692	OutStreamer.AddComment(T: CS.str());
1693	return true;
1694	}
1695	}
1696
1697	return false;
1698	}
1699	static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1700	int SrcEltBits, int DstEltBits) {
1701	printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true);
1702	}
1703	static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1704	int SrcEltBits, int DstEltBits) {
1705	if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false))
1706	return;
1707
1708	// We didn't find a constant load, fallback to a shuffle mask decode.
1709	std::string Comment;
1710	raw_string_ostream CS(Comment);
1711	printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: `1`));
1712	CS << " = ";
1713
1714	SmallVector<int> Mask;
1715	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1716	assert((Width % DstEltBits) == `0` && (DstEltBits % SrcEltBits) == `0` &&
1717	"Illegal extension ratio");
1718	DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask);
1719	printShuffleMask(CS, Src1Name: "mem", Src2Name: "", Mask);
1720
1721	OutStreamer.AddComment(T: CS.str());
1722	}
1723
1724	void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1725	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1726	assert((getSubtarget().isOSWindows() \|\| getSubtarget().isUEFI()) &&
1727	"SEH_ instruction Windows and UEFI only");
1728
1729	// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1730	if (EmitFPOData) {
1731	X86TargetStreamer *XTS =
1732	static_cast<X86TargetStreamer *>(OutStreamer ->getTargetStreamer());
1733	switch (MI->getOpcode()) {
1734	case X86::SEH_PushReg:
1735	XTS->emitFPOPushReg(Reg: MI->getOperand(i: `0`).getImm());
1736	break;
1737	case X86::SEH_StackAlloc:
1738	XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: `0`).getImm());
1739	break;
1740	case X86::SEH_StackAlign:
1741	XTS->emitFPOStackAlign(Align: MI->getOperand(i: `0`).getImm());
1742	break;
1743	case X86::SEH_SetFrame:
1744	assert(MI->getOperand(`1`).getImm() == `0` &&
1745	".cv_fpo_setframe takes no offset");
1746	XTS->emitFPOSetFrame(Reg: MI->getOperand(i: `0`).getImm());
1747	break;
1748	case X86::SEH_EndPrologue:
1749	XTS->emitFPOEndPrologue();
1750	break;
1751	case X86::SEH_SaveReg:
1752	case X86::SEH_SaveXMM:
1753	case X86::SEH_PushFrame:
1754	llvm_unreachable("SEH_ directive incompatible with FPO");
1755	break;
1756	default:
1757	llvm_unreachable("expected SEH_ instruction");
1758	}
1759	return;
1760	}
1761
1762	// Otherwise, use the .seh_ directives for all other Windows platforms.
1763	switch (MI->getOpcode()) {
1764	case X86::SEH_PushReg:
1765	OutStreamer ->emitWinCFIPushReg(Register: MI->getOperand(i: `0`).getImm());
1766	break;
1767
1768	case X86::SEH_SaveReg:
1769	OutStreamer ->emitWinCFISaveReg(Register: MI->getOperand(i: `0`).getImm(),
1770	Offset: MI->getOperand(i: `1`).getImm());
1771	break;
1772
1773	case X86::SEH_SaveXMM:
1774	OutStreamer ->emitWinCFISaveXMM(Register: MI->getOperand(i: `0`).getImm(),
1775	Offset: MI->getOperand(i: `1`).getImm());
1776	break;
1777
1778	case X86::SEH_StackAlloc:
1779	OutStreamer ->emitWinCFIAllocStack(Size: MI->getOperand(i: `0`).getImm());
1780	break;
1781
1782	case X86::SEH_SetFrame:
1783	OutStreamer ->emitWinCFISetFrame(Register: MI->getOperand(i: `0`).getImm(),
1784	Offset: MI->getOperand(i: `1`).getImm());
1785	break;
1786
1787	case X86::SEH_PushFrame:
1788	OutStreamer ->emitWinCFIPushFrame(Code: MI->getOperand(i: `0`).getImm());
1789	break;
1790
1791	case X86::SEH_EndPrologue:
1792	OutStreamer ->emitWinCFIEndProlog();
1793	break;
1794
1795	case X86::SEH_BeginEpilogue:
1796	OutStreamer ->emitWinCFIBeginEpilogue();
1797	break;
1798
1799	case X86::SEH_EndEpilogue:
1800	OutStreamer ->emitWinCFIEndEpilogue();
1801	break;
1802
1803	case X86::SEH_UnwindV2Start:
1804	OutStreamer ->emitWinCFIUnwindV2Start();
1805	break;
1806
1807	case X86::SEH_UnwindVersion:
1808	OutStreamer ->emitWinCFIUnwindVersion(Version: MI->getOperand(i: `0`).getImm());
1809	break;
1810
1811	default:
1812	llvm_unreachable("expected SEH_ instruction");
1813	}
1814	}
1815
1816	static void addConstantComments(const MachineInstr *MI,
1817	MCStreamer &OutStreamer) {
1818	switch (MI->getOpcode()) {
1819	// Lower PSHUFB and VPERMILP normally but add a comment if we can find
1820	// a constant shuffle mask. We won't be able to do this at the MC layer
1821	// because the mask isn't an immediate.
1822	case X86::PSHUFBrm:
1823	case X86::VPSHUFBrm:
1824	case X86::VPSHUFBYrm:
1825	case X86::VPSHUFBZ128rm:
1826	case X86::VPSHUFBZ128rmk:
1827	case X86::VPSHUFBZ128rmkz:
1828	case X86::VPSHUFBZ256rm:
1829	case X86::VPSHUFBZ256rmk:
1830	case X86::VPSHUFBZ256rmkz:
1831	case X86::VPSHUFBZrm:
1832	case X86::VPSHUFBZrmk:
1833	case X86::VPSHUFBZrmkz: {
1834	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1835	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1836	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1837	SmallVector<int, `64`> Mask;
1838	DecodePSHUFBMask(C, Width, ShuffleMask&: Mask);
1839	if (!Mask.empty())
1840	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1841	}
1842	break;
1843	}
1844
1845	case X86::VPERMILPSrm:
1846	case X86::VPERMILPSYrm:
1847	case X86::VPERMILPSZ128rm:
1848	case X86::VPERMILPSZ128rmk:
1849	case X86::VPERMILPSZ128rmkz:
1850	case X86::VPERMILPSZ256rm:
1851	case X86::VPERMILPSZ256rmk:
1852	case X86::VPERMILPSZ256rmkz:
1853	case X86::VPERMILPSZrm:
1854	case X86::VPERMILPSZrmk:
1855	case X86::VPERMILPSZrmkz: {
1856	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1857	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1858	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1859	SmallVector<int, `16`> Mask;
1860	DecodeVPERMILPMask(C, ElSize: `32`, Width, ShuffleMask&: Mask);
1861	if (!Mask.empty())
1862	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1863	}
1864	break;
1865	}
1866	case X86::VPERMILPDrm:
1867	case X86::VPERMILPDYrm:
1868	case X86::VPERMILPDZ128rm:
1869	case X86::VPERMILPDZ128rmk:
1870	case X86::VPERMILPDZ128rmkz:
1871	case X86::VPERMILPDZ256rm:
1872	case X86::VPERMILPDZ256rmk:
1873	case X86::VPERMILPDZ256rmkz:
1874	case X86::VPERMILPDZrm:
1875	case X86::VPERMILPDZrmk:
1876	case X86::VPERMILPDZrmkz: {
1877	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1878	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1879	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1880	SmallVector<int, `16`> Mask;
1881	DecodeVPERMILPMask(C, ElSize: `64`, Width, ShuffleMask&: Mask);
1882	if (!Mask.empty())
1883	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask));
1884	}
1885	break;
1886	}
1887
1888	case X86::VPERMIL2PDrm:
1889	case X86::VPERMIL2PSrm:
1890	case X86::VPERMIL2PDYrm:
1891	case X86::VPERMIL2PSYrm: {
1892	assert(MI->getNumOperands() >= (`3` + X86::AddrNumOperands + `1`) &&
1893	"Unexpected number of operands!");
1894
1895	const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - `1`);
1896	if (!CtrlOp.isImm())
1897	break;
1898
1899	unsigned ElSize;
1900	switch (MI->getOpcode()) {
1901	default: llvm_unreachable("Invalid opcode");
1902	case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = `32`; break;
1903	case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = `64`; break;
1904	}
1905
1906	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1907	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1908	SmallVector<int, `16`> Mask;
1909	DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask);
1910	if (!Mask.empty())
1911	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1912	}
1913	break;
1914	}
1915
1916	case X86::VPPERMrrm: {
1917	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `3`)) {
1918	unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1919	SmallVector<int, `16`> Mask;
1920	DecodeVPPERMMask(C, Width, ShuffleMask&: Mask);
1921	if (!Mask.empty())
1922	OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: `1`, SrcOp2Idx: `2`, Mask));
1923	}
1924	break;
1925	}
1926
1927	case X86::MMX_MOVQ64rm: {
1928	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: `1`)) {
1929	std::string Comment;
1930	raw_string_ostream CS(Comment);
1931	const MachineOperand &DstOp = MI->getOperand(i: `0`);
1932	CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = ";
1933	if (auto *CF = dyn_cast<ConstantFP>(Val: C)) {
1934	CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: `16`, Signed: false);
1935	OutStreamer.AddComment(T: CS.str());
1936	}
1937	}
1938	break;
1939	}
1940
1941	#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
1942	case X86::Prefix##Instr##Suffix##rm##Postfix:
1943
1944	#define CASE_AVX512_ARITH_RM(Instr) \
1945	INSTR_CASE(V, Instr, Z128, ) \
1946	INSTR_CASE(V, Instr, Z128, k) \
1947	INSTR_CASE(V, Instr, Z128, kz) \
1948	INSTR_CASE(V, Instr, Z256, ) \
1949	INSTR_CASE(V, Instr, Z256, k) \
1950	INSTR_CASE(V, Instr, Z256, kz) \
1951	INSTR_CASE(V, Instr, Z, ) \
1952	INSTR_CASE(V, Instr, Z, k) \
1953	INSTR_CASE(V, Instr, Z, kz)
1954
1955	#define CASE_ARITH_RM(Instr) \
1956	INSTR_CASE(, Instr, , ) /* SSE */ \
1957	INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
1958	INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \
1959	INSTR_CASE(V, Instr, Z128, ) \
1960	INSTR_CASE(V, Instr, Z128, k) \
1961	INSTR_CASE(V, Instr, Z128, kz) \
1962	INSTR_CASE(V, Instr, Z256, ) \
1963	INSTR_CASE(V, Instr, Z256, k) \
1964	INSTR_CASE(V, Instr, Z256, kz) \
1965	INSTR_CASE(V, Instr, Z, ) \
1966	INSTR_CASE(V, Instr, Z, k) \
1967	INSTR_CASE(V, Instr, Z, kz)
1968
1969	// TODO: Add additional instructions when useful.
1970	CASE_ARITH_RM(PMADDUBSW)
1971	CASE_ARITH_RM(PMADDWD)
1972	CASE_ARITH_RM(PMULDQ)
1973	CASE_ARITH_RM(PMULUDQ)
1974	CASE_ARITH_RM(PMULLD)
1975	CASE_AVX512_ARITH_RM(PMULLQ)
1976	CASE_ARITH_RM(PMULLW)
1977	CASE_ARITH_RM(PMULHW)
1978	CASE_ARITH_RM(PMULHUW)
1979	CASE_ARITH_RM(PMULHRSW) {
1980	unsigned SrcIdx = getSrcIdx(MI, SrcIdx: `1`);
1981	if (auto C = X86::getConstantFromPool(MI: MI, OpNo: SrcIdx + `1`)) {
1982	std::string Comment;
1983	raw_string_ostream CS(Comment);
1984	unsigned VectorWidth =
1985	X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[`0`]);
1986	CS << "[";
1987	printConstant(COp: C, BitWidth: VectorWidth, CS);
1988	CS << "]";
1989	OutStreamer.AddComment(T: CS.str());
1990	}
1991	break;
1992	}
1993
1994	#define MASK_AVX512_CASE(Instr) \
1995	case Instr: \
1996	case Instr##k: \
1997	case Instr##kz:
1998
1999	case X86::MOVSDrm:
2000	case X86::VMOVSDrm:
2001	MASK_AVX512_CASE(X86::VMOVSDZrm)
2002	case X86::MOVSDrm_alt:
2003	case X86::VMOVSDrm_alt:
2004	case X86::VMOVSDZrm_alt:
2005	case X86::MOVQI2PQIrm:
2006	case X86::VMOVQI2PQIrm:
2007	case X86::VMOVQI2PQIZrm:
2008	printZeroUpperMove(MI, OutStreamer, SclWidth: `64`, VecWidth: `128`, ShuffleComment: "mem[0],zero");
2009	break;
2010
2011	MASK_AVX512_CASE(X86::VMOVSHZrm)
2012	case X86::VMOVSHZrm_alt:
2013	printZeroUpperMove(MI, OutStreamer, SclWidth: `16`, VecWidth: `128`,
2014	ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero");
2015	break;
2016
2017	case X86::MOVSSrm:
2018	case X86::VMOVSSrm:
2019	MASK_AVX512_CASE(X86::VMOVSSZrm)
2020	case X86::MOVSSrm_alt:
2021	case X86::VMOVSSrm_alt:
2022	case X86::VMOVSSZrm_alt:
2023	case X86::MOVDI2PDIrm:
2024	case X86::VMOVDI2PDIrm:
2025	case X86::VMOVDI2PDIZrm:
2026	printZeroUpperMove(MI, OutStreamer, SclWidth: `32`, VecWidth: `128`, ShuffleComment: "mem[0],zero,zero,zero");
2027	break;
2028
2029	#define MOV_CASE(Prefix, Suffix) \
2030	case X86::Prefix##MOVAPD##Suffix##rm: \
2031	case X86::Prefix##MOVAPS##Suffix##rm: \
2032	case X86::Prefix##MOVUPD##Suffix##rm: \
2033	case X86::Prefix##MOVUPS##Suffix##rm: \
2034	case X86::Prefix##MOVDQA##Suffix##rm: \
2035	case X86::Prefix##MOVDQU##Suffix##rm:
2036
2037	#define MOV_AVX512_CASE(Suffix, Postfix) \
2038	case X86::VMOVDQA64##Suffix##rm##Postfix: \
2039	case X86::VMOVDQA32##Suffix##rm##Postfix: \
2040	case X86::VMOVDQU64##Suffix##rm##Postfix: \
2041	case X86::VMOVDQU32##Suffix##rm##Postfix: \
2042	case X86::VMOVDQU16##Suffix##rm##Postfix: \
2043	case X86::VMOVDQU8##Suffix##rm##Postfix: \
2044	case X86::VMOVAPS##Suffix##rm##Postfix: \
2045	case X86::VMOVAPD##Suffix##rm##Postfix: \
2046	case X86::VMOVUPS##Suffix##rm##Postfix: \
2047	case X86::VMOVUPD##Suffix##rm##Postfix:
2048
2049	#define CASE_128_MOV_RM() \
2050	MOV_CASE(, ) /* SSE */ \
2051	MOV_CASE(V, ) /* AVX-128 */ \
2052	MOV_AVX512_CASE(Z128, ) \
2053	MOV_AVX512_CASE(Z128, k) \
2054	MOV_AVX512_CASE(Z128, kz)
2055
2056	#define CASE_256_MOV_RM() \
2057	MOV_CASE(V, Y) /* AVX-256 */ \
2058	MOV_AVX512_CASE(Z256, ) \
2059	MOV_AVX512_CASE(Z256, k) \
2060	MOV_AVX512_CASE(Z256, kz) \
2061
2062	#define CASE_512_MOV_RM() \
2063	MOV_AVX512_CASE(Z, ) \
2064	MOV_AVX512_CASE(Z, k) \
2065	MOV_AVX512_CASE(Z, kz) \
2066
2067	// For loads from a constant pool to a vector register, print the constant
2068	// loaded.
2069	CASE_128_MOV_RM()
2070	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `128`);
2071	break;
2072	CASE_256_MOV_RM()
2073	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `256`);
2074	break;
2075	CASE_512_MOV_RM()
2076	printBroadcast(MI, OutStreamer, Repeats: `1`, BitWidth: `512`);
2077	break;
2078	case X86::VBROADCASTF128rm:
2079	case X86::VBROADCASTI128rm:
2080	MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
2081	MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
2082	MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
2083	MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
2084	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `128`);
2085	break;
2086	MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
2087	MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
2088	MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
2089	MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
2090	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `128`);
2091	break;
2092	MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
2093	MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
2094	MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
2095	MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
2096	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `256`);
2097	break;
2098
2099	// For broadcast loads from a constant pool to a vector register, repeatedly
2100	// print the constant loaded.
2101	case X86::MOVDDUPrm:
2102	case X86::VMOVDDUPrm:
2103	MASK_AVX512_CASE(X86::VMOVDDUPZ128rm)
2104	case X86::VPBROADCASTQrm:
2105	MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm)
2106	printBroadcast(MI, OutStreamer, Repeats: `2`, BitWidth: `64`);
2107	break;
2108	case X86::VBROADCASTSDYrm:
2109	MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm)
2110	case X86::VPBROADCASTQYrm:
2111	MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm)
2112	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `64`);
2113	break;
2114	MASK_AVX512_CASE(X86::VBROADCASTSDZrm)
2115	MASK_AVX512_CASE(X86::VPBROADCASTQZrm)
2116	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `64`);
2117	break;
2118	case X86::VBROADCASTSSrm:
2119	MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm)
2120	case X86::VPBROADCASTDrm:
2121	MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm)
2122	printBroadcast(MI, OutStreamer, Repeats: `4`, BitWidth: `32`);
2123	break;
2124	case X86::VBROADCASTSSYrm:
2125	MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm)
2126	case X86::VPBROADCASTDYrm:
2127	MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm)
2128	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `32`);
2129	break;
2130	MASK_AVX512_CASE(X86::VBROADCASTSSZrm)
2131	MASK_AVX512_CASE(X86::VPBROADCASTDZrm)
2132	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `32`);
2133	break;
2134	case X86::VPBROADCASTWrm:
2135	MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm)
2136	printBroadcast(MI, OutStreamer, Repeats: `8`, BitWidth: `16`);
2137	break;
2138	case X86::VPBROADCASTWYrm:
2139	MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm)
2140	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `16`);
2141	break;
2142	MASK_AVX512_CASE(X86::VPBROADCASTWZrm)
2143	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `16`);
2144	break;
2145	case X86::VPBROADCASTBrm:
2146	MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm)
2147	printBroadcast(MI, OutStreamer, Repeats: `16`, BitWidth: `8`);
2148	break;
2149	case X86::VPBROADCASTBYrm:
2150	MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm)
2151	printBroadcast(MI, OutStreamer, Repeats: `32`, BitWidth: `8`);
2152	break;
2153	MASK_AVX512_CASE(X86::VPBROADCASTBZrm)
2154	printBroadcast(MI, OutStreamer, Repeats: `64`, BitWidth: `8`);
2155	break;
2156
2157	#define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \
2158	case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix:
2159
2160	#define CASE_MOVX_RM(Ext, Type) \
2161	MOVX_CASE(, Ext, Type, , ) \
2162	MOVX_CASE(V, Ext, Type, , ) \
2163	MOVX_CASE(V, Ext, Type, Y, ) \
2164	MOVX_CASE(V, Ext, Type, Z128, ) \
2165	MOVX_CASE(V, Ext, Type, Z128, k ) \
2166	MOVX_CASE(V, Ext, Type, Z128, kz ) \
2167	MOVX_CASE(V, Ext, Type, Z256, ) \
2168	MOVX_CASE(V, Ext, Type, Z256, k ) \
2169	MOVX_CASE(V, Ext, Type, Z256, kz ) \
2170	MOVX_CASE(V, Ext, Type, Z, ) \
2171	MOVX_CASE(V, Ext, Type, Z, k ) \
2172	MOVX_CASE(V, Ext, Type, Z, kz )
2173
2174	CASE_MOVX_RM(SX, BD)
2175	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2176	break;
2177	CASE_MOVX_RM(SX, BQ)
2178	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2179	break;
2180	CASE_MOVX_RM(SX, BW)
2181	printSignExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2182	break;
2183	CASE_MOVX_RM(SX, DQ)
2184	printSignExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2185	break;
2186	CASE_MOVX_RM(SX, WD)
2187	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2188	break;
2189	CASE_MOVX_RM(SX, WQ)
2190	printSignExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2191	break;
2192
2193	CASE_MOVX_RM(ZX, BD)
2194	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `32`);
2195	break;
2196	CASE_MOVX_RM(ZX, BQ)
2197	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `64`);
2198	break;
2199	CASE_MOVX_RM(ZX, BW)
2200	printZeroExtend(MI, OutStreamer, SrcEltBits: `8`, DstEltBits: `16`);
2201	break;
2202	CASE_MOVX_RM(ZX, DQ)
2203	printZeroExtend(MI, OutStreamer, SrcEltBits: `32`, DstEltBits: `64`);
2204	break;
2205	CASE_MOVX_RM(ZX, WD)
2206	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `32`);
2207	break;
2208	CASE_MOVX_RM(ZX, WQ)
2209	printZeroExtend(MI, OutStreamer, SrcEltBits: `16`, DstEltBits: `64`);
2210	break;
2211	}
2212	}
2213
2214	// Does the given operand refer to a DLLIMPORT function?
2215	bool isImportedFunction(const MachineOperand &MO) {
2216	return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT);
2217	}
2218
2219	// Is the given instruction a call to a CFGuard function?
2220	bool isCallToCFGuardFunction(const MachineInstr *MI) {
2221	assert(MI->getOpcode() == X86::TAILJMPm64_REX \|\|
2222	MI->getOpcode() == X86::CALL64m);
2223	const MachineOperand &MO = MI->getOperand(i: `3`);
2224	return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) &&
2225	isCFGuardFunction(GV: MO.getGlobal());
2226	}
2227
2228	// Does the containing block for the given instruction contain any jump table
2229	// info (indicating that the block is a dispatch for a jump table)?
2230	bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI) {
2231	const MachineBasicBlock &MBB = *MI->getParent();
2232	for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I)
2233	if (I ->isJumpTableDebugInfo())
2234	return true;
2235
2236	return false;
2237	}
2238
2239	void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2240	// FIXME: Enable feature predicate checks once all the test pass.
2241	// X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2242	// Subtarget->getFeatureBits());
2243
2244	X86MCInstLower MCInstLowering(MF, this);
2245	const X86RegisterInfo *RI =
2246	MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2247
2248	if (MI->getOpcode() == X86::OR64rm) {
2249	for (auto &Opd : MI->operands()) {
2250	if (Opd.isSymbol() && StringRef (Opd.getSymbolName()) ==
2251	"swift_async_extendedFramePointerFlags") {
2252	ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2253	}
2254	}
2255	}
2256
2257	// Add comments for values loaded from constant pool.
2258	if (OutStreamer ->isVerboseAsm())
2259	addConstantComments(MI, OutStreamer&: *OutStreamer);
2260
2261	// Add a comment about EVEX compression
2262	if (TM.Options.MCOptions.ShowMCEncoding) {
2263	if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
2264	OutStreamer ->AddComment(T: "EVEX TO LEGACY Compression ", EOL: false);
2265	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2266	OutStreamer ->AddComment(T: "EVEX TO VEX Compression ", EOL: false);
2267	else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX)
2268	OutStreamer ->AddComment(T: "EVEX TO EVEX Compression ", EOL: false);
2269	}
2270
2271	// We use this to suppress NOP padding for Windows EH.
2272	bool IsTailJump = false;
2273
2274	switch (MI->getOpcode()) {
2275	case TargetOpcode::DBG_VALUE:
2276	llvm_unreachable("Should be handled target independently");
2277
2278	case X86::EH_RETURN:
2279	case X86::EH_RETURN64: {
2280	// Lower these as normal, but add some comments.
2281	Register Reg = MI->getOperand(i: `0`).getReg();
2282	OutStreamer ->AddComment(T: StringRef ("eh_return, addr: %") +
2283	X86ATTInstPrinter::getRegisterName(Reg));
2284	break;
2285	}
2286	case X86::CLEANUPRET: {
2287	// Lower these as normal, but add some comments.
2288	OutStreamer ->AddComment(T: "CLEANUPRET");
2289	break;
2290	}
2291
2292	case X86::CATCHRET: {
2293	// Lower these as normal, but add some comments.
2294	OutStreamer ->AddComment(T: "CATCHRET");
2295	break;
2296	}
2297
2298	case X86::ENDBR32:
2299	case X86::ENDBR64: {
2300	// CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2301	// -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2302	// non-empty. If MI is the initial ENDBR, place the
2303	// __patchable_function_entries label after ENDBR.
2304	if (CurrentPatchableFunctionEntrySym &&
2305	CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2306	MI == &MF->front().front()) {
2307	MCInst Inst;
2308	MCInstLowering.Lower(MI, OutMI&: Inst);
2309	EmitAndCountInstruction(Inst);
2310	CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch");
2311	OutStreamer ->emitLabel(Symbol: CurrentPatchableFunctionEntrySym);
2312	return;
2313	}
2314	break;
2315	}
2316
2317	case X86::TAILJMPd64:
2318	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11))
2319	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CS_PREFIX));
2320
2321	if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: `0`))) {
2322	emitLabelAndRecordForImportCallOptimization(
2323	Kind: IMAGE_RETPOLINE_AMD64_IMPORT_BR);
2324	}
2325
2326	// Lower this as normal, but add a comment.
2327	OutStreamer ->AddComment(T: "TAILCALL");
2328	IsTailJump = true;
2329	break;
2330
2331	case X86::TAILJMPr:
2332	case X86::TAILJMPm:
2333	case X86::TAILJMPd:
2334	case X86::TAILJMPd_CC:
2335	case X86::TAILJMPr64:
2336	case X86::TAILJMPm64:
2337	case X86::TAILJMPd64_CC:
2338	if (EnableImportCallOptimization)
2339	report_fatal_error(reason: "Unexpected TAILJMP instruction was emitted when "
2340	"import call optimization was enabled");
2341
2342	// Lower these as normal, but add some comments.
2343	OutStreamer ->AddComment(T: "TAILCALL");
2344	IsTailJump = true;
2345	break;
2346
2347	case X86::TAILJMPm64_REX:
2348	if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2349	emitLabelAndRecordForImportCallOptimization(
2350	Kind: IMAGE_RETPOLINE_AMD64_CFG_BR_REX);
2351	}
2352
2353	OutStreamer ->AddComment(T: "TAILCALL");
2354	IsTailJump = true;
2355	break;
2356
2357	case X86::TAILJMPr64_REX: {
2358	if (EnableImportCallOptimization) {
2359	assert(MI->getOperand(`0`).getReg() == X86::RAX &&
2360	"Indirect tail calls with impcall enabled must go through RAX (as "
2361	"enforced by TCRETURNImpCallri64)");
2362	emitLabelAndRecordForImportCallOptimization(
2363	Kind: IMAGE_RETPOLINE_AMD64_INDIR_BR);
2364	}
2365
2366	OutStreamer ->AddComment(T: "TAILCALL");
2367	IsTailJump = true;
2368	break;
2369	}
2370
2371	case X86::JMP64r:
2372	if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) {
2373	uint16_t EncodedReg =
2374	this->getSubtarget().getRegisterInfo()->getEncodingValue(
2375	Reg: MI->getOperand(i: `0`).getReg().asMCReg());
2376	emitLabelAndRecordForImportCallOptimization(
2377	Kind: (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST +
2378	EncodedReg));
2379	}
2380	break;
2381
2382	case X86::JMP16r:
2383	case X86::JMP16m:
2384	case X86::JMP32r:
2385	case X86::JMP32m:
2386	case X86::JMP64m:
2387	if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI))
2388	report_fatal_error(
2389	reason: "Unexpected JMP instruction was emitted for a jump-table when import "
2390	"call optimization was enabled");
2391	break;
2392
2393	case X86::TLS_addr32:
2394	case X86::TLS_addr64:
2395	case X86::TLS_addrX32:
2396	case X86::TLS_base_addr32:
2397	case X86::TLS_base_addr64:
2398	case X86::TLS_base_addrX32:
2399	case X86::TLS_desc32:
2400	case X86::TLS_desc64:
2401	return LowerTlsAddr(MCInstLowering, MI: *MI);
2402
2403	case X86::MOVPC32r: {
2404	// This is a pseudo op for a two instruction sequence with a label, which
2405	// looks like:
2406	// call "L1$pb"
2407	// "L1$pb":
2408	// popl %esi
2409
2410	// Emit the call.
2411	MCSymbol *PICBase = MF->getPICBaseSymbol();
2412	// FIXME: We would like an efficient form for this, so we don't have to do a
2413	// lot of extra uniquing.
2414	EmitAndCountInstruction(
2415	Inst&: MCInstBuilder (X86::CALLpcrel32)
2416	.addExpr(Val: MCSymbolRefExpr::create(Symbol: PICBase, Ctx&: OutContext)));
2417
2418	const X86FrameLowering *FrameLowering =
2419	MF->getSubtarget<X86Subtarget>().getFrameLowering();
2420	bool hasFP = FrameLowering->hasFP(MF: *MF);
2421
2422	// TODO: This is needed only if we require precise CFA.
2423	bool HasActiveDwarfFrame = OutStreamer ->getNumFrameInfos() &&
2424	!OutStreamer ->getDwarfFrameInfos().back().End;
2425
2426	int stackGrowth = -RI->getSlotSize();
2427
2428	if (HasActiveDwarfFrame && !hasFP) {
2429	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth);
2430	MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2431	}
2432
2433	// Emit the label.
2434	OutStreamer ->emitLabel(Symbol: PICBase);
2435
2436	// popl $reg
2437	EmitAndCountInstruction(
2438	Inst&: MCInstBuilder (X86::POP32r).addReg(Reg: MI->getOperand(i: `0`).getReg()));
2439
2440	if (HasActiveDwarfFrame && !hasFP) {
2441	OutStreamer ->emitCFIAdjustCfaOffset(Adjustment: stackGrowth);
2442	}
2443	return;
2444	}
2445
2446	case X86::ADD32ri: {
2447	// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2448	if (MI->getOperand(i: `2`).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2449	break;
2450
2451	// Okay, we have something like:
2452	// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2453
2454	// For this, we want to print something like:
2455	// MYGLOBAL + (. - PICBASE)
2456	// However, we can't generate a ".", so just emit a new label here and refer
2457	// to it.
2458	MCSymbol *DotSym = OutContext.createTempSymbol();
2459	OutStreamer ->emitLabel(Symbol: DotSym);
2460
2461	// Now that we have emitted the label, lower the complex operand expression.
2462	MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: `2`));
2463
2464	const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext);
2465	const MCExpr *PICBase =
2466	MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext);
2467	DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext);
2468
2469	DotExpr = MCBinaryExpr::createAdd(
2470	LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext);
2471
2472	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::ADD32ri)
2473	.addReg(Reg: MI->getOperand(i: `0`).getReg())
2474	.addReg(Reg: MI->getOperand(i: `1`).getReg())
2475	.addExpr(Val: DotExpr));
2476	return;
2477	}
2478	case TargetOpcode::STATEPOINT:
2479	return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering);
2480
2481	case TargetOpcode::FAULTING_OP:
2482	return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering);
2483
2484	case TargetOpcode::FENTRY_CALL:
2485	return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering);
2486
2487	case TargetOpcode::PATCHABLE_OP:
2488	return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering);
2489
2490	case TargetOpcode::STACKMAP:
2491	return LowerSTACKMAP(MI: *MI);
2492
2493	case TargetOpcode::PATCHPOINT:
2494	return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering);
2495
2496	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2497	return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering);
2498
2499	case TargetOpcode::PATCHABLE_RET:
2500	return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering);
2501
2502	case TargetOpcode::PATCHABLE_TAIL_CALL:
2503	return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering);
2504
2505	case TargetOpcode::PATCHABLE_EVENT_CALL:
2506	return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2507
2508	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2509	return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering);
2510
2511	case X86::MORESTACK_RET:
2512	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2513	return;
2514
2515	case X86::KCFI_CHECK:
2516	return LowerKCFI_CHECK(MI: *MI);
2517
2518	case X86::ASAN_CHECK_MEMACCESS:
2519	return LowerASAN_CHECK_MEMACCESS(MI: *MI);
2520
2521	case X86::MORESTACK_RET_RESTORE_R10:
2522	// Return, then restore R10.
2523	EmitAndCountInstruction(Inst&: MCInstBuilder (getRetOpcode(Subtarget: *Subtarget)));
2524	EmitAndCountInstruction(
2525	Inst&: MCInstBuilder (X86::MOV64rr).addReg(Reg: X86::R10).addReg(Reg: X86::RAX));
2526	return;
2527
2528	case X86::SEH_PushReg:
2529	case X86::SEH_SaveReg:
2530	case X86::SEH_SaveXMM:
2531	case X86::SEH_StackAlloc:
2532	case X86::SEH_StackAlign:
2533	case X86::SEH_SetFrame:
2534	case X86::SEH_PushFrame:
2535	case X86::SEH_EndPrologue:
2536	case X86::SEH_EndEpilogue:
2537	case X86::SEH_UnwindV2Start:
2538	case X86::SEH_UnwindVersion:
2539	EmitSEHInstruction(MI);
2540	return;
2541
2542	case X86::SEH_SplitChainedAtEndOfBlock:
2543	assert(!SplitChainedAtEndOfBlock &&
2544	"Duplicate SEH_SplitChainedAtEndOfBlock in a current block");
2545	SplitChainedAtEndOfBlock = true;
2546	return;
2547
2548	case X86::SEH_BeginEpilogue: {
2549	assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2550	EmitSEHInstruction(MI);
2551	return;
2552	}
2553	case X86::UBSAN_UD1:
2554	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::UD1Lm)
2555	.addReg(Reg: X86::EAX)
2556	.addReg(Reg: X86::EAX)
2557	.addImm(Val: `1`)
2558	.addReg(Reg: X86::NoRegister)
2559	.addImm(Val: MI->getOperand(i: `0`).getImm())
2560	.addReg(Reg: X86::NoRegister));
2561	return;
2562	case X86::CALL64pcrel32:
2563	if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11))
2564	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::CS_PREFIX));
2565
2566	if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: `0`))) {
2567	emitLabelAndRecordForImportCallOptimization(
2568	Kind: IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
2569
2570	MCInst TmpInst;
2571	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2572
2573	// For Import Call Optimization to work, we need a the call instruction
2574	// with a rex prefix, and a 5-byte nop after the call instruction.
2575	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::REX64_PREFIX));
2576	emitCallInstruction(MCI: TmpInst);
2577	emitNop(OS&: *OutStreamer, NumBytes: `5`, Subtarget);
2578	maybeEmitNopAfterCallForWindowsEH(MI);
2579	return;
2580	}
2581
2582	break;
2583
2584	case X86::CALL64r:
2585	if (EnableImportCallOptimization) {
2586	assert(MI->getOperand(`0`).getReg() == X86::RAX &&
2587	"Indirect calls with impcall enabled must go through RAX (as "
2588	"enforced by CALL64r_ImpCall)");
2589
2590	emitLabelAndRecordForImportCallOptimization(
2591	Kind: IMAGE_RETPOLINE_AMD64_INDIR_CALL);
2592	MCInst TmpInst;
2593	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2594	emitCallInstruction(MCI: TmpInst);
2595
2596	// For Import Call Optimization to work, we need a 3-byte nop after the
2597	// call instruction.
2598	emitNop(OS&: *OutStreamer, NumBytes: `3`, Subtarget);
2599	maybeEmitNopAfterCallForWindowsEH(MI);
2600	return;
2601	}
2602	break;
2603
2604	case X86::CALL64m:
2605	if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2606	emitLabelAndRecordForImportCallOptimization(
2607	Kind: IMAGE_RETPOLINE_AMD64_CFG_CALL);
2608	}
2609	break;
2610
2611	case X86::JCC_1:
2612	// Two instruction prefixes (2EH for branch not-taken and 3EH for branch
2613	// taken) are used as branch hints. Here we add branch taken prefix for
2614	// jump instruction with higher probability than threshold.
2615	if (getSubtarget().hasBranchHint() && EnableBranchHint) {
2616	const MachineBranchProbabilityInfo *MBPI =
2617	&getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
2618	MachineBasicBlock *DestBB = MI->getOperand(i: `0`).getMBB();
2619	BranchProbability EdgeProb =
2620	MBPI->getEdgeProbability(Src: MI->getParent(), Dst: DestBB);
2621	BranchProbability Threshold(BranchHintProbabilityThreshold, `100`);
2622	if (EdgeProb > Threshold)
2623	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::DS_PREFIX));
2624	}
2625	break;
2626
2627	case X86::JCC_SELF:
2628	MCSymbol *Sym = OutContext.createTempSymbol();
2629	OutStreamer ->emitLabel(Symbol: Sym);
2630	EmitAndCountInstruction(
2631	Inst&: MCInstBuilder (X86::JCC_1)
2632	.addExpr(Val: MCSymbolRefExpr::create(Symbol: Sym, Ctx&: OutContext))
2633	.addImm(Val: MI->getOperand(i: `0`).getImm()));
2634	return;
2635	}
2636
2637	MCInst TmpInst;
2638	MCInstLowering.Lower(MI, OutMI&: TmpInst);
2639
2640	if (MI->isCall()) {
2641	emitCallInstruction(MCI: TmpInst);
2642	// Since tail calls transfer control without leaving a stack frame, there is
2643	// never a need for NOP padding tail calls.
2644	if (!IsTailJump)
2645	maybeEmitNopAfterCallForWindowsEH(MI);
2646	return;
2647	}
2648
2649	EmitAndCountInstruction(Inst&: TmpInst);
2650	}
2651
2652	void X86AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
2653	const MCSubtargetInfo *EndInfo,
2654	const MachineInstr *MI) {
2655	if (MI) {
2656	// If unwinding inline asm ends on a call, wineh may require insertion of
2657	// a nop.
2658	unsigned ExtraInfo = MI->getOperand(i: InlineAsm::MIOp_ExtraInfo).getImm();
2659	if (ExtraInfo & InlineAsm::Extra_MayUnwind)
2660	maybeEmitNopAfterCallForWindowsEH(MI);
2661	}
2662	}
2663
2664	void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) {
2665	// Stackmap shadows cannot include branch targets, so we can count the bytes
2666	// in a call towards the shadow, but must ensure that the no thread returns
2667	// in to the stackmap shadow. The only way to achieve this is if the call
2668	// is at the end of the shadow.
2669
2670	// Count then size of the call towards the shadow
2671	SMShadowTracker.count(Inst: MCI, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get());
2672	// Then flush the shadow so that we fill with nops before the call, not
2673	// after it.
2674	SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo());
2675	// Then emit the call
2676	OutStreamer ->emitInstruction(Inst: MCI, STI: getSubtargetInfo());
2677	}
2678
2679	// Determines whether a NOP is required after a CALL, so that Windows EH
2680	// IP2State tables have the correct information.
2681	//
2682	// On most Windows platforms (AMD64, ARM64, ARM32, IA64, but not* x86-32),*
2683	// exception handling works by looking up instruction pointers in lookup
2684	// tables. These lookup tables are stored in .xdata sections in executables.
2685	// One element of the lookup tables are the "IP2State" tables (Instruction
2686	// Pointer to State).
2687	//
2688	// If a function has any instructions that require cleanup during exception
2689	// unwinding, then it will have an IP2State table. Each entry in the IP2State
2690	// table describes a range of bytes in the function's instruction stream, and
2691	// associates an "EH state number" with that range of instructions. A value of
2692	// -1 means "the null state", which does not require any code to execute.
2693	// A value other than -1 is an index into the State table.
2694	//
2695	// The entries in the IP2State table contain byte offsets within the instruction
2696	// stream of the function. The Windows ABI requires that these offsets are
2697	// aligned to instruction boundaries; they are not permitted to point to a byte
2698	// that is not the first byte of an instruction.
2699	//
2700	// Unfortunately, CALL instructions present a problem during unwinding. CALL
2701	// instructions push the address of the instruction after the CALL instruction,
2702	// so that execution can resume after the CALL. If the CALL is the last
2703	// instruction within an IP2State region, then the return address (on the stack)
2704	// points to the next* IP2State region. This means that the unwinder will*
2705	// use the wrong cleanup funclet during unwinding.
2706	//
2707	// To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2708	// are never placed at the end of an IP2State region. Stated equivalently, the
2709	// end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2710	// CALL instruction would occur at the end of an IP2State region, then the
2711	// compiler must insert a NOP instruction after the CALL. The NOP instruction
2712	// is placed in the same EH region as the CALL instruction, so that the return
2713	// address points to the NOP and the unwinder will locate the correct region.
2714	//
2715	// NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2716	// instructions have a fixed size so the unwinder knows how to "back up" by
2717	// one instruction.
2718	//
2719	// Interaction with Import Call Optimization (ICO):
2720	//
2721	// Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2722	// improves the performance and security of DLL imports. ICO relies on using a
2723	// specific CALL idiom that can be replaced by the OS DLL loader. This removes
2724	// a load and indirect CALL and replaces it with a single direct CALL.
2725	//
2726	// To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2727	// end of the CALL is aligned with an EH state transition, we also* insert*
2728	// a single-byte NOP. Both forms of NOPs must be preserved.* They cannot*
2729	// be combined into a single larger NOP; nor can the second NOP be removed.
2730	//
2731	// This is necessary because, if ICO is active and the call site is modified
2732	// by the loader, the loader will end up overwriting the NOPs that were inserted
2733	// for ICO. That means that those NOPs cannot be used for the correct
2734	// termination of the exception handling region (the IP2State transition),
2735	// so we still need an additional NOP instruction. The NOPs cannot be combined
2736	// into a longer NOP (which is ordinarily desirable) because then ICO would
2737	// split one instruction, producing a malformed instruction after the ICO call.
2738	void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH(const MachineInstr *MI) {
2739	// We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2740	// (Don't let the name fool you: Itanium refers to table-based exception
2741	// handling, not the Itanium architecture.)
2742	if (MAI->getExceptionHandlingType() != ExceptionHandling::WinEH \|\|
2743	MAI->getWinEHEncodingType() != WinEH::EncodingType::Itanium) {
2744	return;
2745	}
2746
2747	bool HasEHPersonality = MF->getWinEHFuncInfo() != nullptr;
2748
2749	// Set up MBB iterator, initially positioned on the same MBB as MI.
2750	MachineFunction::const_iterator MFI(MI->getParent());
2751	MachineFunction::const_iterator MFE(MF->end());
2752
2753	// Set up instruction iterator, positioned immediately after* MI.*
2754	MachineBasicBlock::const_iterator MBBI(MI);
2755	MachineBasicBlock::const_iterator MBBE = MI->getParent()->end();
2756	++MBBI; // Step over MI
2757
2758	// This loop iterates MBBs
2759	for (;;) {
2760	// This loop iterates instructions
2761	for (; MBBI != MBBE; ++MBBI) {
2762	// Check the instruction that follows this CALL.
2763	const MachineInstr &NextMI = *MBBI;
2764
2765	// If there is an EH_LABEL after this CALL, then there is an EH state
2766	// transition after this CALL. This is exactly the situation which
2767	// requires NOP padding.
2768	if (NextMI.isEHLabel()) {
2769	if (HasEHPersonality) {
2770	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::NOOP));
2771	return;
2772	}
2773	// We actually want to continue, in case there is an SEH_BeginEpilogue
2774	// instruction after the EH_LABEL. In some situations, IR is produced
2775	// that contains EH_LABEL pseudo-instructions, even when we are not
2776	// generating IP2State tables. We still need to insert a NOP before
2777	// SEH_BeginEpilogue in that case.
2778	continue;
2779	}
2780
2781	// Somewhat similarly, if the CALL is the last instruction before the
2782	// SEH prologue, then we also need a NOP. This is necessary because the
2783	// Windows stack unwinder will not invoke a function's exception handler
2784	// if the instruction pointer is in the function prologue or epilogue.
2785	//
2786	// We always emit a NOP before SEH_BeginEpilogue, even if there is no
2787	// personality function (unwind info) for this frame. This is the same
2788	// behavior as MSVC.
2789	if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2790	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::NOOP));
2791	return;
2792	}
2793
2794	if (!NextMI.isPseudo() && !NextMI.isMetaInstruction()) {
2795	// We found a real instruction. During the CALL, the return IP will
2796	// point to this instruction. Since this instruction has the same EH
2797	// state as the call itself (because there is no intervening EH_LABEL),
2798	// the IP2State table will be accurate; there is no need to insert a
2799	// NOP.
2800	return;
2801	}
2802
2803	// The next instruction is a pseudo-op. Ignore it and keep searching.
2804	// Because these instructions do not generate any machine code, they
2805	// cannot prevent the IP2State table from pointing at the wrong
2806	// instruction during a CALL.
2807	}
2808
2809	// We've reached the end of this MBB. Find the next MBB in program order.
2810	// MBB order should be finalized by this point, so falling across MBBs is
2811	// expected.
2812	++MFI;
2813	if (MFI == MFE) {
2814	// No more blocks; we've reached the end of the function. This should
2815	// only happen with no-return functions, but double-check to be sure.
2816	if (HasEHPersonality) {
2817	// If the CALL has no successors, then it is a noreturn function.
2818	// Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2819	// but is more clear to read. Also, analysis tools will understand
2820	// that they should not continue disassembling after the CALL (unless
2821	// there are other branches to that label).
2822	if (MI->getParent()->succ_empty())
2823	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::INT3));
2824	else
2825	EmitAndCountInstruction(Inst&: MCInstBuilder (X86::NOOP));
2826	}
2827	return;
2828	}
2829
2830	// Set up iterator to scan the next basic block.
2831	const MachineBasicBlock NextMBB = &MFI;
2832	MBBI = NextMBB->instr_begin();
2833	MBBE = NextMBB->instr_end();
2834	}
2835	}
2836
2837	void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization(
2838	ImportCallKind Kind) {
2839	assert(EnableImportCallOptimization);
2840
2841	MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol(Name: "impcall");
2842	OutStreamer ->emitLabel(Symbol: CallSiteSymbol);
2843
2844	SectionToImportedFunctionCalls [OutStreamer ->getCurrentSectionOnly()]
2845	.push_back(x: {.CalleeSymbol: CallSiteSymbol, .Kind: Kind});
2846	}
2847

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86MCInstLower.cpp