1 | //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains code to lower X86 MachineInstrs to their corresponding |
10 | // MCInst records. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "MCTargetDesc/X86ATTInstPrinter.h" |
15 | #include "MCTargetDesc/X86BaseInfo.h" |
16 | #include "MCTargetDesc/X86EncodingOptimization.h" |
17 | #include "MCTargetDesc/X86InstComments.h" |
18 | #include "MCTargetDesc/X86MCAsmInfo.h" |
19 | #include "MCTargetDesc/X86ShuffleDecode.h" |
20 | #include "MCTargetDesc/X86TargetStreamer.h" |
21 | #include "X86AsmPrinter.h" |
22 | #include "X86MachineFunctionInfo.h" |
23 | #include "X86RegisterInfo.h" |
24 | #include "X86ShuffleDecodeConstantPool.h" |
25 | #include "X86Subtarget.h" |
26 | #include "llvm/ADT/STLExtras.h" |
27 | #include "llvm/ADT/SmallString.h" |
28 | #include "llvm/ADT/StringExtras.h" |
29 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
30 | #include "llvm/CodeGen/MachineConstantPool.h" |
31 | #include "llvm/CodeGen/MachineFunction.h" |
32 | #include "llvm/CodeGen/MachineModuleInfoImpls.h" |
33 | #include "llvm/CodeGen/MachineOperand.h" |
34 | #include "llvm/CodeGen/StackMaps.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/GlobalValue.h" |
37 | #include "llvm/IR/Mangler.h" |
38 | #include "llvm/MC/MCAsmInfo.h" |
39 | #include "llvm/MC/MCCodeEmitter.h" |
40 | #include "llvm/MC/MCContext.h" |
41 | #include "llvm/MC/MCExpr.h" |
42 | #include "llvm/MC/MCFixup.h" |
43 | #include "llvm/MC/MCInst.h" |
44 | #include "llvm/MC/MCInstBuilder.h" |
45 | #include "llvm/MC/MCSection.h" |
46 | #include "llvm/MC/MCStreamer.h" |
47 | #include "llvm/MC/MCSymbol.h" |
48 | #include "llvm/MC/TargetRegistry.h" |
49 | #include "llvm/Target/TargetLoweringObjectFile.h" |
50 | #include "llvm/Target/TargetMachine.h" |
51 | #include "llvm/Transforms/CFGuard.h" |
52 | #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" |
53 | #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" |
54 | #include <string> |
55 | |
56 | using namespace llvm; |
57 | |
58 | static cl::opt<bool> EnableBranchHint("enable-branch-hint" , |
59 | cl::desc("Enable branch hint." ), |
60 | cl::init(Val: false), cl::Hidden); |
61 | static cl::opt<unsigned> BranchHintProbabilityThreshold( |
62 | "branch-hint-probability-threshold" , |
63 | cl::desc("The probability threshold of enabling branch hint." ), |
64 | cl::init(Val: 50), cl::Hidden); |
65 | |
66 | namespace { |
67 | |
68 | /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. |
69 | class X86MCInstLower { |
70 | MCContext &Ctx; |
71 | const MachineFunction &MF; |
72 | const TargetMachine &TM; |
73 | const MCAsmInfo &MAI; |
74 | X86AsmPrinter &AsmPrinter; |
75 | |
76 | public: |
77 | X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); |
78 | |
79 | MCOperand LowerMachineOperand(const MachineInstr *MI, |
80 | const MachineOperand &MO) const; |
81 | void Lower(const MachineInstr *MI, MCInst &OutMI) const; |
82 | |
83 | MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; |
84 | MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; |
85 | |
86 | private: |
87 | MachineModuleInfoMachO &getMachOMMI() const; |
88 | }; |
89 | |
90 | } // end anonymous namespace |
91 | |
92 | /// A RAII helper which defines a region of instructions which can't have |
93 | /// padding added between them for correctness. |
94 | struct NoAutoPaddingScope { |
95 | MCStreamer &OS; |
96 | const bool OldAllowAutoPadding; |
97 | NoAutoPaddingScope(MCStreamer &OS) |
98 | : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { |
99 | changeAndComment(b: false); |
100 | } |
101 | ~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); } |
102 | void changeAndComment(bool b) { |
103 | if (b == OS.getAllowAutoPadding()) |
104 | return; |
105 | OS.setAllowAutoPadding(b); |
106 | if (b) |
107 | OS.emitRawComment(T: "autopadding" ); |
108 | else |
109 | OS.emitRawComment(T: "noautopadding" ); |
110 | } |
111 | }; |
112 | |
113 | // Emit a minimal sequence of nops spanning NumBytes bytes. |
114 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
115 | const X86Subtarget *Subtarget); |
116 | |
117 | void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst, |
118 | const MCSubtargetInfo &STI, |
119 | MCCodeEmitter *CodeEmitter) { |
120 | if (InShadow) { |
121 | SmallString<256> Code; |
122 | SmallVector<MCFixup, 4> Fixups; |
123 | CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI); |
124 | CurrentShadowSize += Code.size(); |
125 | if (CurrentShadowSize >= RequiredShadowSize) |
126 | InShadow = false; // The shadow is big enough. Stop counting. |
127 | } |
128 | } |
129 | |
130 | void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( |
131 | MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { |
132 | if (InShadow && CurrentShadowSize < RequiredShadowSize) { |
133 | InShadow = false; |
134 | emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize, |
135 | Subtarget: &MF->getSubtarget<X86Subtarget>()); |
136 | } |
137 | } |
138 | |
139 | void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { |
140 | OutStreamer->emitInstruction(Inst, STI: getSubtargetInfo()); |
141 | SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
142 | } |
143 | |
144 | X86MCInstLower::X86MCInstLower(const MachineFunction &mf, |
145 | X86AsmPrinter &asmprinter) |
146 | : Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()), |
147 | MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {} |
148 | |
149 | MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { |
150 | return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); |
151 | } |
152 | |
153 | /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol |
154 | /// operand to an MCSymbol. |
155 | MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { |
156 | const Triple &TT = TM.getTargetTriple(); |
157 | if (MO.isGlobal() && TT.isOSBinFormatELF()) |
158 | return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal()); |
159 | |
160 | const DataLayout &DL = MF.getDataLayout(); |
161 | assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && |
162 | "Isn't a symbol reference" ); |
163 | |
164 | MCSymbol *Sym = nullptr; |
165 | SmallString<128> Name; |
166 | StringRef Suffix; |
167 | |
168 | switch (MO.getTargetFlags()) { |
169 | case X86II::MO_DLLIMPORT: |
170 | // Handle dllimport linkage. |
171 | Name += "__imp_" ; |
172 | break; |
173 | case X86II::MO_COFFSTUB: |
174 | Name += ".refptr." ; |
175 | break; |
176 | case X86II::MO_DARWIN_NONLAZY: |
177 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
178 | Suffix = "$non_lazy_ptr" ; |
179 | break; |
180 | } |
181 | |
182 | if (!Suffix.empty()) |
183 | Name += DL.getPrivateGlobalPrefix(); |
184 | |
185 | if (MO.isGlobal()) { |
186 | const GlobalValue *GV = MO.getGlobal(); |
187 | AsmPrinter.getNameWithPrefix(Name, GV); |
188 | } else if (MO.isSymbol()) { |
189 | Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL); |
190 | } else if (MO.isMBB()) { |
191 | assert(Suffix.empty()); |
192 | Sym = MO.getMBB()->getSymbol(); |
193 | } |
194 | |
195 | Name += Suffix; |
196 | if (!Sym) |
197 | Sym = Ctx.getOrCreateSymbol(Name); |
198 | |
199 | // If the target flags on the operand changes the name of the symbol, do that |
200 | // before we return the symbol. |
201 | switch (MO.getTargetFlags()) { |
202 | default: |
203 | break; |
204 | case X86II::MO_COFFSTUB: { |
205 | MachineModuleInfoCOFF &MMICOFF = |
206 | AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>(); |
207 | MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); |
208 | if (!StubSym.getPointer()) { |
209 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
210 | StubSym = MachineModuleInfoImpl::StubValueTy( |
211 | AsmPrinter.getSymbol(GV: MO.getGlobal()), true); |
212 | } |
213 | break; |
214 | } |
215 | case X86II::MO_DARWIN_NONLAZY: |
216 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { |
217 | MachineModuleInfoImpl::StubValueTy &StubSym = |
218 | getMachOMMI().getGVStubEntry(Sym); |
219 | if (!StubSym.getPointer()) { |
220 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
221 | StubSym = MachineModuleInfoImpl::StubValueTy( |
222 | AsmPrinter.getSymbol(GV: MO.getGlobal()), |
223 | !MO.getGlobal()->hasInternalLinkage()); |
224 | } |
225 | break; |
226 | } |
227 | } |
228 | |
229 | return Sym; |
230 | } |
231 | |
232 | MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, |
233 | MCSymbol *Sym) const { |
234 | // FIXME: We would like an efficient form for this, so we don't have to do a |
235 | // lot of extra uniquing. |
236 | const MCExpr *Expr = nullptr; |
237 | uint16_t Specifier = X86::S_None; |
238 | |
239 | switch (MO.getTargetFlags()) { |
240 | default: |
241 | llvm_unreachable("Unknown target flag on GV operand" ); |
242 | case X86II::MO_NO_FLAG: // No flag. |
243 | // These affect the name of the symbol, not any suffix. |
244 | case X86II::MO_DARWIN_NONLAZY: |
245 | case X86II::MO_DLLIMPORT: |
246 | case X86II::MO_COFFSTUB: |
247 | break; |
248 | |
249 | case X86II::MO_TLVP: |
250 | Specifier = X86::S_TLVP; |
251 | break; |
252 | case X86II::MO_TLVP_PIC_BASE: |
253 | Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: X86::S_TLVP, Ctx); |
254 | // Subtract the pic base. |
255 | Expr = MCBinaryExpr::createSub( |
256 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
257 | break; |
258 | case X86II::MO_SECREL: |
259 | Specifier = uint16_t(X86::S_COFF_SECREL); |
260 | break; |
261 | case X86II::MO_TLSGD: |
262 | Specifier = X86::S_TLSGD; |
263 | break; |
264 | case X86II::MO_TLSLD: |
265 | Specifier = X86::S_TLSLD; |
266 | break; |
267 | case X86II::MO_TLSLDM: |
268 | Specifier = X86::S_TLSLDM; |
269 | break; |
270 | case X86II::MO_GOTTPOFF: |
271 | Specifier = X86::S_GOTTPOFF; |
272 | break; |
273 | case X86II::MO_INDNTPOFF: |
274 | Specifier = X86::S_INDNTPOFF; |
275 | break; |
276 | case X86II::MO_TPOFF: |
277 | Specifier = X86::S_TPOFF; |
278 | break; |
279 | case X86II::MO_DTPOFF: |
280 | Specifier = X86::S_DTPOFF; |
281 | break; |
282 | case X86II::MO_NTPOFF: |
283 | Specifier = X86::S_NTPOFF; |
284 | break; |
285 | case X86II::MO_GOTNTPOFF: |
286 | Specifier = X86::S_GOTNTPOFF; |
287 | break; |
288 | case X86II::MO_GOTPCREL: |
289 | Specifier = X86::S_GOTPCREL; |
290 | break; |
291 | case X86II::MO_GOTPCREL_NORELAX: |
292 | Specifier = X86::S_GOTPCREL_NORELAX; |
293 | break; |
294 | case X86II::MO_GOT: |
295 | Specifier = X86::S_GOT; |
296 | break; |
297 | case X86II::MO_GOTOFF: |
298 | Specifier = X86::S_GOTOFF; |
299 | break; |
300 | case X86II::MO_PLT: |
301 | Specifier = X86::S_PLT; |
302 | break; |
303 | case X86II::MO_ABS8: |
304 | Specifier = X86::S_ABS8; |
305 | break; |
306 | case X86II::MO_PIC_BASE_OFFSET: |
307 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
308 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
309 | // Subtract the pic base. |
310 | Expr = MCBinaryExpr::createSub( |
311 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
312 | if (MO.isJTI()) { |
313 | assert(MAI.doesSetDirectiveSuppressReloc()); |
314 | // If .set directive is supported, use it to reduce the number of |
315 | // relocations the assembler will generate for differences between |
316 | // local labels. This is only safe when the symbols are in the same |
317 | // section so we are restricting it to jumptable references. |
318 | MCSymbol *Label = Ctx.createTempSymbol(); |
319 | AsmPrinter.OutStreamer->emitAssignment(Symbol: Label, Value: Expr); |
320 | Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx); |
321 | } |
322 | break; |
323 | } |
324 | |
325 | if (!Expr) |
326 | Expr = MCSymbolRefExpr::create(Symbol: Sym, specifier: Specifier, Ctx); |
327 | |
328 | if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) |
329 | Expr = MCBinaryExpr::createAdd( |
330 | LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx); |
331 | return MCOperand::createExpr(Val: Expr); |
332 | } |
333 | |
334 | static unsigned getRetOpcode(const X86Subtarget &Subtarget) { |
335 | return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; |
336 | } |
337 | |
338 | MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, |
339 | const MachineOperand &MO) const { |
340 | switch (MO.getType()) { |
341 | default: |
342 | MI->print(OS&: errs()); |
343 | llvm_unreachable("unknown operand type" ); |
344 | case MachineOperand::MO_Register: |
345 | // Ignore all implicit register operands. |
346 | if (MO.isImplicit()) |
347 | return MCOperand(); |
348 | return MCOperand::createReg(Reg: MO.getReg()); |
349 | case MachineOperand::MO_Immediate: |
350 | return MCOperand::createImm(Val: MO.getImm()); |
351 | case MachineOperand::MO_MachineBasicBlock: |
352 | case MachineOperand::MO_GlobalAddress: |
353 | case MachineOperand::MO_ExternalSymbol: |
354 | return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO)); |
355 | case MachineOperand::MO_MCSymbol: |
356 | return LowerSymbolOperand(MO, Sym: MO.getMCSymbol()); |
357 | case MachineOperand::MO_JumpTableIndex: |
358 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex())); |
359 | case MachineOperand::MO_ConstantPoolIndex: |
360 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex())); |
361 | case MachineOperand::MO_BlockAddress: |
362 | return LowerSymbolOperand( |
363 | MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress())); |
364 | case MachineOperand::MO_RegisterMask: |
365 | // Ignore call clobbers. |
366 | return MCOperand(); |
367 | } |
368 | } |
369 | |
370 | // Replace TAILJMP opcodes with their equivalent opcodes that have encoding |
371 | // information. |
372 | static unsigned convertTailJumpOpcode(unsigned Opcode) { |
373 | switch (Opcode) { |
374 | case X86::TAILJMPr: |
375 | Opcode = X86::JMP32r; |
376 | break; |
377 | case X86::TAILJMPm: |
378 | Opcode = X86::JMP32m; |
379 | break; |
380 | case X86::TAILJMPr64: |
381 | Opcode = X86::JMP64r; |
382 | break; |
383 | case X86::TAILJMPm64: |
384 | Opcode = X86::JMP64m; |
385 | break; |
386 | case X86::TAILJMPr64_REX: |
387 | Opcode = X86::JMP64r_REX; |
388 | break; |
389 | case X86::TAILJMPm64_REX: |
390 | Opcode = X86::JMP64m_REX; |
391 | break; |
392 | case X86::TAILJMPd: |
393 | case X86::TAILJMPd64: |
394 | Opcode = X86::JMP_1; |
395 | break; |
396 | case X86::TAILJMPd_CC: |
397 | case X86::TAILJMPd64_CC: |
398 | Opcode = X86::JCC_1; |
399 | break; |
400 | } |
401 | |
402 | return Opcode; |
403 | } |
404 | |
405 | void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { |
406 | OutMI.setOpcode(MI->getOpcode()); |
407 | |
408 | for (const MachineOperand &MO : MI->operands()) |
409 | if (auto Op = LowerMachineOperand(MI, MO); Op.isValid()) |
410 | OutMI.addOperand(Op); |
411 | |
412 | bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); |
413 | if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) || |
414 | X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) || |
415 | X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) || |
416 | X86::optimizeMOVSX(MI&: OutMI) || X86::optimizeINCDEC(MI&: OutMI, In64BitMode) || |
417 | X86::optimizeMOV(MI&: OutMI, In64BitMode) || |
418 | X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI)) |
419 | return; |
420 | |
421 | // Handle a few special cases to eliminate operand modifiers. |
422 | switch (OutMI.getOpcode()) { |
423 | case X86::LEA64_32r: |
424 | case X86::LEA64r: |
425 | case X86::LEA16r: |
426 | case X86::LEA32r: |
427 | // LEA should have a segment register, but it must be empty. |
428 | assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && |
429 | "Unexpected # of LEA operands" ); |
430 | assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && |
431 | "LEA has segment specified!" ); |
432 | break; |
433 | case X86::MULX32Hrr: |
434 | case X86::MULX32Hrm: |
435 | case X86::MULX64Hrr: |
436 | case X86::MULX64Hrm: { |
437 | // Turn into regular MULX by duplicating the destination. |
438 | unsigned NewOpc; |
439 | switch (OutMI.getOpcode()) { |
440 | default: llvm_unreachable("Invalid opcode" ); |
441 | case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; |
442 | case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; |
443 | case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; |
444 | case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; |
445 | } |
446 | OutMI.setOpcode(NewOpc); |
447 | // Duplicate the destination. |
448 | MCRegister DestReg = OutMI.getOperand(i: 0).getReg(); |
449 | OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg)); |
450 | break; |
451 | } |
452 | // CALL64r, CALL64pcrel32 - These instructions used to have |
453 | // register inputs modeled as normal uses instead of implicit uses. As such, |
454 | // they we used to truncate off all but the first operand (the callee). This |
455 | // issue seems to have been fixed at some point. This assert verifies that. |
456 | case X86::CALL64r: |
457 | case X86::CALL64pcrel32: |
458 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
459 | break; |
460 | case X86::EH_RETURN: |
461 | case X86::EH_RETURN64: { |
462 | OutMI = MCInst(); |
463 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
464 | break; |
465 | } |
466 | case X86::CLEANUPRET: { |
467 | // Replace CLEANUPRET with the appropriate RET. |
468 | OutMI = MCInst(); |
469 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
470 | break; |
471 | } |
472 | case X86::CATCHRET: { |
473 | // Replace CATCHRET with the appropriate RET. |
474 | const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); |
475 | unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; |
476 | OutMI = MCInst(); |
477 | OutMI.setOpcode(getRetOpcode(Subtarget)); |
478 | OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg)); |
479 | break; |
480 | } |
481 | // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump |
482 | // instruction. |
483 | case X86::TAILJMPr: |
484 | case X86::TAILJMPr64: |
485 | case X86::TAILJMPr64_REX: |
486 | case X86::TAILJMPd: |
487 | case X86::TAILJMPd64: |
488 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
489 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
490 | break; |
491 | case X86::TAILJMPd_CC: |
492 | case X86::TAILJMPd64_CC: |
493 | assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!" ); |
494 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
495 | break; |
496 | case X86::TAILJMPm: |
497 | case X86::TAILJMPm64: |
498 | case X86::TAILJMPm64_REX: |
499 | assert(OutMI.getNumOperands() == X86::AddrNumOperands && |
500 | "Unexpected number of operands!" ); |
501 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
502 | break; |
503 | case X86::MASKMOVDQU: |
504 | case X86::VMASKMOVDQU: |
505 | if (In64BitMode) |
506 | OutMI.setFlags(X86::IP_HAS_AD_SIZE); |
507 | break; |
508 | case X86::BSF16rm: |
509 | case X86::BSF16rr: |
510 | case X86::BSF32rm: |
511 | case X86::BSF32rr: |
512 | case X86::BSF64rm: |
513 | case X86::BSF64rr: { |
514 | // Add an REP prefix to BSF instructions so that new processors can |
515 | // recognize as TZCNT, which has better performance than BSF. |
516 | // BSF and TZCNT have different interpretations on ZF bit. So make sure |
517 | // it won't be used later. |
518 | const MachineOperand *FlagDef = |
519 | MI->findRegisterDefOperand(Reg: X86::EFLAGS, /*TRI=*/nullptr); |
520 | if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) |
521 | OutMI.setFlags(X86::IP_HAS_REPEAT); |
522 | break; |
523 | } |
524 | default: |
525 | break; |
526 | } |
527 | } |
528 | |
529 | void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, |
530 | const MachineInstr &MI) { |
531 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
532 | bool Is64Bits = getSubtarget().is64Bit(); |
533 | bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); |
534 | MCContext &Ctx = OutStreamer->getContext(); |
535 | |
536 | X86::Specifier Specifier; |
537 | switch (MI.getOpcode()) { |
538 | case X86::TLS_addr32: |
539 | case X86::TLS_addr64: |
540 | case X86::TLS_addrX32: |
541 | Specifier = X86::S_TLSGD; |
542 | break; |
543 | case X86::TLS_base_addr32: |
544 | Specifier = X86::S_TLSLDM; |
545 | break; |
546 | case X86::TLS_base_addr64: |
547 | case X86::TLS_base_addrX32: |
548 | Specifier = X86::S_TLSLD; |
549 | break; |
550 | case X86::TLS_desc32: |
551 | case X86::TLS_desc64: |
552 | Specifier = X86::S_TLSDESC; |
553 | break; |
554 | default: |
555 | llvm_unreachable("unexpected opcode" ); |
556 | } |
557 | |
558 | const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( |
559 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), specifier: Specifier, Ctx); |
560 | |
561 | // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD |
562 | // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is |
563 | // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by |
564 | // only using GOT when GOTPCRELX is enabled. |
565 | // TODO Delete the workaround when rustc no longer relies on the hack |
566 | bool UseGot = MMI->getModule()->getRtLibUseGOT() && |
567 | Ctx.getTargetOptions()->X86RelaxRelocations; |
568 | |
569 | if (Specifier == X86::S_TLSDESC) { |
570 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( |
571 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), specifier: X86::S_TLSCALL, |
572 | Ctx); |
573 | EmitAndCountInstruction( |
574 | Inst&: MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) |
575 | .addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX) |
576 | .addReg(Reg: Is64Bits ? X86::RIP : X86::EBX) |
577 | .addImm(Val: 1) |
578 | .addReg(Reg: 0) |
579 | .addExpr(Val: Sym) |
580 | .addReg(Reg: 0)); |
581 | EmitAndCountInstruction( |
582 | Inst&: MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) |
583 | .addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX) |
584 | .addImm(Val: 1) |
585 | .addReg(Reg: 0) |
586 | .addExpr(Val: Expr) |
587 | .addReg(Reg: 0)); |
588 | } else if (Is64Bits) { |
589 | bool NeedsPadding = Specifier == X86::S_TLSGD; |
590 | if (NeedsPadding && Is64BitsLP64) |
591 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
592 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA64r) |
593 | .addReg(Reg: X86::RDI) |
594 | .addReg(Reg: X86::RIP) |
595 | .addImm(Val: 1) |
596 | .addReg(Reg: 0) |
597 | .addExpr(Val: Sym) |
598 | .addReg(Reg: 0)); |
599 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr" ); |
600 | if (NeedsPadding) { |
601 | if (!UseGot) |
602 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
603 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
604 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::REX64_PREFIX)); |
605 | } |
606 | if (UseGot) { |
607 | const MCExpr *Expr = |
608 | MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOTPCREL, Ctx); |
609 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64m) |
610 | .addReg(Reg: X86::RIP) |
611 | .addImm(Val: 1) |
612 | .addReg(Reg: 0) |
613 | .addExpr(Val: Expr) |
614 | .addReg(Reg: 0)); |
615 | } else { |
616 | EmitAndCountInstruction( |
617 | Inst&: MCInstBuilder(X86::CALL64pcrel32) |
618 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx))); |
619 | } |
620 | } else { |
621 | if (Specifier == X86::S_TLSGD && !UseGot) { |
622 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA32r) |
623 | .addReg(Reg: X86::EAX) |
624 | .addReg(Reg: 0) |
625 | .addImm(Val: 1) |
626 | .addReg(Reg: X86::EBX) |
627 | .addExpr(Val: Sym) |
628 | .addReg(Reg: 0)); |
629 | } else { |
630 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA32r) |
631 | .addReg(Reg: X86::EAX) |
632 | .addReg(Reg: X86::EBX) |
633 | .addImm(Val: 1) |
634 | .addReg(Reg: 0) |
635 | .addExpr(Val: Sym) |
636 | .addReg(Reg: 0)); |
637 | } |
638 | |
639 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr" ); |
640 | if (UseGot) { |
641 | const MCExpr *Expr = MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_GOT, Ctx); |
642 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL32m) |
643 | .addReg(Reg: X86::EBX) |
644 | .addImm(Val: 1) |
645 | .addReg(Reg: 0) |
646 | .addExpr(Val: Expr) |
647 | .addReg(Reg: 0)); |
648 | } else { |
649 | EmitAndCountInstruction( |
650 | Inst&: MCInstBuilder(X86::CALLpcrel32) |
651 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, specifier: X86::S_PLT, Ctx))); |
652 | } |
653 | } |
654 | } |
655 | |
656 | /// Emit the largest nop instruction smaller than or equal to \p NumBytes |
657 | /// bytes. Return the size of nop emitted. |
658 | static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, |
659 | const X86Subtarget *Subtarget) { |
660 | // Determine the longest nop which can be efficiently decoded for the given |
661 | // target cpu. 15-bytes is the longest single NOP instruction, but some |
662 | // platforms can't decode the longest forms efficiently. |
663 | unsigned MaxNopLength = 1; |
664 | if (Subtarget->is64Bit()) { |
665 | // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the |
666 | // IndexReg/BaseReg below need to be updated. |
667 | if (Subtarget->hasFeature(Feature: X86::TuningFast7ByteNOP)) |
668 | MaxNopLength = 7; |
669 | else if (Subtarget->hasFeature(Feature: X86::TuningFast15ByteNOP)) |
670 | MaxNopLength = 15; |
671 | else if (Subtarget->hasFeature(Feature: X86::TuningFast11ByteNOP)) |
672 | MaxNopLength = 11; |
673 | else |
674 | MaxNopLength = 10; |
675 | } if (Subtarget->is32Bit()) |
676 | MaxNopLength = 2; |
677 | |
678 | // Cap a single nop emission at the profitable value for the target |
679 | NumBytes = std::min(a: NumBytes, b: MaxNopLength); |
680 | |
681 | unsigned NopSize; |
682 | unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; |
683 | IndexReg = Displacement = SegmentReg = 0; |
684 | BaseReg = X86::RAX; |
685 | ScaleVal = 1; |
686 | switch (NumBytes) { |
687 | case 0: |
688 | llvm_unreachable("Zero nops?" ); |
689 | break; |
690 | case 1: |
691 | NopSize = 1; |
692 | Opc = X86::NOOP; |
693 | break; |
694 | case 2: |
695 | NopSize = 2; |
696 | Opc = X86::XCHG16ar; |
697 | break; |
698 | case 3: |
699 | NopSize = 3; |
700 | Opc = X86::NOOPL; |
701 | break; |
702 | case 4: |
703 | NopSize = 4; |
704 | Opc = X86::NOOPL; |
705 | Displacement = 8; |
706 | break; |
707 | case 5: |
708 | NopSize = 5; |
709 | Opc = X86::NOOPL; |
710 | Displacement = 8; |
711 | IndexReg = X86::RAX; |
712 | break; |
713 | case 6: |
714 | NopSize = 6; |
715 | Opc = X86::NOOPW; |
716 | Displacement = 8; |
717 | IndexReg = X86::RAX; |
718 | break; |
719 | case 7: |
720 | NopSize = 7; |
721 | Opc = X86::NOOPL; |
722 | Displacement = 512; |
723 | break; |
724 | case 8: |
725 | NopSize = 8; |
726 | Opc = X86::NOOPL; |
727 | Displacement = 512; |
728 | IndexReg = X86::RAX; |
729 | break; |
730 | case 9: |
731 | NopSize = 9; |
732 | Opc = X86::NOOPW; |
733 | Displacement = 512; |
734 | IndexReg = X86::RAX; |
735 | break; |
736 | default: |
737 | NopSize = 10; |
738 | Opc = X86::NOOPW; |
739 | Displacement = 512; |
740 | IndexReg = X86::RAX; |
741 | SegmentReg = X86::CS; |
742 | break; |
743 | } |
744 | |
745 | unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: 5U); |
746 | NopSize += NumPrefixes; |
747 | for (unsigned i = 0; i != NumPrefixes; ++i) |
748 | OS.emitBytes(Data: "\x66" ); |
749 | |
750 | switch (Opc) { |
751 | default: llvm_unreachable("Unexpected opcode" ); |
752 | case X86::NOOP: |
753 | OS.emitInstruction(Inst: MCInstBuilder(Opc), STI: *Subtarget); |
754 | break; |
755 | case X86::XCHG16ar: |
756 | OS.emitInstruction(Inst: MCInstBuilder(Opc).addReg(Reg: X86::AX).addReg(Reg: X86::AX), |
757 | STI: *Subtarget); |
758 | break; |
759 | case X86::NOOPL: |
760 | case X86::NOOPW: |
761 | OS.emitInstruction(Inst: MCInstBuilder(Opc) |
762 | .addReg(Reg: BaseReg) |
763 | .addImm(Val: ScaleVal) |
764 | .addReg(Reg: IndexReg) |
765 | .addImm(Val: Displacement) |
766 | .addReg(Reg: SegmentReg), |
767 | STI: *Subtarget); |
768 | break; |
769 | } |
770 | assert(NopSize <= NumBytes && "We overemitted?" ); |
771 | return NopSize; |
772 | } |
773 | |
774 | /// Emit the optimal amount of multi-byte nops on X86. |
775 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
776 | const X86Subtarget *Subtarget) { |
777 | unsigned NopsToEmit = NumBytes; |
778 | (void)NopsToEmit; |
779 | while (NumBytes) { |
780 | NumBytes -= emitNop(OS, NumBytes, Subtarget); |
781 | assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!" ); |
782 | } |
783 | } |
784 | |
785 | void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, |
786 | X86MCInstLower &MCIL) { |
787 | assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64" ); |
788 | |
789 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
790 | |
791 | StatepointOpers SOpers(&MI); |
792 | if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { |
793 | emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget); |
794 | } else { |
795 | // Lower call target and choose correct opcode |
796 | const MachineOperand &CallTarget = SOpers.getCallTarget(); |
797 | MCOperand CallTargetMCOp; |
798 | unsigned CallOpcode; |
799 | switch (CallTarget.getType()) { |
800 | case MachineOperand::MO_GlobalAddress: |
801 | case MachineOperand::MO_ExternalSymbol: |
802 | CallTargetMCOp = MCIL.LowerSymbolOperand( |
803 | MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget)); |
804 | CallOpcode = X86::CALL64pcrel32; |
805 | // Currently, we only support relative addressing with statepoints. |
806 | // Otherwise, we'll need a scratch register to hold the target |
807 | // address. You'll fail asserts during load & relocation if this |
808 | // symbol is to far away. (TODO: support non-relative addressing) |
809 | break; |
810 | case MachineOperand::MO_Immediate: |
811 | CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm()); |
812 | CallOpcode = X86::CALL64pcrel32; |
813 | // Currently, we only support relative addressing with statepoints. |
814 | // Otherwise, we'll need a scratch register to hold the target |
815 | // immediate. You'll fail asserts during load & relocation if this |
816 | // address is to far away. (TODO: support non-relative addressing) |
817 | break; |
818 | case MachineOperand::MO_Register: |
819 | // FIXME: Add retpoline support and remove this. |
820 | if (Subtarget->useIndirectThunkCalls()) |
821 | report_fatal_error(reason: "Lowering register statepoints with thunks not " |
822 | "yet implemented." ); |
823 | CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg()); |
824 | CallOpcode = X86::CALL64r; |
825 | break; |
826 | default: |
827 | llvm_unreachable("Unsupported operand type in statepoint call target" ); |
828 | break; |
829 | } |
830 | |
831 | // Emit call |
832 | MCInst CallInst; |
833 | CallInst.setOpcode(CallOpcode); |
834 | CallInst.addOperand(Op: CallTargetMCOp); |
835 | OutStreamer->emitInstruction(Inst: CallInst, STI: getSubtargetInfo()); |
836 | } |
837 | |
838 | // Record our statepoint node in the same section used by STACKMAP |
839 | // and PATCHPOINT |
840 | auto &Ctx = OutStreamer->getContext(); |
841 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
842 | OutStreamer->emitLabel(Symbol: MILabel); |
843 | SM.recordStatepoint(L: *MILabel, MI); |
844 | } |
845 | |
846 | void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, |
847 | X86MCInstLower &MCIL) { |
848 | // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, |
849 | // <opcode>, <operands> |
850 | |
851 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
852 | |
853 | Register DefRegister = FaultingMI.getOperand(i: 0).getReg(); |
854 | FaultMaps::FaultKind FK = |
855 | static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: 1).getImm()); |
856 | MCSymbol *HandlerLabel = FaultingMI.getOperand(i: 2).getMBB()->getSymbol(); |
857 | unsigned Opcode = FaultingMI.getOperand(i: 3).getImm(); |
858 | unsigned OperandsBeginIdx = 4; |
859 | |
860 | auto &Ctx = OutStreamer->getContext(); |
861 | MCSymbol *FaultingLabel = Ctx.createTempSymbol(); |
862 | OutStreamer->emitLabel(Symbol: FaultingLabel); |
863 | |
864 | assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!" ); |
865 | FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel); |
866 | |
867 | MCInst MI; |
868 | MI.setOpcode(Opcode); |
869 | |
870 | if (DefRegister != X86::NoRegister) |
871 | MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister)); |
872 | |
873 | for (const MachineOperand &MO : |
874 | llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx)) |
875 | if (auto Op = MCIL.LowerMachineOperand(MI: &FaultingMI, MO); Op.isValid()) |
876 | MI.addOperand(Op); |
877 | |
878 | OutStreamer->AddComment(T: "on-fault: " + HandlerLabel->getName()); |
879 | OutStreamer->emitInstruction(Inst: MI, STI: getSubtargetInfo()); |
880 | } |
881 | |
882 | void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, |
883 | X86MCInstLower &MCIL) { |
884 | bool Is64Bits = Subtarget->is64Bit(); |
885 | MCContext &Ctx = OutStreamer->getContext(); |
886 | MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__" ); |
887 | const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(Symbol: fentry, Ctx); |
888 | |
889 | EmitAndCountInstruction( |
890 | Inst&: MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) |
891 | .addExpr(Val: Op)); |
892 | } |
893 | |
894 | void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { |
895 | assert(std::next(MI.getIterator())->isCall() && |
896 | "KCFI_CHECK not followed by a call instruction" ); |
897 | |
898 | // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() |
899 | // returns a 1-byte X86::NOOP, which means the offset is the same in |
900 | // bytes. This assumes that patchable-function-prefix is the same for all |
901 | // functions. |
902 | const MachineFunction &MF = *MI.getMF(); |
903 | int64_t PrefixNops = 0; |
904 | (void)MF.getFunction() |
905 | .getFnAttribute(Kind: "patchable-function-prefix" ) |
906 | .getValueAsString() |
907 | .getAsInteger(Radix: 10, Result&: PrefixNops); |
908 | |
909 | // KCFI allows indirect calls to any location that's preceded by a valid |
910 | // type identifier. To avoid encoding the full constant into an instruction, |
911 | // and thus emitting potential call target gadgets at each indirect call |
912 | // site, load a negated constant to a register and compare that to the |
913 | // expected value at the call target. |
914 | const Register AddrReg = MI.getOperand(i: 0).getReg(); |
915 | const uint32_t Type = MI.getOperand(i: 1).getImm(); |
916 | // The check is immediately before the call. If the call target is in R10, |
917 | // we can clobber R11 for the check instead. |
918 | unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; |
919 | EmitAndCountInstruction( |
920 | Inst&: MCInstBuilder(X86::MOV32ri).addReg(Reg: TempReg).addImm(Val: -MaskKCFIType(Value: Type))); |
921 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::ADD32rm) |
922 | .addReg(Reg: X86::NoRegister) |
923 | .addReg(Reg: TempReg) |
924 | .addReg(Reg: AddrReg) |
925 | .addImm(Val: 1) |
926 | .addReg(Reg: X86::NoRegister) |
927 | .addImm(Val: -(PrefixNops + 4)) |
928 | .addReg(Reg: X86::NoRegister)); |
929 | |
930 | MCSymbol *Pass = OutContext.createTempSymbol(); |
931 | EmitAndCountInstruction( |
932 | Inst&: MCInstBuilder(X86::JCC_1) |
933 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: Pass, Ctx&: OutContext)) |
934 | .addImm(Val: X86::COND_E)); |
935 | |
936 | MCSymbol *Trap = OutContext.createTempSymbol(); |
937 | OutStreamer->emitLabel(Symbol: Trap); |
938 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::TRAP)); |
939 | emitKCFITrapEntry(MF, Symbol: Trap); |
940 | OutStreamer->emitLabel(Symbol: Pass); |
941 | } |
942 | |
943 | void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { |
944 | // FIXME: Make this work on non-ELF. |
945 | if (!TM.getTargetTriple().isOSBinFormatELF()) { |
946 | report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF" ); |
947 | return; |
948 | } |
949 | |
950 | const auto &Reg = MI.getOperand(i: 0).getReg(); |
951 | ASanAccessInfo AccessInfo(MI.getOperand(i: 1).getImm()); |
952 | |
953 | uint64_t ShadowBase; |
954 | int MappingScale; |
955 | bool OrShadowOffset; |
956 | getAddressSanitizerParams(TargetTriple: TM.getTargetTriple(), LongSize: 64, IsKasan: AccessInfo.CompileKernel, |
957 | ShadowBase: &ShadowBase, MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset); |
958 | |
959 | StringRef Name = AccessInfo.IsWrite ? "store" : "load" ; |
960 | StringRef Op = OrShadowOffset ? "or" : "add" ; |
961 | std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + |
962 | Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + |
963 | TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg())) |
964 | .str(); |
965 | if (OrShadowOffset) |
966 | report_fatal_error( |
967 | reason: "OrShadowOffset is not supported with optimized callbacks" ); |
968 | |
969 | EmitAndCountInstruction( |
970 | Inst&: MCInstBuilder(X86::CALL64pcrel32) |
971 | .addExpr(Val: MCSymbolRefExpr::create( |
972 | Symbol: OutContext.getOrCreateSymbol(Name: SymName), Ctx&: OutContext))); |
973 | } |
974 | |
975 | void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, |
976 | X86MCInstLower &MCIL) { |
977 | // PATCHABLE_OP minsize |
978 | |
979 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
980 | |
981 | auto NextMI = std::find_if(first: std::next(x: MI.getIterator()), |
982 | last: MI.getParent()->end().getInstrIterator(), |
983 | pred: [](auto &II) { return !II.isMetaInstruction(); }); |
984 | |
985 | SmallString<256> Code; |
986 | unsigned MinSize = MI.getOperand(i: 0).getImm(); |
987 | |
988 | if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { |
989 | // Lower the next MachineInstr to find its byte size. |
990 | // If the next instruction is inline assembly, we skip lowering it for now, |
991 | // and assume we should always generate NOPs. |
992 | MCInst MCI; |
993 | MCIL.Lower(MI: &*NextMI, OutMI&: MCI); |
994 | |
995 | SmallVector<MCFixup, 4> Fixups; |
996 | CodeEmitter->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo()); |
997 | } |
998 | |
999 | if (Code.size() < MinSize) { |
1000 | if (MinSize == 2 && Subtarget->is32Bit() && |
1001 | Subtarget->isTargetWindowsMSVC() && |
1002 | (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3" )) { |
1003 | // For compatibility reasons, when targetting MSVC, it is important to |
1004 | // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools |
1005 | // rely specifically on this pattern to be able to patch a function. |
1006 | // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. |
1007 | OutStreamer->emitInstruction( |
1008 | Inst: MCInstBuilder(X86::MOV32rr_REV).addReg(Reg: X86::EDI).addReg(Reg: X86::EDI), |
1009 | STI: *Subtarget); |
1010 | } else { |
1011 | unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget); |
1012 | assert(NopSize == MinSize && "Could not implement MinSize!" ); |
1013 | (void)NopSize; |
1014 | } |
1015 | } |
1016 | } |
1017 | |
1018 | // Lower a stackmap of the form: |
1019 | // <id>, <shadowBytes>, ... |
1020 | void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { |
1021 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1022 | |
1023 | auto &Ctx = OutStreamer->getContext(); |
1024 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1025 | OutStreamer->emitLabel(Symbol: MILabel); |
1026 | |
1027 | SM.recordStackMap(L: *MILabel, MI); |
1028 | unsigned NumShadowBytes = MI.getOperand(i: 1).getImm(); |
1029 | SMShadowTracker.reset(RequiredSize: NumShadowBytes); |
1030 | } |
1031 | |
1032 | // Lower a patchpoint of the form: |
1033 | // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... |
1034 | void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, |
1035 | X86MCInstLower &MCIL) { |
1036 | assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64" ); |
1037 | |
1038 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1039 | |
1040 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1041 | |
1042 | auto &Ctx = OutStreamer->getContext(); |
1043 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1044 | OutStreamer->emitLabel(Symbol: MILabel); |
1045 | SM.recordPatchPoint(L: *MILabel, MI); |
1046 | |
1047 | PatchPointOpers opers(&MI); |
1048 | unsigned ScratchIdx = opers.getNextScratchIdx(); |
1049 | unsigned EncodedBytes = 0; |
1050 | const MachineOperand &CalleeMO = opers.getCallTarget(); |
1051 | |
1052 | // Check for null target. If target is non-null (i.e. is non-zero or is |
1053 | // symbolic) then emit a call. |
1054 | if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { |
1055 | MCOperand CalleeMCOp; |
1056 | switch (CalleeMO.getType()) { |
1057 | default: |
1058 | /// FIXME: Add a verifier check for bad callee types. |
1059 | llvm_unreachable("Unrecognized callee operand type." ); |
1060 | case MachineOperand::MO_Immediate: |
1061 | if (CalleeMO.getImm()) |
1062 | CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm()); |
1063 | break; |
1064 | case MachineOperand::MO_ExternalSymbol: |
1065 | case MachineOperand::MO_GlobalAddress: |
1066 | CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO, |
1067 | Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO)); |
1068 | break; |
1069 | } |
1070 | |
1071 | // Emit MOV to materialize the target address and the CALL to target. |
1072 | // This is encoded with 12-13 bytes, depending on which register is used. |
1073 | Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg(); |
1074 | if (X86II::isX86_64ExtendedReg(Reg: ScratchReg)) |
1075 | EncodedBytes = 13; |
1076 | else |
1077 | EncodedBytes = 12; |
1078 | |
1079 | EmitAndCountInstruction( |
1080 | Inst&: MCInstBuilder(X86::MOV64ri).addReg(Reg: ScratchReg).addOperand(Op: CalleeMCOp)); |
1081 | // FIXME: Add retpoline support and remove this. |
1082 | if (Subtarget->useIndirectThunkCalls()) |
1083 | report_fatal_error( |
1084 | reason: "Lowering patchpoint with thunks not yet implemented." ); |
1085 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64r).addReg(Reg: ScratchReg)); |
1086 | } |
1087 | |
1088 | // Emit padding. |
1089 | unsigned NumBytes = opers.getNumPatchBytes(); |
1090 | assert(NumBytes >= EncodedBytes && |
1091 | "Patchpoint can't request size less than the length of a call." ); |
1092 | |
1093 | emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget); |
1094 | } |
1095 | |
1096 | void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, |
1097 | X86MCInstLower &MCIL) { |
1098 | assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64" ); |
1099 | |
1100 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1101 | |
1102 | // We want to emit the following pattern, which follows the x86 calling |
1103 | // convention to prepare for the trampoline call to be patched in. |
1104 | // |
1105 | // .p2align 1, ... |
1106 | // .Lxray_event_sled_N: |
1107 | // jmp +N // jump across the instrumentation sled |
1108 | // ... // set up arguments in register |
1109 | // callq __xray_CustomEvent@plt // force dependency to symbol |
1110 | // ... |
1111 | // <jump here> |
1112 | // |
1113 | // After patching, it would look something like: |
1114 | // |
1115 | // nopw (2-byte nop) |
1116 | // ... |
1117 | // callq __xrayCustomEvent // already lowered |
1118 | // ... |
1119 | // |
1120 | // --- |
1121 | // First we emit the label and the jump. |
1122 | auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_" , AlwaysAddSuffix: true); |
1123 | OutStreamer->AddComment(T: "# XRay Custom Event Log" ); |
1124 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1125 | OutStreamer->emitLabel(Symbol: CurSled); |
1126 | |
1127 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1128 | // an operand (computed as an offset from the jmp instruction). |
1129 | // FIXME: Find another less hacky way do force the relative jump. |
1130 | OutStreamer->emitBinaryData(Data: "\xeb\x0f" ); |
1131 | |
1132 | // The default C calling convention will place two arguments into %rcx and |
1133 | // %rdx -- so we only work with those. |
1134 | const Register DestRegs[] = {X86::RDI, X86::RSI}; |
1135 | bool UsedMask[] = {false, false}; |
1136 | // Filled out in loop. |
1137 | Register SrcRegs[] = {0, 0}; |
1138 | |
1139 | // Then we put the operands in the %rdi and %rsi registers. We spill the |
1140 | // values in the register before we clobber them, and mark them as used in |
1141 | // UsedMask. In case the arguments are already in the correct register, we use |
1142 | // emit nops appropriately sized to keep the sled the same size in every |
1143 | // situation. |
1144 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1145 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I)); |
1146 | Op.isValid()) { |
1147 | assert(Op.isReg() && "Only support arguments in registers" ); |
1148 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: 64); |
1149 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1150 | if (SrcRegs[I] != DestRegs[I]) { |
1151 | UsedMask[I] = true; |
1152 | EmitAndCountInstruction( |
1153 | Inst&: MCInstBuilder(X86::PUSH64r).addReg(Reg: DestRegs[I])); |
1154 | } else { |
1155 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1156 | } |
1157 | } |
1158 | |
1159 | // Now that the register values are stashed, mov arguments into place. |
1160 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1161 | // earlier DestReg. We will have already overwritten over the register before |
1162 | // we can copy from it. |
1163 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1164 | if (SrcRegs[I] != DestRegs[I]) |
1165 | EmitAndCountInstruction( |
1166 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I])); |
1167 | |
1168 | // We emit a hard dependency on the __xray_CustomEvent symbol, which is the |
1169 | // name of the trampoline to be implemented by the XRay runtime. |
1170 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent" ); |
1171 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1172 | if (isPositionIndependent()) |
1173 | TOp.setTargetFlags(X86II::MO_PLT); |
1174 | |
1175 | // Emit the call instruction. |
1176 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64pcrel32) |
1177 | .addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym))); |
1178 | |
1179 | // Restore caller-saved and used registers. |
1180 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1181 | if (UsedMask[I]) |
1182 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::POP64r).addReg(Reg: DestRegs[I])); |
1183 | else |
1184 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1185 | |
1186 | OutStreamer->AddComment(T: "xray custom event end." ); |
1187 | |
1188 | // Record the sled version. Version 0 of this sled was spelled differently, so |
1189 | // we let the runtime handle the different offsets we're using. Version 2 |
1190 | // changed the absolute address to a PC-relative address. |
1191 | recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: 2); |
1192 | } |
1193 | |
1194 | void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, |
1195 | X86MCInstLower &MCIL) { |
1196 | assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64" ); |
1197 | |
1198 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1199 | |
1200 | // We want to emit the following pattern, which follows the x86 calling |
1201 | // convention to prepare for the trampoline call to be patched in. |
1202 | // |
1203 | // .p2align 1, ... |
1204 | // .Lxray_event_sled_N: |
1205 | // jmp +N // jump across the instrumentation sled |
1206 | // ... // set up arguments in register |
1207 | // callq __xray_TypedEvent@plt // force dependency to symbol |
1208 | // ... |
1209 | // <jump here> |
1210 | // |
1211 | // After patching, it would look something like: |
1212 | // |
1213 | // nopw (2-byte nop) |
1214 | // ... |
1215 | // callq __xrayTypedEvent // already lowered |
1216 | // ... |
1217 | // |
1218 | // --- |
1219 | // First we emit the label and the jump. |
1220 | auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_" , AlwaysAddSuffix: true); |
1221 | OutStreamer->AddComment(T: "# XRay Typed Event Log" ); |
1222 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1223 | OutStreamer->emitLabel(Symbol: CurSled); |
1224 | |
1225 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1226 | // an operand (computed as an offset from the jmp instruction). |
1227 | // FIXME: Find another less hacky way do force the relative jump. |
1228 | OutStreamer->emitBinaryData(Data: "\xeb\x14" ); |
1229 | |
1230 | // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, |
1231 | // so we'll work with those. Or we may be called via SystemV, in which case |
1232 | // we don't have to do any translation. |
1233 | const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; |
1234 | bool UsedMask[] = {false, false, false}; |
1235 | |
1236 | // Will fill out src regs in the loop. |
1237 | Register SrcRegs[] = {0, 0, 0}; |
1238 | |
1239 | // Then we put the operands in the SystemV registers. We spill the values in |
1240 | // the registers before we clobber them, and mark them as used in UsedMask. |
1241 | // In case the arguments are already in the correct register, we emit nops |
1242 | // appropriately sized to keep the sled the same size in every situation. |
1243 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1244 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I)); |
1245 | Op.isValid()) { |
1246 | // TODO: Is register only support adequate? |
1247 | assert(Op.isReg() && "Only supports arguments in registers" ); |
1248 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: 64); |
1249 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1250 | if (SrcRegs[I] != DestRegs[I]) { |
1251 | UsedMask[I] = true; |
1252 | EmitAndCountInstruction( |
1253 | Inst&: MCInstBuilder(X86::PUSH64r).addReg(Reg: DestRegs[I])); |
1254 | } else { |
1255 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1256 | } |
1257 | } |
1258 | |
1259 | // In the above loop we only stash all of the destination registers or emit |
1260 | // nops if the arguments are already in the right place. Doing the actually |
1261 | // moving is postponed until after all the registers are stashed so nothing |
1262 | // is clobbers. We've already added nops to account for the size of mov and |
1263 | // push if the register is in the right place, so we only have to worry about |
1264 | // emitting movs. |
1265 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1266 | // earlier DestReg. We will have already overwritten over the register before |
1267 | // we can copy from it. |
1268 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1269 | if (UsedMask[I]) |
1270 | EmitAndCountInstruction( |
1271 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I])); |
1272 | |
1273 | // We emit a hard dependency on the __xray_TypedEvent symbol, which is the |
1274 | // name of the trampoline to be implemented by the XRay runtime. |
1275 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent" ); |
1276 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1277 | if (isPositionIndependent()) |
1278 | TOp.setTargetFlags(X86II::MO_PLT); |
1279 | |
1280 | // Emit the call instruction. |
1281 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64pcrel32) |
1282 | .addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym))); |
1283 | |
1284 | // Restore caller-saved and used registers. |
1285 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1286 | if (UsedMask[I]) |
1287 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::POP64r).addReg(Reg: DestRegs[I])); |
1288 | else |
1289 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1290 | |
1291 | OutStreamer->AddComment(T: "xray typed event end." ); |
1292 | |
1293 | // Record the sled version. |
1294 | recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: 2); |
1295 | } |
1296 | |
1297 | void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, |
1298 | X86MCInstLower &MCIL) { |
1299 | |
1300 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1301 | |
1302 | const Function &F = MF->getFunction(); |
1303 | if (F.hasFnAttribute(Kind: "patchable-function-entry" )) { |
1304 | unsigned Num; |
1305 | if (F.getFnAttribute(Kind: "patchable-function-entry" ) |
1306 | .getValueAsString() |
1307 | .getAsInteger(Radix: 10, Result&: Num)) |
1308 | return; |
1309 | emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget); |
1310 | return; |
1311 | } |
1312 | // We want to emit the following pattern: |
1313 | // |
1314 | // .p2align 1, ... |
1315 | // .Lxray_sled_N: |
1316 | // jmp .tmpN |
1317 | // # 9 bytes worth of noops |
1318 | // |
1319 | // We need the 9 bytes because at runtime, we'd be patching over the full 11 |
1320 | // bytes with the following pattern: |
1321 | // |
1322 | // mov %r10, <function id, 32-bit> // 6 bytes |
1323 | // call <relative offset, 32-bits> // 5 bytes |
1324 | // |
1325 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1326 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1327 | OutStreamer->emitLabel(Symbol: CurSled); |
1328 | |
1329 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1330 | // an operand (computed as an offset from the jmp instruction). |
1331 | // FIXME: Find another less hacky way do force the relative jump. |
1332 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1333 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1334 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: 2); |
1335 | } |
1336 | |
1337 | void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, |
1338 | X86MCInstLower &MCIL) { |
1339 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1340 | |
1341 | // Since PATCHABLE_RET takes the opcode of the return statement as an |
1342 | // argument, we use that to emit the correct form of the RET that we want. |
1343 | // i.e. when we see this: |
1344 | // |
1345 | // PATCHABLE_RET X86::RET ... |
1346 | // |
1347 | // We should emit the RET followed by sleds. |
1348 | // |
1349 | // .p2align 1, ... |
1350 | // .Lxray_sled_N: |
1351 | // ret # or equivalent instruction |
1352 | // # 10 bytes worth of noops |
1353 | // |
1354 | // This just makes sure that the alignment for the next instruction is 2. |
1355 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1356 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1357 | OutStreamer->emitLabel(Symbol: CurSled); |
1358 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1359 | MCInst Ret; |
1360 | Ret.setOpcode(OpCode); |
1361 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1362 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid()) |
1363 | Ret.addOperand(Op); |
1364 | OutStreamer->emitInstruction(Inst: Ret, STI: getSubtargetInfo()); |
1365 | emitX86Nops(OS&: *OutStreamer, NumBytes: 10, Subtarget); |
1366 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: 2); |
1367 | } |
1368 | |
1369 | void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, |
1370 | X86MCInstLower &MCIL) { |
1371 | MCInst TC; |
1372 | TC.setOpcode(convertTailJumpOpcode(Opcode: MI.getOperand(i: 0).getImm())); |
1373 | // Drop the tail jump opcode. |
1374 | auto TCOperands = drop_begin(RangeOrContainer: MI.operands()); |
1375 | bool IsConditional = TC.getOpcode() == X86::JCC_1; |
1376 | MCSymbol *FallthroughLabel; |
1377 | if (IsConditional) { |
1378 | // Rewrite: |
1379 | // je target |
1380 | // |
1381 | // To: |
1382 | // jne .fallthrough |
1383 | // .p2align 1, ... |
1384 | // .Lxray_sled_N: |
1385 | // SLED_CODE |
1386 | // jmp target |
1387 | // .fallthrough: |
1388 | FallthroughLabel = OutContext.createTempSymbol(); |
1389 | EmitToStreamer( |
1390 | S&: *OutStreamer, |
1391 | Inst: MCInstBuilder(X86::JCC_1) |
1392 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: FallthroughLabel, Ctx&: OutContext)) |
1393 | .addImm(Val: X86::GetOppositeBranchCondition( |
1394 | CC: static_cast<X86::CondCode>(MI.getOperand(i: 2).getImm())))); |
1395 | TC.setOpcode(X86::JMP_1); |
1396 | // Drop the condition code. |
1397 | TCOperands = drop_end(RangeOrContainer&: TCOperands); |
1398 | } |
1399 | |
1400 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1401 | |
1402 | // Like PATCHABLE_RET, we have the actual instruction in the operands to this |
1403 | // instruction so we lower that particular instruction and its operands. |
1404 | // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |
1405 | // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to |
1406 | // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual |
1407 | // tail call much like how we have it in PATCHABLE_RET. |
1408 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1409 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1410 | OutStreamer->emitLabel(Symbol: CurSled); |
1411 | auto Target = OutContext.createTempSymbol(); |
1412 | |
1413 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1414 | // an operand (computed as an offset from the jmp instruction). |
1415 | // FIXME: Find another less hacky way do force the relative jump. |
1416 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1417 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1418 | OutStreamer->emitLabel(Symbol: Target); |
1419 | recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: 2); |
1420 | |
1421 | // Before emitting the instruction, add a comment to indicate that this is |
1422 | // indeed a tail call. |
1423 | OutStreamer->AddComment(T: "TAILCALL" ); |
1424 | for (auto &MO : TCOperands) |
1425 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid()) |
1426 | TC.addOperand(Op); |
1427 | OutStreamer->emitInstruction(Inst: TC, STI: getSubtargetInfo()); |
1428 | |
1429 | if (IsConditional) |
1430 | OutStreamer->emitLabel(Symbol: FallthroughLabel); |
1431 | } |
1432 | |
1433 | // Returns instruction preceding MBBI in MachineFunction. |
1434 | // If MBBI is the first instruction of the first basic block, returns null. |
1435 | static MachineBasicBlock::const_iterator |
1436 | PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { |
1437 | const MachineBasicBlock *MBB = MBBI->getParent(); |
1438 | while (MBBI == MBB->begin()) { |
1439 | if (MBB == &MBB->getParent()->front()) |
1440 | return MachineBasicBlock::const_iterator(); |
1441 | MBB = MBB->getPrevNode(); |
1442 | MBBI = MBB->end(); |
1443 | } |
1444 | --MBBI; |
1445 | return MBBI; |
1446 | } |
1447 | |
1448 | static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { |
1449 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1450 | // Skip mask operand. |
1451 | ++SrcIdx; |
1452 | if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1453 | // Skip passthru operand. |
1454 | ++SrcIdx; |
1455 | } |
1456 | } |
1457 | return SrcIdx; |
1458 | } |
1459 | |
1460 | static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, |
1461 | unsigned SrcOpIdx) { |
1462 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1463 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()); |
1464 | |
1465 | // Handle AVX512 MASK/MASXZ write mask comments. |
1466 | // MASK: zmmX {%kY} |
1467 | // MASKZ: zmmX {%kY} {z} |
1468 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1469 | const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - 1); |
1470 | StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg()); |
1471 | CS << " {%" << Mask << "}" ; |
1472 | if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1473 | CS << " {z}" ; |
1474 | } |
1475 | } |
1476 | } |
1477 | |
1478 | static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, |
1479 | StringRef Src2Name, ArrayRef<int> Mask) { |
1480 | // One source operand, fix the mask to print all elements in one span. |
1481 | SmallVector<int, 8> ShuffleMask(Mask); |
1482 | if (Src1Name == Src2Name) |
1483 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) |
1484 | if (ShuffleMask[i] >= e) |
1485 | ShuffleMask[i] -= e; |
1486 | |
1487 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { |
1488 | if (i != 0) |
1489 | CS << "," ; |
1490 | if (ShuffleMask[i] == SM_SentinelZero) { |
1491 | CS << "zero" ; |
1492 | continue; |
1493 | } |
1494 | |
1495 | // Otherwise, it must come from src1 or src2. Print the span of elements |
1496 | // that comes from this src. |
1497 | bool isSrc1 = ShuffleMask[i] < (int)e; |
1498 | CS << (isSrc1 ? Src1Name : Src2Name) << '['; |
1499 | |
1500 | bool IsFirst = true; |
1501 | while (i != e && ShuffleMask[i] != SM_SentinelZero && |
1502 | (ShuffleMask[i] < (int)e) == isSrc1) { |
1503 | if (!IsFirst) |
1504 | CS << ','; |
1505 | else |
1506 | IsFirst = false; |
1507 | if (ShuffleMask[i] == SM_SentinelUndef) |
1508 | CS << "u" ; |
1509 | else |
1510 | CS << ShuffleMask[i] % (int)e; |
1511 | ++i; |
1512 | } |
1513 | CS << ']'; |
1514 | --i; // For loop increments element #. |
1515 | } |
1516 | } |
1517 | |
1518 | static std::string (const MachineInstr *MI, unsigned SrcOp1Idx, |
1519 | unsigned SrcOp2Idx, ArrayRef<int> Mask) { |
1520 | std::string ; |
1521 | |
1522 | const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx); |
1523 | const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx); |
1524 | StringRef Src1Name = SrcOp1.isReg() |
1525 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg()) |
1526 | : "mem" ; |
1527 | StringRef Src2Name = SrcOp2.isReg() |
1528 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg()) |
1529 | : "mem" ; |
1530 | |
1531 | raw_string_ostream CS(Comment); |
1532 | printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx); |
1533 | CS << " = " ; |
1534 | printShuffleMask(CS, Src1Name, Src2Name, Mask); |
1535 | |
1536 | return Comment; |
1537 | } |
1538 | |
1539 | static void printConstant(const APInt &Val, raw_ostream &CS, |
1540 | bool PrintZero = false) { |
1541 | if (Val.getBitWidth() <= 64) { |
1542 | CS << (PrintZero ? 0ULL : Val.getZExtValue()); |
1543 | } else { |
1544 | // print multi-word constant as (w0,w1) |
1545 | CS << "(" ; |
1546 | for (int i = 0, N = Val.getNumWords(); i < N; ++i) { |
1547 | if (i > 0) |
1548 | CS << "," ; |
1549 | CS << (PrintZero ? 0ULL : Val.getRawData()[i]); |
1550 | } |
1551 | CS << ")" ; |
1552 | } |
1553 | } |
1554 | |
1555 | static void printConstant(const APFloat &Flt, raw_ostream &CS, |
1556 | bool PrintZero = false) { |
1557 | SmallString<32> Str; |
1558 | // Force scientific notation to distinguish from integers. |
1559 | if (PrintZero) |
1560 | APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1561 | else |
1562 | Flt.toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1563 | CS << Str; |
1564 | } |
1565 | |
1566 | static void printConstant(const Constant *COp, unsigned BitWidth, |
1567 | raw_ostream &CS, bool PrintZero = false) { |
1568 | if (isa<UndefValue>(Val: COp)) { |
1569 | CS << "u" ; |
1570 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) { |
1571 | if (auto VTy = dyn_cast<FixedVectorType>(Val: CI->getType())) { |
1572 | for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { |
1573 | if (I != 0) |
1574 | CS << ','; |
1575 | printConstant(Val: CI->getValue(), CS, PrintZero); |
1576 | } |
1577 | } else |
1578 | printConstant(Val: CI->getValue(), CS, PrintZero); |
1579 | } else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) { |
1580 | if (auto VTy = dyn_cast<FixedVectorType>(Val: CF->getType())) { |
1581 | for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { |
1582 | if (I != 0) |
1583 | CS << ','; |
1584 | printConstant(Flt: CF->getValueAPF(), CS, PrintZero); |
1585 | } |
1586 | } else |
1587 | printConstant(Flt: CF->getValueAPF(), CS, PrintZero); |
1588 | } else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) { |
1589 | Type *EltTy = CDS->getElementType(); |
1590 | bool IsInteger = EltTy->isIntegerTy(); |
1591 | bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); |
1592 | unsigned EltBits = EltTy->getPrimitiveSizeInBits(); |
1593 | unsigned E = std::min(a: BitWidth / EltBits, b: (unsigned)CDS->getNumElements()); |
1594 | if ((BitWidth % EltBits) == 0) { |
1595 | for (unsigned I = 0; I != E; ++I) { |
1596 | if (I != 0) |
1597 | CS << "," ; |
1598 | if (IsInteger) |
1599 | printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero); |
1600 | else if (IsFP) |
1601 | printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero); |
1602 | else |
1603 | CS << "?" ; |
1604 | } |
1605 | } else { |
1606 | CS << "?" ; |
1607 | } |
1608 | } else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) { |
1609 | unsigned EltBits = CV->getType()->getScalarSizeInBits(); |
1610 | unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands()); |
1611 | if ((BitWidth % EltBits) == 0) { |
1612 | for (unsigned I = 0; I != E; ++I) { |
1613 | if (I != 0) |
1614 | CS << "," ; |
1615 | printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero); |
1616 | } |
1617 | } else { |
1618 | CS << "?" ; |
1619 | } |
1620 | } else { |
1621 | CS << "?" ; |
1622 | } |
1623 | } |
1624 | |
1625 | static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, |
1626 | int SclWidth, int VecWidth, |
1627 | const char *) { |
1628 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1629 | |
1630 | std::string ; |
1631 | raw_string_ostream CS(Comment); |
1632 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1633 | CS << " = " ; |
1634 | |
1635 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1636 | CS << "[" ; |
1637 | printConstant(COp: C, BitWidth: SclWidth, CS); |
1638 | for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { |
1639 | CS << "," ; |
1640 | printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true); |
1641 | } |
1642 | CS << "]" ; |
1643 | OutStreamer.AddComment(T: CS.str()); |
1644 | return; // early-out |
1645 | } |
1646 | |
1647 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1648 | CS << ShuffleComment; |
1649 | OutStreamer.AddComment(T: CS.str()); |
1650 | } |
1651 | |
1652 | static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, |
1653 | int Repeats, int BitWidth) { |
1654 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1655 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1656 | std::string ; |
1657 | raw_string_ostream CS(Comment); |
1658 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1659 | CS << " = [" ; |
1660 | for (int l = 0; l != Repeats; ++l) { |
1661 | if (l != 0) |
1662 | CS << "," ; |
1663 | printConstant(COp: C, BitWidth, CS); |
1664 | } |
1665 | CS << "]" ; |
1666 | OutStreamer.AddComment(T: CS.str()); |
1667 | } |
1668 | } |
1669 | |
1670 | static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1671 | int SrcEltBits, int DstEltBits, bool IsSext) { |
1672 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1673 | auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx); |
1674 | if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { |
1675 | if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) { |
1676 | int NumElts = CDS->getNumElements(); |
1677 | std::string ; |
1678 | raw_string_ostream CS(Comment); |
1679 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1680 | CS << " = [" ; |
1681 | for (int i = 0; i != NumElts; ++i) { |
1682 | if (i != 0) |
1683 | CS << "," ; |
1684 | if (CDS->getElementType()->isIntegerTy()) { |
1685 | APInt Elt = CDS->getElementAsAPInt(i); |
1686 | Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits); |
1687 | printConstant(Val: Elt, CS); |
1688 | } else |
1689 | CS << "?" ; |
1690 | } |
1691 | CS << "]" ; |
1692 | OutStreamer.AddComment(T: CS.str()); |
1693 | return true; |
1694 | } |
1695 | } |
1696 | |
1697 | return false; |
1698 | } |
1699 | static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1700 | int SrcEltBits, int DstEltBits) { |
1701 | printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true); |
1702 | } |
1703 | static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1704 | int SrcEltBits, int DstEltBits) { |
1705 | if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false)) |
1706 | return; |
1707 | |
1708 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1709 | std::string ; |
1710 | raw_string_ostream CS(Comment); |
1711 | printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: 1)); |
1712 | CS << " = " ; |
1713 | |
1714 | SmallVector<int> Mask; |
1715 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1716 | assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && |
1717 | "Illegal extension ratio" ); |
1718 | DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask); |
1719 | printShuffleMask(CS, Src1Name: "mem" , Src2Name: "" , Mask); |
1720 | |
1721 | OutStreamer.AddComment(T: CS.str()); |
1722 | } |
1723 | |
1724 | void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { |
1725 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
1726 | assert((getSubtarget().isOSWindows() || getSubtarget().isUEFI()) && |
1727 | "SEH_ instruction Windows and UEFI only" ); |
1728 | |
1729 | // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. |
1730 | if (EmitFPOData) { |
1731 | X86TargetStreamer *XTS = |
1732 | static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); |
1733 | switch (MI->getOpcode()) { |
1734 | case X86::SEH_PushReg: |
1735 | XTS->emitFPOPushReg(Reg: MI->getOperand(i: 0).getImm()); |
1736 | break; |
1737 | case X86::SEH_StackAlloc: |
1738 | XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: 0).getImm()); |
1739 | break; |
1740 | case X86::SEH_StackAlign: |
1741 | XTS->emitFPOStackAlign(Align: MI->getOperand(i: 0).getImm()); |
1742 | break; |
1743 | case X86::SEH_SetFrame: |
1744 | assert(MI->getOperand(1).getImm() == 0 && |
1745 | ".cv_fpo_setframe takes no offset" ); |
1746 | XTS->emitFPOSetFrame(Reg: MI->getOperand(i: 0).getImm()); |
1747 | break; |
1748 | case X86::SEH_EndPrologue: |
1749 | XTS->emitFPOEndPrologue(); |
1750 | break; |
1751 | case X86::SEH_SaveReg: |
1752 | case X86::SEH_SaveXMM: |
1753 | case X86::SEH_PushFrame: |
1754 | llvm_unreachable("SEH_ directive incompatible with FPO" ); |
1755 | break; |
1756 | default: |
1757 | llvm_unreachable("expected SEH_ instruction" ); |
1758 | } |
1759 | return; |
1760 | } |
1761 | |
1762 | // Otherwise, use the .seh_ directives for all other Windows platforms. |
1763 | switch (MI->getOpcode()) { |
1764 | case X86::SEH_PushReg: |
1765 | OutStreamer->emitWinCFIPushReg(Register: MI->getOperand(i: 0).getImm()); |
1766 | break; |
1767 | |
1768 | case X86::SEH_SaveReg: |
1769 | OutStreamer->emitWinCFISaveReg(Register: MI->getOperand(i: 0).getImm(), |
1770 | Offset: MI->getOperand(i: 1).getImm()); |
1771 | break; |
1772 | |
1773 | case X86::SEH_SaveXMM: |
1774 | OutStreamer->emitWinCFISaveXMM(Register: MI->getOperand(i: 0).getImm(), |
1775 | Offset: MI->getOperand(i: 1).getImm()); |
1776 | break; |
1777 | |
1778 | case X86::SEH_StackAlloc: |
1779 | OutStreamer->emitWinCFIAllocStack(Size: MI->getOperand(i: 0).getImm()); |
1780 | break; |
1781 | |
1782 | case X86::SEH_SetFrame: |
1783 | OutStreamer->emitWinCFISetFrame(Register: MI->getOperand(i: 0).getImm(), |
1784 | Offset: MI->getOperand(i: 1).getImm()); |
1785 | break; |
1786 | |
1787 | case X86::SEH_PushFrame: |
1788 | OutStreamer->emitWinCFIPushFrame(Code: MI->getOperand(i: 0).getImm()); |
1789 | break; |
1790 | |
1791 | case X86::SEH_EndPrologue: |
1792 | OutStreamer->emitWinCFIEndProlog(); |
1793 | break; |
1794 | |
1795 | case X86::SEH_BeginEpilogue: |
1796 | OutStreamer->emitWinCFIBeginEpilogue(); |
1797 | break; |
1798 | |
1799 | case X86::SEH_EndEpilogue: |
1800 | OutStreamer->emitWinCFIEndEpilogue(); |
1801 | break; |
1802 | |
1803 | case X86::SEH_UnwindV2Start: |
1804 | OutStreamer->emitWinCFIUnwindV2Start(); |
1805 | break; |
1806 | |
1807 | case X86::SEH_UnwindVersion: |
1808 | OutStreamer->emitWinCFIUnwindVersion(Version: MI->getOperand(i: 0).getImm()); |
1809 | break; |
1810 | |
1811 | default: |
1812 | llvm_unreachable("expected SEH_ instruction" ); |
1813 | } |
1814 | } |
1815 | |
1816 | static void (const MachineInstr *MI, |
1817 | MCStreamer &OutStreamer) { |
1818 | switch (MI->getOpcode()) { |
1819 | // Lower PSHUFB and VPERMILP normally but add a comment if we can find |
1820 | // a constant shuffle mask. We won't be able to do this at the MC layer |
1821 | // because the mask isn't an immediate. |
1822 | case X86::PSHUFBrm: |
1823 | case X86::VPSHUFBrm: |
1824 | case X86::VPSHUFBYrm: |
1825 | case X86::VPSHUFBZ128rm: |
1826 | case X86::VPSHUFBZ128rmk: |
1827 | case X86::VPSHUFBZ128rmkz: |
1828 | case X86::VPSHUFBZ256rm: |
1829 | case X86::VPSHUFBZ256rmk: |
1830 | case X86::VPSHUFBZ256rmkz: |
1831 | case X86::VPSHUFBZrm: |
1832 | case X86::VPSHUFBZrmk: |
1833 | case X86::VPSHUFBZrmkz: { |
1834 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1835 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1836 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1837 | SmallVector<int, 64> Mask; |
1838 | DecodePSHUFBMask(C, Width, ShuffleMask&: Mask); |
1839 | if (!Mask.empty()) |
1840 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1841 | } |
1842 | break; |
1843 | } |
1844 | |
1845 | case X86::VPERMILPSrm: |
1846 | case X86::VPERMILPSYrm: |
1847 | case X86::VPERMILPSZ128rm: |
1848 | case X86::VPERMILPSZ128rmk: |
1849 | case X86::VPERMILPSZ128rmkz: |
1850 | case X86::VPERMILPSZ256rm: |
1851 | case X86::VPERMILPSZ256rmk: |
1852 | case X86::VPERMILPSZ256rmkz: |
1853 | case X86::VPERMILPSZrm: |
1854 | case X86::VPERMILPSZrmk: |
1855 | case X86::VPERMILPSZrmkz: { |
1856 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1857 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1858 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1859 | SmallVector<int, 16> Mask; |
1860 | DecodeVPERMILPMask(C, ElSize: 32, Width, ShuffleMask&: Mask); |
1861 | if (!Mask.empty()) |
1862 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1863 | } |
1864 | break; |
1865 | } |
1866 | case X86::VPERMILPDrm: |
1867 | case X86::VPERMILPDYrm: |
1868 | case X86::VPERMILPDZ128rm: |
1869 | case X86::VPERMILPDZ128rmk: |
1870 | case X86::VPERMILPDZ128rmkz: |
1871 | case X86::VPERMILPDZ256rm: |
1872 | case X86::VPERMILPDZ256rmk: |
1873 | case X86::VPERMILPDZ256rmkz: |
1874 | case X86::VPERMILPDZrm: |
1875 | case X86::VPERMILPDZrmk: |
1876 | case X86::VPERMILPDZrmkz: { |
1877 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1878 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1879 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1880 | SmallVector<int, 16> Mask; |
1881 | DecodeVPERMILPMask(C, ElSize: 64, Width, ShuffleMask&: Mask); |
1882 | if (!Mask.empty()) |
1883 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1884 | } |
1885 | break; |
1886 | } |
1887 | |
1888 | case X86::VPERMIL2PDrm: |
1889 | case X86::VPERMIL2PSrm: |
1890 | case X86::VPERMIL2PDYrm: |
1891 | case X86::VPERMIL2PSYrm: { |
1892 | assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && |
1893 | "Unexpected number of operands!" ); |
1894 | |
1895 | const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - 1); |
1896 | if (!CtrlOp.isImm()) |
1897 | break; |
1898 | |
1899 | unsigned ElSize; |
1900 | switch (MI->getOpcode()) { |
1901 | default: llvm_unreachable("Invalid opcode" ); |
1902 | case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; |
1903 | case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; |
1904 | } |
1905 | |
1906 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1907 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1908 | SmallVector<int, 16> Mask; |
1909 | DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask); |
1910 | if (!Mask.empty()) |
1911 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1912 | } |
1913 | break; |
1914 | } |
1915 | |
1916 | case X86::VPPERMrrm: { |
1917 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1918 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1919 | SmallVector<int, 16> Mask; |
1920 | DecodeVPPERMMask(C, Width, ShuffleMask&: Mask); |
1921 | if (!Mask.empty()) |
1922 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1923 | } |
1924 | break; |
1925 | } |
1926 | |
1927 | case X86::MMX_MOVQ64rm: { |
1928 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 1)) { |
1929 | std::string ; |
1930 | raw_string_ostream CS(Comment); |
1931 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1932 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = " ; |
1933 | if (auto *CF = dyn_cast<ConstantFP>(Val: C)) { |
1934 | CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: 16, Signed: false); |
1935 | OutStreamer.AddComment(T: CS.str()); |
1936 | } |
1937 | } |
1938 | break; |
1939 | } |
1940 | |
1941 | #define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ |
1942 | case X86::Prefix##Instr##Suffix##rm##Postfix: |
1943 | |
1944 | #define CASE_ARITH_RM(Instr) \ |
1945 | INSTR_CASE(, Instr, , ) /* SSE */ \ |
1946 | INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ |
1947 | INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \ |
1948 | INSTR_CASE(V, Instr, Z128, ) \ |
1949 | INSTR_CASE(V, Instr, Z128, k) \ |
1950 | INSTR_CASE(V, Instr, Z128, kz) \ |
1951 | INSTR_CASE(V, Instr, Z256, ) \ |
1952 | INSTR_CASE(V, Instr, Z256, k) \ |
1953 | INSTR_CASE(V, Instr, Z256, kz) \ |
1954 | INSTR_CASE(V, Instr, Z, ) \ |
1955 | INSTR_CASE(V, Instr, Z, k) \ |
1956 | INSTR_CASE(V, Instr, Z, kz) |
1957 | |
1958 | // TODO: Add additional instructions when useful. |
1959 | CASE_ARITH_RM(PMADDUBSW) { |
1960 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1961 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1962 | if (C->getType()->getScalarSizeInBits() == 8) { |
1963 | std::string ; |
1964 | raw_string_ostream CS(Comment); |
1965 | unsigned VectorWidth = |
1966 | X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1967 | CS << "[" ; |
1968 | printConstant(COp: C, BitWidth: VectorWidth, CS); |
1969 | CS << "]" ; |
1970 | OutStreamer.AddComment(T: CS.str()); |
1971 | } |
1972 | } |
1973 | break; |
1974 | } |
1975 | |
1976 | CASE_ARITH_RM(PMADDWD) |
1977 | CASE_ARITH_RM(PMULLW) |
1978 | CASE_ARITH_RM(PMULHW) |
1979 | CASE_ARITH_RM(PMULHUW) |
1980 | CASE_ARITH_RM(PMULHRSW) { |
1981 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1982 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1983 | if (C->getType()->getScalarSizeInBits() == 16) { |
1984 | std::string ; |
1985 | raw_string_ostream CS(Comment); |
1986 | unsigned VectorWidth = |
1987 | X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1988 | CS << "[" ; |
1989 | printConstant(COp: C, BitWidth: VectorWidth, CS); |
1990 | CS << "]" ; |
1991 | OutStreamer.AddComment(T: CS.str()); |
1992 | } |
1993 | } |
1994 | break; |
1995 | } |
1996 | |
1997 | #define MASK_AVX512_CASE(Instr) \ |
1998 | case Instr: \ |
1999 | case Instr##k: \ |
2000 | case Instr##kz: |
2001 | |
2002 | case X86::MOVSDrm: |
2003 | case X86::VMOVSDrm: |
2004 | MASK_AVX512_CASE(X86::VMOVSDZrm) |
2005 | case X86::MOVSDrm_alt: |
2006 | case X86::VMOVSDrm_alt: |
2007 | case X86::VMOVSDZrm_alt: |
2008 | case X86::MOVQI2PQIrm: |
2009 | case X86::VMOVQI2PQIrm: |
2010 | case X86::VMOVQI2PQIZrm: |
2011 | printZeroUpperMove(MI, OutStreamer, SclWidth: 64, VecWidth: 128, ShuffleComment: "mem[0],zero" ); |
2012 | break; |
2013 | |
2014 | MASK_AVX512_CASE(X86::VMOVSHZrm) |
2015 | case X86::VMOVSHZrm_alt: |
2016 | printZeroUpperMove(MI, OutStreamer, SclWidth: 16, VecWidth: 128, |
2017 | ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero" ); |
2018 | break; |
2019 | |
2020 | case X86::MOVSSrm: |
2021 | case X86::VMOVSSrm: |
2022 | MASK_AVX512_CASE(X86::VMOVSSZrm) |
2023 | case X86::MOVSSrm_alt: |
2024 | case X86::VMOVSSrm_alt: |
2025 | case X86::VMOVSSZrm_alt: |
2026 | case X86::MOVDI2PDIrm: |
2027 | case X86::VMOVDI2PDIrm: |
2028 | case X86::VMOVDI2PDIZrm: |
2029 | printZeroUpperMove(MI, OutStreamer, SclWidth: 32, VecWidth: 128, ShuffleComment: "mem[0],zero,zero,zero" ); |
2030 | break; |
2031 | |
2032 | #define MOV_CASE(Prefix, Suffix) \ |
2033 | case X86::Prefix##MOVAPD##Suffix##rm: \ |
2034 | case X86::Prefix##MOVAPS##Suffix##rm: \ |
2035 | case X86::Prefix##MOVUPD##Suffix##rm: \ |
2036 | case X86::Prefix##MOVUPS##Suffix##rm: \ |
2037 | case X86::Prefix##MOVDQA##Suffix##rm: \ |
2038 | case X86::Prefix##MOVDQU##Suffix##rm: |
2039 | |
2040 | #define MOV_AVX512_CASE(Suffix, Postfix) \ |
2041 | case X86::VMOVDQA64##Suffix##rm##Postfix: \ |
2042 | case X86::VMOVDQA32##Suffix##rm##Postfix: \ |
2043 | case X86::VMOVDQU64##Suffix##rm##Postfix: \ |
2044 | case X86::VMOVDQU32##Suffix##rm##Postfix: \ |
2045 | case X86::VMOVDQU16##Suffix##rm##Postfix: \ |
2046 | case X86::VMOVDQU8##Suffix##rm##Postfix: \ |
2047 | case X86::VMOVAPS##Suffix##rm##Postfix: \ |
2048 | case X86::VMOVAPD##Suffix##rm##Postfix: \ |
2049 | case X86::VMOVUPS##Suffix##rm##Postfix: \ |
2050 | case X86::VMOVUPD##Suffix##rm##Postfix: |
2051 | |
2052 | #define CASE_128_MOV_RM() \ |
2053 | MOV_CASE(, ) /* SSE */ \ |
2054 | MOV_CASE(V, ) /* AVX-128 */ \ |
2055 | MOV_AVX512_CASE(Z128, ) \ |
2056 | MOV_AVX512_CASE(Z128, k) \ |
2057 | MOV_AVX512_CASE(Z128, kz) |
2058 | |
2059 | #define CASE_256_MOV_RM() \ |
2060 | MOV_CASE(V, Y) /* AVX-256 */ \ |
2061 | MOV_AVX512_CASE(Z256, ) \ |
2062 | MOV_AVX512_CASE(Z256, k) \ |
2063 | MOV_AVX512_CASE(Z256, kz) \ |
2064 | |
2065 | #define CASE_512_MOV_RM() \ |
2066 | MOV_AVX512_CASE(Z, ) \ |
2067 | MOV_AVX512_CASE(Z, k) \ |
2068 | MOV_AVX512_CASE(Z, kz) \ |
2069 | |
2070 | // For loads from a constant pool to a vector register, print the constant |
2071 | // loaded. |
2072 | CASE_128_MOV_RM() |
2073 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 128); |
2074 | break; |
2075 | CASE_256_MOV_RM() |
2076 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 256); |
2077 | break; |
2078 | CASE_512_MOV_RM() |
2079 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 512); |
2080 | break; |
2081 | case X86::VBROADCASTF128rm: |
2082 | case X86::VBROADCASTI128rm: |
2083 | MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) |
2084 | MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm) |
2085 | MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) |
2086 | MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm) |
2087 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 128); |
2088 | break; |
2089 | MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm) |
2090 | MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm) |
2091 | MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm) |
2092 | MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm) |
2093 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 128); |
2094 | break; |
2095 | MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm) |
2096 | MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm) |
2097 | MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm) |
2098 | MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm) |
2099 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 256); |
2100 | break; |
2101 | |
2102 | // For broadcast loads from a constant pool to a vector register, repeatedly |
2103 | // print the constant loaded. |
2104 | case X86::MOVDDUPrm: |
2105 | case X86::VMOVDDUPrm: |
2106 | MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) |
2107 | case X86::VPBROADCASTQrm: |
2108 | MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) |
2109 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 64); |
2110 | break; |
2111 | case X86::VBROADCASTSDYrm: |
2112 | MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) |
2113 | case X86::VPBROADCASTQYrm: |
2114 | MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) |
2115 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 64); |
2116 | break; |
2117 | MASK_AVX512_CASE(X86::VBROADCASTSDZrm) |
2118 | MASK_AVX512_CASE(X86::VPBROADCASTQZrm) |
2119 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 64); |
2120 | break; |
2121 | case X86::VBROADCASTSSrm: |
2122 | MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) |
2123 | case X86::VPBROADCASTDrm: |
2124 | MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) |
2125 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 32); |
2126 | break; |
2127 | case X86::VBROADCASTSSYrm: |
2128 | MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) |
2129 | case X86::VPBROADCASTDYrm: |
2130 | MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) |
2131 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 32); |
2132 | break; |
2133 | MASK_AVX512_CASE(X86::VBROADCASTSSZrm) |
2134 | MASK_AVX512_CASE(X86::VPBROADCASTDZrm) |
2135 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 32); |
2136 | break; |
2137 | case X86::VPBROADCASTWrm: |
2138 | MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) |
2139 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 16); |
2140 | break; |
2141 | case X86::VPBROADCASTWYrm: |
2142 | MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) |
2143 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 16); |
2144 | break; |
2145 | MASK_AVX512_CASE(X86::VPBROADCASTWZrm) |
2146 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 16); |
2147 | break; |
2148 | case X86::VPBROADCASTBrm: |
2149 | MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) |
2150 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 8); |
2151 | break; |
2152 | case X86::VPBROADCASTBYrm: |
2153 | MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) |
2154 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 8); |
2155 | break; |
2156 | MASK_AVX512_CASE(X86::VPBROADCASTBZrm) |
2157 | printBroadcast(MI, OutStreamer, Repeats: 64, BitWidth: 8); |
2158 | break; |
2159 | |
2160 | #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ |
2161 | case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: |
2162 | |
2163 | #define CASE_MOVX_RM(Ext, Type) \ |
2164 | MOVX_CASE(, Ext, Type, , ) \ |
2165 | MOVX_CASE(V, Ext, Type, , ) \ |
2166 | MOVX_CASE(V, Ext, Type, Y, ) \ |
2167 | MOVX_CASE(V, Ext, Type, Z128, ) \ |
2168 | MOVX_CASE(V, Ext, Type, Z128, k ) \ |
2169 | MOVX_CASE(V, Ext, Type, Z128, kz ) \ |
2170 | MOVX_CASE(V, Ext, Type, Z256, ) \ |
2171 | MOVX_CASE(V, Ext, Type, Z256, k ) \ |
2172 | MOVX_CASE(V, Ext, Type, Z256, kz ) \ |
2173 | MOVX_CASE(V, Ext, Type, Z, ) \ |
2174 | MOVX_CASE(V, Ext, Type, Z, k ) \ |
2175 | MOVX_CASE(V, Ext, Type, Z, kz ) |
2176 | |
2177 | CASE_MOVX_RM(SX, BD) |
2178 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2179 | break; |
2180 | CASE_MOVX_RM(SX, BQ) |
2181 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2182 | break; |
2183 | CASE_MOVX_RM(SX, BW) |
2184 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2185 | break; |
2186 | CASE_MOVX_RM(SX, DQ) |
2187 | printSignExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2188 | break; |
2189 | CASE_MOVX_RM(SX, WD) |
2190 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2191 | break; |
2192 | CASE_MOVX_RM(SX, WQ) |
2193 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2194 | break; |
2195 | |
2196 | CASE_MOVX_RM(ZX, BD) |
2197 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2198 | break; |
2199 | CASE_MOVX_RM(ZX, BQ) |
2200 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2201 | break; |
2202 | CASE_MOVX_RM(ZX, BW) |
2203 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2204 | break; |
2205 | CASE_MOVX_RM(ZX, DQ) |
2206 | printZeroExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2207 | break; |
2208 | CASE_MOVX_RM(ZX, WD) |
2209 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2210 | break; |
2211 | CASE_MOVX_RM(ZX, WQ) |
2212 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2213 | break; |
2214 | } |
2215 | } |
2216 | |
2217 | // Does the given operand refer to a DLLIMPORT function? |
2218 | bool isImportedFunction(const MachineOperand &MO) { |
2219 | return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT); |
2220 | } |
2221 | |
2222 | // Is the given instruction a call to a CFGuard function? |
2223 | bool isCallToCFGuardFunction(const MachineInstr *MI) { |
2224 | assert(MI->getOpcode() == X86::TAILJMPm64_REX || |
2225 | MI->getOpcode() == X86::CALL64m); |
2226 | const MachineOperand &MO = MI->getOperand(i: 3); |
2227 | return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) && |
2228 | isCFGuardFunction(GV: MO.getGlobal()); |
2229 | } |
2230 | |
2231 | // Does the containing block for the given instruction contain any jump table |
2232 | // info (indicating that the block is a dispatch for a jump table)? |
2233 | bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI) { |
2234 | const MachineBasicBlock &MBB = *MI->getParent(); |
2235 | for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I) |
2236 | if (I->isJumpTableDebugInfo()) |
2237 | return true; |
2238 | |
2239 | return false; |
2240 | } |
2241 | |
2242 | void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { |
2243 | // FIXME: Enable feature predicate checks once all the test pass. |
2244 | // X86_MC::verifyInstructionPredicates(MI->getOpcode(), |
2245 | // Subtarget->getFeatureBits()); |
2246 | |
2247 | X86MCInstLower MCInstLowering(*MF, *this); |
2248 | const X86RegisterInfo *RI = |
2249 | MF->getSubtarget<X86Subtarget>().getRegisterInfo(); |
2250 | |
2251 | if (MI->getOpcode() == X86::OR64rm) { |
2252 | for (auto &Opd : MI->operands()) { |
2253 | if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == |
2254 | "swift_async_extendedFramePointerFlags" ) { |
2255 | ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; |
2256 | } |
2257 | } |
2258 | } |
2259 | |
2260 | // Add comments for values loaded from constant pool. |
2261 | if (OutStreamer->isVerboseAsm()) |
2262 | addConstantComments(MI, OutStreamer&: *OutStreamer); |
2263 | |
2264 | // Add a comment about EVEX compression |
2265 | if (TM.Options.MCOptions.ShowMCEncoding) { |
2266 | if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) |
2267 | OutStreamer->AddComment(T: "EVEX TO LEGACY Compression " , EOL: false); |
2268 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) |
2269 | OutStreamer->AddComment(T: "EVEX TO VEX Compression " , EOL: false); |
2270 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) |
2271 | OutStreamer->AddComment(T: "EVEX TO EVEX Compression " , EOL: false); |
2272 | } |
2273 | |
2274 | switch (MI->getOpcode()) { |
2275 | case TargetOpcode::DBG_VALUE: |
2276 | llvm_unreachable("Should be handled target independently" ); |
2277 | |
2278 | case X86::EH_RETURN: |
2279 | case X86::EH_RETURN64: { |
2280 | // Lower these as normal, but add some comments. |
2281 | Register Reg = MI->getOperand(i: 0).getReg(); |
2282 | OutStreamer->AddComment(T: StringRef("eh_return, addr: %" ) + |
2283 | X86ATTInstPrinter::getRegisterName(Reg)); |
2284 | break; |
2285 | } |
2286 | case X86::CLEANUPRET: { |
2287 | // Lower these as normal, but add some comments. |
2288 | OutStreamer->AddComment(T: "CLEANUPRET" ); |
2289 | break; |
2290 | } |
2291 | |
2292 | case X86::CATCHRET: { |
2293 | // Lower these as normal, but add some comments. |
2294 | OutStreamer->AddComment(T: "CATCHRET" ); |
2295 | break; |
2296 | } |
2297 | |
2298 | case X86::ENDBR32: |
2299 | case X86::ENDBR64: { |
2300 | // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for |
2301 | // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be |
2302 | // non-empty. If MI is the initial ENDBR, place the |
2303 | // __patchable_function_entries label after ENDBR. |
2304 | if (CurrentPatchableFunctionEntrySym && |
2305 | CurrentPatchableFunctionEntrySym == CurrentFnBegin && |
2306 | MI == &MF->front().front()) { |
2307 | MCInst Inst; |
2308 | MCInstLowering.Lower(MI, OutMI&: Inst); |
2309 | EmitAndCountInstruction(Inst); |
2310 | CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch" ); |
2311 | OutStreamer->emitLabel(Symbol: CurrentPatchableFunctionEntrySym); |
2312 | return; |
2313 | } |
2314 | break; |
2315 | } |
2316 | |
2317 | case X86::TAILJMPd64: |
2318 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11)) |
2319 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CS_PREFIX)); |
2320 | |
2321 | if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: 0))) { |
2322 | emitLabelAndRecordForImportCallOptimization( |
2323 | Kind: IMAGE_RETPOLINE_AMD64_IMPORT_BR); |
2324 | } |
2325 | |
2326 | // Lower this as normal, but add a comment. |
2327 | OutStreamer->AddComment(T: "TAILCALL" ); |
2328 | break; |
2329 | |
2330 | case X86::TAILJMPr: |
2331 | case X86::TAILJMPm: |
2332 | case X86::TAILJMPd: |
2333 | case X86::TAILJMPd_CC: |
2334 | case X86::TAILJMPr64: |
2335 | case X86::TAILJMPm64: |
2336 | case X86::TAILJMPd64_CC: |
2337 | if (EnableImportCallOptimization) |
2338 | report_fatal_error(reason: "Unexpected TAILJMP instruction was emitted when " |
2339 | "import call optimization was enabled" ); |
2340 | |
2341 | // Lower these as normal, but add some comments. |
2342 | OutStreamer->AddComment(T: "TAILCALL" ); |
2343 | break; |
2344 | |
2345 | case X86::TAILJMPm64_REX: |
2346 | if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) { |
2347 | emitLabelAndRecordForImportCallOptimization( |
2348 | Kind: IMAGE_RETPOLINE_AMD64_CFG_BR_REX); |
2349 | } |
2350 | |
2351 | OutStreamer->AddComment(T: "TAILCALL" ); |
2352 | break; |
2353 | |
2354 | case X86::TAILJMPr64_REX: { |
2355 | if (EnableImportCallOptimization) { |
2356 | assert(MI->getOperand(0).getReg() == X86::RAX && |
2357 | "Indirect tail calls with impcall enabled must go through RAX (as " |
2358 | "enforced by TCRETURNImpCallri64)" ); |
2359 | emitLabelAndRecordForImportCallOptimization( |
2360 | Kind: IMAGE_RETPOLINE_AMD64_INDIR_BR); |
2361 | } |
2362 | |
2363 | OutStreamer->AddComment(T: "TAILCALL" ); |
2364 | break; |
2365 | } |
2366 | |
2367 | case X86::JMP64r: |
2368 | if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) { |
2369 | uint16_t EncodedReg = |
2370 | this->getSubtarget().getRegisterInfo()->getEncodingValue( |
2371 | Reg: MI->getOperand(i: 0).getReg().asMCReg()); |
2372 | emitLabelAndRecordForImportCallOptimization( |
2373 | Kind: (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST + |
2374 | EncodedReg)); |
2375 | } |
2376 | break; |
2377 | |
2378 | case X86::JMP16r: |
2379 | case X86::JMP16m: |
2380 | case X86::JMP32r: |
2381 | case X86::JMP32m: |
2382 | case X86::JMP64m: |
2383 | if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) |
2384 | report_fatal_error( |
2385 | reason: "Unexpected JMP instruction was emitted for a jump-table when import " |
2386 | "call optimization was enabled" ); |
2387 | break; |
2388 | |
2389 | case X86::TLS_addr32: |
2390 | case X86::TLS_addr64: |
2391 | case X86::TLS_addrX32: |
2392 | case X86::TLS_base_addr32: |
2393 | case X86::TLS_base_addr64: |
2394 | case X86::TLS_base_addrX32: |
2395 | case X86::TLS_desc32: |
2396 | case X86::TLS_desc64: |
2397 | return LowerTlsAddr(MCInstLowering, MI: *MI); |
2398 | |
2399 | case X86::MOVPC32r: { |
2400 | // This is a pseudo op for a two instruction sequence with a label, which |
2401 | // looks like: |
2402 | // call "L1$pb" |
2403 | // "L1$pb": |
2404 | // popl %esi |
2405 | |
2406 | // Emit the call. |
2407 | MCSymbol *PICBase = MF->getPICBaseSymbol(); |
2408 | // FIXME: We would like an efficient form for this, so we don't have to do a |
2409 | // lot of extra uniquing. |
2410 | EmitAndCountInstruction( |
2411 | Inst&: MCInstBuilder(X86::CALLpcrel32) |
2412 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: PICBase, Ctx&: OutContext))); |
2413 | |
2414 | const X86FrameLowering *FrameLowering = |
2415 | MF->getSubtarget<X86Subtarget>().getFrameLowering(); |
2416 | bool hasFP = FrameLowering->hasFP(MF: *MF); |
2417 | |
2418 | // TODO: This is needed only if we require precise CFA. |
2419 | bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && |
2420 | !OutStreamer->getDwarfFrameInfos().back().End; |
2421 | |
2422 | int stackGrowth = -RI->getSlotSize(); |
2423 | |
2424 | if (HasActiveDwarfFrame && !hasFP) { |
2425 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth); |
2426 | MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
2427 | } |
2428 | |
2429 | // Emit the label. |
2430 | OutStreamer->emitLabel(Symbol: PICBase); |
2431 | |
2432 | // popl $reg |
2433 | EmitAndCountInstruction( |
2434 | Inst&: MCInstBuilder(X86::POP32r).addReg(Reg: MI->getOperand(i: 0).getReg())); |
2435 | |
2436 | if (HasActiveDwarfFrame && !hasFP) { |
2437 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: stackGrowth); |
2438 | } |
2439 | return; |
2440 | } |
2441 | |
2442 | case X86::ADD32ri: { |
2443 | // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. |
2444 | if (MI->getOperand(i: 2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) |
2445 | break; |
2446 | |
2447 | // Okay, we have something like: |
2448 | // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) |
2449 | |
2450 | // For this, we want to print something like: |
2451 | // MYGLOBAL + (. - PICBASE) |
2452 | // However, we can't generate a ".", so just emit a new label here and refer |
2453 | // to it. |
2454 | MCSymbol *DotSym = OutContext.createTempSymbol(); |
2455 | OutStreamer->emitLabel(Symbol: DotSym); |
2456 | |
2457 | // Now that we have emitted the label, lower the complex operand expression. |
2458 | MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: 2)); |
2459 | |
2460 | const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext); |
2461 | const MCExpr *PICBase = |
2462 | MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext); |
2463 | DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext); |
2464 | |
2465 | DotExpr = MCBinaryExpr::createAdd( |
2466 | LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext); |
2467 | |
2468 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::ADD32ri) |
2469 | .addReg(Reg: MI->getOperand(i: 0).getReg()) |
2470 | .addReg(Reg: MI->getOperand(i: 1).getReg()) |
2471 | .addExpr(Val: DotExpr)); |
2472 | return; |
2473 | } |
2474 | case TargetOpcode::STATEPOINT: |
2475 | return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering); |
2476 | |
2477 | case TargetOpcode::FAULTING_OP: |
2478 | return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering); |
2479 | |
2480 | case TargetOpcode::FENTRY_CALL: |
2481 | return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering); |
2482 | |
2483 | case TargetOpcode::PATCHABLE_OP: |
2484 | return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering); |
2485 | |
2486 | case TargetOpcode::STACKMAP: |
2487 | return LowerSTACKMAP(MI: *MI); |
2488 | |
2489 | case TargetOpcode::PATCHPOINT: |
2490 | return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering); |
2491 | |
2492 | case TargetOpcode::PATCHABLE_FUNCTION_ENTER: |
2493 | return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering); |
2494 | |
2495 | case TargetOpcode::PATCHABLE_RET: |
2496 | return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering); |
2497 | |
2498 | case TargetOpcode::PATCHABLE_TAIL_CALL: |
2499 | return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering); |
2500 | |
2501 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
2502 | return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2503 | |
2504 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
2505 | return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2506 | |
2507 | case X86::MORESTACK_RET: |
2508 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2509 | return; |
2510 | |
2511 | case X86::KCFI_CHECK: |
2512 | return LowerKCFI_CHECK(MI: *MI); |
2513 | |
2514 | case X86::ASAN_CHECK_MEMACCESS: |
2515 | return LowerASAN_CHECK_MEMACCESS(MI: *MI); |
2516 | |
2517 | case X86::MORESTACK_RET_RESTORE_R10: |
2518 | // Return, then restore R10. |
2519 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2520 | EmitAndCountInstruction( |
2521 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: X86::R10).addReg(Reg: X86::RAX)); |
2522 | return; |
2523 | |
2524 | case X86::SEH_PushReg: |
2525 | case X86::SEH_SaveReg: |
2526 | case X86::SEH_SaveXMM: |
2527 | case X86::SEH_StackAlloc: |
2528 | case X86::SEH_StackAlign: |
2529 | case X86::SEH_SetFrame: |
2530 | case X86::SEH_PushFrame: |
2531 | case X86::SEH_EndPrologue: |
2532 | case X86::SEH_EndEpilogue: |
2533 | case X86::SEH_UnwindV2Start: |
2534 | case X86::SEH_UnwindVersion: |
2535 | EmitSEHInstruction(MI); |
2536 | return; |
2537 | |
2538 | case X86::SEH_BeginEpilogue: { |
2539 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
2540 | // Windows unwinder will not invoke function's exception handler if IP is |
2541 | // either in prologue or in epilogue. This behavior causes a problem when a |
2542 | // call immediately precedes an epilogue, because the return address points |
2543 | // into the epilogue. To cope with that, we insert a 'nop' if it ends up |
2544 | // immediately after a CALL in the final emitted code. |
2545 | MachineBasicBlock::const_iterator MBBI(MI); |
2546 | // Check if preceded by a call and emit nop if so. |
2547 | for (MBBI = PrevCrossBBInst(MBBI); |
2548 | MBBI != MachineBasicBlock::const_iterator(); |
2549 | MBBI = PrevCrossBBInst(MBBI)) { |
2550 | // Pseudo instructions that aren't a call are assumed to not emit any |
2551 | // code. If they do, we worst case generate unnecessary noops after a |
2552 | // call. |
2553 | if (MBBI->isCall() || !MBBI->isPseudo()) { |
2554 | if (MBBI->isCall()) |
2555 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::NOOP)); |
2556 | break; |
2557 | } |
2558 | } |
2559 | |
2560 | EmitSEHInstruction(MI); |
2561 | return; |
2562 | } |
2563 | case X86::UBSAN_UD1: |
2564 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::UD1Lm) |
2565 | .addReg(Reg: X86::EAX) |
2566 | .addReg(Reg: X86::EAX) |
2567 | .addImm(Val: 1) |
2568 | .addReg(Reg: X86::NoRegister) |
2569 | .addImm(Val: MI->getOperand(i: 0).getImm()) |
2570 | .addReg(Reg: X86::NoRegister)); |
2571 | return; |
2572 | case X86::CALL64pcrel32: |
2573 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11)) |
2574 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CS_PREFIX)); |
2575 | |
2576 | if (EnableImportCallOptimization && isImportedFunction(MO: MI->getOperand(i: 0))) { |
2577 | emitLabelAndRecordForImportCallOptimization( |
2578 | Kind: IMAGE_RETPOLINE_AMD64_IMPORT_CALL); |
2579 | |
2580 | MCInst TmpInst; |
2581 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2582 | |
2583 | // For Import Call Optimization to work, we need a the call instruction |
2584 | // with a rex prefix, and a 5-byte nop after the call instruction. |
2585 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::REX64_PREFIX)); |
2586 | emitCallInstruction(MCI: TmpInst); |
2587 | emitNop(OS&: *OutStreamer, NumBytes: 5, Subtarget); |
2588 | return; |
2589 | } |
2590 | |
2591 | break; |
2592 | |
2593 | case X86::CALL64r: |
2594 | if (EnableImportCallOptimization) { |
2595 | assert(MI->getOperand(0).getReg() == X86::RAX && |
2596 | "Indirect calls with impcall enabled must go through RAX (as " |
2597 | "enforced by CALL64r_ImpCall)" ); |
2598 | |
2599 | emitLabelAndRecordForImportCallOptimization( |
2600 | Kind: IMAGE_RETPOLINE_AMD64_INDIR_CALL); |
2601 | MCInst TmpInst; |
2602 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2603 | emitCallInstruction(MCI: TmpInst); |
2604 | |
2605 | // For Import Call Optimization to work, we need a 3-byte nop after the |
2606 | // call instruction. |
2607 | emitNop(OS&: *OutStreamer, NumBytes: 3, Subtarget); |
2608 | return; |
2609 | } |
2610 | break; |
2611 | |
2612 | case X86::CALL64m: |
2613 | if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) { |
2614 | emitLabelAndRecordForImportCallOptimization( |
2615 | Kind: IMAGE_RETPOLINE_AMD64_CFG_CALL); |
2616 | } |
2617 | break; |
2618 | |
2619 | case X86::JCC_1: |
2620 | // Two instruction prefixes (2EH for branch not-taken and 3EH for branch |
2621 | // taken) are used as branch hints. Here we add branch taken prefix for |
2622 | // jump instruction with higher probability than threshold. |
2623 | if (getSubtarget().hasBranchHint() && EnableBranchHint) { |
2624 | const MachineBranchProbabilityInfo *MBPI = |
2625 | &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
2626 | MachineBasicBlock *DestBB = MI->getOperand(i: 0).getMBB(); |
2627 | BranchProbability EdgeProb = |
2628 | MBPI->getEdgeProbability(Src: MI->getParent(), Dst: DestBB); |
2629 | BranchProbability Threshold(BranchHintProbabilityThreshold, 100); |
2630 | if (EdgeProb > Threshold) |
2631 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DS_PREFIX)); |
2632 | } |
2633 | break; |
2634 | } |
2635 | |
2636 | MCInst TmpInst; |
2637 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2638 | |
2639 | if (MI->isCall()) { |
2640 | emitCallInstruction(MCI: TmpInst); |
2641 | return; |
2642 | } |
2643 | |
2644 | EmitAndCountInstruction(Inst&: TmpInst); |
2645 | } |
2646 | |
2647 | void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) { |
2648 | // Stackmap shadows cannot include branch targets, so we can count the bytes |
2649 | // in a call towards the shadow, but must ensure that the no thread returns |
2650 | // in to the stackmap shadow. The only way to achieve this is if the call |
2651 | // is at the end of the shadow. |
2652 | |
2653 | // Count then size of the call towards the shadow |
2654 | SMShadowTracker.count(Inst: MCI, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
2655 | // Then flush the shadow so that we fill with nops before the call, not |
2656 | // after it. |
2657 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
2658 | // Then emit the call |
2659 | OutStreamer->emitInstruction(Inst: MCI, STI: getSubtargetInfo()); |
2660 | } |
2661 | |
2662 | void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization( |
2663 | ImportCallKind Kind) { |
2664 | assert(EnableImportCallOptimization); |
2665 | |
2666 | MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol(Name: "impcall" ); |
2667 | OutStreamer->emitLabel(Symbol: CallSiteSymbol); |
2668 | |
2669 | SectionToImportedFunctionCalls[OutStreamer->getCurrentSectionOnly()] |
2670 | .push_back(x: {.CalleeSymbol: CallSiteSymbol, .Kind: Kind}); |
2671 | } |
2672 | |