1 | //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains code to lower X86 MachineInstrs to their corresponding |
10 | // MCInst records. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "MCTargetDesc/X86ATTInstPrinter.h" |
15 | #include "MCTargetDesc/X86BaseInfo.h" |
16 | #include "MCTargetDesc/X86EncodingOptimization.h" |
17 | #include "MCTargetDesc/X86InstComments.h" |
18 | #include "MCTargetDesc/X86ShuffleDecode.h" |
19 | #include "MCTargetDesc/X86TargetStreamer.h" |
20 | #include "X86AsmPrinter.h" |
21 | #include "X86MachineFunctionInfo.h" |
22 | #include "X86RegisterInfo.h" |
23 | #include "X86ShuffleDecodeConstantPool.h" |
24 | #include "X86Subtarget.h" |
25 | #include "llvm/ADT/STLExtras.h" |
26 | #include "llvm/ADT/SmallString.h" |
27 | #include "llvm/ADT/StringExtras.h" |
28 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
29 | #include "llvm/CodeGen/MachineConstantPool.h" |
30 | #include "llvm/CodeGen/MachineFunction.h" |
31 | #include "llvm/CodeGen/MachineModuleInfoImpls.h" |
32 | #include "llvm/CodeGen/MachineOperand.h" |
33 | #include "llvm/CodeGen/StackMaps.h" |
34 | #include "llvm/IR/DataLayout.h" |
35 | #include "llvm/IR/GlobalValue.h" |
36 | #include "llvm/IR/Mangler.h" |
37 | #include "llvm/MC/MCAsmInfo.h" |
38 | #include "llvm/MC/MCCodeEmitter.h" |
39 | #include "llvm/MC/MCContext.h" |
40 | #include "llvm/MC/MCExpr.h" |
41 | #include "llvm/MC/MCFixup.h" |
42 | #include "llvm/MC/MCInst.h" |
43 | #include "llvm/MC/MCInstBuilder.h" |
44 | #include "llvm/MC/MCSection.h" |
45 | #include "llvm/MC/MCSectionELF.h" |
46 | #include "llvm/MC/MCStreamer.h" |
47 | #include "llvm/MC/MCSymbol.h" |
48 | #include "llvm/MC/MCSymbolELF.h" |
49 | #include "llvm/MC/TargetRegistry.h" |
50 | #include "llvm/Target/TargetLoweringObjectFile.h" |
51 | #include "llvm/Target/TargetMachine.h" |
52 | #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" |
53 | #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" |
54 | #include <string> |
55 | |
56 | using namespace llvm; |
57 | |
58 | static cl::opt<bool> EnableBranchHint("enable-branch-hint" , |
59 | cl::desc("Enable branch hint." ), |
60 | cl::init(Val: false), cl::Hidden); |
61 | static cl::opt<unsigned> BranchHintProbabilityThreshold( |
62 | "branch-hint-probability-threshold" , |
63 | cl::desc("The probability threshold of enabling branch hint." ), |
64 | cl::init(Val: 50), cl::Hidden); |
65 | |
66 | namespace { |
67 | |
68 | /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. |
69 | class X86MCInstLower { |
70 | MCContext &Ctx; |
71 | const MachineFunction &MF; |
72 | const TargetMachine &TM; |
73 | const MCAsmInfo &MAI; |
74 | X86AsmPrinter &AsmPrinter; |
75 | |
76 | public: |
77 | X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); |
78 | |
79 | MCOperand LowerMachineOperand(const MachineInstr *MI, |
80 | const MachineOperand &MO) const; |
81 | void Lower(const MachineInstr *MI, MCInst &OutMI) const; |
82 | |
83 | MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; |
84 | MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; |
85 | |
86 | private: |
87 | MachineModuleInfoMachO &getMachOMMI() const; |
88 | }; |
89 | |
90 | } // end anonymous namespace |
91 | |
92 | /// A RAII helper which defines a region of instructions which can't have |
93 | /// padding added between them for correctness. |
94 | struct NoAutoPaddingScope { |
95 | MCStreamer &OS; |
96 | const bool OldAllowAutoPadding; |
97 | NoAutoPaddingScope(MCStreamer &OS) |
98 | : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { |
99 | changeAndComment(b: false); |
100 | } |
101 | ~NoAutoPaddingScope() { changeAndComment(b: OldAllowAutoPadding); } |
102 | void changeAndComment(bool b) { |
103 | if (b == OS.getAllowAutoPadding()) |
104 | return; |
105 | OS.setAllowAutoPadding(b); |
106 | if (b) |
107 | OS.emitRawComment(T: "autopadding" ); |
108 | else |
109 | OS.emitRawComment(T: "noautopadding" ); |
110 | } |
111 | }; |
112 | |
113 | // Emit a minimal sequence of nops spanning NumBytes bytes. |
114 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
115 | const X86Subtarget *Subtarget); |
116 | |
117 | void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, |
118 | const MCSubtargetInfo &STI, |
119 | MCCodeEmitter *CodeEmitter) { |
120 | if (InShadow) { |
121 | SmallString<256> Code; |
122 | SmallVector<MCFixup, 4> Fixups; |
123 | CodeEmitter->encodeInstruction(Inst, CB&: Code, Fixups, STI); |
124 | CurrentShadowSize += Code.size(); |
125 | if (CurrentShadowSize >= RequiredShadowSize) |
126 | InShadow = false; // The shadow is big enough. Stop counting. |
127 | } |
128 | } |
129 | |
130 | void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( |
131 | MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { |
132 | if (InShadow && CurrentShadowSize < RequiredShadowSize) { |
133 | InShadow = false; |
134 | emitX86Nops(OS&: OutStreamer, NumBytes: RequiredShadowSize - CurrentShadowSize, |
135 | Subtarget: &MF->getSubtarget<X86Subtarget>()); |
136 | } |
137 | } |
138 | |
139 | void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { |
140 | OutStreamer->emitInstruction(Inst, STI: getSubtargetInfo()); |
141 | SMShadowTracker.count(Inst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
142 | } |
143 | |
144 | X86MCInstLower::X86MCInstLower(const MachineFunction &mf, |
145 | X86AsmPrinter &asmprinter) |
146 | : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), |
147 | AsmPrinter(asmprinter) {} |
148 | |
149 | MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { |
150 | return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); |
151 | } |
152 | |
153 | /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol |
154 | /// operand to an MCSymbol. |
155 | MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { |
156 | const Triple &TT = TM.getTargetTriple(); |
157 | if (MO.isGlobal() && TT.isOSBinFormatELF()) |
158 | return AsmPrinter.getSymbolPreferLocal(GV: *MO.getGlobal()); |
159 | |
160 | const DataLayout &DL = MF.getDataLayout(); |
161 | assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && |
162 | "Isn't a symbol reference" ); |
163 | |
164 | MCSymbol *Sym = nullptr; |
165 | SmallString<128> Name; |
166 | StringRef Suffix; |
167 | |
168 | switch (MO.getTargetFlags()) { |
169 | case X86II::MO_DLLIMPORT: |
170 | // Handle dllimport linkage. |
171 | Name += "__imp_" ; |
172 | break; |
173 | case X86II::MO_COFFSTUB: |
174 | Name += ".refptr." ; |
175 | break; |
176 | case X86II::MO_DARWIN_NONLAZY: |
177 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
178 | Suffix = "$non_lazy_ptr" ; |
179 | break; |
180 | } |
181 | |
182 | if (!Suffix.empty()) |
183 | Name += DL.getPrivateGlobalPrefix(); |
184 | |
185 | if (MO.isGlobal()) { |
186 | const GlobalValue *GV = MO.getGlobal(); |
187 | AsmPrinter.getNameWithPrefix(Name, GV); |
188 | } else if (MO.isSymbol()) { |
189 | Mangler::getNameWithPrefix(OutName&: Name, GVName: MO.getSymbolName(), DL); |
190 | } else if (MO.isMBB()) { |
191 | assert(Suffix.empty()); |
192 | Sym = MO.getMBB()->getSymbol(); |
193 | } |
194 | |
195 | Name += Suffix; |
196 | if (!Sym) |
197 | Sym = Ctx.getOrCreateSymbol(Name); |
198 | |
199 | // If the target flags on the operand changes the name of the symbol, do that |
200 | // before we return the symbol. |
201 | switch (MO.getTargetFlags()) { |
202 | default: |
203 | break; |
204 | case X86II::MO_COFFSTUB: { |
205 | MachineModuleInfoCOFF &MMICOFF = |
206 | AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>(); |
207 | MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); |
208 | if (!StubSym.getPointer()) { |
209 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
210 | StubSym = MachineModuleInfoImpl::StubValueTy( |
211 | AsmPrinter.getSymbol(GV: MO.getGlobal()), true); |
212 | } |
213 | break; |
214 | } |
215 | case X86II::MO_DARWIN_NONLAZY: |
216 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { |
217 | MachineModuleInfoImpl::StubValueTy &StubSym = |
218 | getMachOMMI().getGVStubEntry(Sym); |
219 | if (!StubSym.getPointer()) { |
220 | assert(MO.isGlobal() && "Extern symbol not handled yet" ); |
221 | StubSym = MachineModuleInfoImpl::StubValueTy( |
222 | AsmPrinter.getSymbol(GV: MO.getGlobal()), |
223 | !MO.getGlobal()->hasInternalLinkage()); |
224 | } |
225 | break; |
226 | } |
227 | } |
228 | |
229 | return Sym; |
230 | } |
231 | |
232 | MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, |
233 | MCSymbol *Sym) const { |
234 | // FIXME: We would like an efficient form for this, so we don't have to do a |
235 | // lot of extra uniquing. |
236 | const MCExpr *Expr = nullptr; |
237 | MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; |
238 | |
239 | switch (MO.getTargetFlags()) { |
240 | default: |
241 | llvm_unreachable("Unknown target flag on GV operand" ); |
242 | case X86II::MO_NO_FLAG: // No flag. |
243 | // These affect the name of the symbol, not any suffix. |
244 | case X86II::MO_DARWIN_NONLAZY: |
245 | case X86II::MO_DLLIMPORT: |
246 | case X86II::MO_COFFSTUB: |
247 | break; |
248 | |
249 | case X86II::MO_TLVP: |
250 | RefKind = MCSymbolRefExpr::VK_TLVP; |
251 | break; |
252 | case X86II::MO_TLVP_PIC_BASE: |
253 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: MCSymbolRefExpr::VK_TLVP, Ctx); |
254 | // Subtract the pic base. |
255 | Expr = MCBinaryExpr::createSub( |
256 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
257 | break; |
258 | case X86II::MO_SECREL: |
259 | RefKind = MCSymbolRefExpr::VK_SECREL; |
260 | break; |
261 | case X86II::MO_TLSGD: |
262 | RefKind = MCSymbolRefExpr::VK_TLSGD; |
263 | break; |
264 | case X86II::MO_TLSLD: |
265 | RefKind = MCSymbolRefExpr::VK_TLSLD; |
266 | break; |
267 | case X86II::MO_TLSLDM: |
268 | RefKind = MCSymbolRefExpr::VK_TLSLDM; |
269 | break; |
270 | case X86II::MO_GOTTPOFF: |
271 | RefKind = MCSymbolRefExpr::VK_GOTTPOFF; |
272 | break; |
273 | case X86II::MO_INDNTPOFF: |
274 | RefKind = MCSymbolRefExpr::VK_INDNTPOFF; |
275 | break; |
276 | case X86II::MO_TPOFF: |
277 | RefKind = MCSymbolRefExpr::VK_TPOFF; |
278 | break; |
279 | case X86II::MO_DTPOFF: |
280 | RefKind = MCSymbolRefExpr::VK_DTPOFF; |
281 | break; |
282 | case X86II::MO_NTPOFF: |
283 | RefKind = MCSymbolRefExpr::VK_NTPOFF; |
284 | break; |
285 | case X86II::MO_GOTNTPOFF: |
286 | RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; |
287 | break; |
288 | case X86II::MO_GOTPCREL: |
289 | RefKind = MCSymbolRefExpr::VK_GOTPCREL; |
290 | break; |
291 | case X86II::MO_GOTPCREL_NORELAX: |
292 | RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; |
293 | break; |
294 | case X86II::MO_GOT: |
295 | RefKind = MCSymbolRefExpr::VK_GOT; |
296 | break; |
297 | case X86II::MO_GOTOFF: |
298 | RefKind = MCSymbolRefExpr::VK_GOTOFF; |
299 | break; |
300 | case X86II::MO_PLT: |
301 | RefKind = MCSymbolRefExpr::VK_PLT; |
302 | break; |
303 | case X86II::MO_ABS8: |
304 | RefKind = MCSymbolRefExpr::VK_X86_ABS8; |
305 | break; |
306 | case X86II::MO_PIC_BASE_OFFSET: |
307 | case X86II::MO_DARWIN_NONLAZY_PIC_BASE: |
308 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
309 | // Subtract the pic base. |
310 | Expr = MCBinaryExpr::createSub( |
311 | LHS: Expr, RHS: MCSymbolRefExpr::create(Symbol: MF.getPICBaseSymbol(), Ctx), Ctx); |
312 | if (MO.isJTI()) { |
313 | assert(MAI.doesSetDirectiveSuppressReloc()); |
314 | // If .set directive is supported, use it to reduce the number of |
315 | // relocations the assembler will generate for differences between |
316 | // local labels. This is only safe when the symbols are in the same |
317 | // section so we are restricting it to jumptable references. |
318 | MCSymbol *Label = Ctx.createTempSymbol(); |
319 | AsmPrinter.OutStreamer->emitAssignment(Symbol: Label, Value: Expr); |
320 | Expr = MCSymbolRefExpr::create(Symbol: Label, Ctx); |
321 | } |
322 | break; |
323 | } |
324 | |
325 | if (!Expr) |
326 | Expr = MCSymbolRefExpr::create(Symbol: Sym, Kind: RefKind, Ctx); |
327 | |
328 | if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) |
329 | Expr = MCBinaryExpr::createAdd( |
330 | LHS: Expr, RHS: MCConstantExpr::create(Value: MO.getOffset(), Ctx), Ctx); |
331 | return MCOperand::createExpr(Val: Expr); |
332 | } |
333 | |
334 | static unsigned getRetOpcode(const X86Subtarget &Subtarget) { |
335 | return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; |
336 | } |
337 | |
338 | MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, |
339 | const MachineOperand &MO) const { |
340 | switch (MO.getType()) { |
341 | default: |
342 | MI->print(OS&: errs()); |
343 | llvm_unreachable("unknown operand type" ); |
344 | case MachineOperand::MO_Register: |
345 | // Ignore all implicit register operands. |
346 | if (MO.isImplicit()) |
347 | return MCOperand(); |
348 | return MCOperand::createReg(Reg: MO.getReg()); |
349 | case MachineOperand::MO_Immediate: |
350 | return MCOperand::createImm(Val: MO.getImm()); |
351 | case MachineOperand::MO_MachineBasicBlock: |
352 | case MachineOperand::MO_GlobalAddress: |
353 | case MachineOperand::MO_ExternalSymbol: |
354 | return LowerSymbolOperand(MO, Sym: GetSymbolFromOperand(MO)); |
355 | case MachineOperand::MO_MCSymbol: |
356 | return LowerSymbolOperand(MO, Sym: MO.getMCSymbol()); |
357 | case MachineOperand::MO_JumpTableIndex: |
358 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetJTISymbol(JTID: MO.getIndex())); |
359 | case MachineOperand::MO_ConstantPoolIndex: |
360 | return LowerSymbolOperand(MO, Sym: AsmPrinter.GetCPISymbol(CPID: MO.getIndex())); |
361 | case MachineOperand::MO_BlockAddress: |
362 | return LowerSymbolOperand( |
363 | MO, Sym: AsmPrinter.GetBlockAddressSymbol(BA: MO.getBlockAddress())); |
364 | case MachineOperand::MO_RegisterMask: |
365 | // Ignore call clobbers. |
366 | return MCOperand(); |
367 | } |
368 | } |
369 | |
370 | // Replace TAILJMP opcodes with their equivalent opcodes that have encoding |
371 | // information. |
372 | static unsigned convertTailJumpOpcode(unsigned Opcode) { |
373 | switch (Opcode) { |
374 | case X86::TAILJMPr: |
375 | Opcode = X86::JMP32r; |
376 | break; |
377 | case X86::TAILJMPm: |
378 | Opcode = X86::JMP32m; |
379 | break; |
380 | case X86::TAILJMPr64: |
381 | Opcode = X86::JMP64r; |
382 | break; |
383 | case X86::TAILJMPm64: |
384 | Opcode = X86::JMP64m; |
385 | break; |
386 | case X86::TAILJMPr64_REX: |
387 | Opcode = X86::JMP64r_REX; |
388 | break; |
389 | case X86::TAILJMPm64_REX: |
390 | Opcode = X86::JMP64m_REX; |
391 | break; |
392 | case X86::TAILJMPd: |
393 | case X86::TAILJMPd64: |
394 | Opcode = X86::JMP_1; |
395 | break; |
396 | case X86::TAILJMPd_CC: |
397 | case X86::TAILJMPd64_CC: |
398 | Opcode = X86::JCC_1; |
399 | break; |
400 | } |
401 | |
402 | return Opcode; |
403 | } |
404 | |
405 | void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { |
406 | OutMI.setOpcode(MI->getOpcode()); |
407 | |
408 | for (const MachineOperand &MO : MI->operands()) |
409 | if (auto Op = LowerMachineOperand(MI, MO); Op.isValid()) |
410 | OutMI.addOperand(Op); |
411 | |
412 | bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); |
413 | if (X86::optimizeInstFromVEX3ToVEX2(MI&: OutMI, Desc: MI->getDesc()) || |
414 | X86::optimizeShiftRotateWithImmediateOne(MI&: OutMI) || |
415 | X86::optimizeVPCMPWithImmediateOneOrSix(MI&: OutMI) || |
416 | X86::optimizeMOVSX(MI&: OutMI) || X86::optimizeINCDEC(MI&: OutMI, In64BitMode) || |
417 | X86::optimizeMOV(MI&: OutMI, In64BitMode) || |
418 | X86::optimizeToFixedRegisterOrShortImmediateForm(MI&: OutMI)) |
419 | return; |
420 | |
421 | // Handle a few special cases to eliminate operand modifiers. |
422 | switch (OutMI.getOpcode()) { |
423 | case X86::LEA64_32r: |
424 | case X86::LEA64r: |
425 | case X86::LEA16r: |
426 | case X86::LEA32r: |
427 | // LEA should have a segment register, but it must be empty. |
428 | assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && |
429 | "Unexpected # of LEA operands" ); |
430 | assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && |
431 | "LEA has segment specified!" ); |
432 | break; |
433 | case X86::MULX32Hrr: |
434 | case X86::MULX32Hrm: |
435 | case X86::MULX64Hrr: |
436 | case X86::MULX64Hrm: { |
437 | // Turn into regular MULX by duplicating the destination. |
438 | unsigned NewOpc; |
439 | switch (OutMI.getOpcode()) { |
440 | default: llvm_unreachable("Invalid opcode" ); |
441 | case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; |
442 | case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; |
443 | case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; |
444 | case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; |
445 | } |
446 | OutMI.setOpcode(NewOpc); |
447 | // Duplicate the destination. |
448 | unsigned DestReg = OutMI.getOperand(i: 0).getReg(); |
449 | OutMI.insert(I: OutMI.begin(), Op: MCOperand::createReg(Reg: DestReg)); |
450 | break; |
451 | } |
452 | // CALL64r, CALL64pcrel32 - These instructions used to have |
453 | // register inputs modeled as normal uses instead of implicit uses. As such, |
454 | // they we used to truncate off all but the first operand (the callee). This |
455 | // issue seems to have been fixed at some point. This assert verifies that. |
456 | case X86::CALL64r: |
457 | case X86::CALL64pcrel32: |
458 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
459 | break; |
460 | case X86::EH_RETURN: |
461 | case X86::EH_RETURN64: { |
462 | OutMI = MCInst(); |
463 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
464 | break; |
465 | } |
466 | case X86::CLEANUPRET: { |
467 | // Replace CLEANUPRET with the appropriate RET. |
468 | OutMI = MCInst(); |
469 | OutMI.setOpcode(getRetOpcode(Subtarget: AsmPrinter.getSubtarget())); |
470 | break; |
471 | } |
472 | case X86::CATCHRET: { |
473 | // Replace CATCHRET with the appropriate RET. |
474 | const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); |
475 | unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; |
476 | OutMI = MCInst(); |
477 | OutMI.setOpcode(getRetOpcode(Subtarget)); |
478 | OutMI.addOperand(Op: MCOperand::createReg(Reg: ReturnReg)); |
479 | break; |
480 | } |
481 | // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump |
482 | // instruction. |
483 | case X86::TAILJMPr: |
484 | case X86::TAILJMPr64: |
485 | case X86::TAILJMPr64_REX: |
486 | case X86::TAILJMPd: |
487 | case X86::TAILJMPd64: |
488 | assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!" ); |
489 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
490 | break; |
491 | case X86::TAILJMPd_CC: |
492 | case X86::TAILJMPd64_CC: |
493 | assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!" ); |
494 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
495 | break; |
496 | case X86::TAILJMPm: |
497 | case X86::TAILJMPm64: |
498 | case X86::TAILJMPm64_REX: |
499 | assert(OutMI.getNumOperands() == X86::AddrNumOperands && |
500 | "Unexpected number of operands!" ); |
501 | OutMI.setOpcode(convertTailJumpOpcode(Opcode: OutMI.getOpcode())); |
502 | break; |
503 | case X86::MASKMOVDQU: |
504 | case X86::VMASKMOVDQU: |
505 | if (In64BitMode) |
506 | OutMI.setFlags(X86::IP_HAS_AD_SIZE); |
507 | break; |
508 | case X86::BSF16rm: |
509 | case X86::BSF16rr: |
510 | case X86::BSF32rm: |
511 | case X86::BSF32rr: |
512 | case X86::BSF64rm: |
513 | case X86::BSF64rr: { |
514 | // Add an REP prefix to BSF instructions so that new processors can |
515 | // recognize as TZCNT, which has better performance than BSF. |
516 | // BSF and TZCNT have different interpretations on ZF bit. So make sure |
517 | // it won't be used later. |
518 | const MachineOperand *FlagDef = |
519 | MI->findRegisterDefOperand(Reg: X86::EFLAGS, /*TRI=*/nullptr); |
520 | if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) |
521 | OutMI.setFlags(X86::IP_HAS_REPEAT); |
522 | break; |
523 | } |
524 | default: |
525 | break; |
526 | } |
527 | } |
528 | |
529 | void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, |
530 | const MachineInstr &MI) { |
531 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
532 | bool Is64Bits = getSubtarget().is64Bit(); |
533 | bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); |
534 | MCContext &Ctx = OutStreamer->getContext(); |
535 | |
536 | MCSymbolRefExpr::VariantKind SRVK; |
537 | switch (MI.getOpcode()) { |
538 | case X86::TLS_addr32: |
539 | case X86::TLS_addr64: |
540 | case X86::TLS_addrX32: |
541 | SRVK = MCSymbolRefExpr::VK_TLSGD; |
542 | break; |
543 | case X86::TLS_base_addr32: |
544 | SRVK = MCSymbolRefExpr::VK_TLSLDM; |
545 | break; |
546 | case X86::TLS_base_addr64: |
547 | case X86::TLS_base_addrX32: |
548 | SRVK = MCSymbolRefExpr::VK_TLSLD; |
549 | break; |
550 | case X86::TLS_desc32: |
551 | case X86::TLS_desc64: |
552 | SRVK = MCSymbolRefExpr::VK_TLSDESC; |
553 | break; |
554 | default: |
555 | llvm_unreachable("unexpected opcode" ); |
556 | } |
557 | |
558 | const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( |
559 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), Kind: SRVK, Ctx); |
560 | |
561 | // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD |
562 | // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is |
563 | // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by |
564 | // only using GOT when GOTPCRELX is enabled. |
565 | // TODO Delete the workaround when rustc no longer relies on the hack |
566 | bool UseGot = MMI->getModule()->getRtLibUseGOT() && |
567 | Ctx.getTargetOptions()->X86RelaxRelocations; |
568 | |
569 | if (SRVK == MCSymbolRefExpr::VK_TLSDESC) { |
570 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( |
571 | Symbol: MCInstLowering.GetSymbolFromOperand(MO: MI.getOperand(i: 3)), |
572 | Kind: MCSymbolRefExpr::VK_TLSCALL, Ctx); |
573 | EmitAndCountInstruction( |
574 | Inst&: MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) |
575 | .addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX) |
576 | .addReg(Reg: Is64Bits ? X86::RIP : X86::EBX) |
577 | .addImm(Val: 1) |
578 | .addReg(Reg: 0) |
579 | .addExpr(Val: Sym) |
580 | .addReg(Reg: 0)); |
581 | EmitAndCountInstruction( |
582 | Inst&: MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) |
583 | .addReg(Reg: Is64BitsLP64 ? X86::RAX : X86::EAX) |
584 | .addImm(Val: 1) |
585 | .addReg(Reg: 0) |
586 | .addExpr(Val: Expr) |
587 | .addReg(Reg: 0)); |
588 | } else if (Is64Bits) { |
589 | bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; |
590 | if (NeedsPadding && Is64BitsLP64) |
591 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
592 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA64r) |
593 | .addReg(Reg: X86::RDI) |
594 | .addReg(Reg: X86::RIP) |
595 | .addImm(Val: 1) |
596 | .addReg(Reg: 0) |
597 | .addExpr(Val: Sym) |
598 | .addReg(Reg: 0)); |
599 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "__tls_get_addr" ); |
600 | if (NeedsPadding) { |
601 | if (!UseGot) |
602 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
603 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DATA16_PREFIX)); |
604 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::REX64_PREFIX)); |
605 | } |
606 | if (UseGot) { |
607 | const MCExpr *Expr = MCSymbolRefExpr::create( |
608 | Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOTPCREL, Ctx); |
609 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64m) |
610 | .addReg(Reg: X86::RIP) |
611 | .addImm(Val: 1) |
612 | .addReg(Reg: 0) |
613 | .addExpr(Val: Expr) |
614 | .addReg(Reg: 0)); |
615 | } else { |
616 | EmitAndCountInstruction( |
617 | Inst&: MCInstBuilder(X86::CALL64pcrel32) |
618 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, |
619 | Kind: MCSymbolRefExpr::VK_PLT, Ctx))); |
620 | } |
621 | } else { |
622 | if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { |
623 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA32r) |
624 | .addReg(Reg: X86::EAX) |
625 | .addReg(Reg: 0) |
626 | .addImm(Val: 1) |
627 | .addReg(Reg: X86::EBX) |
628 | .addExpr(Val: Sym) |
629 | .addReg(Reg: 0)); |
630 | } else { |
631 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::LEA32r) |
632 | .addReg(Reg: X86::EAX) |
633 | .addReg(Reg: X86::EBX) |
634 | .addImm(Val: 1) |
635 | .addReg(Reg: 0) |
636 | .addExpr(Val: Sym) |
637 | .addReg(Reg: 0)); |
638 | } |
639 | |
640 | const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol(Name: "___tls_get_addr" ); |
641 | if (UseGot) { |
642 | const MCExpr *Expr = |
643 | MCSymbolRefExpr::create(Symbol: TlsGetAddr, Kind: MCSymbolRefExpr::VK_GOT, Ctx); |
644 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL32m) |
645 | .addReg(Reg: X86::EBX) |
646 | .addImm(Val: 1) |
647 | .addReg(Reg: 0) |
648 | .addExpr(Val: Expr) |
649 | .addReg(Reg: 0)); |
650 | } else { |
651 | EmitAndCountInstruction( |
652 | Inst&: MCInstBuilder(X86::CALLpcrel32) |
653 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: TlsGetAddr, |
654 | Kind: MCSymbolRefExpr::VK_PLT, Ctx))); |
655 | } |
656 | } |
657 | } |
658 | |
659 | /// Emit the largest nop instruction smaller than or equal to \p NumBytes |
660 | /// bytes. Return the size of nop emitted. |
661 | static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, |
662 | const X86Subtarget *Subtarget) { |
663 | // Determine the longest nop which can be efficiently decoded for the given |
664 | // target cpu. 15-bytes is the longest single NOP instruction, but some |
665 | // platforms can't decode the longest forms efficiently. |
666 | unsigned MaxNopLength = 1; |
667 | if (Subtarget->is64Bit()) { |
668 | // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the |
669 | // IndexReg/BaseReg below need to be updated. |
670 | if (Subtarget->hasFeature(Feature: X86::TuningFast7ByteNOP)) |
671 | MaxNopLength = 7; |
672 | else if (Subtarget->hasFeature(Feature: X86::TuningFast15ByteNOP)) |
673 | MaxNopLength = 15; |
674 | else if (Subtarget->hasFeature(Feature: X86::TuningFast11ByteNOP)) |
675 | MaxNopLength = 11; |
676 | else |
677 | MaxNopLength = 10; |
678 | } if (Subtarget->is32Bit()) |
679 | MaxNopLength = 2; |
680 | |
681 | // Cap a single nop emission at the profitable value for the target |
682 | NumBytes = std::min(a: NumBytes, b: MaxNopLength); |
683 | |
684 | unsigned NopSize; |
685 | unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; |
686 | IndexReg = Displacement = SegmentReg = 0; |
687 | BaseReg = X86::RAX; |
688 | ScaleVal = 1; |
689 | switch (NumBytes) { |
690 | case 0: |
691 | llvm_unreachable("Zero nops?" ); |
692 | break; |
693 | case 1: |
694 | NopSize = 1; |
695 | Opc = X86::NOOP; |
696 | break; |
697 | case 2: |
698 | NopSize = 2; |
699 | Opc = X86::XCHG16ar; |
700 | break; |
701 | case 3: |
702 | NopSize = 3; |
703 | Opc = X86::NOOPL; |
704 | break; |
705 | case 4: |
706 | NopSize = 4; |
707 | Opc = X86::NOOPL; |
708 | Displacement = 8; |
709 | break; |
710 | case 5: |
711 | NopSize = 5; |
712 | Opc = X86::NOOPL; |
713 | Displacement = 8; |
714 | IndexReg = X86::RAX; |
715 | break; |
716 | case 6: |
717 | NopSize = 6; |
718 | Opc = X86::NOOPW; |
719 | Displacement = 8; |
720 | IndexReg = X86::RAX; |
721 | break; |
722 | case 7: |
723 | NopSize = 7; |
724 | Opc = X86::NOOPL; |
725 | Displacement = 512; |
726 | break; |
727 | case 8: |
728 | NopSize = 8; |
729 | Opc = X86::NOOPL; |
730 | Displacement = 512; |
731 | IndexReg = X86::RAX; |
732 | break; |
733 | case 9: |
734 | NopSize = 9; |
735 | Opc = X86::NOOPW; |
736 | Displacement = 512; |
737 | IndexReg = X86::RAX; |
738 | break; |
739 | default: |
740 | NopSize = 10; |
741 | Opc = X86::NOOPW; |
742 | Displacement = 512; |
743 | IndexReg = X86::RAX; |
744 | SegmentReg = X86::CS; |
745 | break; |
746 | } |
747 | |
748 | unsigned NumPrefixes = std::min(a: NumBytes - NopSize, b: 5U); |
749 | NopSize += NumPrefixes; |
750 | for (unsigned i = 0; i != NumPrefixes; ++i) |
751 | OS.emitBytes(Data: "\x66" ); |
752 | |
753 | switch (Opc) { |
754 | default: llvm_unreachable("Unexpected opcode" ); |
755 | case X86::NOOP: |
756 | OS.emitInstruction(Inst: MCInstBuilder(Opc), STI: *Subtarget); |
757 | break; |
758 | case X86::XCHG16ar: |
759 | OS.emitInstruction(Inst: MCInstBuilder(Opc).addReg(Reg: X86::AX).addReg(Reg: X86::AX), |
760 | STI: *Subtarget); |
761 | break; |
762 | case X86::NOOPL: |
763 | case X86::NOOPW: |
764 | OS.emitInstruction(Inst: MCInstBuilder(Opc) |
765 | .addReg(Reg: BaseReg) |
766 | .addImm(Val: ScaleVal) |
767 | .addReg(Reg: IndexReg) |
768 | .addImm(Val: Displacement) |
769 | .addReg(Reg: SegmentReg), |
770 | STI: *Subtarget); |
771 | break; |
772 | } |
773 | assert(NopSize <= NumBytes && "We overemitted?" ); |
774 | return NopSize; |
775 | } |
776 | |
777 | /// Emit the optimal amount of multi-byte nops on X86. |
778 | static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, |
779 | const X86Subtarget *Subtarget) { |
780 | unsigned NopsToEmit = NumBytes; |
781 | (void)NopsToEmit; |
782 | while (NumBytes) { |
783 | NumBytes -= emitNop(OS, NumBytes, Subtarget); |
784 | assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!" ); |
785 | } |
786 | } |
787 | |
788 | void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, |
789 | X86MCInstLower &MCIL) { |
790 | assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64" ); |
791 | |
792 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
793 | |
794 | StatepointOpers SOpers(&MI); |
795 | if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { |
796 | emitX86Nops(OS&: *OutStreamer, NumBytes: PatchBytes, Subtarget); |
797 | } else { |
798 | // Lower call target and choose correct opcode |
799 | const MachineOperand &CallTarget = SOpers.getCallTarget(); |
800 | MCOperand CallTargetMCOp; |
801 | unsigned CallOpcode; |
802 | switch (CallTarget.getType()) { |
803 | case MachineOperand::MO_GlobalAddress: |
804 | case MachineOperand::MO_ExternalSymbol: |
805 | CallTargetMCOp = MCIL.LowerSymbolOperand( |
806 | MO: CallTarget, Sym: MCIL.GetSymbolFromOperand(MO: CallTarget)); |
807 | CallOpcode = X86::CALL64pcrel32; |
808 | // Currently, we only support relative addressing with statepoints. |
809 | // Otherwise, we'll need a scratch register to hold the target |
810 | // address. You'll fail asserts during load & relocation if this |
811 | // symbol is to far away. (TODO: support non-relative addressing) |
812 | break; |
813 | case MachineOperand::MO_Immediate: |
814 | CallTargetMCOp = MCOperand::createImm(Val: CallTarget.getImm()); |
815 | CallOpcode = X86::CALL64pcrel32; |
816 | // Currently, we only support relative addressing with statepoints. |
817 | // Otherwise, we'll need a scratch register to hold the target |
818 | // immediate. You'll fail asserts during load & relocation if this |
819 | // address is to far away. (TODO: support non-relative addressing) |
820 | break; |
821 | case MachineOperand::MO_Register: |
822 | // FIXME: Add retpoline support and remove this. |
823 | if (Subtarget->useIndirectThunkCalls()) |
824 | report_fatal_error(reason: "Lowering register statepoints with thunks not " |
825 | "yet implemented." ); |
826 | CallTargetMCOp = MCOperand::createReg(Reg: CallTarget.getReg()); |
827 | CallOpcode = X86::CALL64r; |
828 | break; |
829 | default: |
830 | llvm_unreachable("Unsupported operand type in statepoint call target" ); |
831 | break; |
832 | } |
833 | |
834 | // Emit call |
835 | MCInst CallInst; |
836 | CallInst.setOpcode(CallOpcode); |
837 | CallInst.addOperand(Op: CallTargetMCOp); |
838 | OutStreamer->emitInstruction(Inst: CallInst, STI: getSubtargetInfo()); |
839 | } |
840 | |
841 | // Record our statepoint node in the same section used by STACKMAP |
842 | // and PATCHPOINT |
843 | auto &Ctx = OutStreamer->getContext(); |
844 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
845 | OutStreamer->emitLabel(Symbol: MILabel); |
846 | SM.recordStatepoint(L: *MILabel, MI); |
847 | } |
848 | |
849 | void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, |
850 | X86MCInstLower &MCIL) { |
851 | // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, |
852 | // <opcode>, <operands> |
853 | |
854 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
855 | |
856 | Register DefRegister = FaultingMI.getOperand(i: 0).getReg(); |
857 | FaultMaps::FaultKind FK = |
858 | static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(i: 1).getImm()); |
859 | MCSymbol *HandlerLabel = FaultingMI.getOperand(i: 2).getMBB()->getSymbol(); |
860 | unsigned Opcode = FaultingMI.getOperand(i: 3).getImm(); |
861 | unsigned OperandsBeginIdx = 4; |
862 | |
863 | auto &Ctx = OutStreamer->getContext(); |
864 | MCSymbol *FaultingLabel = Ctx.createTempSymbol(); |
865 | OutStreamer->emitLabel(Symbol: FaultingLabel); |
866 | |
867 | assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!" ); |
868 | FM.recordFaultingOp(FaultTy: FK, FaultingLabel, HandlerLabel); |
869 | |
870 | MCInst MI; |
871 | MI.setOpcode(Opcode); |
872 | |
873 | if (DefRegister != X86::NoRegister) |
874 | MI.addOperand(Op: MCOperand::createReg(Reg: DefRegister)); |
875 | |
876 | for (const MachineOperand &MO : |
877 | llvm::drop_begin(RangeOrContainer: FaultingMI.operands(), N: OperandsBeginIdx)) |
878 | if (auto Op = MCIL.LowerMachineOperand(MI: &FaultingMI, MO); Op.isValid()) |
879 | MI.addOperand(Op); |
880 | |
881 | OutStreamer->AddComment(T: "on-fault: " + HandlerLabel->getName()); |
882 | OutStreamer->emitInstruction(Inst: MI, STI: getSubtargetInfo()); |
883 | } |
884 | |
885 | void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, |
886 | X86MCInstLower &MCIL) { |
887 | bool Is64Bits = Subtarget->is64Bit(); |
888 | MCContext &Ctx = OutStreamer->getContext(); |
889 | MCSymbol *fentry = Ctx.getOrCreateSymbol(Name: "__fentry__" ); |
890 | const MCSymbolRefExpr *Op = |
891 | MCSymbolRefExpr::create(Symbol: fentry, Kind: MCSymbolRefExpr::VK_None, Ctx); |
892 | |
893 | EmitAndCountInstruction( |
894 | Inst&: MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) |
895 | .addExpr(Val: Op)); |
896 | } |
897 | |
898 | void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { |
899 | assert(std::next(MI.getIterator())->isCall() && |
900 | "KCFI_CHECK not followed by a call instruction" ); |
901 | |
902 | // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() |
903 | // returns a 1-byte X86::NOOP, which means the offset is the same in |
904 | // bytes. This assumes that patchable-function-prefix is the same for all |
905 | // functions. |
906 | const MachineFunction &MF = *MI.getMF(); |
907 | int64_t PrefixNops = 0; |
908 | (void)MF.getFunction() |
909 | .getFnAttribute(Kind: "patchable-function-prefix" ) |
910 | .getValueAsString() |
911 | .getAsInteger(Radix: 10, Result&: PrefixNops); |
912 | |
913 | // KCFI allows indirect calls to any location that's preceded by a valid |
914 | // type identifier. To avoid encoding the full constant into an instruction, |
915 | // and thus emitting potential call target gadgets at each indirect call |
916 | // site, load a negated constant to a register and compare that to the |
917 | // expected value at the call target. |
918 | const Register AddrReg = MI.getOperand(i: 0).getReg(); |
919 | const uint32_t Type = MI.getOperand(i: 1).getImm(); |
920 | // The check is immediately before the call. If the call target is in R10, |
921 | // we can clobber R11 for the check instead. |
922 | unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; |
923 | EmitAndCountInstruction( |
924 | Inst&: MCInstBuilder(X86::MOV32ri).addReg(Reg: TempReg).addImm(Val: -MaskKCFIType(Value: Type))); |
925 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::ADD32rm) |
926 | .addReg(Reg: X86::NoRegister) |
927 | .addReg(Reg: TempReg) |
928 | .addReg(Reg: AddrReg) |
929 | .addImm(Val: 1) |
930 | .addReg(Reg: X86::NoRegister) |
931 | .addImm(Val: -(PrefixNops + 4)) |
932 | .addReg(Reg: X86::NoRegister)); |
933 | |
934 | MCSymbol *Pass = OutContext.createTempSymbol(); |
935 | EmitAndCountInstruction( |
936 | Inst&: MCInstBuilder(X86::JCC_1) |
937 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: Pass, Ctx&: OutContext)) |
938 | .addImm(Val: X86::COND_E)); |
939 | |
940 | MCSymbol *Trap = OutContext.createTempSymbol(); |
941 | OutStreamer->emitLabel(Symbol: Trap); |
942 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::TRAP)); |
943 | emitKCFITrapEntry(MF, Symbol: Trap); |
944 | OutStreamer->emitLabel(Symbol: Pass); |
945 | } |
946 | |
947 | void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { |
948 | // FIXME: Make this work on non-ELF. |
949 | if (!TM.getTargetTriple().isOSBinFormatELF()) { |
950 | report_fatal_error(reason: "llvm.asan.check.memaccess only supported on ELF" ); |
951 | return; |
952 | } |
953 | |
954 | const auto &Reg = MI.getOperand(i: 0).getReg(); |
955 | ASanAccessInfo AccessInfo(MI.getOperand(i: 1).getImm()); |
956 | |
957 | uint64_t ShadowBase; |
958 | int MappingScale; |
959 | bool OrShadowOffset; |
960 | getAddressSanitizerParams(TargetTriple: Triple(TM.getTargetTriple()), LongSize: 64, |
961 | IsKasan: AccessInfo.CompileKernel, ShadowBase: &ShadowBase, |
962 | MappingScale: &MappingScale, OrShadowOffset: &OrShadowOffset); |
963 | |
964 | StringRef Name = AccessInfo.IsWrite ? "store" : "load" ; |
965 | StringRef Op = OrShadowOffset ? "or" : "add" ; |
966 | std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + |
967 | Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + |
968 | TM.getMCRegisterInfo()->getName(RegNo: Reg.asMCReg())) |
969 | .str(); |
970 | if (OrShadowOffset) |
971 | report_fatal_error( |
972 | reason: "OrShadowOffset is not supported with optimized callbacks" ); |
973 | |
974 | EmitAndCountInstruction( |
975 | Inst&: MCInstBuilder(X86::CALL64pcrel32) |
976 | .addExpr(Val: MCSymbolRefExpr::create( |
977 | Symbol: OutContext.getOrCreateSymbol(Name: SymName), Ctx&: OutContext))); |
978 | } |
979 | |
980 | void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, |
981 | X86MCInstLower &MCIL) { |
982 | // PATCHABLE_OP minsize |
983 | |
984 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
985 | |
986 | auto NextMI = std::find_if(first: std::next(x: MI.getIterator()), |
987 | last: MI.getParent()->end().getInstrIterator(), |
988 | pred: [](auto &II) { return !II.isMetaInstruction(); }); |
989 | |
990 | SmallString<256> Code; |
991 | unsigned MinSize = MI.getOperand(i: 0).getImm(); |
992 | |
993 | if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { |
994 | // Lower the next MachineInstr to find its byte size. |
995 | // If the next instruction is inline assembly, we skip lowering it for now, |
996 | // and assume we should always generate NOPs. |
997 | MCInst MCI; |
998 | MCIL.Lower(MI: &*NextMI, OutMI&: MCI); |
999 | |
1000 | SmallVector<MCFixup, 4> Fixups; |
1001 | CodeEmitter->encodeInstruction(Inst: MCI, CB&: Code, Fixups, STI: getSubtargetInfo()); |
1002 | } |
1003 | |
1004 | if (Code.size() < MinSize) { |
1005 | if (MinSize == 2 && Subtarget->is32Bit() && |
1006 | Subtarget->isTargetWindowsMSVC() && |
1007 | (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3" )) { |
1008 | // For compatibility reasons, when targetting MSVC, it is important to |
1009 | // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools |
1010 | // rely specifically on this pattern to be able to patch a function. |
1011 | // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. |
1012 | OutStreamer->emitInstruction( |
1013 | Inst: MCInstBuilder(X86::MOV32rr_REV).addReg(Reg: X86::EDI).addReg(Reg: X86::EDI), |
1014 | STI: *Subtarget); |
1015 | } else { |
1016 | unsigned NopSize = emitNop(OS&: *OutStreamer, NumBytes: MinSize, Subtarget); |
1017 | assert(NopSize == MinSize && "Could not implement MinSize!" ); |
1018 | (void)NopSize; |
1019 | } |
1020 | } |
1021 | } |
1022 | |
1023 | // Lower a stackmap of the form: |
1024 | // <id>, <shadowBytes>, ... |
1025 | void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { |
1026 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1027 | |
1028 | auto &Ctx = OutStreamer->getContext(); |
1029 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1030 | OutStreamer->emitLabel(Symbol: MILabel); |
1031 | |
1032 | SM.recordStackMap(L: *MILabel, MI); |
1033 | unsigned NumShadowBytes = MI.getOperand(i: 1).getImm(); |
1034 | SMShadowTracker.reset(RequiredSize: NumShadowBytes); |
1035 | } |
1036 | |
1037 | // Lower a patchpoint of the form: |
1038 | // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... |
1039 | void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, |
1040 | X86MCInstLower &MCIL) { |
1041 | assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64" ); |
1042 | |
1043 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
1044 | |
1045 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1046 | |
1047 | auto &Ctx = OutStreamer->getContext(); |
1048 | MCSymbol *MILabel = Ctx.createTempSymbol(); |
1049 | OutStreamer->emitLabel(Symbol: MILabel); |
1050 | SM.recordPatchPoint(L: *MILabel, MI); |
1051 | |
1052 | PatchPointOpers opers(&MI); |
1053 | unsigned ScratchIdx = opers.getNextScratchIdx(); |
1054 | unsigned EncodedBytes = 0; |
1055 | const MachineOperand &CalleeMO = opers.getCallTarget(); |
1056 | |
1057 | // Check for null target. If target is non-null (i.e. is non-zero or is |
1058 | // symbolic) then emit a call. |
1059 | if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { |
1060 | MCOperand CalleeMCOp; |
1061 | switch (CalleeMO.getType()) { |
1062 | default: |
1063 | /// FIXME: Add a verifier check for bad callee types. |
1064 | llvm_unreachable("Unrecognized callee operand type." ); |
1065 | case MachineOperand::MO_Immediate: |
1066 | if (CalleeMO.getImm()) |
1067 | CalleeMCOp = MCOperand::createImm(Val: CalleeMO.getImm()); |
1068 | break; |
1069 | case MachineOperand::MO_ExternalSymbol: |
1070 | case MachineOperand::MO_GlobalAddress: |
1071 | CalleeMCOp = MCIL.LowerSymbolOperand(MO: CalleeMO, |
1072 | Sym: MCIL.GetSymbolFromOperand(MO: CalleeMO)); |
1073 | break; |
1074 | } |
1075 | |
1076 | // Emit MOV to materialize the target address and the CALL to target. |
1077 | // This is encoded with 12-13 bytes, depending on which register is used. |
1078 | Register ScratchReg = MI.getOperand(i: ScratchIdx).getReg(); |
1079 | if (X86II::isX86_64ExtendedReg(RegNo: ScratchReg)) |
1080 | EncodedBytes = 13; |
1081 | else |
1082 | EncodedBytes = 12; |
1083 | |
1084 | EmitAndCountInstruction( |
1085 | Inst&: MCInstBuilder(X86::MOV64ri).addReg(Reg: ScratchReg).addOperand(Op: CalleeMCOp)); |
1086 | // FIXME: Add retpoline support and remove this. |
1087 | if (Subtarget->useIndirectThunkCalls()) |
1088 | report_fatal_error( |
1089 | reason: "Lowering patchpoint with thunks not yet implemented." ); |
1090 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64r).addReg(Reg: ScratchReg)); |
1091 | } |
1092 | |
1093 | // Emit padding. |
1094 | unsigned NumBytes = opers.getNumPatchBytes(); |
1095 | assert(NumBytes >= EncodedBytes && |
1096 | "Patchpoint can't request size less than the length of a call." ); |
1097 | |
1098 | emitX86Nops(OS&: *OutStreamer, NumBytes: NumBytes - EncodedBytes, Subtarget); |
1099 | } |
1100 | |
1101 | void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, |
1102 | X86MCInstLower &MCIL) { |
1103 | assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64" ); |
1104 | |
1105 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1106 | |
1107 | // We want to emit the following pattern, which follows the x86 calling |
1108 | // convention to prepare for the trampoline call to be patched in. |
1109 | // |
1110 | // .p2align 1, ... |
1111 | // .Lxray_event_sled_N: |
1112 | // jmp +N // jump across the instrumentation sled |
1113 | // ... // set up arguments in register |
1114 | // callq __xray_CustomEvent@plt // force dependency to symbol |
1115 | // ... |
1116 | // <jump here> |
1117 | // |
1118 | // After patching, it would look something like: |
1119 | // |
1120 | // nopw (2-byte nop) |
1121 | // ... |
1122 | // callq __xrayCustomEvent // already lowered |
1123 | // ... |
1124 | // |
1125 | // --- |
1126 | // First we emit the label and the jump. |
1127 | auto CurSled = OutContext.createTempSymbol(Name: "xray_event_sled_" , AlwaysAddSuffix: true); |
1128 | OutStreamer->AddComment(T: "# XRay Custom Event Log" ); |
1129 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1130 | OutStreamer->emitLabel(Symbol: CurSled); |
1131 | |
1132 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1133 | // an operand (computed as an offset from the jmp instruction). |
1134 | // FIXME: Find another less hacky way do force the relative jump. |
1135 | OutStreamer->emitBinaryData(Data: "\xeb\x0f" ); |
1136 | |
1137 | // The default C calling convention will place two arguments into %rcx and |
1138 | // %rdx -- so we only work with those. |
1139 | const Register DestRegs[] = {X86::RDI, X86::RSI}; |
1140 | bool UsedMask[] = {false, false}; |
1141 | // Filled out in loop. |
1142 | Register SrcRegs[] = {0, 0}; |
1143 | |
1144 | // Then we put the operands in the %rdi and %rsi registers. We spill the |
1145 | // values in the register before we clobber them, and mark them as used in |
1146 | // UsedMask. In case the arguments are already in the correct register, we use |
1147 | // emit nops appropriately sized to keep the sled the same size in every |
1148 | // situation. |
1149 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1150 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I)); |
1151 | Op.isValid()) { |
1152 | assert(Op.isReg() && "Only support arguments in registers" ); |
1153 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: 64); |
1154 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1155 | if (SrcRegs[I] != DestRegs[I]) { |
1156 | UsedMask[I] = true; |
1157 | EmitAndCountInstruction( |
1158 | Inst&: MCInstBuilder(X86::PUSH64r).addReg(Reg: DestRegs[I])); |
1159 | } else { |
1160 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1161 | } |
1162 | } |
1163 | |
1164 | // Now that the register values are stashed, mov arguments into place. |
1165 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1166 | // earlier DestReg. We will have already overwritten over the register before |
1167 | // we can copy from it. |
1168 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1169 | if (SrcRegs[I] != DestRegs[I]) |
1170 | EmitAndCountInstruction( |
1171 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I])); |
1172 | |
1173 | // We emit a hard dependency on the __xray_CustomEvent symbol, which is the |
1174 | // name of the trampoline to be implemented by the XRay runtime. |
1175 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_CustomEvent" ); |
1176 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1177 | if (isPositionIndependent()) |
1178 | TOp.setTargetFlags(X86II::MO_PLT); |
1179 | |
1180 | // Emit the call instruction. |
1181 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64pcrel32) |
1182 | .addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym))); |
1183 | |
1184 | // Restore caller-saved and used registers. |
1185 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1186 | if (UsedMask[I]) |
1187 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::POP64r).addReg(Reg: DestRegs[I])); |
1188 | else |
1189 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1190 | |
1191 | OutStreamer->AddComment(T: "xray custom event end." ); |
1192 | |
1193 | // Record the sled version. Version 0 of this sled was spelled differently, so |
1194 | // we let the runtime handle the different offsets we're using. Version 2 |
1195 | // changed the absolute address to a PC-relative address. |
1196 | recordSled(Sled: CurSled, MI, Kind: SledKind::CUSTOM_EVENT, Version: 2); |
1197 | } |
1198 | |
1199 | void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, |
1200 | X86MCInstLower &MCIL) { |
1201 | assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64" ); |
1202 | |
1203 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1204 | |
1205 | // We want to emit the following pattern, which follows the x86 calling |
1206 | // convention to prepare for the trampoline call to be patched in. |
1207 | // |
1208 | // .p2align 1, ... |
1209 | // .Lxray_event_sled_N: |
1210 | // jmp +N // jump across the instrumentation sled |
1211 | // ... // set up arguments in register |
1212 | // callq __xray_TypedEvent@plt // force dependency to symbol |
1213 | // ... |
1214 | // <jump here> |
1215 | // |
1216 | // After patching, it would look something like: |
1217 | // |
1218 | // nopw (2-byte nop) |
1219 | // ... |
1220 | // callq __xrayTypedEvent // already lowered |
1221 | // ... |
1222 | // |
1223 | // --- |
1224 | // First we emit the label and the jump. |
1225 | auto CurSled = OutContext.createTempSymbol(Name: "xray_typed_event_sled_" , AlwaysAddSuffix: true); |
1226 | OutStreamer->AddComment(T: "# XRay Typed Event Log" ); |
1227 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1228 | OutStreamer->emitLabel(Symbol: CurSled); |
1229 | |
1230 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1231 | // an operand (computed as an offset from the jmp instruction). |
1232 | // FIXME: Find another less hacky way do force the relative jump. |
1233 | OutStreamer->emitBinaryData(Data: "\xeb\x14" ); |
1234 | |
1235 | // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, |
1236 | // so we'll work with those. Or we may be called via SystemV, in which case |
1237 | // we don't have to do any translation. |
1238 | const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; |
1239 | bool UsedMask[] = {false, false, false}; |
1240 | |
1241 | // Will fill out src regs in the loop. |
1242 | Register SrcRegs[] = {0, 0, 0}; |
1243 | |
1244 | // Then we put the operands in the SystemV registers. We spill the values in |
1245 | // the registers before we clobber them, and mark them as used in UsedMask. |
1246 | // In case the arguments are already in the correct register, we emit nops |
1247 | // appropriately sized to keep the sled the same size in every situation. |
1248 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1249 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO: MI.getOperand(i: I)); |
1250 | Op.isValid()) { |
1251 | // TODO: Is register only support adequate? |
1252 | assert(Op.isReg() && "Only supports arguments in registers" ); |
1253 | SrcRegs[I] = getX86SubSuperRegister(Reg: Op.getReg(), Size: 64); |
1254 | assert(SrcRegs[I].isValid() && "Invalid operand" ); |
1255 | if (SrcRegs[I] != DestRegs[I]) { |
1256 | UsedMask[I] = true; |
1257 | EmitAndCountInstruction( |
1258 | Inst&: MCInstBuilder(X86::PUSH64r).addReg(Reg: DestRegs[I])); |
1259 | } else { |
1260 | emitX86Nops(OS&: *OutStreamer, NumBytes: 4, Subtarget); |
1261 | } |
1262 | } |
1263 | |
1264 | // In the above loop we only stash all of the destination registers or emit |
1265 | // nops if the arguments are already in the right place. Doing the actually |
1266 | // moving is postponed until after all the registers are stashed so nothing |
1267 | // is clobbers. We've already added nops to account for the size of mov and |
1268 | // push if the register is in the right place, so we only have to worry about |
1269 | // emitting movs. |
1270 | // FIXME: This doesn't work if one of the later SrcRegs is equal to an |
1271 | // earlier DestReg. We will have already overwritten over the register before |
1272 | // we can copy from it. |
1273 | for (unsigned I = 0; I < MI.getNumOperands(); ++I) |
1274 | if (UsedMask[I]) |
1275 | EmitAndCountInstruction( |
1276 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: DestRegs[I]).addReg(Reg: SrcRegs[I])); |
1277 | |
1278 | // We emit a hard dependency on the __xray_TypedEvent symbol, which is the |
1279 | // name of the trampoline to be implemented by the XRay runtime. |
1280 | auto TSym = OutContext.getOrCreateSymbol(Name: "__xray_TypedEvent" ); |
1281 | MachineOperand TOp = MachineOperand::CreateMCSymbol(Sym: TSym); |
1282 | if (isPositionIndependent()) |
1283 | TOp.setTargetFlags(X86II::MO_PLT); |
1284 | |
1285 | // Emit the call instruction. |
1286 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CALL64pcrel32) |
1287 | .addOperand(Op: MCIL.LowerSymbolOperand(MO: TOp, Sym: TSym))); |
1288 | |
1289 | // Restore caller-saved and used registers. |
1290 | for (unsigned I = sizeof UsedMask; I-- > 0;) |
1291 | if (UsedMask[I]) |
1292 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::POP64r).addReg(Reg: DestRegs[I])); |
1293 | else |
1294 | emitX86Nops(OS&: *OutStreamer, NumBytes: 1, Subtarget); |
1295 | |
1296 | OutStreamer->AddComment(T: "xray typed event end." ); |
1297 | |
1298 | // Record the sled version. |
1299 | recordSled(Sled: CurSled, MI, Kind: SledKind::TYPED_EVENT, Version: 2); |
1300 | } |
1301 | |
1302 | void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, |
1303 | X86MCInstLower &MCIL) { |
1304 | |
1305 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1306 | |
1307 | const Function &F = MF->getFunction(); |
1308 | if (F.hasFnAttribute(Kind: "patchable-function-entry" )) { |
1309 | unsigned Num; |
1310 | if (F.getFnAttribute(Kind: "patchable-function-entry" ) |
1311 | .getValueAsString() |
1312 | .getAsInteger(Radix: 10, Result&: Num)) |
1313 | return; |
1314 | emitX86Nops(OS&: *OutStreamer, NumBytes: Num, Subtarget); |
1315 | return; |
1316 | } |
1317 | // We want to emit the following pattern: |
1318 | // |
1319 | // .p2align 1, ... |
1320 | // .Lxray_sled_N: |
1321 | // jmp .tmpN |
1322 | // # 9 bytes worth of noops |
1323 | // |
1324 | // We need the 9 bytes because at runtime, we'd be patching over the full 11 |
1325 | // bytes with the following pattern: |
1326 | // |
1327 | // mov %r10, <function id, 32-bit> // 6 bytes |
1328 | // call <relative offset, 32-bits> // 5 bytes |
1329 | // |
1330 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1331 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1332 | OutStreamer->emitLabel(Symbol: CurSled); |
1333 | |
1334 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1335 | // an operand (computed as an offset from the jmp instruction). |
1336 | // FIXME: Find another less hacky way do force the relative jump. |
1337 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1338 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1339 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_ENTER, Version: 2); |
1340 | } |
1341 | |
1342 | void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, |
1343 | X86MCInstLower &MCIL) { |
1344 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1345 | |
1346 | // Since PATCHABLE_RET takes the opcode of the return statement as an |
1347 | // argument, we use that to emit the correct form of the RET that we want. |
1348 | // i.e. when we see this: |
1349 | // |
1350 | // PATCHABLE_RET X86::RET ... |
1351 | // |
1352 | // We should emit the RET followed by sleds. |
1353 | // |
1354 | // .p2align 1, ... |
1355 | // .Lxray_sled_N: |
1356 | // ret # or equivalent instruction |
1357 | // # 10 bytes worth of noops |
1358 | // |
1359 | // This just makes sure that the alignment for the next instruction is 2. |
1360 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1361 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1362 | OutStreamer->emitLabel(Symbol: CurSled); |
1363 | unsigned OpCode = MI.getOperand(i: 0).getImm(); |
1364 | MCInst Ret; |
1365 | Ret.setOpcode(OpCode); |
1366 | for (auto &MO : drop_begin(RangeOrContainer: MI.operands())) |
1367 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid()) |
1368 | Ret.addOperand(Op); |
1369 | OutStreamer->emitInstruction(Inst: Ret, STI: getSubtargetInfo()); |
1370 | emitX86Nops(OS&: *OutStreamer, NumBytes: 10, Subtarget); |
1371 | recordSled(Sled: CurSled, MI, Kind: SledKind::FUNCTION_EXIT, Version: 2); |
1372 | } |
1373 | |
1374 | void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, |
1375 | X86MCInstLower &MCIL) { |
1376 | MCInst TC; |
1377 | TC.setOpcode(convertTailJumpOpcode(Opcode: MI.getOperand(i: 0).getImm())); |
1378 | // Drop the tail jump opcode. |
1379 | auto TCOperands = drop_begin(RangeOrContainer: MI.operands()); |
1380 | bool IsConditional = TC.getOpcode() == X86::JCC_1; |
1381 | MCSymbol *FallthroughLabel; |
1382 | if (IsConditional) { |
1383 | // Rewrite: |
1384 | // je target |
1385 | // |
1386 | // To: |
1387 | // jne .fallthrough |
1388 | // .p2align 1, ... |
1389 | // .Lxray_sled_N: |
1390 | // SLED_CODE |
1391 | // jmp target |
1392 | // .fallthrough: |
1393 | FallthroughLabel = OutContext.createTempSymbol(); |
1394 | EmitToStreamer( |
1395 | S&: *OutStreamer, |
1396 | Inst: MCInstBuilder(X86::JCC_1) |
1397 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: FallthroughLabel, Ctx&: OutContext)) |
1398 | .addImm(Val: X86::GetOppositeBranchCondition( |
1399 | CC: static_cast<X86::CondCode>(MI.getOperand(i: 2).getImm())))); |
1400 | TC.setOpcode(X86::JMP_1); |
1401 | // Drop the condition code. |
1402 | TCOperands = drop_end(RangeOrContainer&: TCOperands); |
1403 | } |
1404 | |
1405 | NoAutoPaddingScope NoPadScope(*OutStreamer); |
1406 | |
1407 | // Like PATCHABLE_RET, we have the actual instruction in the operands to this |
1408 | // instruction so we lower that particular instruction and its operands. |
1409 | // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how |
1410 | // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to |
1411 | // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual |
1412 | // tail call much like how we have it in PATCHABLE_RET. |
1413 | auto CurSled = OutContext.createTempSymbol(Name: "xray_sled_" , AlwaysAddSuffix: true); |
1414 | OutStreamer->emitCodeAlignment(Alignment: Align(2), STI: &getSubtargetInfo()); |
1415 | OutStreamer->emitLabel(Symbol: CurSled); |
1416 | auto Target = OutContext.createTempSymbol(); |
1417 | |
1418 | // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as |
1419 | // an operand (computed as an offset from the jmp instruction). |
1420 | // FIXME: Find another less hacky way do force the relative jump. |
1421 | OutStreamer->emitBytes(Data: "\xeb\x09" ); |
1422 | emitX86Nops(OS&: *OutStreamer, NumBytes: 9, Subtarget); |
1423 | OutStreamer->emitLabel(Symbol: Target); |
1424 | recordSled(Sled: CurSled, MI, Kind: SledKind::TAIL_CALL, Version: 2); |
1425 | |
1426 | // Before emitting the instruction, add a comment to indicate that this is |
1427 | // indeed a tail call. |
1428 | OutStreamer->AddComment(T: "TAILCALL" ); |
1429 | for (auto &MO : TCOperands) |
1430 | if (auto Op = MCIL.LowerMachineOperand(MI: &MI, MO); Op.isValid()) |
1431 | TC.addOperand(Op); |
1432 | OutStreamer->emitInstruction(Inst: TC, STI: getSubtargetInfo()); |
1433 | |
1434 | if (IsConditional) |
1435 | OutStreamer->emitLabel(Symbol: FallthroughLabel); |
1436 | } |
1437 | |
1438 | // Returns instruction preceding MBBI in MachineFunction. |
1439 | // If MBBI is the first instruction of the first basic block, returns null. |
1440 | static MachineBasicBlock::const_iterator |
1441 | PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { |
1442 | const MachineBasicBlock *MBB = MBBI->getParent(); |
1443 | while (MBBI == MBB->begin()) { |
1444 | if (MBB == &MBB->getParent()->front()) |
1445 | return MachineBasicBlock::const_iterator(); |
1446 | MBB = MBB->getPrevNode(); |
1447 | MBBI = MBB->end(); |
1448 | } |
1449 | --MBBI; |
1450 | return MBBI; |
1451 | } |
1452 | |
1453 | static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { |
1454 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1455 | // Skip mask operand. |
1456 | ++SrcIdx; |
1457 | if (X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1458 | // Skip passthru operand. |
1459 | ++SrcIdx; |
1460 | } |
1461 | } |
1462 | return SrcIdx; |
1463 | } |
1464 | |
1465 | static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, |
1466 | unsigned SrcOpIdx) { |
1467 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1468 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()); |
1469 | |
1470 | // Handle AVX512 MASK/MASXZ write mask comments. |
1471 | // MASK: zmmX {%kY} |
1472 | // MASKZ: zmmX {%kY} {z} |
1473 | if (X86II::isKMasked(TSFlags: MI->getDesc().TSFlags)) { |
1474 | const MachineOperand &WriteMaskOp = MI->getOperand(i: SrcOpIdx - 1); |
1475 | StringRef Mask = X86ATTInstPrinter::getRegisterName(Reg: WriteMaskOp.getReg()); |
1476 | CS << " {%" << Mask << "}" ; |
1477 | if (!X86II::isKMergeMasked(TSFlags: MI->getDesc().TSFlags)) { |
1478 | CS << " {z}" ; |
1479 | } |
1480 | } |
1481 | } |
1482 | |
1483 | static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, |
1484 | StringRef Src2Name, ArrayRef<int> Mask) { |
1485 | // One source operand, fix the mask to print all elements in one span. |
1486 | SmallVector<int, 8> ShuffleMask(Mask); |
1487 | if (Src1Name == Src2Name) |
1488 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) |
1489 | if (ShuffleMask[i] >= e) |
1490 | ShuffleMask[i] -= e; |
1491 | |
1492 | for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { |
1493 | if (i != 0) |
1494 | CS << "," ; |
1495 | if (ShuffleMask[i] == SM_SentinelZero) { |
1496 | CS << "zero" ; |
1497 | continue; |
1498 | } |
1499 | |
1500 | // Otherwise, it must come from src1 or src2. Print the span of elements |
1501 | // that comes from this src. |
1502 | bool isSrc1 = ShuffleMask[i] < (int)e; |
1503 | CS << (isSrc1 ? Src1Name : Src2Name) << '['; |
1504 | |
1505 | bool IsFirst = true; |
1506 | while (i != e && ShuffleMask[i] != SM_SentinelZero && |
1507 | (ShuffleMask[i] < (int)e) == isSrc1) { |
1508 | if (!IsFirst) |
1509 | CS << ','; |
1510 | else |
1511 | IsFirst = false; |
1512 | if (ShuffleMask[i] == SM_SentinelUndef) |
1513 | CS << "u" ; |
1514 | else |
1515 | CS << ShuffleMask[i] % (int)e; |
1516 | ++i; |
1517 | } |
1518 | CS << ']'; |
1519 | --i; // For loop increments element #. |
1520 | } |
1521 | } |
1522 | |
1523 | static std::string (const MachineInstr *MI, unsigned SrcOp1Idx, |
1524 | unsigned SrcOp2Idx, ArrayRef<int> Mask) { |
1525 | std::string ; |
1526 | |
1527 | const MachineOperand &SrcOp1 = MI->getOperand(i: SrcOp1Idx); |
1528 | const MachineOperand &SrcOp2 = MI->getOperand(i: SrcOp2Idx); |
1529 | StringRef Src1Name = SrcOp1.isReg() |
1530 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp1.getReg()) |
1531 | : "mem" ; |
1532 | StringRef Src2Name = SrcOp2.isReg() |
1533 | ? X86ATTInstPrinter::getRegisterName(Reg: SrcOp2.getReg()) |
1534 | : "mem" ; |
1535 | |
1536 | raw_string_ostream CS(Comment); |
1537 | printDstRegisterName(CS, MI, SrcOpIdx: SrcOp1Idx); |
1538 | CS << " = " ; |
1539 | printShuffleMask(CS, Src1Name, Src2Name, Mask); |
1540 | CS.flush(); |
1541 | |
1542 | return Comment; |
1543 | } |
1544 | |
1545 | static void printConstant(const APInt &Val, raw_ostream &CS, |
1546 | bool PrintZero = false) { |
1547 | if (Val.getBitWidth() <= 64) { |
1548 | CS << (PrintZero ? 0ULL : Val.getZExtValue()); |
1549 | } else { |
1550 | // print multi-word constant as (w0,w1) |
1551 | CS << "(" ; |
1552 | for (int i = 0, N = Val.getNumWords(); i < N; ++i) { |
1553 | if (i > 0) |
1554 | CS << "," ; |
1555 | CS << (PrintZero ? 0ULL : Val.getRawData()[i]); |
1556 | } |
1557 | CS << ")" ; |
1558 | } |
1559 | } |
1560 | |
1561 | static void printConstant(const APFloat &Flt, raw_ostream &CS, |
1562 | bool PrintZero = false) { |
1563 | SmallString<32> Str; |
1564 | // Force scientific notation to distinguish from integers. |
1565 | if (PrintZero) |
1566 | APFloat::getZero(Sem: Flt.getSemantics()).toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1567 | else |
1568 | Flt.toString(Str, FormatPrecision: 0, FormatMaxPadding: 0); |
1569 | CS << Str; |
1570 | } |
1571 | |
1572 | static void printConstant(const Constant *COp, unsigned BitWidth, |
1573 | raw_ostream &CS, bool PrintZero = false) { |
1574 | if (isa<UndefValue>(Val: COp)) { |
1575 | CS << "u" ; |
1576 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: COp)) { |
1577 | printConstant(Val: CI->getValue(), CS, PrintZero); |
1578 | } else if (auto *CF = dyn_cast<ConstantFP>(Val: COp)) { |
1579 | printConstant(Flt: CF->getValueAPF(), CS, PrintZero); |
1580 | } else if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: COp)) { |
1581 | Type *EltTy = CDS->getElementType(); |
1582 | bool IsInteger = EltTy->isIntegerTy(); |
1583 | bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); |
1584 | unsigned EltBits = EltTy->getPrimitiveSizeInBits(); |
1585 | unsigned E = std::min(a: BitWidth / EltBits, b: CDS->getNumElements()); |
1586 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1587 | for (unsigned I = 0; I != E; ++I) { |
1588 | if (I != 0) |
1589 | CS << "," ; |
1590 | if (IsInteger) |
1591 | printConstant(Val: CDS->getElementAsAPInt(i: I), CS, PrintZero); |
1592 | else if (IsFP) |
1593 | printConstant(Flt: CDS->getElementAsAPFloat(i: I), CS, PrintZero); |
1594 | else |
1595 | CS << "?" ; |
1596 | } |
1597 | } else if (auto *CV = dyn_cast<ConstantVector>(Val: COp)) { |
1598 | unsigned EltBits = CV->getType()->getScalarSizeInBits(); |
1599 | unsigned E = std::min(a: BitWidth / EltBits, b: CV->getNumOperands()); |
1600 | assert((BitWidth % EltBits) == 0 && "Element size mismatch" ); |
1601 | for (unsigned I = 0; I != E; ++I) { |
1602 | if (I != 0) |
1603 | CS << "," ; |
1604 | printConstant(COp: CV->getOperand(i_nocapture: I), BitWidth: EltBits, CS, PrintZero); |
1605 | } |
1606 | } else { |
1607 | CS << "?" ; |
1608 | } |
1609 | } |
1610 | |
1611 | static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, |
1612 | int SclWidth, int VecWidth, |
1613 | const char *) { |
1614 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1615 | |
1616 | std::string ; |
1617 | raw_string_ostream CS(Comment); |
1618 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1619 | CS << " = " ; |
1620 | |
1621 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1622 | CS << "[" ; |
1623 | printConstant(COp: C, BitWidth: SclWidth, CS); |
1624 | for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { |
1625 | CS << "," ; |
1626 | printConstant(COp: C, BitWidth: SclWidth, CS, PrintZero: true); |
1627 | } |
1628 | CS << "]" ; |
1629 | OutStreamer.AddComment(T: CS.str()); |
1630 | return; // early-out |
1631 | } |
1632 | |
1633 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1634 | CS << ShuffleComment; |
1635 | OutStreamer.AddComment(T: CS.str()); |
1636 | } |
1637 | |
1638 | static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, |
1639 | int Repeats, int BitWidth) { |
1640 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1641 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx)) { |
1642 | std::string ; |
1643 | raw_string_ostream CS(Comment); |
1644 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1645 | CS << " = [" ; |
1646 | for (int l = 0; l != Repeats; ++l) { |
1647 | if (l != 0) |
1648 | CS << "," ; |
1649 | printConstant(COp: C, BitWidth, CS); |
1650 | } |
1651 | CS << "]" ; |
1652 | OutStreamer.AddComment(T: CS.str()); |
1653 | } |
1654 | } |
1655 | |
1656 | static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1657 | int SrcEltBits, int DstEltBits, bool IsSext) { |
1658 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1659 | auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx); |
1660 | if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { |
1661 | if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) { |
1662 | int NumElts = CDS->getNumElements(); |
1663 | std::string ; |
1664 | raw_string_ostream CS(Comment); |
1665 | printDstRegisterName(CS, MI, SrcOpIdx: SrcIdx); |
1666 | CS << " = [" ; |
1667 | for (int i = 0; i != NumElts; ++i) { |
1668 | if (i != 0) |
1669 | CS << "," ; |
1670 | if (CDS->getElementType()->isIntegerTy()) { |
1671 | APInt Elt = CDS->getElementAsAPInt(i); |
1672 | Elt = IsSext ? Elt.sext(width: DstEltBits) : Elt.zext(width: DstEltBits); |
1673 | printConstant(Val: Elt, CS); |
1674 | } else |
1675 | CS << "?" ; |
1676 | } |
1677 | CS << "]" ; |
1678 | OutStreamer.AddComment(T: CS.str()); |
1679 | return true; |
1680 | } |
1681 | } |
1682 | |
1683 | return false; |
1684 | } |
1685 | static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1686 | int SrcEltBits, int DstEltBits) { |
1687 | printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: true); |
1688 | } |
1689 | static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, |
1690 | int SrcEltBits, int DstEltBits) { |
1691 | if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, IsSext: false)) |
1692 | return; |
1693 | |
1694 | // We didn't find a constant load, fallback to a shuffle mask decode. |
1695 | std::string ; |
1696 | raw_string_ostream CS(Comment); |
1697 | printDstRegisterName(CS, MI, SrcOpIdx: getSrcIdx(MI, SrcIdx: 1)); |
1698 | CS << " = " ; |
1699 | |
1700 | SmallVector<int> Mask; |
1701 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1702 | assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && |
1703 | "Illegal extension ratio" ); |
1704 | DecodeZeroExtendMask(SrcScalarBits: SrcEltBits, DstScalarBits: DstEltBits, NumDstElts: Width / DstEltBits, IsAnyExtend: false, ShuffleMask&: Mask); |
1705 | printShuffleMask(CS, Src1Name: "mem" , Src2Name: "" , Mask); |
1706 | |
1707 | OutStreamer.AddComment(T: CS.str()); |
1708 | } |
1709 | |
1710 | void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { |
1711 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
1712 | assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) && |
1713 | "SEH_ instruction Windows and UEFI only" ); |
1714 | |
1715 | // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. |
1716 | if (EmitFPOData) { |
1717 | X86TargetStreamer *XTS = |
1718 | static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); |
1719 | switch (MI->getOpcode()) { |
1720 | case X86::SEH_PushReg: |
1721 | XTS->emitFPOPushReg(Reg: MI->getOperand(i: 0).getImm()); |
1722 | break; |
1723 | case X86::SEH_StackAlloc: |
1724 | XTS->emitFPOStackAlloc(StackAlloc: MI->getOperand(i: 0).getImm()); |
1725 | break; |
1726 | case X86::SEH_StackAlign: |
1727 | XTS->emitFPOStackAlign(Align: MI->getOperand(i: 0).getImm()); |
1728 | break; |
1729 | case X86::SEH_SetFrame: |
1730 | assert(MI->getOperand(1).getImm() == 0 && |
1731 | ".cv_fpo_setframe takes no offset" ); |
1732 | XTS->emitFPOSetFrame(Reg: MI->getOperand(i: 0).getImm()); |
1733 | break; |
1734 | case X86::SEH_EndPrologue: |
1735 | XTS->emitFPOEndPrologue(); |
1736 | break; |
1737 | case X86::SEH_SaveReg: |
1738 | case X86::SEH_SaveXMM: |
1739 | case X86::SEH_PushFrame: |
1740 | llvm_unreachable("SEH_ directive incompatible with FPO" ); |
1741 | break; |
1742 | default: |
1743 | llvm_unreachable("expected SEH_ instruction" ); |
1744 | } |
1745 | return; |
1746 | } |
1747 | |
1748 | // Otherwise, use the .seh_ directives for all other Windows platforms. |
1749 | switch (MI->getOpcode()) { |
1750 | case X86::SEH_PushReg: |
1751 | OutStreamer->emitWinCFIPushReg(Register: MI->getOperand(i: 0).getImm()); |
1752 | break; |
1753 | |
1754 | case X86::SEH_SaveReg: |
1755 | OutStreamer->emitWinCFISaveReg(Register: MI->getOperand(i: 0).getImm(), |
1756 | Offset: MI->getOperand(i: 1).getImm()); |
1757 | break; |
1758 | |
1759 | case X86::SEH_SaveXMM: |
1760 | OutStreamer->emitWinCFISaveXMM(Register: MI->getOperand(i: 0).getImm(), |
1761 | Offset: MI->getOperand(i: 1).getImm()); |
1762 | break; |
1763 | |
1764 | case X86::SEH_StackAlloc: |
1765 | OutStreamer->emitWinCFIAllocStack(Size: MI->getOperand(i: 0).getImm()); |
1766 | break; |
1767 | |
1768 | case X86::SEH_SetFrame: |
1769 | OutStreamer->emitWinCFISetFrame(Register: MI->getOperand(i: 0).getImm(), |
1770 | Offset: MI->getOperand(i: 1).getImm()); |
1771 | break; |
1772 | |
1773 | case X86::SEH_PushFrame: |
1774 | OutStreamer->emitWinCFIPushFrame(Code: MI->getOperand(i: 0).getImm()); |
1775 | break; |
1776 | |
1777 | case X86::SEH_EndPrologue: |
1778 | OutStreamer->emitWinCFIEndProlog(); |
1779 | break; |
1780 | |
1781 | default: |
1782 | llvm_unreachable("expected SEH_ instruction" ); |
1783 | } |
1784 | } |
1785 | |
1786 | static void (const MachineInstr *MI, |
1787 | MCStreamer &OutStreamer) { |
1788 | switch (MI->getOpcode()) { |
1789 | // Lower PSHUFB and VPERMILP normally but add a comment if we can find |
1790 | // a constant shuffle mask. We won't be able to do this at the MC layer |
1791 | // because the mask isn't an immediate. |
1792 | case X86::PSHUFBrm: |
1793 | case X86::VPSHUFBrm: |
1794 | case X86::VPSHUFBYrm: |
1795 | case X86::VPSHUFBZ128rm: |
1796 | case X86::VPSHUFBZ128rmk: |
1797 | case X86::VPSHUFBZ128rmkz: |
1798 | case X86::VPSHUFBZ256rm: |
1799 | case X86::VPSHUFBZ256rmk: |
1800 | case X86::VPSHUFBZ256rmkz: |
1801 | case X86::VPSHUFBZrm: |
1802 | case X86::VPSHUFBZrmk: |
1803 | case X86::VPSHUFBZrmkz: { |
1804 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1805 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1806 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1807 | SmallVector<int, 64> Mask; |
1808 | DecodePSHUFBMask(C, Width, ShuffleMask&: Mask); |
1809 | if (!Mask.empty()) |
1810 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1811 | } |
1812 | break; |
1813 | } |
1814 | |
1815 | case X86::VPERMILPSrm: |
1816 | case X86::VPERMILPSYrm: |
1817 | case X86::VPERMILPSZ128rm: |
1818 | case X86::VPERMILPSZ128rmk: |
1819 | case X86::VPERMILPSZ128rmkz: |
1820 | case X86::VPERMILPSZ256rm: |
1821 | case X86::VPERMILPSZ256rmk: |
1822 | case X86::VPERMILPSZ256rmkz: |
1823 | case X86::VPERMILPSZrm: |
1824 | case X86::VPERMILPSZrmk: |
1825 | case X86::VPERMILPSZrmkz: { |
1826 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1827 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1828 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1829 | SmallVector<int, 16> Mask; |
1830 | DecodeVPERMILPMask(C, ElSize: 32, Width, ShuffleMask&: Mask); |
1831 | if (!Mask.empty()) |
1832 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1833 | } |
1834 | break; |
1835 | } |
1836 | case X86::VPERMILPDrm: |
1837 | case X86::VPERMILPDYrm: |
1838 | case X86::VPERMILPDZ128rm: |
1839 | case X86::VPERMILPDZ128rmk: |
1840 | case X86::VPERMILPDZ128rmkz: |
1841 | case X86::VPERMILPDZ256rm: |
1842 | case X86::VPERMILPDZ256rmk: |
1843 | case X86::VPERMILPDZ256rmkz: |
1844 | case X86::VPERMILPDZrm: |
1845 | case X86::VPERMILPDZrmk: |
1846 | case X86::VPERMILPDZrmkz: { |
1847 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1848 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1849 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1850 | SmallVector<int, 16> Mask; |
1851 | DecodeVPERMILPMask(C, ElSize: 64, Width, ShuffleMask&: Mask); |
1852 | if (!Mask.empty()) |
1853 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: SrcIdx, SrcOp2Idx: SrcIdx, Mask)); |
1854 | } |
1855 | break; |
1856 | } |
1857 | |
1858 | case X86::VPERMIL2PDrm: |
1859 | case X86::VPERMIL2PSrm: |
1860 | case X86::VPERMIL2PDYrm: |
1861 | case X86::VPERMIL2PSYrm: { |
1862 | assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && |
1863 | "Unexpected number of operands!" ); |
1864 | |
1865 | const MachineOperand &CtrlOp = MI->getOperand(i: MI->getNumOperands() - 1); |
1866 | if (!CtrlOp.isImm()) |
1867 | break; |
1868 | |
1869 | unsigned ElSize; |
1870 | switch (MI->getOpcode()) { |
1871 | default: llvm_unreachable("Invalid opcode" ); |
1872 | case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; |
1873 | case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; |
1874 | } |
1875 | |
1876 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1877 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1878 | SmallVector<int, 16> Mask; |
1879 | DecodeVPERMIL2PMask(C, M2Z: (unsigned)CtrlOp.getImm(), ElSize, Width, ShuffleMask&: Mask); |
1880 | if (!Mask.empty()) |
1881 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1882 | } |
1883 | break; |
1884 | } |
1885 | |
1886 | case X86::VPPERMrrm: { |
1887 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 3)) { |
1888 | unsigned Width = X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1889 | SmallVector<int, 16> Mask; |
1890 | DecodeVPPERMMask(C, Width, ShuffleMask&: Mask); |
1891 | if (!Mask.empty()) |
1892 | OutStreamer.AddComment(T: getShuffleComment(MI, SrcOp1Idx: 1, SrcOp2Idx: 2, Mask)); |
1893 | } |
1894 | break; |
1895 | } |
1896 | |
1897 | case X86::MMX_MOVQ64rm: { |
1898 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: 1)) { |
1899 | std::string ; |
1900 | raw_string_ostream CS(Comment); |
1901 | const MachineOperand &DstOp = MI->getOperand(i: 0); |
1902 | CS << X86ATTInstPrinter::getRegisterName(Reg: DstOp.getReg()) << " = " ; |
1903 | if (auto *CF = dyn_cast<ConstantFP>(Val: C)) { |
1904 | CS << "0x" << toString(I: CF->getValueAPF().bitcastToAPInt(), Radix: 16, Signed: false); |
1905 | OutStreamer.AddComment(T: CS.str()); |
1906 | } |
1907 | } |
1908 | break; |
1909 | } |
1910 | |
1911 | #define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ |
1912 | case X86::Prefix##Instr##Suffix##rm##Postfix: |
1913 | |
1914 | #define CASE_ARITH_RM(Instr) \ |
1915 | INSTR_CASE(, Instr, , ) /* SSE */ \ |
1916 | INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ |
1917 | INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \ |
1918 | INSTR_CASE(V, Instr, Z128, ) \ |
1919 | INSTR_CASE(V, Instr, Z128, k) \ |
1920 | INSTR_CASE(V, Instr, Z128, kz) \ |
1921 | INSTR_CASE(V, Instr, Z256, ) \ |
1922 | INSTR_CASE(V, Instr, Z256, k) \ |
1923 | INSTR_CASE(V, Instr, Z256, kz) \ |
1924 | INSTR_CASE(V, Instr, Z, ) \ |
1925 | INSTR_CASE(V, Instr, Z, k) \ |
1926 | INSTR_CASE(V, Instr, Z, kz) |
1927 | |
1928 | // TODO: Add additional instructions when useful. |
1929 | CASE_ARITH_RM(PMADDUBSW) { |
1930 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1931 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1932 | if (C->getType()->getScalarSizeInBits() == 8) { |
1933 | std::string ; |
1934 | raw_string_ostream CS(Comment); |
1935 | unsigned VectorWidth = |
1936 | X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1937 | CS << "[" ; |
1938 | printConstant(COp: C, BitWidth: VectorWidth, CS); |
1939 | CS << "]" ; |
1940 | OutStreamer.AddComment(T: CS.str()); |
1941 | } |
1942 | } |
1943 | break; |
1944 | } |
1945 | |
1946 | CASE_ARITH_RM(PMADDWD) |
1947 | CASE_ARITH_RM(PMULLW) |
1948 | CASE_ARITH_RM(PMULHW) |
1949 | CASE_ARITH_RM(PMULHUW) |
1950 | CASE_ARITH_RM(PMULHRSW) { |
1951 | unsigned SrcIdx = getSrcIdx(MI, SrcIdx: 1); |
1952 | if (auto *C = X86::getConstantFromPool(MI: *MI, OpNo: SrcIdx + 1)) { |
1953 | if (C->getType()->getScalarSizeInBits() == 16) { |
1954 | std::string ; |
1955 | raw_string_ostream CS(Comment); |
1956 | unsigned VectorWidth = |
1957 | X86::getVectorRegisterWidth(Info: MI->getDesc().operands()[0]); |
1958 | CS << "[" ; |
1959 | printConstant(COp: C, BitWidth: VectorWidth, CS); |
1960 | CS << "]" ; |
1961 | OutStreamer.AddComment(T: CS.str()); |
1962 | } |
1963 | } |
1964 | break; |
1965 | } |
1966 | |
1967 | #define MASK_AVX512_CASE(Instr) \ |
1968 | case Instr: \ |
1969 | case Instr##k: \ |
1970 | case Instr##kz: |
1971 | |
1972 | case X86::MOVSDrm: |
1973 | case X86::VMOVSDrm: |
1974 | MASK_AVX512_CASE(X86::VMOVSDZrm) |
1975 | case X86::MOVSDrm_alt: |
1976 | case X86::VMOVSDrm_alt: |
1977 | case X86::VMOVSDZrm_alt: |
1978 | case X86::MOVQI2PQIrm: |
1979 | case X86::VMOVQI2PQIrm: |
1980 | case X86::VMOVQI2PQIZrm: |
1981 | printZeroUpperMove(MI, OutStreamer, SclWidth: 64, VecWidth: 128, ShuffleComment: "mem[0],zero" ); |
1982 | break; |
1983 | |
1984 | MASK_AVX512_CASE(X86::VMOVSHZrm) |
1985 | case X86::VMOVSHZrm_alt: |
1986 | printZeroUpperMove(MI, OutStreamer, SclWidth: 16, VecWidth: 128, |
1987 | ShuffleComment: "mem[0],zero,zero,zero,zero,zero,zero,zero" ); |
1988 | break; |
1989 | |
1990 | case X86::MOVSSrm: |
1991 | case X86::VMOVSSrm: |
1992 | MASK_AVX512_CASE(X86::VMOVSSZrm) |
1993 | case X86::MOVSSrm_alt: |
1994 | case X86::VMOVSSrm_alt: |
1995 | case X86::VMOVSSZrm_alt: |
1996 | case X86::MOVDI2PDIrm: |
1997 | case X86::VMOVDI2PDIrm: |
1998 | case X86::VMOVDI2PDIZrm: |
1999 | printZeroUpperMove(MI, OutStreamer, SclWidth: 32, VecWidth: 128, ShuffleComment: "mem[0],zero,zero,zero" ); |
2000 | break; |
2001 | |
2002 | #define MOV_CASE(Prefix, Suffix) \ |
2003 | case X86::Prefix##MOVAPD##Suffix##rm: \ |
2004 | case X86::Prefix##MOVAPS##Suffix##rm: \ |
2005 | case X86::Prefix##MOVUPD##Suffix##rm: \ |
2006 | case X86::Prefix##MOVUPS##Suffix##rm: \ |
2007 | case X86::Prefix##MOVDQA##Suffix##rm: \ |
2008 | case X86::Prefix##MOVDQU##Suffix##rm: |
2009 | |
2010 | #define MOV_AVX512_CASE(Suffix, Postfix) \ |
2011 | case X86::VMOVDQA64##Suffix##rm##Postfix: \ |
2012 | case X86::VMOVDQA32##Suffix##rm##Postfix: \ |
2013 | case X86::VMOVDQU64##Suffix##rm##Postfix: \ |
2014 | case X86::VMOVDQU32##Suffix##rm##Postfix: \ |
2015 | case X86::VMOVDQU16##Suffix##rm##Postfix: \ |
2016 | case X86::VMOVDQU8##Suffix##rm##Postfix: \ |
2017 | case X86::VMOVAPS##Suffix##rm##Postfix: \ |
2018 | case X86::VMOVAPD##Suffix##rm##Postfix: \ |
2019 | case X86::VMOVUPS##Suffix##rm##Postfix: \ |
2020 | case X86::VMOVUPD##Suffix##rm##Postfix: |
2021 | |
2022 | #define CASE_128_MOV_RM() \ |
2023 | MOV_CASE(, ) /* SSE */ \ |
2024 | MOV_CASE(V, ) /* AVX-128 */ \ |
2025 | MOV_AVX512_CASE(Z128, ) \ |
2026 | MOV_AVX512_CASE(Z128, k) \ |
2027 | MOV_AVX512_CASE(Z128, kz) |
2028 | |
2029 | #define CASE_256_MOV_RM() \ |
2030 | MOV_CASE(V, Y) /* AVX-256 */ \ |
2031 | MOV_AVX512_CASE(Z256, ) \ |
2032 | MOV_AVX512_CASE(Z256, k) \ |
2033 | MOV_AVX512_CASE(Z256, kz) \ |
2034 | |
2035 | #define CASE_512_MOV_RM() \ |
2036 | MOV_AVX512_CASE(Z, ) \ |
2037 | MOV_AVX512_CASE(Z, k) \ |
2038 | MOV_AVX512_CASE(Z, kz) \ |
2039 | |
2040 | // For loads from a constant pool to a vector register, print the constant |
2041 | // loaded. |
2042 | CASE_128_MOV_RM() |
2043 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 128); |
2044 | break; |
2045 | CASE_256_MOV_RM() |
2046 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 256); |
2047 | break; |
2048 | CASE_512_MOV_RM() |
2049 | printBroadcast(MI, OutStreamer, Repeats: 1, BitWidth: 512); |
2050 | break; |
2051 | case X86::VBROADCASTF128rm: |
2052 | case X86::VBROADCASTI128rm: |
2053 | MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) |
2054 | MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm) |
2055 | MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) |
2056 | MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm) |
2057 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 128); |
2058 | break; |
2059 | MASK_AVX512_CASE(X86::VBROADCASTF32X4rm) |
2060 | MASK_AVX512_CASE(X86::VBROADCASTF64X2rm) |
2061 | MASK_AVX512_CASE(X86::VBROADCASTI32X4rm) |
2062 | MASK_AVX512_CASE(X86::VBROADCASTI64X2rm) |
2063 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 128); |
2064 | break; |
2065 | MASK_AVX512_CASE(X86::VBROADCASTF32X8rm) |
2066 | MASK_AVX512_CASE(X86::VBROADCASTF64X4rm) |
2067 | MASK_AVX512_CASE(X86::VBROADCASTI32X8rm) |
2068 | MASK_AVX512_CASE(X86::VBROADCASTI64X4rm) |
2069 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 256); |
2070 | break; |
2071 | |
2072 | // For broadcast loads from a constant pool to a vector register, repeatedly |
2073 | // print the constant loaded. |
2074 | case X86::MOVDDUPrm: |
2075 | case X86::VMOVDDUPrm: |
2076 | MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) |
2077 | case X86::VPBROADCASTQrm: |
2078 | MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) |
2079 | printBroadcast(MI, OutStreamer, Repeats: 2, BitWidth: 64); |
2080 | break; |
2081 | case X86::VBROADCASTSDYrm: |
2082 | MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) |
2083 | case X86::VPBROADCASTQYrm: |
2084 | MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) |
2085 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 64); |
2086 | break; |
2087 | MASK_AVX512_CASE(X86::VBROADCASTSDZrm) |
2088 | MASK_AVX512_CASE(X86::VPBROADCASTQZrm) |
2089 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 64); |
2090 | break; |
2091 | case X86::VBROADCASTSSrm: |
2092 | MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) |
2093 | case X86::VPBROADCASTDrm: |
2094 | MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) |
2095 | printBroadcast(MI, OutStreamer, Repeats: 4, BitWidth: 32); |
2096 | break; |
2097 | case X86::VBROADCASTSSYrm: |
2098 | MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) |
2099 | case X86::VPBROADCASTDYrm: |
2100 | MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) |
2101 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 32); |
2102 | break; |
2103 | MASK_AVX512_CASE(X86::VBROADCASTSSZrm) |
2104 | MASK_AVX512_CASE(X86::VPBROADCASTDZrm) |
2105 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 32); |
2106 | break; |
2107 | case X86::VPBROADCASTWrm: |
2108 | MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) |
2109 | printBroadcast(MI, OutStreamer, Repeats: 8, BitWidth: 16); |
2110 | break; |
2111 | case X86::VPBROADCASTWYrm: |
2112 | MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) |
2113 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 16); |
2114 | break; |
2115 | MASK_AVX512_CASE(X86::VPBROADCASTWZrm) |
2116 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 16); |
2117 | break; |
2118 | case X86::VPBROADCASTBrm: |
2119 | MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) |
2120 | printBroadcast(MI, OutStreamer, Repeats: 16, BitWidth: 8); |
2121 | break; |
2122 | case X86::VPBROADCASTBYrm: |
2123 | MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) |
2124 | printBroadcast(MI, OutStreamer, Repeats: 32, BitWidth: 8); |
2125 | break; |
2126 | MASK_AVX512_CASE(X86::VPBROADCASTBZrm) |
2127 | printBroadcast(MI, OutStreamer, Repeats: 64, BitWidth: 8); |
2128 | break; |
2129 | |
2130 | #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ |
2131 | case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: |
2132 | |
2133 | #define CASE_MOVX_RM(Ext, Type) \ |
2134 | MOVX_CASE(, Ext, Type, , ) \ |
2135 | MOVX_CASE(V, Ext, Type, , ) \ |
2136 | MOVX_CASE(V, Ext, Type, Y, ) \ |
2137 | MOVX_CASE(V, Ext, Type, Z128, ) \ |
2138 | MOVX_CASE(V, Ext, Type, Z128, k ) \ |
2139 | MOVX_CASE(V, Ext, Type, Z128, kz ) \ |
2140 | MOVX_CASE(V, Ext, Type, Z256, ) \ |
2141 | MOVX_CASE(V, Ext, Type, Z256, k ) \ |
2142 | MOVX_CASE(V, Ext, Type, Z256, kz ) \ |
2143 | MOVX_CASE(V, Ext, Type, Z, ) \ |
2144 | MOVX_CASE(V, Ext, Type, Z, k ) \ |
2145 | MOVX_CASE(V, Ext, Type, Z, kz ) |
2146 | |
2147 | CASE_MOVX_RM(SX, BD) |
2148 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2149 | break; |
2150 | CASE_MOVX_RM(SX, BQ) |
2151 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2152 | break; |
2153 | CASE_MOVX_RM(SX, BW) |
2154 | printSignExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2155 | break; |
2156 | CASE_MOVX_RM(SX, DQ) |
2157 | printSignExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2158 | break; |
2159 | CASE_MOVX_RM(SX, WD) |
2160 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2161 | break; |
2162 | CASE_MOVX_RM(SX, WQ) |
2163 | printSignExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2164 | break; |
2165 | |
2166 | CASE_MOVX_RM(ZX, BD) |
2167 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 32); |
2168 | break; |
2169 | CASE_MOVX_RM(ZX, BQ) |
2170 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 64); |
2171 | break; |
2172 | CASE_MOVX_RM(ZX, BW) |
2173 | printZeroExtend(MI, OutStreamer, SrcEltBits: 8, DstEltBits: 16); |
2174 | break; |
2175 | CASE_MOVX_RM(ZX, DQ) |
2176 | printZeroExtend(MI, OutStreamer, SrcEltBits: 32, DstEltBits: 64); |
2177 | break; |
2178 | CASE_MOVX_RM(ZX, WD) |
2179 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 32); |
2180 | break; |
2181 | CASE_MOVX_RM(ZX, WQ) |
2182 | printZeroExtend(MI, OutStreamer, SrcEltBits: 16, DstEltBits: 64); |
2183 | break; |
2184 | } |
2185 | } |
2186 | |
2187 | void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { |
2188 | // FIXME: Enable feature predicate checks once all the test pass. |
2189 | // X86_MC::verifyInstructionPredicates(MI->getOpcode(), |
2190 | // Subtarget->getFeatureBits()); |
2191 | |
2192 | X86MCInstLower MCInstLowering(*MF, *this); |
2193 | const X86RegisterInfo *RI = |
2194 | MF->getSubtarget<X86Subtarget>().getRegisterInfo(); |
2195 | |
2196 | if (MI->getOpcode() == X86::OR64rm) { |
2197 | for (auto &Opd : MI->operands()) { |
2198 | if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == |
2199 | "swift_async_extendedFramePointerFlags" ) { |
2200 | ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; |
2201 | } |
2202 | } |
2203 | } |
2204 | |
2205 | // Add comments for values loaded from constant pool. |
2206 | if (OutStreamer->isVerboseAsm()) |
2207 | addConstantComments(MI, OutStreamer&: *OutStreamer); |
2208 | |
2209 | // Add a comment about EVEX compression |
2210 | if (TM.Options.MCOptions.ShowMCEncoding) { |
2211 | if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) |
2212 | OutStreamer->AddComment(T: "EVEX TO LEGACY Compression " , EOL: false); |
2213 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) |
2214 | OutStreamer->AddComment(T: "EVEX TO VEX Compression " , EOL: false); |
2215 | else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) |
2216 | OutStreamer->AddComment(T: "EVEX TO EVEX Compression " , EOL: false); |
2217 | } |
2218 | |
2219 | switch (MI->getOpcode()) { |
2220 | case TargetOpcode::DBG_VALUE: |
2221 | llvm_unreachable("Should be handled target independently" ); |
2222 | |
2223 | case X86::EH_RETURN: |
2224 | case X86::EH_RETURN64: { |
2225 | // Lower these as normal, but add some comments. |
2226 | Register Reg = MI->getOperand(i: 0).getReg(); |
2227 | OutStreamer->AddComment(T: StringRef("eh_return, addr: %" ) + |
2228 | X86ATTInstPrinter::getRegisterName(Reg)); |
2229 | break; |
2230 | } |
2231 | case X86::CLEANUPRET: { |
2232 | // Lower these as normal, but add some comments. |
2233 | OutStreamer->AddComment(T: "CLEANUPRET" ); |
2234 | break; |
2235 | } |
2236 | |
2237 | case X86::CATCHRET: { |
2238 | // Lower these as normal, but add some comments. |
2239 | OutStreamer->AddComment(T: "CATCHRET" ); |
2240 | break; |
2241 | } |
2242 | |
2243 | case X86::ENDBR32: |
2244 | case X86::ENDBR64: { |
2245 | // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for |
2246 | // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be |
2247 | // non-empty. If MI is the initial ENDBR, place the |
2248 | // __patchable_function_entries label after ENDBR. |
2249 | if (CurrentPatchableFunctionEntrySym && |
2250 | CurrentPatchableFunctionEntrySym == CurrentFnBegin && |
2251 | MI == &MF->front().front()) { |
2252 | MCInst Inst; |
2253 | MCInstLowering.Lower(MI, OutMI&: Inst); |
2254 | EmitAndCountInstruction(Inst); |
2255 | CurrentPatchableFunctionEntrySym = createTempSymbol(Name: "patch" ); |
2256 | OutStreamer->emitLabel(Symbol: CurrentPatchableFunctionEntrySym); |
2257 | return; |
2258 | } |
2259 | break; |
2260 | } |
2261 | |
2262 | case X86::TAILJMPd64: |
2263 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11)) |
2264 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CS_PREFIX)); |
2265 | [[fallthrough]]; |
2266 | case X86::TAILJMPr: |
2267 | case X86::TAILJMPm: |
2268 | case X86::TAILJMPd: |
2269 | case X86::TAILJMPd_CC: |
2270 | case X86::TAILJMPr64: |
2271 | case X86::TAILJMPm64: |
2272 | case X86::TAILJMPd64_CC: |
2273 | case X86::TAILJMPr64_REX: |
2274 | case X86::TAILJMPm64_REX: |
2275 | // Lower these as normal, but add some comments. |
2276 | OutStreamer->AddComment(T: "TAILCALL" ); |
2277 | break; |
2278 | |
2279 | case X86::TLS_addr32: |
2280 | case X86::TLS_addr64: |
2281 | case X86::TLS_addrX32: |
2282 | case X86::TLS_base_addr32: |
2283 | case X86::TLS_base_addr64: |
2284 | case X86::TLS_base_addrX32: |
2285 | case X86::TLS_desc32: |
2286 | case X86::TLS_desc64: |
2287 | return LowerTlsAddr(MCInstLowering, MI: *MI); |
2288 | |
2289 | case X86::MOVPC32r: { |
2290 | // This is a pseudo op for a two instruction sequence with a label, which |
2291 | // looks like: |
2292 | // call "L1$pb" |
2293 | // "L1$pb": |
2294 | // popl %esi |
2295 | |
2296 | // Emit the call. |
2297 | MCSymbol *PICBase = MF->getPICBaseSymbol(); |
2298 | // FIXME: We would like an efficient form for this, so we don't have to do a |
2299 | // lot of extra uniquing. |
2300 | EmitAndCountInstruction( |
2301 | Inst&: MCInstBuilder(X86::CALLpcrel32) |
2302 | .addExpr(Val: MCSymbolRefExpr::create(Symbol: PICBase, Ctx&: OutContext))); |
2303 | |
2304 | const X86FrameLowering *FrameLowering = |
2305 | MF->getSubtarget<X86Subtarget>().getFrameLowering(); |
2306 | bool hasFP = FrameLowering->hasFP(MF: *MF); |
2307 | |
2308 | // TODO: This is needed only if we require precise CFA. |
2309 | bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && |
2310 | !OutStreamer->getDwarfFrameInfos().back().End; |
2311 | |
2312 | int stackGrowth = -RI->getSlotSize(); |
2313 | |
2314 | if (HasActiveDwarfFrame && !hasFP) { |
2315 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: -stackGrowth); |
2316 | MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
2317 | } |
2318 | |
2319 | // Emit the label. |
2320 | OutStreamer->emitLabel(Symbol: PICBase); |
2321 | |
2322 | // popl $reg |
2323 | EmitAndCountInstruction( |
2324 | Inst&: MCInstBuilder(X86::POP32r).addReg(Reg: MI->getOperand(i: 0).getReg())); |
2325 | |
2326 | if (HasActiveDwarfFrame && !hasFP) { |
2327 | OutStreamer->emitCFIAdjustCfaOffset(Adjustment: stackGrowth); |
2328 | } |
2329 | return; |
2330 | } |
2331 | |
2332 | case X86::ADD32ri: { |
2333 | // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. |
2334 | if (MI->getOperand(i: 2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) |
2335 | break; |
2336 | |
2337 | // Okay, we have something like: |
2338 | // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) |
2339 | |
2340 | // For this, we want to print something like: |
2341 | // MYGLOBAL + (. - PICBASE) |
2342 | // However, we can't generate a ".", so just emit a new label here and refer |
2343 | // to it. |
2344 | MCSymbol *DotSym = OutContext.createTempSymbol(); |
2345 | OutStreamer->emitLabel(Symbol: DotSym); |
2346 | |
2347 | // Now that we have emitted the label, lower the complex operand expression. |
2348 | MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MO: MI->getOperand(i: 2)); |
2349 | |
2350 | const MCExpr *DotExpr = MCSymbolRefExpr::create(Symbol: DotSym, Ctx&: OutContext); |
2351 | const MCExpr *PICBase = |
2352 | MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx&: OutContext); |
2353 | DotExpr = MCBinaryExpr::createSub(LHS: DotExpr, RHS: PICBase, Ctx&: OutContext); |
2354 | |
2355 | DotExpr = MCBinaryExpr::createAdd( |
2356 | LHS: MCSymbolRefExpr::create(Symbol: OpSym, Ctx&: OutContext), RHS: DotExpr, Ctx&: OutContext); |
2357 | |
2358 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::ADD32ri) |
2359 | .addReg(Reg: MI->getOperand(i: 0).getReg()) |
2360 | .addReg(Reg: MI->getOperand(i: 1).getReg()) |
2361 | .addExpr(Val: DotExpr)); |
2362 | return; |
2363 | } |
2364 | case TargetOpcode::STATEPOINT: |
2365 | return LowerSTATEPOINT(MI: *MI, MCIL&: MCInstLowering); |
2366 | |
2367 | case TargetOpcode::FAULTING_OP: |
2368 | return LowerFAULTING_OP(FaultingMI: *MI, MCIL&: MCInstLowering); |
2369 | |
2370 | case TargetOpcode::FENTRY_CALL: |
2371 | return LowerFENTRY_CALL(MI: *MI, MCIL&: MCInstLowering); |
2372 | |
2373 | case TargetOpcode::PATCHABLE_OP: |
2374 | return LowerPATCHABLE_OP(MI: *MI, MCIL&: MCInstLowering); |
2375 | |
2376 | case TargetOpcode::STACKMAP: |
2377 | return LowerSTACKMAP(MI: *MI); |
2378 | |
2379 | case TargetOpcode::PATCHPOINT: |
2380 | return LowerPATCHPOINT(MI: *MI, MCIL&: MCInstLowering); |
2381 | |
2382 | case TargetOpcode::PATCHABLE_FUNCTION_ENTER: |
2383 | return LowerPATCHABLE_FUNCTION_ENTER(MI: *MI, MCIL&: MCInstLowering); |
2384 | |
2385 | case TargetOpcode::PATCHABLE_RET: |
2386 | return LowerPATCHABLE_RET(MI: *MI, MCIL&: MCInstLowering); |
2387 | |
2388 | case TargetOpcode::PATCHABLE_TAIL_CALL: |
2389 | return LowerPATCHABLE_TAIL_CALL(MI: *MI, MCIL&: MCInstLowering); |
2390 | |
2391 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
2392 | return LowerPATCHABLE_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2393 | |
2394 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
2395 | return LowerPATCHABLE_TYPED_EVENT_CALL(MI: *MI, MCIL&: MCInstLowering); |
2396 | |
2397 | case X86::MORESTACK_RET: |
2398 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2399 | return; |
2400 | |
2401 | case X86::KCFI_CHECK: |
2402 | return LowerKCFI_CHECK(MI: *MI); |
2403 | |
2404 | case X86::ASAN_CHECK_MEMACCESS: |
2405 | return LowerASAN_CHECK_MEMACCESS(MI: *MI); |
2406 | |
2407 | case X86::MORESTACK_RET_RESTORE_R10: |
2408 | // Return, then restore R10. |
2409 | EmitAndCountInstruction(Inst&: MCInstBuilder(getRetOpcode(Subtarget: *Subtarget))); |
2410 | EmitAndCountInstruction( |
2411 | Inst&: MCInstBuilder(X86::MOV64rr).addReg(Reg: X86::R10).addReg(Reg: X86::RAX)); |
2412 | return; |
2413 | |
2414 | case X86::SEH_PushReg: |
2415 | case X86::SEH_SaveReg: |
2416 | case X86::SEH_SaveXMM: |
2417 | case X86::SEH_StackAlloc: |
2418 | case X86::SEH_StackAlign: |
2419 | case X86::SEH_SetFrame: |
2420 | case X86::SEH_PushFrame: |
2421 | case X86::SEH_EndPrologue: |
2422 | EmitSEHInstruction(MI); |
2423 | return; |
2424 | |
2425 | case X86::SEH_Epilogue: { |
2426 | assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?" ); |
2427 | MachineBasicBlock::const_iterator MBBI(MI); |
2428 | // Check if preceded by a call and emit nop if so. |
2429 | for (MBBI = PrevCrossBBInst(MBBI); |
2430 | MBBI != MachineBasicBlock::const_iterator(); |
2431 | MBBI = PrevCrossBBInst(MBBI)) { |
2432 | // Pseudo instructions that aren't a call are assumed to not emit any |
2433 | // code. If they do, we worst case generate unnecessary noops after a |
2434 | // call. |
2435 | if (MBBI->isCall() || !MBBI->isPseudo()) { |
2436 | if (MBBI->isCall()) |
2437 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::NOOP)); |
2438 | break; |
2439 | } |
2440 | } |
2441 | return; |
2442 | } |
2443 | case X86::UBSAN_UD1: |
2444 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::UD1Lm) |
2445 | .addReg(Reg: X86::EAX) |
2446 | .addReg(Reg: X86::EAX) |
2447 | .addImm(Val: 1) |
2448 | .addReg(Reg: X86::NoRegister) |
2449 | .addImm(Val: MI->getOperand(i: 0).getImm()) |
2450 | .addReg(Reg: X86::NoRegister)); |
2451 | return; |
2452 | case X86::CALL64pcrel32: |
2453 | if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(Reg: X86::R11)) |
2454 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::CS_PREFIX)); |
2455 | break; |
2456 | case X86::JCC_1: |
2457 | // Two instruction prefixes (2EH for branch not-taken and 3EH for branch |
2458 | // taken) are used as branch hints. Here we add branch taken prefix for |
2459 | // jump instruction with higher probability than threshold. |
2460 | if (getSubtarget().hasBranchHint() && EnableBranchHint) { |
2461 | const MachineBranchProbabilityInfo *MBPI = |
2462 | &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
2463 | MachineBasicBlock *DestBB = MI->getOperand(i: 0).getMBB(); |
2464 | BranchProbability EdgeProb = |
2465 | MBPI->getEdgeProbability(Src: MI->getParent(), Dst: DestBB); |
2466 | BranchProbability Threshold(BranchHintProbabilityThreshold, 100); |
2467 | if (EdgeProb > Threshold) |
2468 | EmitAndCountInstruction(Inst&: MCInstBuilder(X86::DS_PREFIX)); |
2469 | } |
2470 | break; |
2471 | } |
2472 | |
2473 | MCInst TmpInst; |
2474 | MCInstLowering.Lower(MI, OutMI&: TmpInst); |
2475 | |
2476 | // Stackmap shadows cannot include branch targets, so we can count the bytes |
2477 | // in a call towards the shadow, but must ensure that the no thread returns |
2478 | // in to the stackmap shadow. The only way to achieve this is if the call |
2479 | // is at the end of the shadow. |
2480 | if (MI->isCall()) { |
2481 | // Count then size of the call towards the shadow |
2482 | SMShadowTracker.count(Inst&: TmpInst, STI: getSubtargetInfo(), CodeEmitter: CodeEmitter.get()); |
2483 | // Then flush the shadow so that we fill with nops before the call, not |
2484 | // after it. |
2485 | SMShadowTracker.emitShadowPadding(OutStreamer&: *OutStreamer, STI: getSubtargetInfo()); |
2486 | // Then emit the call |
2487 | OutStreamer->emitInstruction(Inst: TmpInst, STI: getSubtargetInfo()); |
2488 | return; |
2489 | } |
2490 | |
2491 | EmitAndCountInstruction(Inst&: TmpInst); |
2492 | } |
2493 | |