1 | //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands pseudo instructions into target |
10 | // instructions to allow proper scheduling, if-conversion, other late |
11 | // optimizations, or simply the encoding of the instructions. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86.h" |
16 | #include "X86FrameLowering.h" |
17 | #include "X86InstrBuilder.h" |
18 | #include "X86InstrInfo.h" |
19 | #include "X86MachineFunctionInfo.h" |
20 | #include "X86Subtarget.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
24 | #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. |
25 | #include "llvm/IR/EHPersonalities.h" |
26 | #include "llvm/IR/GlobalValue.h" |
27 | #include "llvm/Target/TargetMachine.h" |
28 | using namespace llvm; |
29 | |
30 | #define DEBUG_TYPE "x86-pseudo" |
31 | #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass" |
32 | |
33 | namespace { |
34 | class X86ExpandPseudo : public MachineFunctionPass { |
35 | public: |
36 | static char ID; |
37 | X86ExpandPseudo() : MachineFunctionPass(ID) {} |
38 | |
39 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
40 | AU.setPreservesCFG(); |
41 | AU.addPreservedID(ID&: MachineLoopInfoID); |
42 | AU.addPreservedID(ID&: MachineDominatorsID); |
43 | MachineFunctionPass::getAnalysisUsage(AU); |
44 | } |
45 | |
46 | const X86Subtarget *STI = nullptr; |
47 | const X86InstrInfo *TII = nullptr; |
48 | const X86RegisterInfo *TRI = nullptr; |
49 | const X86MachineFunctionInfo *X86FI = nullptr; |
50 | const X86FrameLowering *X86FL = nullptr; |
51 | |
52 | bool runOnMachineFunction(MachineFunction &MF) override; |
53 | |
54 | MachineFunctionProperties getRequiredProperties() const override { |
55 | return MachineFunctionProperties().set( |
56 | MachineFunctionProperties::Property::NoVRegs); |
57 | } |
58 | |
59 | StringRef getPassName() const override { |
60 | return "X86 pseudo instruction expansion pass" ; |
61 | } |
62 | |
63 | private: |
64 | void expandICallBranchFunnel(MachineBasicBlock *MBB, |
65 | MachineBasicBlock::iterator MBBI); |
66 | void expandCALL_RVMARKER(MachineBasicBlock &MBB, |
67 | MachineBasicBlock::iterator MBBI); |
68 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); |
69 | bool expandMBB(MachineBasicBlock &MBB); |
70 | |
71 | /// This function expands pseudos which affects control flow. |
72 | /// It is done in separate pass to simplify blocks navigation in main |
73 | /// pass(calling expandMBB). |
74 | bool expandPseudosWhichAffectControlFlow(MachineFunction &MF); |
75 | |
76 | /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions, |
77 | /// placed into separate block guarded by check for al register(for SystemV |
78 | /// abi). |
79 | void expandVastartSaveXmmRegs( |
80 | MachineBasicBlock *EntryBlk, |
81 | MachineBasicBlock::iterator VAStartPseudoInstr) const; |
82 | }; |
83 | char X86ExpandPseudo::ID = 0; |
84 | |
85 | } // End anonymous namespace. |
86 | |
87 | INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false, |
88 | false) |
89 | |
90 | void X86ExpandPseudo::expandICallBranchFunnel( |
91 | MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { |
92 | MachineBasicBlock *JTMBB = MBB; |
93 | MachineInstr *JTInst = &*MBBI; |
94 | MachineFunction *MF = MBB->getParent(); |
95 | const BasicBlock *BB = MBB->getBasicBlock(); |
96 | auto InsPt = MachineFunction::iterator(MBB); |
97 | ++InsPt; |
98 | |
99 | std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; |
100 | const DebugLoc &DL = JTInst->getDebugLoc(); |
101 | MachineOperand Selector = JTInst->getOperand(i: 0); |
102 | const GlobalValue *CombinedGlobal = JTInst->getOperand(i: 1).getGlobal(); |
103 | |
104 | auto CmpTarget = [&](unsigned Target) { |
105 | if (Selector.isReg()) |
106 | MBB->addLiveIn(PhysReg: Selector.getReg()); |
107 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::LEA64r), DestReg: X86::R11) |
108 | .addReg(RegNo: X86::RIP) |
109 | .addImm(Val: 1) |
110 | .addReg(RegNo: 0) |
111 | .addGlobalAddress(GV: CombinedGlobal, |
112 | Offset: JTInst->getOperand(i: 2 + 2 * Target).getImm()) |
113 | .addReg(RegNo: 0); |
114 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::CMP64rr)) |
115 | .add(MO: Selector) |
116 | .addReg(RegNo: X86::R11); |
117 | }; |
118 | |
119 | auto CreateMBB = [&]() { |
120 | auto *NewMBB = MF->CreateMachineBasicBlock(BB); |
121 | MBB->addSuccessor(Succ: NewMBB); |
122 | if (!MBB->isLiveIn(Reg: X86::EFLAGS)) |
123 | MBB->addLiveIn(PhysReg: X86::EFLAGS); |
124 | return NewMBB; |
125 | }; |
126 | |
127 | auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) { |
128 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::JCC_1)).addMBB(MBB: ThenMBB).addImm(Val: CC); |
129 | |
130 | auto *ElseMBB = CreateMBB(); |
131 | MF->insert(MBBI: InsPt, MBB: ElseMBB); |
132 | MBB = ElseMBB; |
133 | MBBI = MBB->end(); |
134 | }; |
135 | |
136 | auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) { |
137 | auto *ThenMBB = CreateMBB(); |
138 | TargetMBBs.push_back(x: {ThenMBB, Target}); |
139 | EmitCondJump(CC, ThenMBB); |
140 | }; |
141 | |
142 | auto EmitTailCall = [&](unsigned Target) { |
143 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPd64)) |
144 | .add(MO: JTInst->getOperand(i: 3 + 2 * Target)); |
145 | }; |
146 | |
147 | std::function<void(unsigned, unsigned)> EmitBranchFunnel = |
148 | [&](unsigned FirstTarget, unsigned NumTargets) { |
149 | if (NumTargets == 1) { |
150 | EmitTailCall(FirstTarget); |
151 | return; |
152 | } |
153 | |
154 | if (NumTargets == 2) { |
155 | CmpTarget(FirstTarget + 1); |
156 | EmitCondJumpTarget(X86::COND_B, FirstTarget); |
157 | EmitTailCall(FirstTarget + 1); |
158 | return; |
159 | } |
160 | |
161 | if (NumTargets < 6) { |
162 | CmpTarget(FirstTarget + 1); |
163 | EmitCondJumpTarget(X86::COND_B, FirstTarget); |
164 | EmitCondJumpTarget(X86::COND_E, FirstTarget + 1); |
165 | EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); |
166 | return; |
167 | } |
168 | |
169 | auto *ThenMBB = CreateMBB(); |
170 | CmpTarget(FirstTarget + (NumTargets / 2)); |
171 | EmitCondJump(X86::COND_B, ThenMBB); |
172 | EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2)); |
173 | EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, |
174 | NumTargets - (NumTargets / 2) - 1); |
175 | |
176 | MF->insert(MBBI: InsPt, MBB: ThenMBB); |
177 | MBB = ThenMBB; |
178 | MBBI = MBB->end(); |
179 | EmitBranchFunnel(FirstTarget, NumTargets / 2); |
180 | }; |
181 | |
182 | EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); |
183 | for (auto P : TargetMBBs) { |
184 | MF->insert(MBBI: InsPt, MBB: P.first); |
185 | BuildMI(BB: P.first, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPd64)) |
186 | .add(MO: JTInst->getOperand(i: 3 + 2 * P.second)); |
187 | } |
188 | JTMBB->erase(I: JTInst); |
189 | } |
190 | |
191 | void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, |
192 | MachineBasicBlock::iterator MBBI) { |
193 | // Expand CALL_RVMARKER pseudo to call instruction, followed by the special |
194 | //"movq %rax, %rdi" marker. |
195 | MachineInstr &MI = *MBBI; |
196 | |
197 | MachineInstr *OriginalCall; |
198 | assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) && |
199 | "invalid operand for regular call" ); |
200 | unsigned Opc = -1; |
201 | if (MI.getOpcode() == X86::CALL64m_RVMARKER) |
202 | Opc = X86::CALL64m; |
203 | else if (MI.getOpcode() == X86::CALL64r_RVMARKER) |
204 | Opc = X86::CALL64r; |
205 | else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER) |
206 | Opc = X86::CALL64pcrel32; |
207 | else |
208 | llvm_unreachable("unexpected opcode" ); |
209 | |
210 | OriginalCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc)).getInstr(); |
211 | bool RAXImplicitDead = false; |
212 | for (MachineOperand &Op : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
213 | // RAX may be 'implicit dead', if there are no other users of the return |
214 | // value. We introduce a new use, so change it to 'implicit def'. |
215 | if (Op.isReg() && Op.isImplicit() && Op.isDead() && |
216 | TRI->regsOverlap(RegA: Op.getReg(), RegB: X86::RAX)) { |
217 | Op.setIsDead(false); |
218 | Op.setIsDef(true); |
219 | RAXImplicitDead = true; |
220 | } |
221 | OriginalCall->addOperand(Op); |
222 | } |
223 | |
224 | // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be |
225 | // live across the earlier call. The call to the ObjC runtime function returns |
226 | // the first argument, so the value of %rax is unchanged after the ObjC |
227 | // runtime call. On Windows targets, the runtime call follows the regular |
228 | // x64 calling convention and expects the first argument in %rcx. |
229 | auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI; |
230 | auto *Marker = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: X86::MOV64rr)) |
231 | .addReg(RegNo: TargetReg, flags: RegState::Define) |
232 | .addReg(RegNo: X86::RAX) |
233 | .getInstr(); |
234 | if (MI.shouldUpdateCallSiteInfo()) |
235 | MBB.getParent()->moveCallSiteInfo(Old: &MI, New: Marker); |
236 | |
237 | // Emit call to ObjC runtime. |
238 | const uint32_t *RegMask = |
239 | TRI->getCallPreservedMask(MF: *MBB.getParent(), CallingConv::C); |
240 | MachineInstr *RtCall = |
241 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: X86::CALL64pcrel32)) |
242 | .addGlobalAddress(GV: MI.getOperand(i: 0).getGlobal(), Offset: 0, TargetFlags: 0) |
243 | .addRegMask(Mask: RegMask) |
244 | .addReg(RegNo: X86::RAX, |
245 | flags: RegState::Implicit | |
246 | (RAXImplicitDead ? (RegState::Dead | RegState::Define) |
247 | : RegState::Define)) |
248 | .getInstr(); |
249 | MI.eraseFromParent(); |
250 | |
251 | auto &TM = MBB.getParent()->getTarget(); |
252 | // On Darwin platforms, wrap the expanded sequence in a bundle to prevent |
253 | // later optimizations from breaking up the sequence. |
254 | if (TM.getTargetTriple().isOSDarwin()) |
255 | finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(), |
256 | LastMI: std::next(x: RtCall->getIterator())); |
257 | } |
258 | |
259 | /// If \p MBBI is a pseudo instruction, this method expands |
260 | /// it to the corresponding (sequence of) actual instruction(s). |
261 | /// \returns true if \p MBBI has been expanded. |
262 | bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, |
263 | MachineBasicBlock::iterator MBBI) { |
264 | MachineInstr &MI = *MBBI; |
265 | unsigned Opcode = MI.getOpcode(); |
266 | const DebugLoc &DL = MBBI->getDebugLoc(); |
267 | #define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC) |
268 | switch (Opcode) { |
269 | default: |
270 | return false; |
271 | case X86::TCRETURNdi: |
272 | case X86::TCRETURNdicc: |
273 | case X86::TCRETURNri: |
274 | case X86::TCRETURNmi: |
275 | case X86::TCRETURNdi64: |
276 | case X86::TCRETURNdi64cc: |
277 | case X86::TCRETURNri64: |
278 | case X86::TCRETURNmi64: { |
279 | bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; |
280 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
281 | MachineOperand &StackAdjust = MBBI->getOperand(i: isMem ? X86::AddrNumOperands |
282 | : 1); |
283 | assert(StackAdjust.isImm() && "Expecting immediate value." ); |
284 | |
285 | // Adjust stack pointer. |
286 | int StackAdj = StackAdjust.getImm(); |
287 | int MaxTCDelta = X86FI->getTCReturnAddrDelta(); |
288 | int Offset = 0; |
289 | assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive" ); |
290 | |
291 | // Incoporate the retaddr area. |
292 | Offset = StackAdj - MaxTCDelta; |
293 | assert(Offset >= 0 && "Offset should never be negative" ); |
294 | |
295 | if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { |
296 | assert(Offset == 0 && "Conditional tail call cannot adjust the stack." ); |
297 | } |
298 | |
299 | if (Offset) { |
300 | // Check for possible merge with preceding ADD instruction. |
301 | Offset += X86FL->mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
302 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue=*/true); |
303 | } |
304 | |
305 | // Jump to label or value in register. |
306 | bool IsWin64 = STI->isTargetWin64(); |
307 | if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || |
308 | Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { |
309 | unsigned Op; |
310 | switch (Opcode) { |
311 | case X86::TCRETURNdi: |
312 | Op = X86::TAILJMPd; |
313 | break; |
314 | case X86::TCRETURNdicc: |
315 | Op = X86::TAILJMPd_CC; |
316 | break; |
317 | case X86::TCRETURNdi64cc: |
318 | assert(!MBB.getParent()->hasWinCFI() && |
319 | "Conditional tail calls confuse " |
320 | "the Win64 unwinder." ); |
321 | Op = X86::TAILJMPd64_CC; |
322 | break; |
323 | default: |
324 | // Note: Win64 uses REX prefixes indirect jumps out of functions, but |
325 | // not direct ones. |
326 | Op = X86::TAILJMPd64; |
327 | break; |
328 | } |
329 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Op)); |
330 | if (JumpTarget.isGlobal()) { |
331 | MIB.addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset(), |
332 | TargetFlags: JumpTarget.getTargetFlags()); |
333 | } else { |
334 | assert(JumpTarget.isSymbol()); |
335 | MIB.addExternalSymbol(FnName: JumpTarget.getSymbolName(), |
336 | TargetFlags: JumpTarget.getTargetFlags()); |
337 | } |
338 | if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { |
339 | MIB.addImm(Val: MBBI->getOperand(i: 2).getImm()); |
340 | } |
341 | |
342 | } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { |
343 | unsigned Op = (Opcode == X86::TCRETURNmi) |
344 | ? X86::TAILJMPm |
345 | : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); |
346 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Op)); |
347 | for (unsigned i = 0; i != X86::AddrNumOperands; ++i) |
348 | MIB.add(MO: MBBI->getOperand(i)); |
349 | } else if (Opcode == X86::TCRETURNri64) { |
350 | JumpTarget.setIsKill(); |
351 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
352 | MCID: TII->get(Opcode: IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) |
353 | .add(MO: JumpTarget); |
354 | } else { |
355 | JumpTarget.setIsKill(); |
356 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPr)) |
357 | .add(MO: JumpTarget); |
358 | } |
359 | |
360 | MachineInstr &NewMI = *std::prev(x: MBBI); |
361 | NewMI.copyImplicitOps(MF&: *MBBI->getParent()->getParent(), MI: *MBBI); |
362 | NewMI.setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType()); |
363 | |
364 | // Update the call site info. |
365 | if (MBBI->isCandidateForCallSiteEntry()) |
366 | MBB.getParent()->moveCallSiteInfo(Old: &*MBBI, New: &NewMI); |
367 | |
368 | // Delete the pseudo instruction TCRETURN. |
369 | MBB.erase(I: MBBI); |
370 | |
371 | return true; |
372 | } |
373 | case X86::EH_RETURN: |
374 | case X86::EH_RETURN64: { |
375 | MachineOperand &DestAddr = MBBI->getOperand(i: 0); |
376 | assert(DestAddr.isReg() && "Offset should be in register!" ); |
377 | const bool Uses64BitFramePtr = |
378 | STI->isTarget64BitLP64() || STI->isTargetNaCl64(); |
379 | Register StackPtr = TRI->getStackRegister(); |
380 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
381 | MCID: TII->get(Opcode: Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), DestReg: StackPtr) |
382 | .addReg(RegNo: DestAddr.getReg()); |
383 | // The EH_RETURN pseudo is really removed during the MC Lowering. |
384 | return true; |
385 | } |
386 | case X86::IRET: { |
387 | // Adjust stack to erase error code |
388 | int64_t StackAdj = MBBI->getOperand(i: 0).getImm(); |
389 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: StackAdj, InEpilogue: true); |
390 | // Replace pseudo with machine iret |
391 | unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32; |
392 | // Use UIRET if UINTR is present (except for building kernel) |
393 | if (STI->is64Bit() && STI->hasUINTR() && |
394 | MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel) |
395 | RetOp = X86::UIRET; |
396 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RetOp)); |
397 | MBB.erase(I: MBBI); |
398 | return true; |
399 | } |
400 | case X86::RET: { |
401 | // Adjust stack to erase error code |
402 | int64_t StackAdj = MBBI->getOperand(i: 0).getImm(); |
403 | MachineInstrBuilder MIB; |
404 | if (StackAdj == 0) { |
405 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
406 | MCID: TII->get(Opcode: STI->is64Bit() ? X86::RET64 : X86::RET32)); |
407 | } else if (isUInt<16>(x: StackAdj)) { |
408 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
409 | MCID: TII->get(Opcode: STI->is64Bit() ? X86::RETI64 : X86::RETI32)) |
410 | .addImm(Val: StackAdj); |
411 | } else { |
412 | assert(!STI->is64Bit() && |
413 | "shouldn't need to do this for x86_64 targets!" ); |
414 | // A ret can only handle immediates as big as 2**16-1. If we need to pop |
415 | // off bytes before the return address, we must do it manually. |
416 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::POP32r)).addReg(RegNo: X86::ECX, flags: RegState::Define); |
417 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: StackAdj, /*InEpilogue=*/true); |
418 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::PUSH32r)).addReg(RegNo: X86::ECX); |
419 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::RET32)); |
420 | } |
421 | for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) |
422 | MIB.add(MO: MBBI->getOperand(i: I)); |
423 | MBB.erase(I: MBBI); |
424 | return true; |
425 | } |
426 | case X86::LCMPXCHG16B_SAVE_RBX: { |
427 | // Perform the following transformation. |
428 | // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx |
429 | // => |
430 | // RBX = InArg |
431 | // actualcmpxchg Addr |
432 | // RBX = SaveRbx |
433 | const MachineOperand &InArg = MBBI->getOperand(i: 6); |
434 | Register SaveRbx = MBBI->getOperand(i: 7).getReg(); |
435 | |
436 | // Copy the input argument of the pseudo into the argument of the |
437 | // actual instruction. |
438 | // NOTE: We don't copy the kill flag since the input might be the same reg |
439 | // as one of the other operands of LCMPXCHG16B. |
440 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: InArg.getReg(), KillSrc: false); |
441 | // Create the actual instruction. |
442 | MachineInstr *NewInstr = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::LCMPXCHG16B)); |
443 | // Copy the operands related to the address. |
444 | for (unsigned Idx = 1; Idx < 6; ++Idx) |
445 | NewInstr->addOperand(Op: MBBI->getOperand(i: Idx)); |
446 | // Finally, restore the value of RBX. |
447 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: SaveRbx, |
448 | /*SrcIsKill*/ KillSrc: true); |
449 | |
450 | // Delete the pseudo. |
451 | MBBI->eraseFromParent(); |
452 | return true; |
453 | } |
454 | // Loading/storing mask pairs requires two kmov operations. The second one of |
455 | // these needs a 2 byte displacement relative to the specified address (with |
456 | // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the |
457 | // same spill size, they all are stored using MASKPAIR16STORE, loaded using |
458 | // MASKPAIR16LOAD. |
459 | // |
460 | // The displacement value might wrap around in theory, thus the asserts in |
461 | // both cases. |
462 | case X86::MASKPAIR16LOAD: { |
463 | int64_t Disp = MBBI->getOperand(i: 1 + X86::AddrDisp).getImm(); |
464 | assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement" ); |
465 | Register Reg = MBBI->getOperand(i: 0).getReg(); |
466 | bool DstIsDead = MBBI->getOperand(i: 0).isDead(); |
467 | Register Reg0 = TRI->getSubReg(Reg, Idx: X86::sub_mask_0); |
468 | Register Reg1 = TRI->getSubReg(Reg, Idx: X86::sub_mask_1); |
469 | |
470 | auto MIBLo = |
471 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) |
472 | .addReg(RegNo: Reg0, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
473 | auto MIBHi = |
474 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) |
475 | .addReg(RegNo: Reg1, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
476 | |
477 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
478 | MIBLo.add(MO: MBBI->getOperand(i: 1 + i)); |
479 | if (i == X86::AddrDisp) |
480 | MIBHi.addImm(Val: Disp + 2); |
481 | else |
482 | MIBHi.add(MO: MBBI->getOperand(i: 1 + i)); |
483 | } |
484 | |
485 | // Split the memory operand, adjusting the offset and size for the halves. |
486 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
487 | MachineFunction *MF = MBB.getParent(); |
488 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: 2); |
489 | MachineMemOperand *MMOHi = MF->getMachineMemOperand(MMO: OldMMO, Offset: 2, Size: 2); |
490 | |
491 | MIBLo.setMemRefs(MMOLo); |
492 | MIBHi.setMemRefs(MMOHi); |
493 | |
494 | // Delete the pseudo. |
495 | MBB.erase(I: MBBI); |
496 | return true; |
497 | } |
498 | case X86::MASKPAIR16STORE: { |
499 | int64_t Disp = MBBI->getOperand(i: X86::AddrDisp).getImm(); |
500 | assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement" ); |
501 | Register Reg = MBBI->getOperand(i: X86::AddrNumOperands).getReg(); |
502 | bool SrcIsKill = MBBI->getOperand(i: X86::AddrNumOperands).isKill(); |
503 | Register Reg0 = TRI->getSubReg(Reg, Idx: X86::sub_mask_0); |
504 | Register Reg1 = TRI->getSubReg(Reg, Idx: X86::sub_mask_1); |
505 | |
506 | auto MIBLo = |
507 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); |
508 | auto MIBHi = |
509 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); |
510 | |
511 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
512 | MIBLo.add(MO: MBBI->getOperand(i)); |
513 | if (i == X86::AddrDisp) |
514 | MIBHi.addImm(Val: Disp + 2); |
515 | else |
516 | MIBHi.add(MO: MBBI->getOperand(i)); |
517 | } |
518 | MIBLo.addReg(RegNo: Reg0, flags: getKillRegState(B: SrcIsKill)); |
519 | MIBHi.addReg(RegNo: Reg1, flags: getKillRegState(B: SrcIsKill)); |
520 | |
521 | // Split the memory operand, adjusting the offset and size for the halves. |
522 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
523 | MachineFunction *MF = MBB.getParent(); |
524 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: 2); |
525 | MachineMemOperand *MMOHi = MF->getMachineMemOperand(MMO: OldMMO, Offset: 2, Size: 2); |
526 | |
527 | MIBLo.setMemRefs(MMOLo); |
528 | MIBHi.setMemRefs(MMOHi); |
529 | |
530 | // Delete the pseudo. |
531 | MBB.erase(I: MBBI); |
532 | return true; |
533 | } |
534 | case X86::MWAITX_SAVE_RBX: { |
535 | // Perform the following transformation. |
536 | // SaveRbx = pseudomwaitx InArg, SaveRbx |
537 | // => |
538 | // [E|R]BX = InArg |
539 | // actualmwaitx |
540 | // [E|R]BX = SaveRbx |
541 | const MachineOperand &InArg = MBBI->getOperand(i: 1); |
542 | // Copy the input argument of the pseudo into the argument of the |
543 | // actual instruction. |
544 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::EBX, SrcReg: InArg.getReg(), KillSrc: InArg.isKill()); |
545 | // Create the actual instruction. |
546 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::MWAITXrrr)); |
547 | // Finally, restore the value of RBX. |
548 | Register SaveRbx = MBBI->getOperand(i: 2).getReg(); |
549 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: SaveRbx, /*SrcIsKill*/ KillSrc: true); |
550 | // Delete the pseudo. |
551 | MBBI->eraseFromParent(); |
552 | return true; |
553 | } |
554 | case TargetOpcode::ICALL_BRANCH_FUNNEL: |
555 | expandICallBranchFunnel(MBB: &MBB, MBBI); |
556 | return true; |
557 | case X86::PLDTILECFGV: { |
558 | MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG))); |
559 | return true; |
560 | } |
561 | case X86::PTILELOADDV: |
562 | case X86::PTILELOADDT1V: { |
563 | for (unsigned i = 2; i > 0; --i) |
564 | MI.removeOperand(OpNo: i); |
565 | unsigned Opc = Opcode == X86::PTILELOADDV |
566 | ? GET_EGPR_IF_ENABLED(X86::TILELOADD) |
567 | : GET_EGPR_IF_ENABLED(X86::TILELOADDT1); |
568 | MI.setDesc(TII->get(Opcode: Opc)); |
569 | return true; |
570 | } |
571 | case X86::PTCMMIMFP16PSV: |
572 | case X86::PTCMMRLFP16PSV: |
573 | case X86::PTDPBSSDV: |
574 | case X86::PTDPBSUDV: |
575 | case X86::PTDPBUSDV: |
576 | case X86::PTDPBUUDV: |
577 | case X86::PTDPBF16PSV: |
578 | case X86::PTDPFP16PSV: { |
579 | MI.untieRegOperand(OpIdx: 4); |
580 | for (unsigned i = 3; i > 0; --i) |
581 | MI.removeOperand(OpNo: i); |
582 | unsigned Opc; |
583 | switch (Opcode) { |
584 | case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break; |
585 | case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break; |
586 | case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break; |
587 | case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break; |
588 | case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break; |
589 | case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break; |
590 | case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break; |
591 | case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break; |
592 | default: llvm_unreachable("Impossible Opcode!" ); |
593 | } |
594 | MI.setDesc(TII->get(Opcode: Opc)); |
595 | MI.tieOperands(DefIdx: 0, UseIdx: 1); |
596 | return true; |
597 | } |
598 | case X86::PTILESTOREDV: { |
599 | for (int i = 1; i >= 0; --i) |
600 | MI.removeOperand(OpNo: i); |
601 | MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED))); |
602 | return true; |
603 | } |
604 | #undef GET_EGPR_IF_ENABLED |
605 | case X86::PTILEZEROV: { |
606 | for (int i = 2; i > 0; --i) // Remove row, col |
607 | MI.removeOperand(OpNo: i); |
608 | MI.setDesc(TII->get(Opcode: X86::TILEZERO)); |
609 | return true; |
610 | } |
611 | case X86::CALL64pcrel32_RVMARKER: |
612 | case X86::CALL64r_RVMARKER: |
613 | case X86::CALL64m_RVMARKER: |
614 | expandCALL_RVMARKER(MBB, MBBI); |
615 | return true; |
616 | case X86::ADD32mi_ND: |
617 | case X86::ADD64mi32_ND: |
618 | case X86::SUB32mi_ND: |
619 | case X86::SUB64mi32_ND: |
620 | case X86::AND32mi_ND: |
621 | case X86::AND64mi32_ND: |
622 | case X86::OR32mi_ND: |
623 | case X86::OR64mi32_ND: |
624 | case X86::XOR32mi_ND: |
625 | case X86::XOR64mi32_ND: |
626 | case X86::ADC32mi_ND: |
627 | case X86::ADC64mi32_ND: |
628 | case X86::SBB32mi_ND: |
629 | case X86::SBB64mi32_ND: { |
630 | // It's possible for an EVEX-encoded legacy instruction to reach the 15-byte |
631 | // instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1 |
632 | // byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of |
633 | // immediate = 15 bytes in total, e.g. |
634 | // |
635 | // subq $184, %fs:257(%rbx, %rcx), %rax |
636 | // |
637 | // In such a case, no additional (ADSIZE or segment override) prefix can be |
638 | // used. To resolve the issue, we split the “long” instruction into 2 |
639 | // instructions: |
640 | // |
641 | // movq %fs:257(%rbx, %rcx),%rax |
642 | // subq $184, %rax |
643 | // |
644 | // Therefore we consider the OPmi_ND to be a pseudo instruction to some |
645 | // extent. |
646 | const MachineOperand &ImmOp = |
647 | MI.getOperand(i: MI.getNumExplicitOperands() - 1); |
648 | // If the immediate is a expr, conservatively estimate 4 bytes. |
649 | if (ImmOp.isImm() && isInt<8>(x: ImmOp.getImm())) |
650 | return false; |
651 | int MemOpNo = X86::getFirstAddrOperandIdx(MI); |
652 | const MachineOperand &DispOp = MI.getOperand(i: MemOpNo + X86::AddrDisp); |
653 | Register Base = MI.getOperand(i: MemOpNo + X86::AddrBaseReg).getReg(); |
654 | // If the displacement is a expr, conservatively estimate 4 bytes. |
655 | if (Base && DispOp.isImm() && isInt<8>(x: DispOp.getImm())) |
656 | return false; |
657 | // There can only be one of three: SIB, segment override register, ADSIZE |
658 | Register Index = MI.getOperand(i: MemOpNo + X86::AddrIndexReg).getReg(); |
659 | unsigned Count = !!MI.getOperand(i: MemOpNo + X86::AddrSegmentReg).getReg(); |
660 | if (X86II::needSIB(BaseReg: Base, IndexReg: Index, /*In64BitMode=*/true)) |
661 | ++Count; |
662 | if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: Base) || |
663 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: Index)) |
664 | ++Count; |
665 | if (Count < 2) |
666 | return false; |
667 | unsigned Opc, LoadOpc; |
668 | switch (Opcode) { |
669 | #define MI_TO_RI(OP) \ |
670 | case X86::OP##32mi_ND: \ |
671 | Opc = X86::OP##32ri; \ |
672 | LoadOpc = X86::MOV32rm; \ |
673 | break; \ |
674 | case X86::OP##64mi32_ND: \ |
675 | Opc = X86::OP##64ri32; \ |
676 | LoadOpc = X86::MOV64rm; \ |
677 | break; |
678 | |
679 | default: |
680 | llvm_unreachable("Unexpected Opcode" ); |
681 | MI_TO_RI(ADD); |
682 | MI_TO_RI(SUB); |
683 | MI_TO_RI(AND); |
684 | MI_TO_RI(OR); |
685 | MI_TO_RI(XOR); |
686 | MI_TO_RI(ADC); |
687 | MI_TO_RI(SBB); |
688 | #undef MI_TO_RI |
689 | } |
690 | // Insert OPri. |
691 | Register DestReg = MI.getOperand(i: 0).getReg(); |
692 | BuildMI(BB&: MBB, I: std::next(x: MBBI), MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg) |
693 | .addReg(RegNo: DestReg) |
694 | .add(MO: ImmOp); |
695 | // Change OPmi_ND to MOVrm. |
696 | for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I) |
697 | MI.removeOperand(OpNo: MI.getNumOperands() - 1); |
698 | MI.setDesc(TII->get(Opcode: LoadOpc)); |
699 | return true; |
700 | } |
701 | } |
702 | llvm_unreachable("Previous switch has a fallthrough?" ); |
703 | } |
704 | |
705 | // This function creates additional block for storing varargs guarded |
706 | // registers. It adds check for %al into entry block, to skip |
707 | // GuardedRegsBlk if xmm registers should not be stored. |
708 | // |
709 | // EntryBlk[VAStartPseudoInstr] EntryBlk |
710 | // | | . |
711 | // | | . |
712 | // | | GuardedRegsBlk |
713 | // | => | . |
714 | // | | . |
715 | // | TailBlk |
716 | // | | |
717 | // | | |
718 | // |
719 | void X86ExpandPseudo::expandVastartSaveXmmRegs( |
720 | MachineBasicBlock *EntryBlk, |
721 | MachineBasicBlock::iterator VAStartPseudoInstr) const { |
722 | assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS); |
723 | |
724 | MachineFunction *Func = EntryBlk->getParent(); |
725 | const TargetInstrInfo *TII = STI->getInstrInfo(); |
726 | const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc(); |
727 | Register CountReg = VAStartPseudoInstr->getOperand(i: 0).getReg(); |
728 | |
729 | // Calculate liveins for newly created blocks. |
730 | LivePhysRegs LiveRegs(*STI->getRegisterInfo()); |
731 | SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; |
732 | |
733 | LiveRegs.addLiveIns(MBB: *EntryBlk); |
734 | for (MachineInstr &MI : EntryBlk->instrs()) { |
735 | if (MI.getOpcode() == VAStartPseudoInstr->getOpcode()) |
736 | break; |
737 | |
738 | LiveRegs.stepForward(MI, Clobbers); |
739 | } |
740 | |
741 | // Create the new basic blocks. One block contains all the XMM stores, |
742 | // and another block is the final destination regardless of whether any |
743 | // stores were performed. |
744 | const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); |
745 | MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); |
746 | MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(BB: LLVMBlk); |
747 | MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(BB: LLVMBlk); |
748 | Func->insert(MBBI: EntryBlkIter, MBB: GuardedRegsBlk); |
749 | Func->insert(MBBI: EntryBlkIter, MBB: TailBlk); |
750 | |
751 | // Transfer the remainder of EntryBlk and its successor edges to TailBlk. |
752 | TailBlk->splice(Where: TailBlk->begin(), Other: EntryBlk, |
753 | From: std::next(x: MachineBasicBlock::iterator(VAStartPseudoInstr)), |
754 | To: EntryBlk->end()); |
755 | TailBlk->transferSuccessorsAndUpdatePHIs(FromMBB: EntryBlk); |
756 | |
757 | uint64_t FrameOffset = VAStartPseudoInstr->getOperand(i: 4).getImm(); |
758 | uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(i: 6).getImm(); |
759 | |
760 | // TODO: add support for YMM and ZMM here. |
761 | unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; |
762 | |
763 | // In the XMM save block, save all the XMM argument registers. |
764 | for (int64_t OpndIdx = 7, RegIdx = 0; |
765 | OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; |
766 | OpndIdx++, RegIdx++) { |
767 | auto NewMI = BuildMI(BB: GuardedRegsBlk, MIMD: DL, MCID: TII->get(Opcode: MOVOpc)); |
768 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
769 | if (i == X86::AddrDisp) |
770 | NewMI.addImm(Val: FrameOffset + VarArgsRegsOffset + RegIdx * 16); |
771 | else |
772 | NewMI.add(MO: VAStartPseudoInstr->getOperand(i: i + 1)); |
773 | } |
774 | NewMI.addReg(RegNo: VAStartPseudoInstr->getOperand(i: OpndIdx).getReg()); |
775 | assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical()); |
776 | } |
777 | |
778 | // The original block will now fall through to the GuardedRegsBlk. |
779 | EntryBlk->addSuccessor(Succ: GuardedRegsBlk); |
780 | // The GuardedRegsBlk will fall through to the TailBlk. |
781 | GuardedRegsBlk->addSuccessor(Succ: TailBlk); |
782 | |
783 | if (!STI->isCallingConvWin64(CC: Func->getFunction().getCallingConv())) { |
784 | // If %al is 0, branch around the XMM save block. |
785 | BuildMI(BB: EntryBlk, MIMD: DL, MCID: TII->get(Opcode: X86::TEST8rr)) |
786 | .addReg(RegNo: CountReg) |
787 | .addReg(RegNo: CountReg); |
788 | BuildMI(BB: EntryBlk, MIMD: DL, MCID: TII->get(Opcode: X86::JCC_1)) |
789 | .addMBB(MBB: TailBlk) |
790 | .addImm(Val: X86::COND_E); |
791 | EntryBlk->addSuccessor(Succ: TailBlk); |
792 | } |
793 | |
794 | // Add liveins to the created block. |
795 | addLiveIns(MBB&: *GuardedRegsBlk, LiveRegs); |
796 | addLiveIns(MBB&: *TailBlk, LiveRegs); |
797 | |
798 | // Delete the pseudo. |
799 | VAStartPseudoInstr->eraseFromParent(); |
800 | } |
801 | |
802 | /// Expand all pseudo instructions contained in \p MBB. |
803 | /// \returns true if any expansion occurred for \p MBB. |
804 | bool X86ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { |
805 | bool Modified = false; |
806 | |
807 | // MBBI may be invalidated by the expansion. |
808 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
809 | while (MBBI != E) { |
810 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
811 | Modified |= expandMI(MBB, MBBI); |
812 | MBBI = NMBBI; |
813 | } |
814 | |
815 | return Modified; |
816 | } |
817 | |
818 | bool X86ExpandPseudo::expandPseudosWhichAffectControlFlow(MachineFunction &MF) { |
819 | // Currently pseudo which affects control flow is only |
820 | // X86::VASTART_SAVE_XMM_REGS which is located in Entry block. |
821 | // So we do not need to evaluate other blocks. |
822 | for (MachineInstr &Instr : MF.front().instrs()) { |
823 | if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) { |
824 | expandVastartSaveXmmRegs(EntryBlk: &(MF.front()), VAStartPseudoInstr: Instr); |
825 | return true; |
826 | } |
827 | } |
828 | |
829 | return false; |
830 | } |
831 | |
832 | bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { |
833 | STI = &MF.getSubtarget<X86Subtarget>(); |
834 | TII = STI->getInstrInfo(); |
835 | TRI = STI->getRegisterInfo(); |
836 | X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
837 | X86FL = STI->getFrameLowering(); |
838 | |
839 | bool Modified = expandPseudosWhichAffectControlFlow(MF); |
840 | |
841 | for (MachineBasicBlock &MBB : MF) |
842 | Modified |= expandMBB(MBB); |
843 | return Modified; |
844 | } |
845 | |
846 | /// Returns an instance of the pseudo instruction expansion pass. |
847 | FunctionPass *llvm::createX86ExpandPseudoPass() { |
848 | return new X86ExpandPseudo(); |
849 | } |
850 | |