1 | //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands pseudo instructions into target |
10 | // instructions to allow proper scheduling, if-conversion, other late |
11 | // optimizations, or simply the encoding of the instructions. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86.h" |
16 | #include "X86FrameLowering.h" |
17 | #include "X86InstrInfo.h" |
18 | #include "X86MachineFunctionInfo.h" |
19 | #include "X86Subtarget.h" |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. |
24 | #include "llvm/IR/EHPersonalities.h" |
25 | #include "llvm/IR/GlobalValue.h" |
26 | #include "llvm/Target/TargetMachine.h" |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "x86-pseudo" |
30 | #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass" |
31 | |
32 | namespace { |
33 | class X86ExpandPseudo : public MachineFunctionPass { |
34 | public: |
35 | static char ID; |
36 | X86ExpandPseudo() : MachineFunctionPass(ID) {} |
37 | |
38 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
39 | AU.setPreservesCFG(); |
40 | AU.addPreservedID(ID&: MachineLoopInfoID); |
41 | AU.addPreservedID(ID&: MachineDominatorsID); |
42 | MachineFunctionPass::getAnalysisUsage(AU); |
43 | } |
44 | |
45 | const X86Subtarget *STI = nullptr; |
46 | const X86InstrInfo *TII = nullptr; |
47 | const X86RegisterInfo *TRI = nullptr; |
48 | const X86MachineFunctionInfo *X86FI = nullptr; |
49 | const X86FrameLowering *X86FL = nullptr; |
50 | |
51 | bool runOnMachineFunction(MachineFunction &MF) override; |
52 | |
53 | MachineFunctionProperties getRequiredProperties() const override { |
54 | return MachineFunctionProperties().setNoVRegs(); |
55 | } |
56 | |
57 | StringRef getPassName() const override { |
58 | return "X86 pseudo instruction expansion pass" ; |
59 | } |
60 | |
61 | private: |
62 | void expandICallBranchFunnel(MachineBasicBlock *MBB, |
63 | MachineBasicBlock::iterator MBBI); |
64 | void expandCALL_RVMARKER(MachineBasicBlock &MBB, |
65 | MachineBasicBlock::iterator MBBI); |
66 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); |
67 | bool expandMBB(MachineBasicBlock &MBB); |
68 | |
69 | /// This function expands pseudos which affects control flow. |
70 | /// It is done in separate pass to simplify blocks navigation in main |
71 | /// pass(calling expandMBB). |
72 | bool expandPseudosWhichAffectControlFlow(MachineFunction &MF); |
73 | |
74 | /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions, |
75 | /// placed into separate block guarded by check for al register(for SystemV |
76 | /// abi). |
77 | void expandVastartSaveXmmRegs( |
78 | MachineBasicBlock *EntryBlk, |
79 | MachineBasicBlock::iterator VAStartPseudoInstr) const; |
80 | }; |
81 | char X86ExpandPseudo::ID = 0; |
82 | |
83 | } // End anonymous namespace. |
84 | |
85 | INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false, |
86 | false) |
87 | |
88 | void X86ExpandPseudo::expandICallBranchFunnel( |
89 | MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { |
90 | MachineBasicBlock *JTMBB = MBB; |
91 | MachineInstr *JTInst = &*MBBI; |
92 | MachineFunction *MF = MBB->getParent(); |
93 | const BasicBlock *BB = MBB->getBasicBlock(); |
94 | auto InsPt = MachineFunction::iterator(MBB); |
95 | ++InsPt; |
96 | |
97 | std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; |
98 | const DebugLoc &DL = JTInst->getDebugLoc(); |
99 | MachineOperand Selector = JTInst->getOperand(i: 0); |
100 | const GlobalValue *CombinedGlobal = JTInst->getOperand(i: 1).getGlobal(); |
101 | |
102 | auto CmpTarget = [&](unsigned Target) { |
103 | if (Selector.isReg()) |
104 | MBB->addLiveIn(PhysReg: Selector.getReg()); |
105 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::LEA64r), DestReg: X86::R11) |
106 | .addReg(RegNo: X86::RIP) |
107 | .addImm(Val: 1) |
108 | .addReg(RegNo: 0) |
109 | .addGlobalAddress(GV: CombinedGlobal, |
110 | Offset: JTInst->getOperand(i: 2 + 2 * Target).getImm()) |
111 | .addReg(RegNo: 0); |
112 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::CMP64rr)) |
113 | .add(MO: Selector) |
114 | .addReg(RegNo: X86::R11); |
115 | }; |
116 | |
117 | auto CreateMBB = [&]() { |
118 | auto *NewMBB = MF->CreateMachineBasicBlock(BB); |
119 | MBB->addSuccessor(Succ: NewMBB); |
120 | if (!MBB->isLiveIn(Reg: X86::EFLAGS)) |
121 | MBB->addLiveIn(PhysReg: X86::EFLAGS); |
122 | return NewMBB; |
123 | }; |
124 | |
125 | auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) { |
126 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::JCC_1)).addMBB(MBB: ThenMBB).addImm(Val: CC); |
127 | |
128 | auto *ElseMBB = CreateMBB(); |
129 | MF->insert(MBBI: InsPt, MBB: ElseMBB); |
130 | MBB = ElseMBB; |
131 | MBBI = MBB->end(); |
132 | }; |
133 | |
134 | auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) { |
135 | auto *ThenMBB = CreateMBB(); |
136 | TargetMBBs.push_back(x: {ThenMBB, Target}); |
137 | EmitCondJump(CC, ThenMBB); |
138 | }; |
139 | |
140 | auto EmitTailCall = [&](unsigned Target) { |
141 | BuildMI(BB&: *MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPd64)) |
142 | .add(MO: JTInst->getOperand(i: 3 + 2 * Target)); |
143 | }; |
144 | |
145 | std::function<void(unsigned, unsigned)> EmitBranchFunnel = |
146 | [&](unsigned FirstTarget, unsigned NumTargets) { |
147 | if (NumTargets == 1) { |
148 | EmitTailCall(FirstTarget); |
149 | return; |
150 | } |
151 | |
152 | if (NumTargets == 2) { |
153 | CmpTarget(FirstTarget + 1); |
154 | EmitCondJumpTarget(X86::COND_B, FirstTarget); |
155 | EmitTailCall(FirstTarget + 1); |
156 | return; |
157 | } |
158 | |
159 | if (NumTargets < 6) { |
160 | CmpTarget(FirstTarget + 1); |
161 | EmitCondJumpTarget(X86::COND_B, FirstTarget); |
162 | EmitCondJumpTarget(X86::COND_E, FirstTarget + 1); |
163 | EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); |
164 | return; |
165 | } |
166 | |
167 | auto *ThenMBB = CreateMBB(); |
168 | CmpTarget(FirstTarget + (NumTargets / 2)); |
169 | EmitCondJump(X86::COND_B, ThenMBB); |
170 | EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2)); |
171 | EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, |
172 | NumTargets - (NumTargets / 2) - 1); |
173 | |
174 | MF->insert(MBBI: InsPt, MBB: ThenMBB); |
175 | MBB = ThenMBB; |
176 | MBBI = MBB->end(); |
177 | EmitBranchFunnel(FirstTarget, NumTargets / 2); |
178 | }; |
179 | |
180 | EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); |
181 | for (auto P : TargetMBBs) { |
182 | MF->insert(MBBI: InsPt, MBB: P.first); |
183 | BuildMI(BB: P.first, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPd64)) |
184 | .add(MO: JTInst->getOperand(i: 3 + 2 * P.second)); |
185 | } |
186 | JTMBB->erase(I: JTInst); |
187 | } |
188 | |
189 | void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, |
190 | MachineBasicBlock::iterator MBBI) { |
191 | // Expand CALL_RVMARKER pseudo to call instruction, followed by the special |
192 | //"movq %rax, %rdi" marker. |
193 | MachineInstr &MI = *MBBI; |
194 | |
195 | MachineInstr *OriginalCall; |
196 | assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) && |
197 | "invalid operand for regular call" ); |
198 | unsigned Opc = -1; |
199 | if (MI.getOpcode() == X86::CALL64m_RVMARKER) |
200 | Opc = X86::CALL64m; |
201 | else if (MI.getOpcode() == X86::CALL64r_RVMARKER) |
202 | Opc = X86::CALL64r; |
203 | else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER) |
204 | Opc = X86::CALL64pcrel32; |
205 | else |
206 | llvm_unreachable("unexpected opcode" ); |
207 | |
208 | OriginalCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc)).getInstr(); |
209 | bool RAXImplicitDead = false; |
210 | for (MachineOperand &Op : llvm::drop_begin(RangeOrContainer: MI.operands())) { |
211 | // RAX may be 'implicit dead', if there are no other users of the return |
212 | // value. We introduce a new use, so change it to 'implicit def'. |
213 | if (Op.isReg() && Op.isImplicit() && Op.isDead() && |
214 | TRI->regsOverlap(RegA: Op.getReg(), RegB: X86::RAX)) { |
215 | Op.setIsDead(false); |
216 | Op.setIsDef(true); |
217 | RAXImplicitDead = true; |
218 | } |
219 | OriginalCall->addOperand(Op); |
220 | } |
221 | |
222 | // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be |
223 | // live across the earlier call. The call to the ObjC runtime function returns |
224 | // the first argument, so the value of %rax is unchanged after the ObjC |
225 | // runtime call. On Windows targets, the runtime call follows the regular |
226 | // x64 calling convention and expects the first argument in %rcx. |
227 | auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI; |
228 | auto *Marker = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: X86::MOV64rr)) |
229 | .addReg(RegNo: TargetReg, flags: RegState::Define) |
230 | .addReg(RegNo: X86::RAX) |
231 | .getInstr(); |
232 | if (MI.shouldUpdateAdditionalCallInfo()) |
233 | MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Marker); |
234 | |
235 | // Emit call to ObjC runtime. |
236 | const uint32_t *RegMask = |
237 | TRI->getCallPreservedMask(MF: *MBB.getParent(), CallingConv::C); |
238 | MachineInstr *RtCall = |
239 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: X86::CALL64pcrel32)) |
240 | .addGlobalAddress(GV: MI.getOperand(i: 0).getGlobal(), Offset: 0, TargetFlags: 0) |
241 | .addRegMask(Mask: RegMask) |
242 | .addReg(RegNo: X86::RAX, |
243 | flags: RegState::Implicit | |
244 | (RAXImplicitDead ? (RegState::Dead | RegState::Define) |
245 | : RegState::Define)) |
246 | .getInstr(); |
247 | MI.eraseFromParent(); |
248 | |
249 | auto &TM = MBB.getParent()->getTarget(); |
250 | // On Darwin platforms, wrap the expanded sequence in a bundle to prevent |
251 | // later optimizations from breaking up the sequence. |
252 | if (TM.getTargetTriple().isOSDarwin()) |
253 | finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(), |
254 | LastMI: std::next(x: RtCall->getIterator())); |
255 | } |
256 | |
257 | /// If \p MBBI is a pseudo instruction, this method expands |
258 | /// it to the corresponding (sequence of) actual instruction(s). |
259 | /// \returns true if \p MBBI has been expanded. |
260 | bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, |
261 | MachineBasicBlock::iterator MBBI) { |
262 | MachineInstr &MI = *MBBI; |
263 | unsigned Opcode = MI.getOpcode(); |
264 | const DebugLoc &DL = MBBI->getDebugLoc(); |
265 | #define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC) |
266 | switch (Opcode) { |
267 | default: |
268 | return false; |
269 | case X86::TCRETURNdi: |
270 | case X86::TCRETURNdicc: |
271 | case X86::TCRETURNri: |
272 | case X86::TCRETURNmi: |
273 | case X86::TCRETURNdi64: |
274 | case X86::TCRETURNdi64cc: |
275 | case X86::TCRETURNri64: |
276 | case X86::TCRETURNri64_ImpCall: |
277 | case X86::TCRETURNmi64: { |
278 | bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; |
279 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
280 | MachineOperand &StackAdjust = MBBI->getOperand(i: isMem ? X86::AddrNumOperands |
281 | : 1); |
282 | assert(StackAdjust.isImm() && "Expecting immediate value." ); |
283 | |
284 | // Adjust stack pointer. |
285 | int StackAdj = StackAdjust.getImm(); |
286 | int MaxTCDelta = X86FI->getTCReturnAddrDelta(); |
287 | int64_t Offset = 0; |
288 | assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive" ); |
289 | |
290 | // Incoporate the retaddr area. |
291 | Offset = StackAdj - MaxTCDelta; |
292 | assert(Offset >= 0 && "Offset should never be negative" ); |
293 | |
294 | if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { |
295 | assert(Offset == 0 && "Conditional tail call cannot adjust the stack." ); |
296 | } |
297 | |
298 | if (Offset) { |
299 | // Check for possible merge with preceding ADD instruction. |
300 | Offset = X86FL->mergeSPAdd(MBB, MBBI, AddOffset: Offset, doMergeWithPrevious: true); |
301 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue=*/true); |
302 | } |
303 | |
304 | // Use this predicate to set REX prefix for X86_64 targets. |
305 | bool IsX64 = STI->isTargetWin64() || STI->isTargetUEFI64(); |
306 | // Jump to label or value in register. |
307 | if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || |
308 | Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { |
309 | unsigned Op; |
310 | switch (Opcode) { |
311 | case X86::TCRETURNdi: |
312 | Op = X86::TAILJMPd; |
313 | break; |
314 | case X86::TCRETURNdicc: |
315 | Op = X86::TAILJMPd_CC; |
316 | break; |
317 | case X86::TCRETURNdi64cc: |
318 | assert(!MBB.getParent()->hasWinCFI() && |
319 | "Conditional tail calls confuse " |
320 | "the Win64 unwinder." ); |
321 | Op = X86::TAILJMPd64_CC; |
322 | break; |
323 | default: |
324 | // Note: Win64 uses REX prefixes indirect jumps out of functions, but |
325 | // not direct ones. |
326 | Op = X86::TAILJMPd64; |
327 | break; |
328 | } |
329 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Op)); |
330 | if (JumpTarget.isGlobal()) { |
331 | MIB.addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset(), |
332 | TargetFlags: JumpTarget.getTargetFlags()); |
333 | } else { |
334 | assert(JumpTarget.isSymbol()); |
335 | MIB.addExternalSymbol(FnName: JumpTarget.getSymbolName(), |
336 | TargetFlags: JumpTarget.getTargetFlags()); |
337 | } |
338 | if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { |
339 | MIB.addImm(Val: MBBI->getOperand(i: 2).getImm()); |
340 | } |
341 | |
342 | } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { |
343 | unsigned Op = (Opcode == X86::TCRETURNmi) |
344 | ? X86::TAILJMPm |
345 | : (IsX64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); |
346 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Op)); |
347 | for (unsigned i = 0; i != X86::AddrNumOperands; ++i) |
348 | MIB.add(MO: MBBI->getOperand(i)); |
349 | } else if ((Opcode == X86::TCRETURNri64) || |
350 | (Opcode == X86::TCRETURNri64_ImpCall)) { |
351 | JumpTarget.setIsKill(); |
352 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
353 | MCID: TII->get(Opcode: IsX64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) |
354 | .add(MO: JumpTarget); |
355 | } else { |
356 | assert(!IsX64 && "Win64 and UEFI64 require REX for indirect jumps." ); |
357 | JumpTarget.setIsKill(); |
358 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TAILJMPr)) |
359 | .add(MO: JumpTarget); |
360 | } |
361 | |
362 | MachineInstr &NewMI = *std::prev(x: MBBI); |
363 | NewMI.copyImplicitOps(MF&: *MBBI->getParent()->getParent(), MI: *MBBI); |
364 | NewMI.setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType()); |
365 | |
366 | // Update the call info. |
367 | if (MBBI->isCandidateForAdditionalCallInfo()) |
368 | MBB.getParent()->moveAdditionalCallInfo(Old: &*MBBI, New: &NewMI); |
369 | |
370 | // Delete the pseudo instruction TCRETURN. |
371 | MBB.erase(I: MBBI); |
372 | |
373 | return true; |
374 | } |
375 | case X86::EH_RETURN: |
376 | case X86::EH_RETURN64: { |
377 | MachineOperand &DestAddr = MBBI->getOperand(i: 0); |
378 | assert(DestAddr.isReg() && "Offset should be in register!" ); |
379 | const bool Uses64BitFramePtr = |
380 | STI->isTarget64BitLP64() || STI->isTargetNaCl64(); |
381 | Register StackPtr = TRI->getStackRegister(); |
382 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
383 | MCID: TII->get(Opcode: Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), DestReg: StackPtr) |
384 | .addReg(RegNo: DestAddr.getReg()); |
385 | // The EH_RETURN pseudo is really removed during the MC Lowering. |
386 | return true; |
387 | } |
388 | case X86::IRET: { |
389 | // Adjust stack to erase error code |
390 | int64_t StackAdj = MBBI->getOperand(i: 0).getImm(); |
391 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: StackAdj, InEpilogue: true); |
392 | // Replace pseudo with machine iret |
393 | unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32; |
394 | // Use UIRET if UINTR is present (except for building kernel) |
395 | if (STI->is64Bit() && STI->hasUINTR() && |
396 | MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel) |
397 | RetOp = X86::UIRET; |
398 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RetOp)); |
399 | MBB.erase(I: MBBI); |
400 | return true; |
401 | } |
402 | case X86::RET: { |
403 | // Adjust stack to erase error code |
404 | int64_t StackAdj = MBBI->getOperand(i: 0).getImm(); |
405 | MachineInstrBuilder MIB; |
406 | if (StackAdj == 0) { |
407 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
408 | MCID: TII->get(Opcode: STI->is64Bit() ? X86::RET64 : X86::RET32)); |
409 | } else if (isUInt<16>(x: StackAdj)) { |
410 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
411 | MCID: TII->get(Opcode: STI->is64Bit() ? X86::RETI64 : X86::RETI32)) |
412 | .addImm(Val: StackAdj); |
413 | } else { |
414 | assert(!STI->is64Bit() && |
415 | "shouldn't need to do this for x86_64 targets!" ); |
416 | // A ret can only handle immediates as big as 2**16-1. If we need to pop |
417 | // off bytes before the return address, we must do it manually. |
418 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::POP32r)).addReg(RegNo: X86::ECX, flags: RegState::Define); |
419 | X86FL->emitSPUpdate(MBB, MBBI, DL, NumBytes: StackAdj, /*InEpilogue=*/true); |
420 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::PUSH32r)).addReg(RegNo: X86::ECX); |
421 | MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::RET32)); |
422 | } |
423 | for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) |
424 | MIB.add(MO: MBBI->getOperand(i: I)); |
425 | MBB.erase(I: MBBI); |
426 | return true; |
427 | } |
428 | case X86::LCMPXCHG16B_SAVE_RBX: { |
429 | // Perform the following transformation. |
430 | // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx |
431 | // => |
432 | // RBX = InArg |
433 | // actualcmpxchg Addr |
434 | // RBX = SaveRbx |
435 | const MachineOperand &InArg = MBBI->getOperand(i: 6); |
436 | Register SaveRbx = MBBI->getOperand(i: 7).getReg(); |
437 | |
438 | // Copy the input argument of the pseudo into the argument of the |
439 | // actual instruction. |
440 | // NOTE: We don't copy the kill flag since the input might be the same reg |
441 | // as one of the other operands of LCMPXCHG16B. |
442 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: InArg.getReg(), KillSrc: false); |
443 | // Create the actual instruction. |
444 | MachineInstr *NewInstr = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::LCMPXCHG16B)); |
445 | // Copy the operands related to the address. If we access a frame variable, |
446 | // we need to replace the RBX base with SaveRbx, as RBX has another value. |
447 | const MachineOperand &Base = MBBI->getOperand(i: 1); |
448 | if (Base.getReg() == X86::RBX || Base.getReg() == X86::EBX) |
449 | NewInstr->addOperand(Op: MachineOperand::CreateReg( |
450 | Reg: Base.getReg() == X86::RBX |
451 | ? SaveRbx |
452 | : Register(TRI->getSubReg(Reg: SaveRbx, Idx: X86::sub_32bit)), |
453 | /*IsDef=*/isDef: false)); |
454 | else |
455 | NewInstr->addOperand(Op: Base); |
456 | for (unsigned Idx = 1 + 1; Idx < 1 + X86::AddrNumOperands; ++Idx) |
457 | NewInstr->addOperand(Op: MBBI->getOperand(i: Idx)); |
458 | // Finally, restore the value of RBX. |
459 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: SaveRbx, |
460 | /*SrcIsKill*/ KillSrc: true); |
461 | |
462 | // Delete the pseudo. |
463 | MBBI->eraseFromParent(); |
464 | return true; |
465 | } |
466 | // Loading/storing mask pairs requires two kmov operations. The second one of |
467 | // these needs a 2 byte displacement relative to the specified address (with |
468 | // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the |
469 | // same spill size, they all are stored using MASKPAIR16STORE, loaded using |
470 | // MASKPAIR16LOAD. |
471 | // |
472 | // The displacement value might wrap around in theory, thus the asserts in |
473 | // both cases. |
474 | case X86::MASKPAIR16LOAD: { |
475 | int64_t Disp = MBBI->getOperand(i: 1 + X86::AddrDisp).getImm(); |
476 | assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement" ); |
477 | Register Reg = MBBI->getOperand(i: 0).getReg(); |
478 | bool DstIsDead = MBBI->getOperand(i: 0).isDead(); |
479 | Register Reg0 = TRI->getSubReg(Reg, Idx: X86::sub_mask_0); |
480 | Register Reg1 = TRI->getSubReg(Reg, Idx: X86::sub_mask_1); |
481 | |
482 | auto MIBLo = |
483 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) |
484 | .addReg(RegNo: Reg0, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
485 | auto MIBHi = |
486 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) |
487 | .addReg(RegNo: Reg1, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
488 | |
489 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
490 | MIBLo.add(MO: MBBI->getOperand(i: 1 + i)); |
491 | if (i == X86::AddrDisp) |
492 | MIBHi.addImm(Val: Disp + 2); |
493 | else |
494 | MIBHi.add(MO: MBBI->getOperand(i: 1 + i)); |
495 | } |
496 | |
497 | // Split the memory operand, adjusting the offset and size for the halves. |
498 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
499 | MachineFunction *MF = MBB.getParent(); |
500 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: 2); |
501 | MachineMemOperand *MMOHi = MF->getMachineMemOperand(MMO: OldMMO, Offset: 2, Size: 2); |
502 | |
503 | MIBLo.setMemRefs(MMOLo); |
504 | MIBHi.setMemRefs(MMOHi); |
505 | |
506 | // Delete the pseudo. |
507 | MBB.erase(I: MBBI); |
508 | return true; |
509 | } |
510 | case X86::MASKPAIR16STORE: { |
511 | int64_t Disp = MBBI->getOperand(i: X86::AddrDisp).getImm(); |
512 | assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement" ); |
513 | Register Reg = MBBI->getOperand(i: X86::AddrNumOperands).getReg(); |
514 | bool SrcIsKill = MBBI->getOperand(i: X86::AddrNumOperands).isKill(); |
515 | Register Reg0 = TRI->getSubReg(Reg, Idx: X86::sub_mask_0); |
516 | Register Reg1 = TRI->getSubReg(Reg, Idx: X86::sub_mask_1); |
517 | |
518 | auto MIBLo = |
519 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); |
520 | auto MIBHi = |
521 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); |
522 | |
523 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
524 | MIBLo.add(MO: MBBI->getOperand(i)); |
525 | if (i == X86::AddrDisp) |
526 | MIBHi.addImm(Val: Disp + 2); |
527 | else |
528 | MIBHi.add(MO: MBBI->getOperand(i)); |
529 | } |
530 | MIBLo.addReg(RegNo: Reg0, flags: getKillRegState(B: SrcIsKill)); |
531 | MIBHi.addReg(RegNo: Reg1, flags: getKillRegState(B: SrcIsKill)); |
532 | |
533 | // Split the memory operand, adjusting the offset and size for the halves. |
534 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
535 | MachineFunction *MF = MBB.getParent(); |
536 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: 2); |
537 | MachineMemOperand *MMOHi = MF->getMachineMemOperand(MMO: OldMMO, Offset: 2, Size: 2); |
538 | |
539 | MIBLo.setMemRefs(MMOLo); |
540 | MIBHi.setMemRefs(MMOHi); |
541 | |
542 | // Delete the pseudo. |
543 | MBB.erase(I: MBBI); |
544 | return true; |
545 | } |
546 | case X86::MWAITX_SAVE_RBX: { |
547 | // Perform the following transformation. |
548 | // SaveRbx = pseudomwaitx InArg, SaveRbx |
549 | // => |
550 | // [E|R]BX = InArg |
551 | // actualmwaitx |
552 | // [E|R]BX = SaveRbx |
553 | const MachineOperand &InArg = MBBI->getOperand(i: 1); |
554 | // Copy the input argument of the pseudo into the argument of the |
555 | // actual instruction. |
556 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::EBX, SrcReg: InArg.getReg(), KillSrc: InArg.isKill()); |
557 | // Create the actual instruction. |
558 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::MWAITXrrr)); |
559 | // Finally, restore the value of RBX. |
560 | Register SaveRbx = MBBI->getOperand(i: 2).getReg(); |
561 | TII->copyPhysReg(MBB, MI: MBBI, DL, DestReg: X86::RBX, SrcReg: SaveRbx, /*SrcIsKill*/ KillSrc: true); |
562 | // Delete the pseudo. |
563 | MBBI->eraseFromParent(); |
564 | return true; |
565 | } |
566 | case TargetOpcode::ICALL_BRANCH_FUNNEL: |
567 | expandICallBranchFunnel(MBB: &MBB, MBBI); |
568 | return true; |
569 | case X86::PLDTILECFGV: { |
570 | MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG))); |
571 | return true; |
572 | } |
573 | case X86::PTILELOADDV: |
574 | case X86::PTILELOADDT1V: |
575 | case X86::PTILELOADDRSV: |
576 | case X86::PTILELOADDRST1V: |
577 | case X86::PTCVTROWD2PSrreV: |
578 | case X86::PTCVTROWD2PSrriV: |
579 | case X86::PTCVTROWPS2BF16HrreV: |
580 | case X86::PTCVTROWPS2BF16HrriV: |
581 | case X86::PTCVTROWPS2BF16LrreV: |
582 | case X86::PTCVTROWPS2BF16LrriV: |
583 | case X86::PTCVTROWPS2PHHrreV: |
584 | case X86::PTCVTROWPS2PHHrriV: |
585 | case X86::PTCVTROWPS2PHLrreV: |
586 | case X86::PTCVTROWPS2PHLrriV: |
587 | case X86::PTILEMOVROWrreV: |
588 | case X86::PTILEMOVROWrriV: { |
589 | for (unsigned i = 2; i > 0; --i) |
590 | MI.removeOperand(OpNo: i); |
591 | unsigned Opc; |
592 | switch (Opcode) { |
593 | case X86::PTILELOADDRSV: |
594 | Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS); |
595 | break; |
596 | case X86::PTILELOADDRST1V: |
597 | Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1); |
598 | break; |
599 | case X86::PTILELOADDV: |
600 | Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); |
601 | break; |
602 | case X86::PTILELOADDT1V: |
603 | Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1); |
604 | break; |
605 | case X86::PTCVTROWD2PSrreV: |
606 | Opc = X86::TCVTROWD2PSrre; |
607 | break; |
608 | case X86::PTCVTROWD2PSrriV: |
609 | Opc = X86::TCVTROWD2PSrri; |
610 | break; |
611 | case X86::PTCVTROWPS2BF16HrreV: |
612 | Opc = X86::TCVTROWPS2BF16Hrre; |
613 | break; |
614 | case X86::PTCVTROWPS2BF16HrriV: |
615 | Opc = X86::TCVTROWPS2BF16Hrri; |
616 | break; |
617 | case X86::PTCVTROWPS2BF16LrreV: |
618 | Opc = X86::TCVTROWPS2BF16Lrre; |
619 | break; |
620 | case X86::PTCVTROWPS2BF16LrriV: |
621 | Opc = X86::TCVTROWPS2BF16Lrri; |
622 | break; |
623 | case X86::PTCVTROWPS2PHHrreV: |
624 | Opc = X86::TCVTROWPS2PHHrre; |
625 | break; |
626 | case X86::PTCVTROWPS2PHHrriV: |
627 | Opc = X86::TCVTROWPS2PHHrri; |
628 | break; |
629 | case X86::PTCVTROWPS2PHLrreV: |
630 | Opc = X86::TCVTROWPS2PHLrre; |
631 | break; |
632 | case X86::PTCVTROWPS2PHLrriV: |
633 | Opc = X86::TCVTROWPS2PHLrri; |
634 | break; |
635 | case X86::PTILEMOVROWrreV: |
636 | Opc = X86::TILEMOVROWrre; |
637 | break; |
638 | case X86::PTILEMOVROWrriV: |
639 | Opc = X86::TILEMOVROWrri; |
640 | break; |
641 | default: |
642 | llvm_unreachable("Unexpected Opcode" ); |
643 | } |
644 | MI.setDesc(TII->get(Opcode: Opc)); |
645 | return true; |
646 | } |
647 | // TILEPAIRLOAD is just for TILEPair spill, we don't have corresponding |
648 | // AMX instruction to support it. So, split it to 2 load instructions: |
649 | // "TILEPAIRLOAD TMM0:TMM1, Base, Scale, Index, Offset, Segment" --> |
650 | // "TILELOAD TMM0, Base, Scale, Index, Offset, Segment" + |
651 | // "TILELOAD TMM1, Base, Scale, Index, Offset + TMM_SIZE, Segment" |
652 | case X86::PTILEPAIRLOAD: { |
653 | int64_t Disp = MBBI->getOperand(i: 1 + X86::AddrDisp).getImm(); |
654 | Register TReg = MBBI->getOperand(i: 0).getReg(); |
655 | bool DstIsDead = MBBI->getOperand(i: 0).isDead(); |
656 | Register TReg0 = TRI->getSubReg(Reg: TReg, Idx: X86::sub_t0); |
657 | Register TReg1 = TRI->getSubReg(Reg: TReg, Idx: X86::sub_t1); |
658 | unsigned TmmSize = TRI->getRegSizeInBits(RC: X86::TILERegClass) / 8; |
659 | |
660 | MachineInstrBuilder MIBLo = |
661 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TILELOADD)) |
662 | .addReg(RegNo: TReg0, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
663 | MachineInstrBuilder MIBHi = |
664 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TILELOADD)) |
665 | .addReg(RegNo: TReg1, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
666 | |
667 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
668 | MIBLo.add(MO: MBBI->getOperand(i: 1 + i)); |
669 | if (i == X86::AddrDisp) |
670 | MIBHi.addImm(Val: Disp + TmmSize); |
671 | else |
672 | MIBHi.add(MO: MBBI->getOperand(i: 1 + i)); |
673 | } |
674 | |
675 | // Make sure the first stride reg used in first tileload is alive. |
676 | MachineOperand &Stride = |
677 | MIBLo.getInstr()->getOperand(i: 1 + X86::AddrIndexReg); |
678 | Stride.setIsKill(false); |
679 | |
680 | // Split the memory operand, adjusting the offset and size for the halves. |
681 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
682 | MachineFunction *MF = MBB.getParent(); |
683 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: TmmSize); |
684 | MachineMemOperand *MMOHi = |
685 | MF->getMachineMemOperand(MMO: OldMMO, Offset: TmmSize, Size: TmmSize); |
686 | |
687 | MIBLo.setMemRefs(MMOLo); |
688 | MIBHi.setMemRefs(MMOHi); |
689 | |
690 | // Delete the pseudo. |
691 | MBB.erase(I: MBBI); |
692 | return true; |
693 | } |
694 | // Similar with TILEPAIRLOAD, TILEPAIRSTORE is just for TILEPair spill, no |
695 | // corresponding AMX instruction to support it. So, split it too: |
696 | // "TILEPAIRSTORE Base, Scale, Index, Offset, Segment, TMM0:TMM1" --> |
697 | // "TILESTORE Base, Scale, Index, Offset, Segment, TMM0" + |
698 | // "TILESTORE Base, Scale, Index, Offset + TMM_SIZE, Segment, TMM1" |
699 | case X86::PTILEPAIRSTORE: { |
700 | int64_t Disp = MBBI->getOperand(i: X86::AddrDisp).getImm(); |
701 | Register TReg = MBBI->getOperand(i: X86::AddrNumOperands).getReg(); |
702 | bool SrcIsKill = MBBI->getOperand(i: X86::AddrNumOperands).isKill(); |
703 | Register TReg0 = TRI->getSubReg(Reg: TReg, Idx: X86::sub_t0); |
704 | Register TReg1 = TRI->getSubReg(Reg: TReg, Idx: X86::sub_t1); |
705 | unsigned TmmSize = TRI->getRegSizeInBits(RC: X86::TILERegClass) / 8; |
706 | |
707 | MachineInstrBuilder MIBLo = |
708 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TILESTORED)); |
709 | MachineInstrBuilder MIBHi = |
710 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TILESTORED)); |
711 | |
712 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
713 | MIBLo.add(MO: MBBI->getOperand(i)); |
714 | if (i == X86::AddrDisp) |
715 | MIBHi.addImm(Val: Disp + TmmSize); |
716 | else |
717 | MIBHi.add(MO: MBBI->getOperand(i)); |
718 | } |
719 | MIBLo.addReg(RegNo: TReg0, flags: getKillRegState(B: SrcIsKill)); |
720 | MIBHi.addReg(RegNo: TReg1, flags: getKillRegState(B: SrcIsKill)); |
721 | |
722 | // Make sure the first stride reg used in first tilestore is alive. |
723 | MachineOperand &Stride = MIBLo.getInstr()->getOperand(i: X86::AddrIndexReg); |
724 | Stride.setIsKill(false); |
725 | |
726 | // Split the memory operand, adjusting the offset and size for the halves. |
727 | MachineMemOperand *OldMMO = MBBI->memoperands().front(); |
728 | MachineFunction *MF = MBB.getParent(); |
729 | MachineMemOperand *MMOLo = MF->getMachineMemOperand(MMO: OldMMO, Offset: 0, Size: TmmSize); |
730 | MachineMemOperand *MMOHi = |
731 | MF->getMachineMemOperand(MMO: OldMMO, Offset: TmmSize, Size: TmmSize); |
732 | |
733 | MIBLo.setMemRefs(MMOLo); |
734 | MIBHi.setMemRefs(MMOHi); |
735 | |
736 | // Delete the pseudo. |
737 | MBB.erase(I: MBBI); |
738 | return true; |
739 | } |
740 | case X86::PT2RPNTLVWZ0V: |
741 | case X86::PT2RPNTLVWZ0T1V: |
742 | case X86::PT2RPNTLVWZ1V: |
743 | case X86::PT2RPNTLVWZ1T1V: |
744 | case X86::PT2RPNTLVWZ0RSV: |
745 | case X86::PT2RPNTLVWZ0RST1V: |
746 | case X86::PT2RPNTLVWZ1RSV: |
747 | case X86::PT2RPNTLVWZ1RST1V: { |
748 | for (unsigned i = 3; i > 0; --i) |
749 | MI.removeOperand(OpNo: i); |
750 | unsigned Opc; |
751 | switch (Opcode) { |
752 | case X86::PT2RPNTLVWZ0V: |
753 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0); |
754 | break; |
755 | case X86::PT2RPNTLVWZ0T1V: |
756 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1); |
757 | break; |
758 | case X86::PT2RPNTLVWZ1V: |
759 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1); |
760 | break; |
761 | case X86::PT2RPNTLVWZ1T1V: |
762 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1); |
763 | break; |
764 | case X86::PT2RPNTLVWZ0RSV: |
765 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS); |
766 | break; |
767 | case X86::PT2RPNTLVWZ0RST1V: |
768 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1); |
769 | break; |
770 | case X86::PT2RPNTLVWZ1RSV: |
771 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS); |
772 | break; |
773 | case X86::PT2RPNTLVWZ1RST1V: |
774 | Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1); |
775 | break; |
776 | default: |
777 | llvm_unreachable("Impossible Opcode!" ); |
778 | } |
779 | MI.setDesc(TII->get(Opcode: Opc)); |
780 | return true; |
781 | } |
782 | case X86::PTTRANSPOSEDV: |
783 | case X86::PTCONJTFP16V: { |
784 | for (int i = 2; i > 0; --i) |
785 | MI.removeOperand(OpNo: i); |
786 | MI.setDesc(TII->get(Opcode: Opcode == X86::PTTRANSPOSEDV ? X86::TTRANSPOSED |
787 | : X86::TCONJTFP16)); |
788 | return true; |
789 | } |
790 | case X86::PTCMMIMFP16PSV: |
791 | case X86::PTCMMRLFP16PSV: |
792 | case X86::PTDPBSSDV: |
793 | case X86::PTDPBSUDV: |
794 | case X86::PTDPBUSDV: |
795 | case X86::PTDPBUUDV: |
796 | case X86::PTDPBF16PSV: |
797 | case X86::PTDPFP16PSV: |
798 | case X86::PTTDPBF16PSV: |
799 | case X86::PTTDPFP16PSV: |
800 | case X86::PTTCMMIMFP16PSV: |
801 | case X86::PTTCMMRLFP16PSV: |
802 | case X86::PTCONJTCMMIMFP16PSV: |
803 | case X86::PTMMULTF32PSV: |
804 | case X86::PTTMMULTF32PSV: |
805 | case X86::PTDPBF8PSV: |
806 | case X86::PTDPBHF8PSV: |
807 | case X86::PTDPHBF8PSV: |
808 | case X86::PTDPHF8PSV: { |
809 | MI.untieRegOperand(OpIdx: 4); |
810 | for (unsigned i = 3; i > 0; --i) |
811 | MI.removeOperand(OpNo: i); |
812 | unsigned Opc; |
813 | switch (Opcode) { |
814 | case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break; |
815 | case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break; |
816 | case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break; |
817 | case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break; |
818 | case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break; |
819 | case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break; |
820 | case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break; |
821 | case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break; |
822 | case X86::PTTDPBF16PSV: |
823 | Opc = X86::TTDPBF16PS; |
824 | break; |
825 | case X86::PTTDPFP16PSV: |
826 | Opc = X86::TTDPFP16PS; |
827 | break; |
828 | case X86::PTTCMMIMFP16PSV: |
829 | Opc = X86::TTCMMIMFP16PS; |
830 | break; |
831 | case X86::PTTCMMRLFP16PSV: |
832 | Opc = X86::TTCMMRLFP16PS; |
833 | break; |
834 | case X86::PTCONJTCMMIMFP16PSV: |
835 | Opc = X86::TCONJTCMMIMFP16PS; |
836 | break; |
837 | case X86::PTMMULTF32PSV: |
838 | Opc = X86::TMMULTF32PS; |
839 | break; |
840 | case X86::PTTMMULTF32PSV: |
841 | Opc = X86::TTMMULTF32PS; |
842 | break; |
843 | case X86::PTDPBF8PSV: |
844 | Opc = X86::TDPBF8PS; |
845 | break; |
846 | case X86::PTDPBHF8PSV: |
847 | Opc = X86::TDPBHF8PS; |
848 | break; |
849 | case X86::PTDPHBF8PSV: |
850 | Opc = X86::TDPHBF8PS; |
851 | break; |
852 | case X86::PTDPHF8PSV: |
853 | Opc = X86::TDPHF8PS; |
854 | break; |
855 | |
856 | default: |
857 | llvm_unreachable("Unexpected Opcode" ); |
858 | } |
859 | MI.setDesc(TII->get(Opcode: Opc)); |
860 | MI.tieOperands(DefIdx: 0, UseIdx: 1); |
861 | return true; |
862 | } |
863 | case X86::PTILESTOREDV: { |
864 | for (int i = 1; i >= 0; --i) |
865 | MI.removeOperand(OpNo: i); |
866 | MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED))); |
867 | return true; |
868 | } |
869 | #undef GET_EGPR_IF_ENABLED |
870 | case X86::PTILEZEROV: { |
871 | for (int i = 2; i > 0; --i) // Remove row, col |
872 | MI.removeOperand(OpNo: i); |
873 | MI.setDesc(TII->get(Opcode: X86::TILEZERO)); |
874 | return true; |
875 | } |
876 | case X86::CALL64pcrel32_RVMARKER: |
877 | case X86::CALL64r_RVMARKER: |
878 | case X86::CALL64m_RVMARKER: |
879 | expandCALL_RVMARKER(MBB, MBBI); |
880 | return true; |
881 | case X86::CALL64r_ImpCall: |
882 | MI.setDesc(TII->get(Opcode: X86::CALL64r)); |
883 | return true; |
884 | case X86::ADD32mi_ND: |
885 | case X86::ADD64mi32_ND: |
886 | case X86::SUB32mi_ND: |
887 | case X86::SUB64mi32_ND: |
888 | case X86::AND32mi_ND: |
889 | case X86::AND64mi32_ND: |
890 | case X86::OR32mi_ND: |
891 | case X86::OR64mi32_ND: |
892 | case X86::XOR32mi_ND: |
893 | case X86::XOR64mi32_ND: |
894 | case X86::ADC32mi_ND: |
895 | case X86::ADC64mi32_ND: |
896 | case X86::SBB32mi_ND: |
897 | case X86::SBB64mi32_ND: { |
898 | // It's possible for an EVEX-encoded legacy instruction to reach the 15-byte |
899 | // instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1 |
900 | // byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of |
901 | // immediate = 15 bytes in total, e.g. |
902 | // |
903 | // subq $184, %fs:257(%rbx, %rcx), %rax |
904 | // |
905 | // In such a case, no additional (ADSIZE or segment override) prefix can be |
906 | // used. To resolve the issue, we split the “long” instruction into 2 |
907 | // instructions: |
908 | // |
909 | // movq %fs:257(%rbx, %rcx),%rax |
910 | // subq $184, %rax |
911 | // |
912 | // Therefore we consider the OPmi_ND to be a pseudo instruction to some |
913 | // extent. |
914 | const MachineOperand &ImmOp = |
915 | MI.getOperand(i: MI.getNumExplicitOperands() - 1); |
916 | // If the immediate is a expr, conservatively estimate 4 bytes. |
917 | if (ImmOp.isImm() && isInt<8>(x: ImmOp.getImm())) |
918 | return false; |
919 | int MemOpNo = X86::getFirstAddrOperandIdx(MI); |
920 | const MachineOperand &DispOp = MI.getOperand(i: MemOpNo + X86::AddrDisp); |
921 | Register Base = MI.getOperand(i: MemOpNo + X86::AddrBaseReg).getReg(); |
922 | // If the displacement is a expr, conservatively estimate 4 bytes. |
923 | if (Base && DispOp.isImm() && isInt<8>(x: DispOp.getImm())) |
924 | return false; |
925 | // There can only be one of three: SIB, segment override register, ADSIZE |
926 | Register Index = MI.getOperand(i: MemOpNo + X86::AddrIndexReg).getReg(); |
927 | unsigned Count = !!MI.getOperand(i: MemOpNo + X86::AddrSegmentReg).getReg(); |
928 | if (X86II::needSIB(BaseReg: Base, IndexReg: Index, /*In64BitMode=*/true)) |
929 | ++Count; |
930 | if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: Base) || |
931 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: Index)) |
932 | ++Count; |
933 | if (Count < 2) |
934 | return false; |
935 | unsigned Opc, LoadOpc; |
936 | switch (Opcode) { |
937 | #define MI_TO_RI(OP) \ |
938 | case X86::OP##32mi_ND: \ |
939 | Opc = X86::OP##32ri; \ |
940 | LoadOpc = X86::MOV32rm; \ |
941 | break; \ |
942 | case X86::OP##64mi32_ND: \ |
943 | Opc = X86::OP##64ri32; \ |
944 | LoadOpc = X86::MOV64rm; \ |
945 | break; |
946 | |
947 | default: |
948 | llvm_unreachable("Unexpected Opcode" ); |
949 | MI_TO_RI(ADD); |
950 | MI_TO_RI(SUB); |
951 | MI_TO_RI(AND); |
952 | MI_TO_RI(OR); |
953 | MI_TO_RI(XOR); |
954 | MI_TO_RI(ADC); |
955 | MI_TO_RI(SBB); |
956 | #undef MI_TO_RI |
957 | } |
958 | // Insert OPri. |
959 | Register DestReg = MI.getOperand(i: 0).getReg(); |
960 | BuildMI(BB&: MBB, I: std::next(x: MBBI), MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg) |
961 | .addReg(RegNo: DestReg) |
962 | .add(MO: ImmOp); |
963 | // Change OPmi_ND to MOVrm. |
964 | for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I) |
965 | MI.removeOperand(OpNo: MI.getNumOperands() - 1); |
966 | MI.setDesc(TII->get(Opcode: LoadOpc)); |
967 | return true; |
968 | } |
969 | } |
970 | llvm_unreachable("Previous switch has a fallthrough?" ); |
971 | } |
972 | |
973 | // This function creates additional block for storing varargs guarded |
974 | // registers. It adds check for %al into entry block, to skip |
975 | // GuardedRegsBlk if xmm registers should not be stored. |
976 | // |
977 | // EntryBlk[VAStartPseudoInstr] EntryBlk |
978 | // | | . |
979 | // | | . |
980 | // | | GuardedRegsBlk |
981 | // | => | . |
982 | // | | . |
983 | // | TailBlk |
984 | // | | |
985 | // | | |
986 | // |
987 | void X86ExpandPseudo::expandVastartSaveXmmRegs( |
988 | MachineBasicBlock *EntryBlk, |
989 | MachineBasicBlock::iterator VAStartPseudoInstr) const { |
990 | assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS); |
991 | |
992 | MachineFunction *Func = EntryBlk->getParent(); |
993 | const TargetInstrInfo *TII = STI->getInstrInfo(); |
994 | const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc(); |
995 | Register CountReg = VAStartPseudoInstr->getOperand(i: 0).getReg(); |
996 | |
997 | // Calculate liveins for newly created blocks. |
998 | LivePhysRegs LiveRegs(*STI->getRegisterInfo()); |
999 | SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; |
1000 | |
1001 | LiveRegs.addLiveIns(MBB: *EntryBlk); |
1002 | for (MachineInstr &MI : EntryBlk->instrs()) { |
1003 | if (MI.getOpcode() == VAStartPseudoInstr->getOpcode()) |
1004 | break; |
1005 | |
1006 | LiveRegs.stepForward(MI, Clobbers); |
1007 | } |
1008 | |
1009 | // Create the new basic blocks. One block contains all the XMM stores, |
1010 | // and another block is the final destination regardless of whether any |
1011 | // stores were performed. |
1012 | const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); |
1013 | MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); |
1014 | MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(BB: LLVMBlk); |
1015 | MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(BB: LLVMBlk); |
1016 | Func->insert(MBBI: EntryBlkIter, MBB: GuardedRegsBlk); |
1017 | Func->insert(MBBI: EntryBlkIter, MBB: TailBlk); |
1018 | |
1019 | // Transfer the remainder of EntryBlk and its successor edges to TailBlk. |
1020 | TailBlk->splice(Where: TailBlk->begin(), Other: EntryBlk, |
1021 | From: std::next(x: MachineBasicBlock::iterator(VAStartPseudoInstr)), |
1022 | To: EntryBlk->end()); |
1023 | TailBlk->transferSuccessorsAndUpdatePHIs(FromMBB: EntryBlk); |
1024 | |
1025 | uint64_t FrameOffset = VAStartPseudoInstr->getOperand(i: 4).getImm(); |
1026 | uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(i: 6).getImm(); |
1027 | |
1028 | // TODO: add support for YMM and ZMM here. |
1029 | unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; |
1030 | |
1031 | // In the XMM save block, save all the XMM argument registers. |
1032 | for (int64_t OpndIdx = 7, RegIdx = 0; |
1033 | OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; |
1034 | OpndIdx++, RegIdx++) { |
1035 | auto NewMI = BuildMI(BB: GuardedRegsBlk, MIMD: DL, MCID: TII->get(Opcode: MOVOpc)); |
1036 | for (int i = 0; i < X86::AddrNumOperands; ++i) { |
1037 | if (i == X86::AddrDisp) |
1038 | NewMI.addImm(Val: FrameOffset + VarArgsRegsOffset + RegIdx * 16); |
1039 | else |
1040 | NewMI.add(MO: VAStartPseudoInstr->getOperand(i: i + 1)); |
1041 | } |
1042 | NewMI.addReg(RegNo: VAStartPseudoInstr->getOperand(i: OpndIdx).getReg()); |
1043 | assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical()); |
1044 | } |
1045 | |
1046 | // The original block will now fall through to the GuardedRegsBlk. |
1047 | EntryBlk->addSuccessor(Succ: GuardedRegsBlk); |
1048 | // The GuardedRegsBlk will fall through to the TailBlk. |
1049 | GuardedRegsBlk->addSuccessor(Succ: TailBlk); |
1050 | |
1051 | if (!STI->isCallingConvWin64(CC: Func->getFunction().getCallingConv())) { |
1052 | // If %al is 0, branch around the XMM save block. |
1053 | BuildMI(BB: EntryBlk, MIMD: DL, MCID: TII->get(Opcode: X86::TEST8rr)) |
1054 | .addReg(RegNo: CountReg) |
1055 | .addReg(RegNo: CountReg); |
1056 | BuildMI(BB: EntryBlk, MIMD: DL, MCID: TII->get(Opcode: X86::JCC_1)) |
1057 | .addMBB(MBB: TailBlk) |
1058 | .addImm(Val: X86::COND_E); |
1059 | EntryBlk->addSuccessor(Succ: TailBlk); |
1060 | } |
1061 | |
1062 | // Add liveins to the created block. |
1063 | addLiveIns(MBB&: *GuardedRegsBlk, LiveRegs); |
1064 | addLiveIns(MBB&: *TailBlk, LiveRegs); |
1065 | |
1066 | // Delete the pseudo. |
1067 | VAStartPseudoInstr->eraseFromParent(); |
1068 | } |
1069 | |
1070 | /// Expand all pseudo instructions contained in \p MBB. |
1071 | /// \returns true if any expansion occurred for \p MBB. |
1072 | bool X86ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { |
1073 | bool Modified = false; |
1074 | |
1075 | // MBBI may be invalidated by the expansion. |
1076 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1077 | while (MBBI != E) { |
1078 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
1079 | Modified |= expandMI(MBB, MBBI); |
1080 | MBBI = NMBBI; |
1081 | } |
1082 | |
1083 | return Modified; |
1084 | } |
1085 | |
1086 | bool X86ExpandPseudo::expandPseudosWhichAffectControlFlow(MachineFunction &MF) { |
1087 | // Currently pseudo which affects control flow is only |
1088 | // X86::VASTART_SAVE_XMM_REGS which is located in Entry block. |
1089 | // So we do not need to evaluate other blocks. |
1090 | for (MachineInstr &Instr : MF.front().instrs()) { |
1091 | if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) { |
1092 | expandVastartSaveXmmRegs(EntryBlk: &(MF.front()), VAStartPseudoInstr: Instr); |
1093 | return true; |
1094 | } |
1095 | } |
1096 | |
1097 | return false; |
1098 | } |
1099 | |
1100 | bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { |
1101 | STI = &MF.getSubtarget<X86Subtarget>(); |
1102 | TII = STI->getInstrInfo(); |
1103 | TRI = STI->getRegisterInfo(); |
1104 | X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1105 | X86FL = STI->getFrameLowering(); |
1106 | |
1107 | bool Modified = expandPseudosWhichAffectControlFlow(MF); |
1108 | |
1109 | for (MachineBasicBlock &MBB : MF) |
1110 | Modified |= expandMBB(MBB); |
1111 | return Modified; |
1112 | } |
1113 | |
1114 | /// Returns an instance of the pseudo instruction expansion pass. |
1115 | FunctionPass *llvm::createX86ExpandPseudoPass() { |
1116 | return new X86ExpandPseudo(); |
1117 | } |
1118 | |