1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CodeGen.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/TargetParser/Triple.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40
41using namespace llvm;
42
43#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
44
45namespace {
46
47class AArch64ExpandPseudoImpl {
48public:
49 const AArch64InstrInfo *TII;
50
51 bool run(MachineFunction &MF);
52
53private:
54 bool expandMBB(MachineBasicBlock &MBB);
55 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
56 MachineBasicBlock::iterator &NextMBBI);
57 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
58 MachineBasicBlock::iterator MBBI,
59 TargetRegisterClass ContiguousClass,
60 TargetRegisterClass StridedClass,
61 unsigned ContiguousOpc, unsigned StridedOpc);
62 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
63 MachineBasicBlock::iterator MBBI,
64 MachineBasicBlock::iterator &NextMBBI,
65 unsigned Size);
66 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67 unsigned BitSize);
68
69 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
70 MachineBasicBlock::iterator MBBI);
71 bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
72 MachineBasicBlock::iterator MBBI);
73 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
74 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
75 unsigned ExtendImm, unsigned ZeroReg,
76 MachineBasicBlock::iterator &NextMBBI);
77 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
78 MachineBasicBlock::iterator MBBI,
79 MachineBasicBlock::iterator &NextMBBI);
80 bool expandSetTagLoop(MachineBasicBlock &MBB,
81 MachineBasicBlock::iterator MBBI,
82 MachineBasicBlock::iterator &NextMBBI);
83 bool expandSVESpillFill(MachineBasicBlock &MBB,
84 MachineBasicBlock::iterator MBBI, unsigned Opc,
85 unsigned N);
86 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
87 MachineBasicBlock::iterator MBBI);
88 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
89 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
90 MachineBasicBlock::iterator MBBI);
91 struct ConditionalBlocks {
92 MachineBasicBlock &CondBB;
93 MachineBasicBlock &EndBB;
94 };
95 ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB,
96 MachineBasicBlock::iterator MBBI,
97 DebugLoc DL,
98 MachineInstrBuilder &Branch);
99 MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI);
101 MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB,
102 MachineBasicBlock::iterator MBBI);
103 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
104 MachineBasicBlock::iterator MBBI);
105};
106
107class AArch64ExpandPseudoLegacy : public MachineFunctionPass {
108public:
109 static char ID;
110
111 AArch64ExpandPseudoLegacy() : MachineFunctionPass(ID) {}
112
113 bool runOnMachineFunction(MachineFunction &MF) override;
114
115 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
116};
117
118} // end anonymous namespace
119
120char AArch64ExpandPseudoLegacy::ID = 0;
121
122INITIALIZE_PASS(AArch64ExpandPseudoLegacy, "aarch64-expand-pseudo",
123 AARCH64_EXPAND_PSEUDO_NAME, false, false)
124
125/// Transfer implicit operands on the pseudo instruction to the
126/// instructions created from the expansion.
127static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
128 MachineInstrBuilder &DefMI) {
129 const MCInstrDesc &Desc = OldMI.getDesc();
130 for (const MachineOperand &MO :
131 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
132 assert(MO.isReg() && MO.getReg());
133 if (MO.isUse())
134 UseMI.add(MO);
135 else
136 DefMI.add(MO);
137 }
138}
139
140/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
141/// real move-immediate instructions to synthesize the immediate.
142bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
143 MachineBasicBlock::iterator MBBI,
144 unsigned BitSize) {
145 MachineInstr &MI = *MBBI;
146 Register DstReg = MI.getOperand(i: 0).getReg();
147 RegState RenamableState =
148 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable());
149 uint64_t Imm = MI.getOperand(i: 1).getImm();
150
151 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
152 // Useless def, and we don't want to risk creating an invalid ORR (which
153 // would really write to sp).
154 MI.eraseFromParent();
155 return true;
156 }
157
158 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
159 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
160 assert(Insn.size() != 0);
161
162 SmallVector<MachineInstrBuilder, 4> MIBS;
163 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
164 bool LastItem = std::next(x: I) == E;
165 switch (I->Opcode)
166 {
167 default: llvm_unreachable("unhandled!"); break;
168
169 case AArch64::ORRWri:
170 case AArch64::ORRXri:
171 case AArch64::ANDXri:
172 case AArch64::EORXri:
173 if (I->Op1 == 0) {
174 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
175 .add(MO: MI.getOperand(i: 0))
176 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
177 .addImm(Val: I->Op2));
178 } else {
179 Register DstReg = MI.getOperand(i: 0).getReg();
180 bool DstIsDead = MI.getOperand(i: 0).isDead();
181 MIBS.push_back(
182 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
183 .addReg(RegNo: DstReg, Flags: RegState::Define |
184 getDeadRegState(B: DstIsDead && LastItem) |
185 RenamableState)
186 .addReg(RegNo: DstReg)
187 .addImm(Val: I->Op2));
188 }
189 break;
190 case AArch64::EONXrs:
191 case AArch64::EORXrs:
192 case AArch64::ORRWrs:
193 case AArch64::ORRXrs: {
194 Register DstReg = MI.getOperand(i: 0).getReg();
195 bool DstIsDead = MI.getOperand(i: 0).isDead();
196 MIBS.push_back(
197 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
198 .addReg(RegNo: DstReg, Flags: RegState::Define |
199 getDeadRegState(B: DstIsDead && LastItem) |
200 RenamableState)
201 .addReg(RegNo: DstReg)
202 .addReg(RegNo: DstReg)
203 .addImm(Val: I->Op2));
204 } break;
205 case AArch64::MOVNWi:
206 case AArch64::MOVNXi:
207 case AArch64::MOVZWi:
208 case AArch64::MOVZXi: {
209 bool DstIsDead = MI.getOperand(i: 0).isDead();
210 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
211 .addReg(RegNo: DstReg, Flags: RegState::Define |
212 getDeadRegState(B: DstIsDead && LastItem) |
213 RenamableState)
214 .addImm(Val: I->Op1)
215 .addImm(Val: I->Op2));
216 } break;
217 case AArch64::MOVKWi:
218 case AArch64::MOVKXi: {
219 Register DstReg = MI.getOperand(i: 0).getReg();
220 bool DstIsDead = MI.getOperand(i: 0).isDead();
221 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
222 .addReg(RegNo: DstReg,
223 Flags: RegState::Define |
224 getDeadRegState(B: DstIsDead && LastItem) |
225 RenamableState)
226 .addReg(RegNo: DstReg)
227 .addImm(Val: I->Op1)
228 .addImm(Val: I->Op2));
229 } break;
230 }
231 }
232 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
233 MI.eraseFromParent();
234 return true;
235}
236
237bool AArch64ExpandPseudoImpl::expandCMP_SWAP(
238 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
239 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
240 MachineBasicBlock::iterator &NextMBBI) {
241 MachineInstr &MI = *MBBI;
242 MIMetadata MIMD(MI);
243 const MachineOperand &Dest = MI.getOperand(i: 0);
244 Register StatusReg = MI.getOperand(i: 1).getReg();
245 bool StatusDead = MI.getOperand(i: 1).isDead();
246 // Duplicating undef operands into 2 instructions does not guarantee the same
247 // value on both; However undef should be replaced by xzr anyway.
248 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
249 Register AddrReg = MI.getOperand(i: 2).getReg();
250 Register DesiredReg = MI.getOperand(i: 3).getReg();
251 Register NewReg = MI.getOperand(i: 4).getReg();
252
253 MachineFunction *MF = MBB.getParent();
254 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
255 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
256 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
257
258 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
259 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
260 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
261
262 // .Lloadcmp:
263 // mov wStatus, 0
264 // ldaxr xDest, [xAddr]
265 // cmp xDest, xDesired
266 // b.ne .Ldone
267 if (!StatusDead)
268 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg)
269 .addImm(Val: 0).addImm(Val: 0);
270 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg())
271 .addReg(RegNo: AddrReg);
272 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg)
273 .addReg(RegNo: Dest.getReg(), Flags: getKillRegState(B: Dest.isDead()))
274 .addReg(RegNo: DesiredReg)
275 .addImm(Val: ExtendImm);
276 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc))
277 .addImm(Val: AArch64CC::NE)
278 .addMBB(MBB: DoneBB)
279 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
280 LoadCmpBB->addSuccessor(Succ: DoneBB);
281 LoadCmpBB->addSuccessor(Succ: StoreBB);
282
283 // .Lstore:
284 // stlxr wStatus, xNew, [xAddr]
285 // cbnz wStatus, .Lloadcmp
286 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg)
287 .addReg(RegNo: NewReg)
288 .addReg(RegNo: AddrReg);
289 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
290 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
291 .addMBB(MBB: LoadCmpBB);
292 StoreBB->addSuccessor(Succ: LoadCmpBB);
293 StoreBB->addSuccessor(Succ: DoneBB);
294
295 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
296 DoneBB->transferSuccessors(FromMBB: &MBB);
297
298 MBB.addSuccessor(Succ: LoadCmpBB);
299
300 NextMBBI = MBB.end();
301 MI.eraseFromParent();
302
303 // Recompute livein lists.
304 LivePhysRegs LiveRegs;
305 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
306 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
307 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
308 // Do an extra pass around the loop to get loop carried registers right.
309 StoreBB->clearLiveIns();
310 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
311 LoadCmpBB->clearLiveIns();
312 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
313
314 return true;
315}
316
317bool AArch64ExpandPseudoImpl::expandCMP_SWAP_128(
318 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
319 MachineBasicBlock::iterator &NextMBBI) {
320 MachineInstr &MI = *MBBI;
321 MIMetadata MIMD(MI);
322 MachineOperand &DestLo = MI.getOperand(i: 0);
323 MachineOperand &DestHi = MI.getOperand(i: 1);
324 Register StatusReg = MI.getOperand(i: 2).getReg();
325 bool StatusDead = MI.getOperand(i: 2).isDead();
326 // Duplicating undef operands into 2 instructions does not guarantee the same
327 // value on both; However undef should be replaced by xzr anyway.
328 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
329 Register AddrReg = MI.getOperand(i: 3).getReg();
330 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
331 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
332 Register NewLoReg = MI.getOperand(i: 6).getReg();
333 Register NewHiReg = MI.getOperand(i: 7).getReg();
334
335 unsigned LdxpOp, StxpOp;
336
337 switch (MI.getOpcode()) {
338 case AArch64::CMP_SWAP_128_MONOTONIC:
339 LdxpOp = AArch64::LDXPX;
340 StxpOp = AArch64::STXPX;
341 break;
342 case AArch64::CMP_SWAP_128_RELEASE:
343 LdxpOp = AArch64::LDXPX;
344 StxpOp = AArch64::STLXPX;
345 break;
346 case AArch64::CMP_SWAP_128_ACQUIRE:
347 LdxpOp = AArch64::LDAXPX;
348 StxpOp = AArch64::STXPX;
349 break;
350 case AArch64::CMP_SWAP_128:
351 LdxpOp = AArch64::LDAXPX;
352 StxpOp = AArch64::STLXPX;
353 break;
354 default:
355 llvm_unreachable("Unexpected opcode");
356 }
357
358 MachineFunction *MF = MBB.getParent();
359 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
360 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
361 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
362 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
363
364 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
365 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
366 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
367 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
368
369 // .Lloadcmp:
370 // ldaxp xDestLo, xDestHi, [xAddr]
371 // cmp xDestLo, xDesiredLo
372 // sbcs xDestHi, xDesiredHi
373 // b.ne .Ldone
374 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp))
375 .addReg(RegNo: DestLo.getReg(), Flags: RegState::Define)
376 .addReg(RegNo: DestHi.getReg(), Flags: RegState::Define)
377 .addReg(RegNo: AddrReg);
378 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
379 .addReg(RegNo: DestLo.getReg(), Flags: getKillRegState(B: DestLo.isDead()))
380 .addReg(RegNo: DesiredLoReg)
381 .addImm(Val: 0);
382 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
383 .addUse(RegNo: AArch64::WZR)
384 .addUse(RegNo: AArch64::WZR)
385 .addImm(Val: AArch64CC::EQ);
386 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
387 .addReg(RegNo: DestHi.getReg(), Flags: getKillRegState(B: DestHi.isDead()))
388 .addReg(RegNo: DesiredHiReg)
389 .addImm(Val: 0);
390 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
391 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
392 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
393 .addImm(Val: AArch64CC::EQ);
394 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
395 .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
396 .addMBB(MBB: FailBB);
397 LoadCmpBB->addSuccessor(Succ: FailBB);
398 LoadCmpBB->addSuccessor(Succ: StoreBB);
399
400 // .Lstore:
401 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
402 // cbnz wStatus, .Lloadcmp
403 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
404 .addReg(RegNo: NewLoReg)
405 .addReg(RegNo: NewHiReg)
406 .addReg(RegNo: AddrReg);
407 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
408 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
409 .addMBB(MBB: LoadCmpBB);
410 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB);
411 StoreBB->addSuccessor(Succ: LoadCmpBB);
412 StoreBB->addSuccessor(Succ: DoneBB);
413
414 // .Lfail:
415 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
416 // cbnz wStatus, .Lloadcmp
417 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
418 .addReg(RegNo: DestLo.getReg())
419 .addReg(RegNo: DestHi.getReg())
420 .addReg(RegNo: AddrReg);
421 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
422 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
423 .addMBB(MBB: LoadCmpBB);
424 FailBB->addSuccessor(Succ: LoadCmpBB);
425 FailBB->addSuccessor(Succ: DoneBB);
426
427 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
428 DoneBB->transferSuccessors(FromMBB: &MBB);
429
430 MBB.addSuccessor(Succ: LoadCmpBB);
431
432 NextMBBI = MBB.end();
433 MI.eraseFromParent();
434
435 // Recompute liveness bottom up.
436 LivePhysRegs LiveRegs;
437 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
438 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
439 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
440 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
441
442 // Do an extra pass in the loop to get the loop carried dependencies right.
443 FailBB->clearLiveIns();
444 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
445 StoreBB->clearLiveIns();
446 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
447 LoadCmpBB->clearLiveIns();
448 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
449
450 return true;
451}
452
453/// \brief Expand Pseudos to Instructions with destructive operands.
454///
455/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
456/// or for fixing relaxed register allocation conditions to comply with
457/// the instructions register constraints. The latter case may be cheaper
458/// than setting the register constraints in the register allocator,
459/// since that will insert regular MOV instructions rather than MOVPRFX.
460///
461/// Example (after register allocation):
462///
463/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
464///
465/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
466/// * We cannot map directly to FSUB_ZPmZ_B because the register
467/// constraints of the instruction are not met.
468/// * Also the _ZERO specifies the false lanes need to be zeroed.
469///
470/// We first try to see if the destructive operand == result operand,
471/// if not, we try to swap the operands, e.g.
472///
473/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
474///
475/// But because FSUB_ZPmZ is not commutative, this is semantically
476/// different, so we need a reverse instruction:
477///
478/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
479///
480/// Then we implement the zeroing of the false lanes of Z0 by adding
481/// a zeroing MOVPRFX instruction:
482///
483/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
484/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
485///
486/// Note that this can only be done for _ZERO or _UNDEF variants where
487/// we can guarantee the false lanes to be zeroed (by implementing this)
488/// or that they are undef (don't care / not used), otherwise the
489/// swapping of operands is illegal because the operation is not
490/// (or cannot be emulated to be) fully commutative.
491bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
492 MachineInstr &MI, MachineBasicBlock &MBB,
493 MachineBasicBlock::iterator MBBI) {
494 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
495 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
496 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
497 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
498 Register DstReg = MI.getOperand(i: 0).getReg();
499 bool DstIsDead = MI.getOperand(i: 0).isDead();
500 bool UseRev = false;
501 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
502
503 switch (DType) {
504 case AArch64::DestructiveBinaryComm:
505 case AArch64::DestructiveBinaryCommWithRev:
506 if (DstReg == MI.getOperand(i: 3).getReg()) {
507 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
508 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
509 UseRev = true;
510 break;
511 }
512 [[fallthrough]];
513 case AArch64::DestructiveBinary:
514 case AArch64::DestructiveBinaryImm:
515 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
516 break;
517 case AArch64::DestructiveUnaryPassthru:
518 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
519 break;
520 case AArch64::DestructiveTernaryCommWithRev:
521 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
522 if (DstReg == MI.getOperand(i: 3).getReg()) {
523 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
524 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
525 UseRev = true;
526 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
527 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
528 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
529 UseRev = true;
530 }
531 break;
532 case AArch64::Destructive2xRegImmUnpred:
533 // EXT_ZZI_CONSTRUCTIVE Zd, Zs, Imm
534 // ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
535 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 1, args: 2);
536 break;
537 case AArch64::DestructiveBinaryImmUnpred:
538 std::tie(args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2);
539 break;
540 case AArch64::DestructiveBinaryShImmUnpred:
541 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3);
542 break;
543 default:
544 llvm_unreachable("Unsupported Destructive Operand type");
545 }
546
547 // MOVPRFX can only be used if the destination operand
548 // is the destructive operand, not as any other operand,
549 // so the Destructive Operand must be unique.
550 bool DOPRegIsUnique = false;
551 switch (DType) {
552 case AArch64::DestructiveBinary:
553 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
554 break;
555 case AArch64::DestructiveBinaryComm:
556 case AArch64::DestructiveBinaryCommWithRev:
557 DOPRegIsUnique =
558 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
559 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
560 break;
561 case AArch64::DestructiveUnaryPassthru:
562 case AArch64::DestructiveBinaryImm:
563 case AArch64::DestructiveBinaryImmUnpred:
564 case AArch64::DestructiveBinaryShImmUnpred:
565 case AArch64::Destructive2xRegImmUnpred:
566 DOPRegIsUnique = true;
567 break;
568 case AArch64::DestructiveTernaryCommWithRev:
569 DOPRegIsUnique =
570 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
571 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
572 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
573 break;
574 }
575
576 // Resolve the reverse opcode
577 if (UseRev) {
578 int NewOpcode;
579 // e.g. DIV -> DIVR
580 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
581 Opcode = NewOpcode;
582 // e.g. DIVR -> DIV
583 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
584 Opcode = NewOpcode;
585 }
586
587 // Get the right MOVPRFX
588 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
589 unsigned MovPrfx, LSLZero, MovPrfxZero;
590 switch (ElementSize) {
591 case AArch64::ElementSizeNone:
592 case AArch64::ElementSizeB:
593 MovPrfx = AArch64::MOVPRFX_ZZ;
594 LSLZero = AArch64::LSL_ZPmI_B;
595 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
596 break;
597 case AArch64::ElementSizeH:
598 MovPrfx = AArch64::MOVPRFX_ZZ;
599 LSLZero = AArch64::LSL_ZPmI_H;
600 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
601 break;
602 case AArch64::ElementSizeS:
603 MovPrfx = AArch64::MOVPRFX_ZZ;
604 LSLZero = AArch64::LSL_ZPmI_S;
605 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
606 break;
607 case AArch64::ElementSizeD:
608 MovPrfx = AArch64::MOVPRFX_ZZ;
609 LSLZero = AArch64::LSL_ZPmI_D;
610 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
611 break;
612 default:
613 llvm_unreachable("Unsupported ElementSize");
614 }
615
616 // Preserve undef state until DOP's reg is defined.
617 RegState DOPRegState = getUndefRegState(B: MI.getOperand(i: DOPIdx).isUndef());
618
619 //
620 // Create the destructive operation (if required)
621 //
622 MachineInstrBuilder PRFX, DOP;
623 if (FalseZero) {
624 // If we cannot prefix the requested instruction we'll instead emit a
625 // prefixed_zeroing_mov for DestructiveBinary.
626 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
627 DType == AArch64::DestructiveBinaryComm ||
628 DType == AArch64::DestructiveBinaryCommWithRev) &&
629 "The destructive operand should be unique");
630 assert(ElementSize != AArch64::ElementSizeNone &&
631 "This instruction is unpredicated");
632
633 // Merge source operand into destination register
634 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero))
635 .addReg(RegNo: DstReg, Flags: RegState::Define)
636 .addReg(RegNo: MI.getOperand(i: PredIdx).getReg())
637 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
638
639 // After the movprfx, the destructive operand is same as Dst
640 DOPIdx = 0;
641 DOPRegState = {};
642
643 // Create the additional LSL to zero the lanes when the DstReg is not
644 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
645 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
646 if ((DType == AArch64::DestructiveBinary ||
647 DType == AArch64::DestructiveBinaryComm ||
648 DType == AArch64::DestructiveBinaryCommWithRev) &&
649 !DOPRegIsUnique) {
650 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero))
651 .addReg(RegNo: DstReg, Flags: RegState::Define)
652 .add(MO: MI.getOperand(i: PredIdx))
653 .addReg(RegNo: DstReg)
654 .addImm(Val: 0);
655 }
656 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
657 assert(DOPRegIsUnique && "The destructive operand should be unique");
658 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx))
659 .addReg(RegNo: DstReg, Flags: RegState::Define)
660 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
661 DOPIdx = 0;
662 DOPRegState = {};
663 }
664
665 //
666 // Create the destructive operation
667 //
668 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode))
669 .addReg(RegNo: DstReg, Flags: RegState::Define | getDeadRegState(B: DstIsDead));
670 DOPRegState = DOPRegState | RegState::Kill;
671
672 switch (DType) {
673 case AArch64::DestructiveUnaryPassthru:
674 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
675 .add(MO: MI.getOperand(i: PredIdx))
676 .add(MO: MI.getOperand(i: SrcIdx));
677 break;
678 case AArch64::DestructiveBinary:
679 case AArch64::DestructiveBinaryImm:
680 case AArch64::DestructiveBinaryComm:
681 case AArch64::DestructiveBinaryCommWithRev:
682 DOP.add(MO: MI.getOperand(i: PredIdx))
683 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
684 .add(MO: MI.getOperand(i: SrcIdx));
685 break;
686 case AArch64::DestructiveTernaryCommWithRev:
687 DOP.add(MO: MI.getOperand(i: PredIdx))
688 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
689 .add(MO: MI.getOperand(i: SrcIdx))
690 .add(MO: MI.getOperand(i: Src2Idx));
691 break;
692 case AArch64::DestructiveBinaryImmUnpred:
693 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
694 .add(MO: MI.getOperand(i: SrcIdx));
695 break;
696 case AArch64::DestructiveBinaryShImmUnpred:
697 case AArch64::Destructive2xRegImmUnpred:
698 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
699 .add(MO: MI.getOperand(i: SrcIdx))
700 .add(MO: MI.getOperand(i: Src2Idx));
701 break;
702 }
703
704 if (PRFX) {
705 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
706 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
707 } else
708 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
709
710 MI.eraseFromParent();
711 return true;
712}
713
714bool AArch64ExpandPseudoImpl::expandSVEBitwisePseudo(
715 MachineInstr &MI, MachineBasicBlock &MBB,
716 MachineBasicBlock::iterator MBBI) {
717 MachineInstrBuilder PRFX, DOP;
718 const unsigned Opcode = MI.getOpcode();
719 const MachineOperand &Op0 = MI.getOperand(i: 0);
720 const MachineOperand *Op1 = &MI.getOperand(i: 1);
721 const MachineOperand *Op2 = &MI.getOperand(i: 2);
722 const Register DOPReg = Op0.getReg();
723
724 if (DOPReg == Op2->getReg()) {
725 // Commute the operands to allow destroying the second source.
726 std::swap(a&: Op1, b&: Op2);
727 } else if (DOPReg != Op1->getReg()) {
728 // If not in destructive form, emit a MOVPRFX. The input should only be
729 // killed if unused by the subsequent instruction.
730 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVPRFX_ZZ))
731 .addDef(RegNo: DOPReg, Flags: getRenamableRegState(B: Op0.isRenamable()))
732 .addReg(RegNo: Op1->getReg(),
733 Flags: getRenamableRegState(B: Op1->isRenamable()) |
734 getUndefRegState(B: Op1->isUndef()) |
735 getKillRegState(B: Op1->isKill() &&
736 Opcode == AArch64::NAND_ZZZ));
737 }
738
739 assert((DOPReg == Op1->getReg() || PRFX) && "invalid expansion");
740
741 const RegState DOPRegState = getRenamableRegState(B: Op0.isRenamable()) |
742 getUndefRegState(B: !PRFX && Op1->isUndef()) |
743 RegState::Kill;
744
745 switch (Opcode) {
746 default:
747 llvm_unreachable("unhandled opcode");
748 case AArch64::EON_ZZZ:
749 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BSL2N_ZZZZ))
750 .add(MO: Op0)
751 .addReg(RegNo: DOPReg, Flags: DOPRegState)
752 .add(MO: *Op1)
753 .add(MO: *Op2);
754 break;
755 case AArch64::NAND_ZZZ:
756 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
757 .add(MO: Op0)
758 .addReg(RegNo: DOPReg, Flags: DOPRegState)
759 .add(MO: *Op2)
760 .add(MO: *Op2);
761 break;
762 case AArch64::NOR_ZZZ:
763 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
764 .add(MO: Op0)
765 .addReg(RegNo: DOPReg, Flags: DOPRegState)
766 .add(MO: *Op2)
767 .add(MO: *Op1);
768 break;
769 }
770
771 if (PRFX) {
772 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
773 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
774 } else {
775 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
776 }
777
778 MI.eraseFromParent();
779 return true;
780}
781
782bool AArch64ExpandPseudoImpl::expandSetTagLoop(
783 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
784 MachineBasicBlock::iterator &NextMBBI) {
785 MachineInstr &MI = *MBBI;
786 DebugLoc DL = MI.getDebugLoc();
787 Register SizeReg = MI.getOperand(i: 0).getReg();
788 Register AddressReg = MI.getOperand(i: 1).getReg();
789
790 MachineFunction *MF = MBB.getParent();
791
792 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
793 const unsigned OpCode1 =
794 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
795 const unsigned OpCode2 =
796 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
797
798 unsigned Size = MI.getOperand(i: 2).getImm();
799 assert(Size > 0 && Size % 16 == 0);
800 if (Size % (16 * 2) != 0) {
801 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg)
802 .addReg(RegNo: AddressReg)
803 .addReg(RegNo: AddressReg)
804 .addImm(Val: 1);
805 Size -= 16;
806 }
807 MachineBasicBlock::iterator I =
808 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg)
809 .addImm(Val: Size);
810 expandMOVImm(MBB, MBBI: I, BitSize: 64);
811
812 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
813 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
814
815 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
816 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
817
818 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2))
819 .addDef(RegNo: AddressReg)
820 .addReg(RegNo: AddressReg)
821 .addReg(RegNo: AddressReg)
822 .addImm(Val: 2)
823 .cloneMemRefs(OtherMI: MI)
824 .setMIFlags(MI.getFlags());
825 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri))
826 .addDef(RegNo: SizeReg)
827 .addReg(RegNo: SizeReg)
828 .addImm(Val: 16 * 2)
829 .addImm(Val: 0);
830 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
831 .addImm(Val: AArch64CC::NE)
832 .addMBB(MBB: LoopBB)
833 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
834
835 LoopBB->addSuccessor(Succ: LoopBB);
836 LoopBB->addSuccessor(Succ: DoneBB);
837
838 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
839 DoneBB->transferSuccessors(FromMBB: &MBB);
840
841 MBB.addSuccessor(Succ: LoopBB);
842
843 NextMBBI = MBB.end();
844 MI.eraseFromParent();
845 // Recompute liveness bottom up.
846 LivePhysRegs LiveRegs;
847 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
848 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
849 // Do an extra pass in the loop to get the loop carried dependencies right.
850 // FIXME: is this necessary?
851 LoopBB->clearLiveIns();
852 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
853 DoneBB->clearLiveIns();
854 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
855
856 return true;
857}
858
859bool AArch64ExpandPseudoImpl::expandSVESpillFill(
860 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opc,
861 unsigned N) {
862 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
863 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
864 "Unexpected opcode");
865 RegState RState =
866 getDefRegState(B: Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI);
867 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
868 ? AArch64::zsub0
869 : AArch64::psub0;
870 const TargetRegisterInfo *TRI =
871 MBB.getParent()->getSubtarget().getRegisterInfo();
872 MachineInstr &MI = *MBBI;
873 for (unsigned Offset = 0; Offset < N; ++Offset) {
874 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
875 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
876 assert(ImmOffset >= -256 && ImmOffset < 256 &&
877 "Immediate spill offset out of range");
878 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
879 .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
880 Flags: RState)
881 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: getKillRegState(B: Kill))
882 .addImm(Val: ImmOffset);
883 }
884 MI.eraseFromParent();
885 return true;
886}
887
888// Create a call with the passed opcode and explicit operands, copying over all
889// the implicit operands from *MBBI, starting at the regmask.
890static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
891 MachineBasicBlock::iterator MBBI,
892 const AArch64InstrInfo *TII,
893 unsigned Opcode,
894 ArrayRef<MachineOperand> ExplicitOps,
895 unsigned RegMaskStartIdx) {
896 // Build the MI, with explicit operands first (including the call target).
897 MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode))
898 .add(MOs: ExplicitOps)
899 .getInstr();
900
901 // Register arguments are added during ISel, but cannot be added as explicit
902 // operands of the branch as it expects to be B <target> which is only one
903 // operand. Instead they are implicit operands used by the branch.
904 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
905 const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx);
906 assert(MOP.isReg() && "can only add register operands");
907 Call->addOperand(Op: MachineOperand::CreateReg(
908 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
909 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
910 RegMaskStartIdx++;
911 }
912 for (const MachineOperand &MO :
913 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
914 Call->addOperand(Op: MO);
915
916 return Call;
917}
918
919// Create a call to CallTarget, copying over all the operands from *MBBI,
920// starting at the regmask.
921static MachineInstr *createCall(MachineBasicBlock &MBB,
922 MachineBasicBlock::iterator MBBI,
923 const AArch64InstrInfo *TII,
924 MachineOperand &CallTarget,
925 unsigned RegMaskStartIdx) {
926 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
927
928 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
929 "invalid operand for regular call");
930 return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx);
931}
932
933bool AArch64ExpandPseudoImpl::expandCALL_RVMARKER(
934 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
935 // Expand CALL_RVMARKER pseudo to:
936 // - a branch to the call target, followed by
937 // - the special `mov x29, x29` marker, if necessary, and
938 // - another branch, to the runtime function
939 // Mark the sequence as bundle, to avoid passes moving other code in between.
940 MachineInstr &MI = *MBBI;
941 MachineOperand &RVTarget = MI.getOperand(i: 0);
942 bool DoEmitMarker = MI.getOperand(i: 1).getImm();
943 assert(RVTarget.isGlobal() && "invalid operand for attached call");
944
945 MachineInstr *OriginalCall = nullptr;
946
947 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
948 // ptrauth call.
949 const MachineOperand &CallTarget = MI.getOperand(i: 2);
950 const MachineOperand &Key = MI.getOperand(i: 3);
951 const MachineOperand &IntDisc = MI.getOperand(i: 4);
952 const MachineOperand &AddrDisc = MI.getOperand(i: 5);
953
954 assert((Key.getImm() == AArch64PACKey::IA ||
955 Key.getImm() == AArch64PACKey::IB) &&
956 "Invalid auth call key");
957
958 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
959
960 OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops,
961 /*RegMaskStartIdx=*/6);
962 } else {
963 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
964 OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 2),
965 // Regmask starts after the RV and call targets.
966 /*RegMaskStartIdx=*/3);
967 }
968
969 if (DoEmitMarker)
970 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs))
971 .addReg(RegNo: AArch64::FP, Flags: RegState::Define)
972 .addReg(RegNo: AArch64::XZR)
973 .addReg(RegNo: AArch64::FP)
974 .addImm(Val: 0);
975
976 auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL))
977 .add(MO: RVTarget)
978 .getInstr();
979
980 if (MI.shouldUpdateAdditionalCallInfo())
981 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: OriginalCall);
982
983 MI.eraseFromParent();
984 finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(),
985 LastMI: std::next(x: RVCall->getIterator()));
986 return true;
987}
988
989bool AArch64ExpandPseudoImpl::expandCALL_BTI(MachineBasicBlock &MBB,
990 MachineBasicBlock::iterator MBBI) {
991 // Expand CALL_BTI pseudo to:
992 // - a branch to the call target
993 // - a BTI instruction
994 // Mark the sequence as a bundle, to avoid passes moving other code in
995 // between.
996 MachineInstr &MI = *MBBI;
997 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
998 // Regmask starts after the call target.
999 /*RegMaskStartIdx=*/1);
1000
1001 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
1002
1003 MachineInstr *BTI =
1004 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT))
1005 // BTI J so that setjmp can to BR to this.
1006 .addImm(Val: 36)
1007 .getInstr();
1008
1009 if (MI.shouldUpdateAdditionalCallInfo())
1010 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Call);
1011
1012 MI.eraseFromParent();
1013 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
1014 return true;
1015}
1016
1017bool AArch64ExpandPseudoImpl::expandStoreSwiftAsyncContext(
1018 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1019 Register CtxReg = MBBI->getOperand(i: 0).getReg();
1020 Register BaseReg = MBBI->getOperand(i: 1).getReg();
1021 int Offset = MBBI->getOperand(i: 2).getImm();
1022 DebugLoc DL(MBBI->getDebugLoc());
1023 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1024
1025 if (STI.getTargetTriple().getArchName() != "arm64e") {
1026 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1027 .addUse(RegNo: CtxReg)
1028 .addUse(RegNo: BaseReg)
1029 .addImm(Val: Offset / 8)
1030 .setMIFlag(MachineInstr::FrameSetup);
1031 MBBI->eraseFromParent();
1032 return true;
1033 }
1034
1035 // We need to sign the context in an address-discriminated way. 0xc31a is a
1036 // fixed random value, chosen as part of the ABI.
1037 // add x16, xBase, #Offset
1038 // movk x16, #0xc31a, lsl #48
1039 // mov x17, x22/xzr
1040 // pacdb x17, x16
1041 // str x17, [xBase, #Offset]
1042 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
1043 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16)
1044 .addUse(RegNo: BaseReg)
1045 .addImm(Val: abs(x: Offset))
1046 .addImm(Val: 0)
1047 .setMIFlag(MachineInstr::FrameSetup);
1048 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16)
1049 .addUse(RegNo: AArch64::X16)
1050 .addImm(Val: 0xc31a)
1051 .addImm(Val: 48)
1052 .setMIFlag(MachineInstr::FrameSetup);
1053 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
1054 // move it somewhere before signing.
1055 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17)
1056 .addUse(RegNo: AArch64::XZR)
1057 .addUse(RegNo: CtxReg)
1058 .addImm(Val: 0)
1059 .setMIFlag(MachineInstr::FrameSetup);
1060 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17)
1061 .addUse(RegNo: AArch64::X17)
1062 .addUse(RegNo: AArch64::X16)
1063 .setMIFlag(MachineInstr::FrameSetup);
1064 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1065 .addUse(RegNo: AArch64::X17)
1066 .addUse(RegNo: BaseReg)
1067 .addImm(Val: Offset / 8)
1068 .setMIFlag(MachineInstr::FrameSetup);
1069
1070 MBBI->eraseFromParent();
1071 return true;
1072}
1073
1074AArch64ExpandPseudoImpl::ConditionalBlocks
1075AArch64ExpandPseudoImpl::expandConditionalPseudo(
1076 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
1077 MachineInstrBuilder &Branch) {
1078 assert((std::next(MBBI) != MBB.end() ||
1079 MBB.successors().begin() != MBB.successors().end()) &&
1080 "Unexpected unreachable in block");
1081
1082 // Split MBB and create two new blocks:
1083 // - MBB now contains all instructions before the conditional pseudo.
1084 // - CondBB contains the conditional pseudo instruction only.
1085 // - EndBB contains all instructions after the conditional pseudo.
1086 MachineInstr &PrevMI = *std::prev(x: MBBI);
1087 MachineBasicBlock *CondBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1088 MachineBasicBlock *EndBB =
1089 std::next(x: MBBI) == CondBB->end()
1090 ? *CondBB->successors().begin()
1091 : CondBB->splitAt(SplitInst&: *MBBI, /*UpdateLiveIns*/ true);
1092
1093 // Add the SMBB label to the branch instruction & create a branch to EndBB.
1094 Branch.addMBB(MBB: CondBB);
1095 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1096 .addMBB(MBB: EndBB);
1097 MBB.addSuccessor(Succ: EndBB);
1098
1099 // Create branch from CondBB to EndBB. Users of this helper should insert new
1100 // instructions at CondBB.back() -- i.e. before the branch.
1101 BuildMI(BB: CondBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1102 return {.CondBB: *CondBB, .EndBB: *EndBB};
1103}
1104
1105MachineBasicBlock *
1106AArch64ExpandPseudoImpl::expandRestoreZASave(MachineBasicBlock &MBB,
1107 MachineBasicBlock::iterator MBBI) {
1108 MachineInstr &MI = *MBBI;
1109 DebugLoc DL = MI.getDebugLoc();
1110
1111 // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero.
1112 MachineInstrBuilder Branch =
1113 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX)).add(MO: MI.getOperand(i: 0));
1114
1115 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1116 // Replace the pseudo with a call (BL).
1117 MachineInstrBuilder MIB =
1118 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1119 // Copy operands (mainly the regmask) from the pseudo.
1120 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1121 MIB.add(MO: MI.getOperand(i: I));
1122 // Mark the TPIDR2 block pointer (X0) as an implicit use.
1123 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState::Implicit);
1124
1125 MI.eraseFromParent();
1126 return &EndBB;
1127}
1128
1129static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1130
1131MachineBasicBlock *
1132AArch64ExpandPseudoImpl::expandCommitZASave(MachineBasicBlock &MBB,
1133 MachineBasicBlock::iterator MBBI) {
1134 MachineInstr &MI = *MBBI;
1135 DebugLoc DL = MI.getDebugLoc();
1136 [[maybe_unused]] auto *RI = MBB.getParent()->getSubtarget().getRegisterInfo();
1137
1138 // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero.
1139 MachineInstrBuilder Branch =
1140 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBNZX)).add(MO: MI.getOperand(i: 0));
1141
1142 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1143 // Replace the pseudo with a call (BL).
1144 MachineInstrBuilder MIB =
1145 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1146 // Copy operands (mainly the regmask) from the pseudo.
1147 for (unsigned I = 3; I < MI.getNumOperands(); ++I)
1148 MIB.add(MO: MI.getOperand(i: I));
1149 // Clear TPIDR2_EL0.
1150 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::MSR))
1151 .addImm(Val: AArch64SysReg::TPIDR2_EL0)
1152 .addReg(RegNo: AArch64::XZR);
1153 bool ZeroZA = MI.getOperand(i: 1).getImm() != 0;
1154 bool ZeroZT0 = MI.getOperand(i: 2).getImm() != 0;
1155 if (ZeroZA) {
1156 assert(MI.definesRegister(AArch64::ZAB0, RI) && "should define ZA!");
1157 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_M))
1158 .addImm(Val: ZERO_ALL_ZA_MASK)
1159 .addDef(RegNo: AArch64::ZAB0, Flags: RegState::ImplicitDefine);
1160 }
1161 if (ZeroZT0) {
1162 assert(MI.definesRegister(AArch64::ZT0, RI) && "should define ZT0!");
1163 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_T))
1164 .addDef(RegNo: AArch64::ZT0);
1165 }
1166
1167 MI.eraseFromParent();
1168 return &EndBB;
1169}
1170
1171MachineBasicBlock *
1172AArch64ExpandPseudoImpl::expandCondSMToggle(MachineBasicBlock &MBB,
1173 MachineBasicBlock::iterator MBBI) {
1174 MachineInstr &MI = *MBBI;
1175 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1176 // Exception handling code generated by Clang may introduce unreachables and it
1177 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1178 // not just an optimisation, the code below expects a successor instruction/block
1179 // in order to split the block at MBBI.
1180 if (std::next(x: MBBI) == MBB.end() &&
1181 MI.getParent()->successors().begin() ==
1182 MI.getParent()->successors().end()) {
1183 MI.eraseFromParent();
1184 return &MBB;
1185 }
1186
1187 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1188 // following operands:
1189 //
1190 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1191 //
1192 // The pseudo is expanded into a conditional smstart/smstop, with a
1193 // check if pstate.sm (register) equals the expected value, and if not,
1194 // invokes the smstart/smstop.
1195 //
1196 // As an example, the following block contains a normal call from a
1197 // streaming-compatible function:
1198 //
1199 // OrigBB:
1200 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1201 // bl @normal_callee
1202 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1203 //
1204 // ...which will be transformed into:
1205 //
1206 // OrigBB:
1207 // TBNZx %0:gpr64, 0, SMBB
1208 // b EndBB
1209 //
1210 // SMBB:
1211 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1212 //
1213 // EndBB:
1214 // bl @normal_callee
1215 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1216 //
1217 DebugLoc DL = MI.getDebugLoc();
1218
1219 // Create the conditional branch based on the third operand of the
1220 // instruction, which tells us if we are wrapping a normal or streaming
1221 // function.
1222 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1223 // expected value for the callee (0 for a normal callee and 1 for a streaming
1224 // callee).
1225 unsigned Opc;
1226 switch (MI.getOperand(i: 2).getImm()) {
1227 case AArch64SME::Always:
1228 llvm_unreachable("Should have matched to instruction directly");
1229 case AArch64SME::IfCallerIsStreaming:
1230 Opc = AArch64::TBNZW;
1231 break;
1232 case AArch64SME::IfCallerIsNonStreaming:
1233 Opc = AArch64::TBZW;
1234 break;
1235 }
1236 auto PStateSM = MI.getOperand(i: 3).getReg();
1237 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1238 unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32);
1239 MachineInstrBuilder Tbx =
1240 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0);
1241
1242 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch&: Tbx);
1243 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1244 MachineInstrBuilder MIB = BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: MI.getDebugLoc(),
1245 MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1));
1246 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1247 // these contain the CopyFromReg for the first argument and the flag to
1248 // indicate whether the callee is streaming or normal).
1249 MIB.add(MO: MI.getOperand(i: 0));
1250 MIB.add(MO: MI.getOperand(i: 1));
1251 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1252 MIB.add(MO: MI.getOperand(i));
1253
1254 MI.eraseFromParent();
1255 return &EndBB;
1256}
1257
1258bool AArch64ExpandPseudoImpl::expandMultiVecPseudo(
1259 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1260 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1261 unsigned ContiguousOp, unsigned StridedOpc) {
1262 MachineInstr &MI = *MBBI;
1263 Register Tuple = MI.getOperand(i: 0).getReg();
1264
1265 auto ContiguousRange = ContiguousClass.getRegisters();
1266 auto StridedRange = StridedClass.getRegisters();
1267 unsigned Opc;
1268 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1269 Opc = ContiguousOp;
1270 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1271 Opc = StridedOpc;
1272 } else
1273 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1274
1275 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
1276 .add(MO: MI.getOperand(i: 0))
1277 .add(MO: MI.getOperand(i: 1))
1278 .add(MO: MI.getOperand(i: 2))
1279 .add(MO: MI.getOperand(i: 3));
1280 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1281 MI.eraseFromParent();
1282 return true;
1283}
1284
1285bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
1286 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1287 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1288 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1289 MachineInstr &MI = *MBBI;
1290 Register ReturnTuple = MI.getOperand(i: 0).getReg();
1291
1292 const TargetRegisterInfo *TRI =
1293 MBB.getParent()->getSubtarget().getRegisterInfo();
1294 for (unsigned I = 0; I < Size; ++I) {
1295 Register FormTupleOpReg = MI.getOperand(i: I + 1).getReg();
1296 Register ReturnTupleSubReg =
1297 TRI->getSubReg(Reg: ReturnTuple, Idx: AArch64::zsub0 + I);
1298 // Add copies to ensure the subregisters remain in the correct order
1299 // for any contigious operation they are used by.
1300 if (FormTupleOpReg != ReturnTupleSubReg)
1301 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORR_ZZZ))
1302 .addReg(RegNo: ReturnTupleSubReg, Flags: RegState::Define)
1303 .addReg(RegNo: FormTupleOpReg)
1304 .addReg(RegNo: FormTupleOpReg);
1305 }
1306
1307 MI.eraseFromParent();
1308 return true;
1309}
1310
1311/// If MBBI references a pseudo instruction that should be expanded here,
1312/// do the expansion and return true. Otherwise return false.
1313bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
1314 MachineBasicBlock::iterator MBBI,
1315 MachineBasicBlock::iterator &NextMBBI) {
1316 MachineInstr &MI = *MBBI;
1317 unsigned Opcode = MI.getOpcode();
1318
1319 // Check if we can expand the destructive op
1320 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1321 if (OrigInstr != -1) {
1322 auto &Orig = TII->get(Opcode: OrigInstr);
1323 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1324 AArch64::NotDestructive) {
1325 return expand_DestructiveOp(MI, MBB, MBBI);
1326 }
1327 }
1328
1329 switch (Opcode) {
1330 default:
1331 break;
1332
1333 case AArch64::BSPv8i8:
1334 case AArch64::BSPv16i8: {
1335 Register DstReg = MI.getOperand(i: 0).getReg();
1336 if (DstReg == MI.getOperand(i: 3).getReg()) {
1337 // Expand to BIT
1338 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1339 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1340 : AArch64::BITv16i8))
1341 .add(MO: MI.getOperand(i: 0))
1342 .add(MO: MI.getOperand(i: 3))
1343 .add(MO: MI.getOperand(i: 2))
1344 .add(MO: MI.getOperand(i: 1));
1345 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1346 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1347 // Expand to BIF
1348 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1349 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1350 : AArch64::BIFv16i8))
1351 .add(MO: MI.getOperand(i: 0))
1352 .add(MO: MI.getOperand(i: 2))
1353 .add(MO: MI.getOperand(i: 3))
1354 .add(MO: MI.getOperand(i: 1));
1355 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1356 } else {
1357 // Expand to BSL, use additional move if required
1358 if (DstReg == MI.getOperand(i: 1).getReg()) {
1359 auto I =
1360 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1361 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1362 : AArch64::BSLv16i8))
1363 .add(MO: MI.getOperand(i: 0))
1364 .add(MO: MI.getOperand(i: 1))
1365 .add(MO: MI.getOperand(i: 2))
1366 .add(MO: MI.getOperand(i: 3));
1367 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1368 } else {
1369 RegState RegState =
1370 getRenamableRegState(B: MI.getOperand(i: 1).isRenamable()) |
1371 getKillRegState(
1372 B: MI.getOperand(i: 1).isKill() &&
1373 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg() &&
1374 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 3).getReg());
1375 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1376 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1377 : AArch64::ORRv16i8))
1378 .addReg(RegNo: DstReg,
1379 Flags: RegState::Define |
1380 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1381 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState)
1382 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState);
1383 auto I2 =
1384 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1385 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1386 : AArch64::BSLv16i8))
1387 .add(MO: MI.getOperand(i: 0))
1388 .addReg(RegNo: DstReg,
1389 Flags: RegState::Kill | getRenamableRegState(
1390 B: MI.getOperand(i: 0).isRenamable()))
1391 .add(MO: MI.getOperand(i: 2))
1392 .add(MO: MI.getOperand(i: 3));
1393 transferImpOps(OldMI&: MI, UseMI&: I2, DefMI&: I2);
1394 }
1395 }
1396 MI.eraseFromParent();
1397 return true;
1398 }
1399
1400 case AArch64::ADDWrr:
1401 case AArch64::SUBWrr:
1402 case AArch64::ADDXrr:
1403 case AArch64::SUBXrr:
1404 case AArch64::ADDSWrr:
1405 case AArch64::SUBSWrr:
1406 case AArch64::ADDSXrr:
1407 case AArch64::SUBSXrr:
1408 case AArch64::ANDWrr:
1409 case AArch64::ANDXrr:
1410 case AArch64::BICWrr:
1411 case AArch64::BICXrr:
1412 case AArch64::ANDSWrr:
1413 case AArch64::ANDSXrr:
1414 case AArch64::BICSWrr:
1415 case AArch64::BICSXrr:
1416 case AArch64::EONWrr:
1417 case AArch64::EONXrr:
1418 case AArch64::EORWrr:
1419 case AArch64::EORXrr:
1420 case AArch64::ORNWrr:
1421 case AArch64::ORNXrr:
1422 case AArch64::ORRWrr:
1423 case AArch64::ORRXrr: {
1424 unsigned Opcode;
1425 switch (MI.getOpcode()) {
1426 default:
1427 return false;
1428 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1429 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1430 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1431 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1432 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1433 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1434 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1435 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1436 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1437 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1438 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1439 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1440 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1441 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1442 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1443 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1444 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1445 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1446 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1447 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1448 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1449 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1450 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1451 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1452 }
1453 MachineFunction &MF = *MBB.getParent();
1454 // Try to create new inst without implicit operands added.
1455 MachineInstr *NewMI = MF.CreateMachineInstr(
1456 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1457 MBB.insert(I: MBBI, MI: NewMI);
1458 MachineInstrBuilder MIB1(MF, NewMI);
1459 MIB1->setPCSections(MF, MD: MI.getPCSections());
1460 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Define)
1461 .add(MO: MI.getOperand(i: 1))
1462 .add(MO: MI.getOperand(i: 2))
1463 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1464 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1465 if (auto DebugNumber = MI.peekDebugInstrNum())
1466 NewMI->setDebugInstrNum(DebugNumber);
1467 MI.eraseFromParent();
1468 return true;
1469 }
1470
1471 case AArch64::LOADgot: {
1472 MachineFunction *MF = MBB.getParent();
1473 Register DstReg = MI.getOperand(i: 0).getReg();
1474 const MachineOperand &MO1 = MI.getOperand(i: 1);
1475 unsigned Flags = MO1.getTargetFlags();
1476
1477 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1478 // Tiny codemodel expand to LDR
1479 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1480 MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg);
1481
1482 if (MO1.isGlobal()) {
1483 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1484 } else if (MO1.isSymbol()) {
1485 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1486 } else {
1487 assert(MO1.isCPI() &&
1488 "Only expect globals, externalsymbols, or constant pools");
1489 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1490 }
1491 } else {
1492 // Small codemodel expand into ADRP + LDR.
1493 MachineFunction &MF = *MI.getParent()->getParent();
1494 DebugLoc DL = MI.getDebugLoc();
1495 MachineInstrBuilder MIB1 =
1496 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg);
1497
1498 MachineInstrBuilder MIB2;
1499 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1500 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1501 unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32);
1502 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui))
1503 .addDef(RegNo: Reg32)
1504 .addReg(RegNo: DstReg, Flags: RegState::Kill)
1505 .addReg(RegNo: DstReg, Flags: RegState::Implicit);
1506 } else {
1507 Register DstReg = MI.getOperand(i: 0).getReg();
1508 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
1509 .add(MO: MI.getOperand(i: 0))
1510 .addUse(RegNo: DstReg, Flags: RegState::Kill);
1511 }
1512
1513 if (MO1.isGlobal()) {
1514 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1515 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1516 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1517 } else if (MO1.isSymbol()) {
1518 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1519 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1520 AArch64II::MO_PAGEOFF |
1521 AArch64II::MO_NC);
1522 } else {
1523 assert(MO1.isCPI() &&
1524 "Only expect globals, externalsymbols, or constant pools");
1525 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1526 TargetFlags: Flags | AArch64II::MO_PAGE);
1527 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1528 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1529 AArch64II::MO_NC);
1530 }
1531
1532 // If the LOADgot instruction has a debug-instr-number, annotate the
1533 // LDRWui instruction that it is expanded to with the same
1534 // debug-instr-number to preserve debug information.
1535 if (MI.peekDebugInstrNum() != 0)
1536 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1537 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1538 }
1539 MI.eraseFromParent();
1540 return true;
1541 }
1542 case AArch64::MOVaddrBA:
1543 case AArch64::MOVaddr:
1544 case AArch64::MOVaddrJT:
1545 case AArch64::MOVaddrCP:
1546 case AArch64::MOVaddrTLS:
1547 case AArch64::MOVaddrEXT: {
1548 MachineFunction &MF = *MI.getParent()->getParent();
1549 Register DstReg = MI.getOperand(i: 0).getReg();
1550 assert(DstReg != AArch64::XZR);
1551
1552 bool IsTargetMachO = MF.getSubtarget<AArch64Subtarget>().isTargetMachO();
1553 SmallVector<AArch64_IMM::AddrInsnModel, 3> Insn;
1554 AArch64_IMM::expandMOVAddr(
1555 Opcode: MI.getOpcode(), TargetFlags: MI.getOperand(i: 1).getTargetFlags(), IsTargetMachO, Insn);
1556
1557 // Compute the constant pool index, if any.
1558 std::optional<unsigned> CPIdx;
1559 if (Opcode == AArch64::MOVaddrBA && IsTargetMachO) {
1560 // blockaddress expressions have to come from a constant pool because the
1561 // largest addend (and hence offset within a function) allowed for ADRP is
1562 // only 8MB.
1563 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1564 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1565 MachineConstantPool *MCP = MF.getConstantPool();
1566 CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1567 }
1568
1569 MachineInstrBuilder FirstMIB;
1570 MachineInstrBuilder LastMIB;
1571 for (const auto &I : Insn) {
1572 MachineInstrBuilder MIB;
1573 switch (I.Opcode) {
1574 case AArch64::ADRP:
1575 MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP),
1576 DestReg: DstReg);
1577 if (CPIdx)
1578 MIB.addConstantPoolIndex(Idx: *CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
1579 else
1580 MIB.add(MO: MI.getOperand(i: 1));
1581 break;
1582 case AArch64::LDRXui:
1583 MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRXui),
1584 DestReg: DstReg)
1585 .addUse(RegNo: DstReg)
1586 .addConstantPoolIndex(
1587 Idx: *CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1588 break;
1589 case AArch64::MOVKXi: {
1590 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1591 // We do so by creating a MOVK that sets bits 48-63 of the register to
1592 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1593 // the small code model so we can assume a binary size of <= 4GB, which
1594 // makes the untagged PC relative offset positive. The binary must also
1595 // be loaded into address range [0, 2^48). Both of these properties need
1596 // to be ensured at runtime when using tagged addresses.
1597 auto Tag = MI.getOperand(i: 1);
1598 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1599 Tag.setOffset(0x100000000);
1600 MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi),
1601 DestReg: DstReg)
1602 .addReg(RegNo: DstReg)
1603 .add(MO: Tag)
1604 .addImm(Val: 48);
1605 break;
1606 }
1607 case AArch64::ADDXri:
1608 MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1609 .add(MO: MI.getOperand(i: 0))
1610 .addReg(RegNo: DstReg)
1611 .add(MO: MI.getOperand(i: 2))
1612 .addImm(Val: 0);
1613 break;
1614 default:
1615 llvm_unreachable("unexpected opcode in MOVaddr expansion");
1616 }
1617
1618 if (!FirstMIB.getInstr())
1619 FirstMIB = MIB;
1620 LastMIB = MIB;
1621 }
1622
1623 transferImpOps(OldMI&: MI, UseMI&: FirstMIB, DefMI&: LastMIB);
1624 MI.eraseFromParent();
1625 return true;
1626 }
1627 case AArch64::ADDlowTLS:
1628 // Produce a plain ADD
1629 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1630 .add(MO: MI.getOperand(i: 0))
1631 .add(MO: MI.getOperand(i: 1))
1632 .add(MO: MI.getOperand(i: 2))
1633 .addImm(Val: 0);
1634 MI.eraseFromParent();
1635 return true;
1636
1637 case AArch64::MOVbaseTLS: {
1638 Register DstReg = MI.getOperand(i: 0).getReg();
1639 auto SysReg = AArch64SysReg::TPIDR_EL0;
1640 MachineFunction *MF = MBB.getParent();
1641 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1642 SysReg = AArch64SysReg::TPIDR_EL3;
1643 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1644 SysReg = AArch64SysReg::TPIDR_EL2;
1645 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1646 SysReg = AArch64SysReg::TPIDR_EL1;
1647 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1648 SysReg = AArch64SysReg::TPIDRRO_EL0;
1649 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg)
1650 .addImm(Val: SysReg);
1651 MI.eraseFromParent();
1652 return true;
1653 }
1654
1655 case AArch64::MOVi32imm:
1656 return expandMOVImm(MBB, MBBI, BitSize: 32);
1657 case AArch64::MOVi64imm:
1658 return expandMOVImm(MBB, MBBI, BitSize: 64);
1659 case AArch64::RET_ReallyLR: {
1660 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1661 // function and missing live-ins. We are fine in practice because callee
1662 // saved register handling ensures the register value is restored before
1663 // RET, but we need the undef flag here to appease the MachineVerifier
1664 // liveness checks.
1665 MachineInstrBuilder MIB =
1666 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET))
1667 .addReg(RegNo: AArch64::LR, Flags: RegState::Undef);
1668 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1669 MI.eraseFromParent();
1670 return true;
1671 }
1672 case AArch64::CMP_SWAP_8:
1673 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB,
1674 CmpOp: AArch64::SUBSWrx,
1675 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0),
1676 ZeroReg: AArch64::WZR, NextMBBI);
1677 case AArch64::CMP_SWAP_16:
1678 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH,
1679 CmpOp: AArch64::SUBSWrx,
1680 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0),
1681 ZeroReg: AArch64::WZR, NextMBBI);
1682 case AArch64::CMP_SWAP_32:
1683 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW,
1684 CmpOp: AArch64::SUBSWrs,
1685 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1686 ZeroReg: AArch64::WZR, NextMBBI);
1687 case AArch64::CMP_SWAP_64:
1688 return expandCMP_SWAP(MBB, MBBI,
1689 LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs,
1690 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1691 ZeroReg: AArch64::XZR, NextMBBI);
1692 case AArch64::CMP_SWAP_128:
1693 case AArch64::CMP_SWAP_128_RELEASE:
1694 case AArch64::CMP_SWAP_128_ACQUIRE:
1695 case AArch64::CMP_SWAP_128_MONOTONIC:
1696 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1697
1698 case AArch64::AESMCrrTied:
1699 case AArch64::AESIMCrrTied: {
1700 MachineInstrBuilder MIB =
1701 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1702 MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1703 AArch64::AESIMCrr))
1704 .add(MO: MI.getOperand(i: 0))
1705 .add(MO: MI.getOperand(i: 1));
1706 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1707 MI.eraseFromParent();
1708 return true;
1709 }
1710 case AArch64::IRGstack: {
1711 MachineFunction &MF = *MBB.getParent();
1712 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1713 const AArch64FrameLowering *TFI =
1714 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1715
1716 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1717 // almost always point to SP-after-prologue; if not, emit a longer
1718 // instruction sequence.
1719 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1720 Register FrameReg;
1721 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1722 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, StackID: TargetStackID::Default /*StackID*/,
1723 FrameReg,
1724 /*PreferFP=*/false,
1725 /*ForSimm=*/true);
1726 Register SrcReg = FrameReg;
1727 if (FrameRegOffset) {
1728 // Use output register as temporary.
1729 SrcReg = MI.getOperand(i: 0).getReg();
1730 emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg,
1731 Offset: FrameRegOffset, TII);
1732 }
1733 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG))
1734 .add(MO: MI.getOperand(i: 0))
1735 .addUse(RegNo: SrcReg)
1736 .add(MO: MI.getOperand(i: 2));
1737 MI.eraseFromParent();
1738 return true;
1739 }
1740 case AArch64::TAGPstack: {
1741 int64_t Offset = MI.getOperand(i: 2).getImm();
1742 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1743 MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1744 .add(MO: MI.getOperand(i: 0))
1745 .add(MO: MI.getOperand(i: 1))
1746 .addImm(Val: std::abs(i: Offset))
1747 .add(MO: MI.getOperand(i: 4));
1748 MI.eraseFromParent();
1749 return true;
1750 }
1751 case AArch64::STGloop_wback:
1752 case AArch64::STZGloop_wback:
1753 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1754 case AArch64::STGloop:
1755 case AArch64::STZGloop:
1756 report_fatal_error(
1757 reason: "Non-writeback variants of STGloop / STZGloop should not "
1758 "survive past PrologEpilogInserter.");
1759 case AArch64::STR_ZZZZXI:
1760 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
1761 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4);
1762 case AArch64::STR_ZZZXI:
1763 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3);
1764 case AArch64::STR_ZZXI:
1765 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
1766 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2);
1767 case AArch64::STR_PPXI:
1768 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2);
1769 case AArch64::LDR_ZZZZXI:
1770 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
1771 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4);
1772 case AArch64::LDR_ZZZXI:
1773 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3);
1774 case AArch64::LDR_ZZXI:
1775 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
1776 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2);
1777 case AArch64::LDR_PPXI:
1778 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2);
1779 case AArch64::BLR_RVMARKER:
1780 case AArch64::BLRA_RVMARKER:
1781 return expandCALL_RVMARKER(MBB, MBBI);
1782 case AArch64::BLR_BTI:
1783 return expandCALL_BTI(MBB, MBBI);
1784 case AArch64::StoreSwiftAsyncContext:
1785 return expandStoreSwiftAsyncContext(MBB, MBBI);
1786 case AArch64::RestoreZAPseudo:
1787 case AArch64::CommitZASavePseudo:
1788 case AArch64::MSRpstatePseudo: {
1789 auto *NewMBB = [&] {
1790 switch (Opcode) {
1791 case AArch64::RestoreZAPseudo:
1792 return expandRestoreZASave(MBB, MBBI);
1793 case AArch64::CommitZASavePseudo:
1794 return expandCommitZASave(MBB, MBBI);
1795 case AArch64::MSRpstatePseudo:
1796 return expandCondSMToggle(MBB, MBBI);
1797 default:
1798 llvm_unreachable("Unexpected conditional pseudo!");
1799 }
1800 }();
1801 if (NewMBB != &MBB)
1802 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1803 return true;
1804 }
1805 case AArch64::InOutZAUsePseudo:
1806 case AArch64::RequiresZASavePseudo:
1807 case AArch64::RequiresZT0SavePseudo:
1808 case AArch64::SMEStateAllocPseudo:
1809 case AArch64::COALESCER_BARRIER_FPR16:
1810 case AArch64::COALESCER_BARRIER_FPR32:
1811 case AArch64::COALESCER_BARRIER_FPR64:
1812 case AArch64::COALESCER_BARRIER_FPR128:
1813 MI.eraseFromParent();
1814 return true;
1815 case AArch64::LD1B_2Z_IMM_PSEUDO:
1816 return expandMultiVecPseudo(
1817 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1818 ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM);
1819 case AArch64::LD1H_2Z_IMM_PSEUDO:
1820 return expandMultiVecPseudo(
1821 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1822 ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM);
1823 case AArch64::LD1W_2Z_IMM_PSEUDO:
1824 return expandMultiVecPseudo(
1825 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1826 ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM);
1827 case AArch64::LD1D_2Z_IMM_PSEUDO:
1828 return expandMultiVecPseudo(
1829 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1830 ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM);
1831 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1832 return expandMultiVecPseudo(
1833 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1834 ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM);
1835 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1836 return expandMultiVecPseudo(
1837 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1838 ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM);
1839 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1840 return expandMultiVecPseudo(
1841 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1842 ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM);
1843 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1844 return expandMultiVecPseudo(
1845 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1846 ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM);
1847 case AArch64::LD1B_2Z_PSEUDO:
1848 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1849 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z,
1850 StridedOpc: AArch64::LD1B_2Z_STRIDED);
1851 case AArch64::LD1H_2Z_PSEUDO:
1852 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1853 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z,
1854 StridedOpc: AArch64::LD1H_2Z_STRIDED);
1855 case AArch64::LD1W_2Z_PSEUDO:
1856 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1857 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z,
1858 StridedOpc: AArch64::LD1W_2Z_STRIDED);
1859 case AArch64::LD1D_2Z_PSEUDO:
1860 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1861 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z,
1862 StridedOpc: AArch64::LD1D_2Z_STRIDED);
1863 case AArch64::LDNT1B_2Z_PSEUDO:
1864 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1865 StridedClass: AArch64::ZPR2StridedRegClass,
1866 ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED);
1867 case AArch64::LDNT1H_2Z_PSEUDO:
1868 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1869 StridedClass: AArch64::ZPR2StridedRegClass,
1870 ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED);
1871 case AArch64::LDNT1W_2Z_PSEUDO:
1872 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1873 StridedClass: AArch64::ZPR2StridedRegClass,
1874 ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED);
1875 case AArch64::LDNT1D_2Z_PSEUDO:
1876 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1877 StridedClass: AArch64::ZPR2StridedRegClass,
1878 ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED);
1879 case AArch64::LD1B_4Z_IMM_PSEUDO:
1880 return expandMultiVecPseudo(
1881 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1882 ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM);
1883 case AArch64::LD1H_4Z_IMM_PSEUDO:
1884 return expandMultiVecPseudo(
1885 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1886 ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM);
1887 case AArch64::LD1W_4Z_IMM_PSEUDO:
1888 return expandMultiVecPseudo(
1889 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1890 ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM);
1891 case AArch64::LD1D_4Z_IMM_PSEUDO:
1892 return expandMultiVecPseudo(
1893 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1894 ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM);
1895 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1896 return expandMultiVecPseudo(
1897 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1898 ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM);
1899 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1900 return expandMultiVecPseudo(
1901 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1902 ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM);
1903 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1904 return expandMultiVecPseudo(
1905 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1906 ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM);
1907 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1908 return expandMultiVecPseudo(
1909 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1910 ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM);
1911 case AArch64::LD1B_4Z_PSEUDO:
1912 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1913 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z,
1914 StridedOpc: AArch64::LD1B_4Z_STRIDED);
1915 case AArch64::LD1H_4Z_PSEUDO:
1916 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1917 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z,
1918 StridedOpc: AArch64::LD1H_4Z_STRIDED);
1919 case AArch64::LD1W_4Z_PSEUDO:
1920 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1921 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z,
1922 StridedOpc: AArch64::LD1W_4Z_STRIDED);
1923 case AArch64::LD1D_4Z_PSEUDO:
1924 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1925 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z,
1926 StridedOpc: AArch64::LD1D_4Z_STRIDED);
1927 case AArch64::LDNT1B_4Z_PSEUDO:
1928 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1929 StridedClass: AArch64::ZPR4StridedRegClass,
1930 ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED);
1931 case AArch64::LDNT1H_4Z_PSEUDO:
1932 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1933 StridedClass: AArch64::ZPR4StridedRegClass,
1934 ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED);
1935 case AArch64::LDNT1W_4Z_PSEUDO:
1936 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1937 StridedClass: AArch64::ZPR4StridedRegClass,
1938 ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED);
1939 case AArch64::LDNT1D_4Z_PSEUDO:
1940 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1941 StridedClass: AArch64::ZPR4StridedRegClass,
1942 ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED);
1943 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1944 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 2);
1945 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1946 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 4);
1947 case AArch64::EON_ZZZ:
1948 case AArch64::NAND_ZZZ:
1949 case AArch64::NOR_ZZZ:
1950 return expandSVEBitwisePseudo(MI, MBB, MBBI);
1951 }
1952 return false;
1953}
1954
1955/// Iterate over the instructions in basic block MBB and expand any
1956/// pseudo instructions. Return true if anything was modified.
1957bool AArch64ExpandPseudoImpl::expandMBB(MachineBasicBlock &MBB) {
1958 bool Modified = false;
1959
1960 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1961 while (MBBI != E) {
1962 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
1963 if (MBBI->isPseudo())
1964 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
1965 MBBI = NMBBI;
1966 }
1967
1968 return Modified;
1969}
1970
1971bool AArch64ExpandPseudoImpl::run(MachineFunction &MF) {
1972 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
1973
1974 bool Modified = false;
1975 for (auto &MBB : MF)
1976 Modified |= expandMBB(MBB);
1977 return Modified;
1978}
1979
1980bool AArch64ExpandPseudoLegacy::runOnMachineFunction(MachineFunction &MF) {
1981 return AArch64ExpandPseudoImpl().run(MF);
1982}
1983
1984/// Returns an instance of the pseudo instruction expansion pass.
1985FunctionPass *llvm::createAArch64ExpandPseudoLegacyPass() {
1986 return new AArch64ExpandPseudoLegacy();
1987}
1988
1989PreservedAnalyses
1990AArch64ExpandPseudoPass::run(MachineFunction &MF,
1991 MachineFunctionAnalysisManager &MFAM) {
1992 const bool Changed = AArch64ExpandPseudoImpl().run(MF);
1993 if (!Changed)
1994 return PreservedAnalyses::all();
1995 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
1996 PA.preserveSet<CFGAnalyses>();
1997 return PA;
1998}
1999