1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineInstrBundle.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/CodeGen/TargetSubtargetInfo.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/CodeGen.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41
42using namespace llvm;
43
44#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
45
46namespace {
47
48class AArch64ExpandPseudo : public MachineFunctionPass {
49public:
50 const AArch64InstrInfo *TII;
51
52 static char ID;
53
54 AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
55
56 bool runOnMachineFunction(MachineFunction &Fn) override;
57
58 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
59
60private:
61 bool expandMBB(MachineBasicBlock &MBB);
62 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
63 MachineBasicBlock::iterator &NextMBBI);
64 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
65 MachineBasicBlock::iterator MBBI,
66 TargetRegisterClass ContiguousClass,
67 TargetRegisterClass StridedClass,
68 unsigned ContiguousOpc, unsigned StridedOpc);
69 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
70 MachineBasicBlock::iterator MBBI,
71 MachineBasicBlock::iterator &NextMBBI,
72 unsigned Size);
73 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
74 unsigned BitSize);
75
76 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
77 MachineBasicBlock::iterator MBBI);
78 bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MBBI);
80 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
81 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
82 unsigned ExtendImm, unsigned ZeroReg,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI,
86 MachineBasicBlock::iterator &NextMBBI);
87 bool expandSetTagLoop(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI,
89 MachineBasicBlock::iterator &NextMBBI);
90 bool expandSVESpillFill(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI, unsigned Opc,
92 unsigned N);
93 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
94 MachineBasicBlock::iterator MBBI);
95 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
96 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
97 MachineBasicBlock::iterator MBBI);
98 bool expandSTSHHAtomicStore(MachineBasicBlock &MBB,
99 MachineBasicBlock::iterator MBBI);
100 struct ConditionalBlocks {
101 MachineBasicBlock &CondBB;
102 MachineBasicBlock &EndBB;
103 };
104 ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB,
105 MachineBasicBlock::iterator MBBI,
106 DebugLoc DL,
107 MachineInstrBuilder &Branch);
108 MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB,
109 MachineBasicBlock::iterator MBBI);
110 MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB,
111 MachineBasicBlock::iterator MBBI);
112 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
113 MachineBasicBlock::iterator MBBI);
114};
115
116} // end anonymous namespace
117
118char AArch64ExpandPseudo::ID = 0;
119
120INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
121 AARCH64_EXPAND_PSEUDO_NAME, false, false)
122
123/// Transfer implicit operands on the pseudo instruction to the
124/// instructions created from the expansion.
125static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
126 MachineInstrBuilder &DefMI) {
127 const MCInstrDesc &Desc = OldMI.getDesc();
128 for (const MachineOperand &MO :
129 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
130 assert(MO.isReg() && MO.getReg());
131 if (MO.isUse())
132 UseMI.add(MO);
133 else
134 DefMI.add(MO);
135 }
136}
137
138/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
139/// real move-immediate instructions to synthesize the immediate.
140bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator MBBI,
142 unsigned BitSize) {
143 MachineInstr &MI = *MBBI;
144 Register DstReg = MI.getOperand(i: 0).getReg();
145 RegState RenamableState =
146 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable());
147 uint64_t Imm = MI.getOperand(i: 1).getImm();
148
149 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
150 // Useless def, and we don't want to risk creating an invalid ORR (which
151 // would really write to sp).
152 MI.eraseFromParent();
153 return true;
154 }
155
156 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
157 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
158 assert(Insn.size() != 0);
159
160 SmallVector<MachineInstrBuilder, 4> MIBS;
161 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
162 bool LastItem = std::next(x: I) == E;
163 switch (I->Opcode)
164 {
165 default: llvm_unreachable("unhandled!"); break;
166
167 case AArch64::ORRWri:
168 case AArch64::ORRXri:
169 if (I->Op1 == 0) {
170 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
171 .add(MO: MI.getOperand(i: 0))
172 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
173 .addImm(Val: I->Op2));
174 } else {
175 Register DstReg = MI.getOperand(i: 0).getReg();
176 bool DstIsDead = MI.getOperand(i: 0).isDead();
177 MIBS.push_back(
178 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
179 .addReg(RegNo: DstReg, Flags: RegState::Define |
180 getDeadRegState(B: DstIsDead && LastItem) |
181 RenamableState)
182 .addReg(RegNo: DstReg)
183 .addImm(Val: I->Op2));
184 }
185 break;
186 case AArch64::EONXrs:
187 case AArch64::EORXrs:
188 case AArch64::ORRWrs:
189 case AArch64::ORRXrs: {
190 Register DstReg = MI.getOperand(i: 0).getReg();
191 bool DstIsDead = MI.getOperand(i: 0).isDead();
192 MIBS.push_back(
193 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
194 .addReg(RegNo: DstReg, Flags: RegState::Define |
195 getDeadRegState(B: DstIsDead && LastItem) |
196 RenamableState)
197 .addReg(RegNo: DstReg)
198 .addReg(RegNo: DstReg)
199 .addImm(Val: I->Op2));
200 } break;
201 case AArch64::ANDXri:
202 case AArch64::EORXri:
203 if (I->Op1 == 0) {
204 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
205 .add(MO: MI.getOperand(i: 0))
206 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
207 .addImm(Val: I->Op2));
208 } else {
209 Register DstReg = MI.getOperand(i: 0).getReg();
210 bool DstIsDead = MI.getOperand(i: 0).isDead();
211 MIBS.push_back(
212 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
213 .addReg(RegNo: DstReg, Flags: RegState::Define |
214 getDeadRegState(B: DstIsDead && LastItem) |
215 RenamableState)
216 .addReg(RegNo: DstReg)
217 .addImm(Val: I->Op2));
218 }
219 break;
220 case AArch64::MOVNWi:
221 case AArch64::MOVNXi:
222 case AArch64::MOVZWi:
223 case AArch64::MOVZXi: {
224 bool DstIsDead = MI.getOperand(i: 0).isDead();
225 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
226 .addReg(RegNo: DstReg, Flags: RegState::Define |
227 getDeadRegState(B: DstIsDead && LastItem) |
228 RenamableState)
229 .addImm(Val: I->Op1)
230 .addImm(Val: I->Op2));
231 } break;
232 case AArch64::MOVKWi:
233 case AArch64::MOVKXi: {
234 Register DstReg = MI.getOperand(i: 0).getReg();
235 bool DstIsDead = MI.getOperand(i: 0).isDead();
236 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
237 .addReg(RegNo: DstReg,
238 Flags: RegState::Define |
239 getDeadRegState(B: DstIsDead && LastItem) |
240 RenamableState)
241 .addReg(RegNo: DstReg)
242 .addImm(Val: I->Op1)
243 .addImm(Val: I->Op2));
244 } break;
245 }
246 }
247 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
248 MI.eraseFromParent();
249 return true;
250}
251
252bool AArch64ExpandPseudo::expandCMP_SWAP(
253 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
254 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
255 MachineBasicBlock::iterator &NextMBBI) {
256 MachineInstr &MI = *MBBI;
257 MIMetadata MIMD(MI);
258 const MachineOperand &Dest = MI.getOperand(i: 0);
259 Register StatusReg = MI.getOperand(i: 1).getReg();
260 bool StatusDead = MI.getOperand(i: 1).isDead();
261 // Duplicating undef operands into 2 instructions does not guarantee the same
262 // value on both; However undef should be replaced by xzr anyway.
263 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
264 Register AddrReg = MI.getOperand(i: 2).getReg();
265 Register DesiredReg = MI.getOperand(i: 3).getReg();
266 Register NewReg = MI.getOperand(i: 4).getReg();
267
268 MachineFunction *MF = MBB.getParent();
269 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
270 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
271 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
272
273 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
274 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
275 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
276
277 // .Lloadcmp:
278 // mov wStatus, 0
279 // ldaxr xDest, [xAddr]
280 // cmp xDest, xDesired
281 // b.ne .Ldone
282 if (!StatusDead)
283 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg)
284 .addImm(Val: 0).addImm(Val: 0);
285 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg())
286 .addReg(RegNo: AddrReg);
287 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg)
288 .addReg(RegNo: Dest.getReg(), Flags: getKillRegState(B: Dest.isDead()))
289 .addReg(RegNo: DesiredReg)
290 .addImm(Val: ExtendImm);
291 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc))
292 .addImm(Val: AArch64CC::NE)
293 .addMBB(MBB: DoneBB)
294 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
295 LoadCmpBB->addSuccessor(Succ: DoneBB);
296 LoadCmpBB->addSuccessor(Succ: StoreBB);
297
298 // .Lstore:
299 // stlxr wStatus, xNew, [xAddr]
300 // cbnz wStatus, .Lloadcmp
301 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg)
302 .addReg(RegNo: NewReg)
303 .addReg(RegNo: AddrReg);
304 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
305 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
306 .addMBB(MBB: LoadCmpBB);
307 StoreBB->addSuccessor(Succ: LoadCmpBB);
308 StoreBB->addSuccessor(Succ: DoneBB);
309
310 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
311 DoneBB->transferSuccessors(FromMBB: &MBB);
312
313 MBB.addSuccessor(Succ: LoadCmpBB);
314
315 NextMBBI = MBB.end();
316 MI.eraseFromParent();
317
318 // Recompute livein lists.
319 LivePhysRegs LiveRegs;
320 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
321 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
322 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
323 // Do an extra pass around the loop to get loop carried registers right.
324 StoreBB->clearLiveIns();
325 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
326 LoadCmpBB->clearLiveIns();
327 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
328
329 return true;
330}
331
332bool AArch64ExpandPseudo::expandCMP_SWAP_128(
333 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
334 MachineBasicBlock::iterator &NextMBBI) {
335 MachineInstr &MI = *MBBI;
336 MIMetadata MIMD(MI);
337 MachineOperand &DestLo = MI.getOperand(i: 0);
338 MachineOperand &DestHi = MI.getOperand(i: 1);
339 Register StatusReg = MI.getOperand(i: 2).getReg();
340 bool StatusDead = MI.getOperand(i: 2).isDead();
341 // Duplicating undef operands into 2 instructions does not guarantee the same
342 // value on both; However undef should be replaced by xzr anyway.
343 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
344 Register AddrReg = MI.getOperand(i: 3).getReg();
345 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
346 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
347 Register NewLoReg = MI.getOperand(i: 6).getReg();
348 Register NewHiReg = MI.getOperand(i: 7).getReg();
349
350 unsigned LdxpOp, StxpOp;
351
352 switch (MI.getOpcode()) {
353 case AArch64::CMP_SWAP_128_MONOTONIC:
354 LdxpOp = AArch64::LDXPX;
355 StxpOp = AArch64::STXPX;
356 break;
357 case AArch64::CMP_SWAP_128_RELEASE:
358 LdxpOp = AArch64::LDXPX;
359 StxpOp = AArch64::STLXPX;
360 break;
361 case AArch64::CMP_SWAP_128_ACQUIRE:
362 LdxpOp = AArch64::LDAXPX;
363 StxpOp = AArch64::STXPX;
364 break;
365 case AArch64::CMP_SWAP_128:
366 LdxpOp = AArch64::LDAXPX;
367 StxpOp = AArch64::STLXPX;
368 break;
369 default:
370 llvm_unreachable("Unexpected opcode");
371 }
372
373 MachineFunction *MF = MBB.getParent();
374 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
375 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
376 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
377 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
378
379 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
380 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
381 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
382 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
383
384 // .Lloadcmp:
385 // ldaxp xDestLo, xDestHi, [xAddr]
386 // cmp xDestLo, xDesiredLo
387 // sbcs xDestHi, xDesiredHi
388 // b.ne .Ldone
389 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp))
390 .addReg(RegNo: DestLo.getReg(), Flags: RegState::Define)
391 .addReg(RegNo: DestHi.getReg(), Flags: RegState::Define)
392 .addReg(RegNo: AddrReg);
393 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
394 .addReg(RegNo: DestLo.getReg(), Flags: getKillRegState(B: DestLo.isDead()))
395 .addReg(RegNo: DesiredLoReg)
396 .addImm(Val: 0);
397 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
398 .addUse(RegNo: AArch64::WZR)
399 .addUse(RegNo: AArch64::WZR)
400 .addImm(Val: AArch64CC::EQ);
401 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
402 .addReg(RegNo: DestHi.getReg(), Flags: getKillRegState(B: DestHi.isDead()))
403 .addReg(RegNo: DesiredHiReg)
404 .addImm(Val: 0);
405 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
406 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
407 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
408 .addImm(Val: AArch64CC::EQ);
409 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
410 .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
411 .addMBB(MBB: FailBB);
412 LoadCmpBB->addSuccessor(Succ: FailBB);
413 LoadCmpBB->addSuccessor(Succ: StoreBB);
414
415 // .Lstore:
416 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
417 // cbnz wStatus, .Lloadcmp
418 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
419 .addReg(RegNo: NewLoReg)
420 .addReg(RegNo: NewHiReg)
421 .addReg(RegNo: AddrReg);
422 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
423 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
424 .addMBB(MBB: LoadCmpBB);
425 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB);
426 StoreBB->addSuccessor(Succ: LoadCmpBB);
427 StoreBB->addSuccessor(Succ: DoneBB);
428
429 // .Lfail:
430 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
431 // cbnz wStatus, .Lloadcmp
432 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
433 .addReg(RegNo: DestLo.getReg())
434 .addReg(RegNo: DestHi.getReg())
435 .addReg(RegNo: AddrReg);
436 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
437 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
438 .addMBB(MBB: LoadCmpBB);
439 FailBB->addSuccessor(Succ: LoadCmpBB);
440 FailBB->addSuccessor(Succ: DoneBB);
441
442 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
443 DoneBB->transferSuccessors(FromMBB: &MBB);
444
445 MBB.addSuccessor(Succ: LoadCmpBB);
446
447 NextMBBI = MBB.end();
448 MI.eraseFromParent();
449
450 // Recompute liveness bottom up.
451 LivePhysRegs LiveRegs;
452 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
453 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
454 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
455 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
456
457 // Do an extra pass in the loop to get the loop carried dependencies right.
458 FailBB->clearLiveIns();
459 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
460 StoreBB->clearLiveIns();
461 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
462 LoadCmpBB->clearLiveIns();
463 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
464
465 return true;
466}
467
468/// \brief Expand Pseudos to Instructions with destructive operands.
469///
470/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
471/// or for fixing relaxed register allocation conditions to comply with
472/// the instructions register constraints. The latter case may be cheaper
473/// than setting the register constraints in the register allocator,
474/// since that will insert regular MOV instructions rather than MOVPRFX.
475///
476/// Example (after register allocation):
477///
478/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
479///
480/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
481/// * We cannot map directly to FSUB_ZPmZ_B because the register
482/// constraints of the instruction are not met.
483/// * Also the _ZERO specifies the false lanes need to be zeroed.
484///
485/// We first try to see if the destructive operand == result operand,
486/// if not, we try to swap the operands, e.g.
487///
488/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
489///
490/// But because FSUB_ZPmZ is not commutative, this is semantically
491/// different, so we need a reverse instruction:
492///
493/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
494///
495/// Then we implement the zeroing of the false lanes of Z0 by adding
496/// a zeroing MOVPRFX instruction:
497///
498/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
499/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
500///
501/// Note that this can only be done for _ZERO or _UNDEF variants where
502/// we can guarantee the false lanes to be zeroed (by implementing this)
503/// or that they are undef (don't care / not used), otherwise the
504/// swapping of operands is illegal because the operation is not
505/// (or cannot be emulated to be) fully commutative.
506bool AArch64ExpandPseudo::expand_DestructiveOp(
507 MachineInstr &MI,
508 MachineBasicBlock &MBB,
509 MachineBasicBlock::iterator MBBI) {
510 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
511 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
512 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
513 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
514 Register DstReg = MI.getOperand(i: 0).getReg();
515 bool DstIsDead = MI.getOperand(i: 0).isDead();
516 bool UseRev = false;
517 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
518
519 switch (DType) {
520 case AArch64::DestructiveBinaryComm:
521 case AArch64::DestructiveBinaryCommWithRev:
522 if (DstReg == MI.getOperand(i: 3).getReg()) {
523 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
524 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
525 UseRev = true;
526 break;
527 }
528 [[fallthrough]];
529 case AArch64::DestructiveBinary:
530 case AArch64::DestructiveBinaryImm:
531 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
532 break;
533 case AArch64::DestructiveUnaryPassthru:
534 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
535 break;
536 case AArch64::DestructiveTernaryCommWithRev:
537 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
538 if (DstReg == MI.getOperand(i: 3).getReg()) {
539 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
540 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
541 UseRev = true;
542 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
543 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
544 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
545 UseRev = true;
546 }
547 break;
548 case AArch64::Destructive2xRegImmUnpred:
549 // EXT_ZZI_CONSTRUCTIVE Zd, Zs, Imm
550 // ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
551 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 1, args: 2);
552 break;
553 default:
554 llvm_unreachable("Unsupported Destructive Operand type");
555 }
556
557 // MOVPRFX can only be used if the destination operand
558 // is the destructive operand, not as any other operand,
559 // so the Destructive Operand must be unique.
560 bool DOPRegIsUnique = false;
561 switch (DType) {
562 case AArch64::DestructiveBinary:
563 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
564 break;
565 case AArch64::DestructiveBinaryComm:
566 case AArch64::DestructiveBinaryCommWithRev:
567 DOPRegIsUnique =
568 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
569 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
570 break;
571 case AArch64::DestructiveUnaryPassthru:
572 case AArch64::DestructiveBinaryImm:
573 case AArch64::Destructive2xRegImmUnpred:
574 DOPRegIsUnique = true;
575 break;
576 case AArch64::DestructiveTernaryCommWithRev:
577 DOPRegIsUnique =
578 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
579 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
580 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
581 break;
582 }
583
584 // Resolve the reverse opcode
585 if (UseRev) {
586 int NewOpcode;
587 // e.g. DIV -> DIVR
588 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
589 Opcode = NewOpcode;
590 // e.g. DIVR -> DIV
591 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
592 Opcode = NewOpcode;
593 }
594
595 // Get the right MOVPRFX
596 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
597 unsigned MovPrfx, LSLZero, MovPrfxZero;
598 switch (ElementSize) {
599 case AArch64::ElementSizeNone:
600 case AArch64::ElementSizeB:
601 MovPrfx = AArch64::MOVPRFX_ZZ;
602 LSLZero = AArch64::LSL_ZPmI_B;
603 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
604 break;
605 case AArch64::ElementSizeH:
606 MovPrfx = AArch64::MOVPRFX_ZZ;
607 LSLZero = AArch64::LSL_ZPmI_H;
608 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
609 break;
610 case AArch64::ElementSizeS:
611 MovPrfx = AArch64::MOVPRFX_ZZ;
612 LSLZero = AArch64::LSL_ZPmI_S;
613 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
614 break;
615 case AArch64::ElementSizeD:
616 MovPrfx = AArch64::MOVPRFX_ZZ;
617 LSLZero = AArch64::LSL_ZPmI_D;
618 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
619 break;
620 default:
621 llvm_unreachable("Unsupported ElementSize");
622 }
623
624 // Preserve undef state until DOP's reg is defined.
625 RegState DOPRegState = getUndefRegState(B: MI.getOperand(i: DOPIdx).isUndef());
626
627 //
628 // Create the destructive operation (if required)
629 //
630 MachineInstrBuilder PRFX, DOP;
631 if (FalseZero) {
632 // If we cannot prefix the requested instruction we'll instead emit a
633 // prefixed_zeroing_mov for DestructiveBinary.
634 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
635 DType == AArch64::DestructiveBinaryComm ||
636 DType == AArch64::DestructiveBinaryCommWithRev) &&
637 "The destructive operand should be unique");
638 assert(ElementSize != AArch64::ElementSizeNone &&
639 "This instruction is unpredicated");
640
641 // Merge source operand into destination register
642 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero))
643 .addReg(RegNo: DstReg, Flags: RegState::Define)
644 .addReg(RegNo: MI.getOperand(i: PredIdx).getReg())
645 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
646
647 // After the movprfx, the destructive operand is same as Dst
648 DOPIdx = 0;
649 DOPRegState = {};
650
651 // Create the additional LSL to zero the lanes when the DstReg is not
652 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
653 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
654 if ((DType == AArch64::DestructiveBinary ||
655 DType == AArch64::DestructiveBinaryComm ||
656 DType == AArch64::DestructiveBinaryCommWithRev) &&
657 !DOPRegIsUnique) {
658 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero))
659 .addReg(RegNo: DstReg, Flags: RegState::Define)
660 .add(MO: MI.getOperand(i: PredIdx))
661 .addReg(RegNo: DstReg)
662 .addImm(Val: 0);
663 }
664 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
665 assert(DOPRegIsUnique && "The destructive operand should be unique");
666 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx))
667 .addReg(RegNo: DstReg, Flags: RegState::Define)
668 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
669 DOPIdx = 0;
670 DOPRegState = {};
671 }
672
673 //
674 // Create the destructive operation
675 //
676 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode))
677 .addReg(RegNo: DstReg, Flags: RegState::Define | getDeadRegState(B: DstIsDead));
678 DOPRegState = DOPRegState | RegState::Kill;
679
680 switch (DType) {
681 case AArch64::DestructiveUnaryPassthru:
682 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
683 .add(MO: MI.getOperand(i: PredIdx))
684 .add(MO: MI.getOperand(i: SrcIdx));
685 break;
686 case AArch64::DestructiveBinary:
687 case AArch64::DestructiveBinaryImm:
688 case AArch64::DestructiveBinaryComm:
689 case AArch64::DestructiveBinaryCommWithRev:
690 DOP.add(MO: MI.getOperand(i: PredIdx))
691 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
692 .add(MO: MI.getOperand(i: SrcIdx));
693 break;
694 case AArch64::DestructiveTernaryCommWithRev:
695 DOP.add(MO: MI.getOperand(i: PredIdx))
696 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
697 .add(MO: MI.getOperand(i: SrcIdx))
698 .add(MO: MI.getOperand(i: Src2Idx));
699 break;
700 case AArch64::Destructive2xRegImmUnpred:
701 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
702 .add(MO: MI.getOperand(i: SrcIdx))
703 .add(MO: MI.getOperand(i: Src2Idx));
704 break;
705 }
706
707 if (PRFX) {
708 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
709 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
710 } else
711 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
712
713 MI.eraseFromParent();
714 return true;
715}
716
717bool AArch64ExpandPseudo::expandSVEBitwisePseudo(
718 MachineInstr &MI, MachineBasicBlock &MBB,
719 MachineBasicBlock::iterator MBBI) {
720 MachineInstrBuilder PRFX, DOP;
721 const unsigned Opcode = MI.getOpcode();
722 const MachineOperand &Op0 = MI.getOperand(i: 0);
723 const MachineOperand *Op1 = &MI.getOperand(i: 1);
724 const MachineOperand *Op2 = &MI.getOperand(i: 2);
725 const Register DOPReg = Op0.getReg();
726
727 if (DOPReg == Op2->getReg()) {
728 // Commute the operands to allow destroying the second source.
729 std::swap(a&: Op1, b&: Op2);
730 } else if (DOPReg != Op1->getReg()) {
731 // If not in destructive form, emit a MOVPRFX. The input should only be
732 // killed if unused by the subsequent instruction.
733 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVPRFX_ZZ))
734 .addDef(RegNo: DOPReg, Flags: getRenamableRegState(B: Op0.isRenamable()))
735 .addReg(RegNo: Op1->getReg(),
736 Flags: getRenamableRegState(B: Op1->isRenamable()) |
737 getUndefRegState(B: Op1->isUndef()) |
738 getKillRegState(B: Op1->isKill() &&
739 Opcode == AArch64::NAND_ZZZ));
740 }
741
742 assert((DOPReg == Op1->getReg() || PRFX) && "invalid expansion");
743
744 const RegState DOPRegState = getRenamableRegState(B: Op0.isRenamable()) |
745 getUndefRegState(B: !PRFX && Op1->isUndef()) |
746 RegState::Kill;
747
748 switch (Opcode) {
749 default:
750 llvm_unreachable("unhandled opcode");
751 case AArch64::EON_ZZZ:
752 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BSL2N_ZZZZ))
753 .add(MO: Op0)
754 .addReg(RegNo: DOPReg, Flags: DOPRegState)
755 .add(MO: *Op1)
756 .add(MO: *Op2);
757 break;
758 case AArch64::NAND_ZZZ:
759 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
760 .add(MO: Op0)
761 .addReg(RegNo: DOPReg, Flags: DOPRegState)
762 .add(MO: *Op2)
763 .add(MO: *Op2);
764 break;
765 case AArch64::NOR_ZZZ:
766 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
767 .add(MO: Op0)
768 .addReg(RegNo: DOPReg, Flags: DOPRegState)
769 .add(MO: *Op2)
770 .add(MO: *Op1);
771 break;
772 }
773
774 if (PRFX) {
775 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
776 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
777 } else {
778 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
779 }
780
781 MI.eraseFromParent();
782 return true;
783}
784
785bool AArch64ExpandPseudo::expandSetTagLoop(
786 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
787 MachineBasicBlock::iterator &NextMBBI) {
788 MachineInstr &MI = *MBBI;
789 DebugLoc DL = MI.getDebugLoc();
790 Register SizeReg = MI.getOperand(i: 0).getReg();
791 Register AddressReg = MI.getOperand(i: 1).getReg();
792
793 MachineFunction *MF = MBB.getParent();
794
795 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
796 const unsigned OpCode1 =
797 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
798 const unsigned OpCode2 =
799 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
800
801 unsigned Size = MI.getOperand(i: 2).getImm();
802 assert(Size > 0 && Size % 16 == 0);
803 if (Size % (16 * 2) != 0) {
804 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg)
805 .addReg(RegNo: AddressReg)
806 .addReg(RegNo: AddressReg)
807 .addImm(Val: 1);
808 Size -= 16;
809 }
810 MachineBasicBlock::iterator I =
811 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg)
812 .addImm(Val: Size);
813 expandMOVImm(MBB, MBBI: I, BitSize: 64);
814
815 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
816 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
817
818 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
819 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
820
821 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2))
822 .addDef(RegNo: AddressReg)
823 .addReg(RegNo: AddressReg)
824 .addReg(RegNo: AddressReg)
825 .addImm(Val: 2)
826 .cloneMemRefs(OtherMI: MI)
827 .setMIFlags(MI.getFlags());
828 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri))
829 .addDef(RegNo: SizeReg)
830 .addReg(RegNo: SizeReg)
831 .addImm(Val: 16 * 2)
832 .addImm(Val: 0);
833 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
834 .addImm(Val: AArch64CC::NE)
835 .addMBB(MBB: LoopBB)
836 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
837
838 LoopBB->addSuccessor(Succ: LoopBB);
839 LoopBB->addSuccessor(Succ: DoneBB);
840
841 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
842 DoneBB->transferSuccessors(FromMBB: &MBB);
843
844 MBB.addSuccessor(Succ: LoopBB);
845
846 NextMBBI = MBB.end();
847 MI.eraseFromParent();
848 // Recompute liveness bottom up.
849 LivePhysRegs LiveRegs;
850 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
851 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
852 // Do an extra pass in the loop to get the loop carried dependencies right.
853 // FIXME: is this necessary?
854 LoopBB->clearLiveIns();
855 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
856 DoneBB->clearLiveIns();
857 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
858
859 return true;
860}
861
862bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
863 MachineBasicBlock::iterator MBBI,
864 unsigned Opc, unsigned N) {
865 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
866 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
867 "Unexpected opcode");
868 RegState RState =
869 getDefRegState(B: Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI);
870 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
871 ? AArch64::zsub0
872 : AArch64::psub0;
873 const TargetRegisterInfo *TRI =
874 MBB.getParent()->getSubtarget().getRegisterInfo();
875 MachineInstr &MI = *MBBI;
876 for (unsigned Offset = 0; Offset < N; ++Offset) {
877 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
878 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
879 assert(ImmOffset >= -256 && ImmOffset < 256 &&
880 "Immediate spill offset out of range");
881 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
882 .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
883 Flags: RState)
884 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: getKillRegState(B: Kill))
885 .addImm(Val: ImmOffset);
886 }
887 MI.eraseFromParent();
888 return true;
889}
890
891// Create a call with the passed opcode and explicit operands, copying over all
892// the implicit operands from *MBBI, starting at the regmask.
893static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
894 MachineBasicBlock::iterator MBBI,
895 const AArch64InstrInfo *TII,
896 unsigned Opcode,
897 ArrayRef<MachineOperand> ExplicitOps,
898 unsigned RegMaskStartIdx) {
899 // Build the MI, with explicit operands first (including the call target).
900 MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode))
901 .add(MOs: ExplicitOps)
902 .getInstr();
903
904 // Register arguments are added during ISel, but cannot be added as explicit
905 // operands of the branch as it expects to be B <target> which is only one
906 // operand. Instead they are implicit operands used by the branch.
907 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
908 const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx);
909 assert(MOP.isReg() && "can only add register operands");
910 Call->addOperand(Op: MachineOperand::CreateReg(
911 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
912 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
913 RegMaskStartIdx++;
914 }
915 for (const MachineOperand &MO :
916 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
917 Call->addOperand(Op: MO);
918
919 return Call;
920}
921
922// Create a call to CallTarget, copying over all the operands from *MBBI,
923// starting at the regmask.
924static MachineInstr *createCall(MachineBasicBlock &MBB,
925 MachineBasicBlock::iterator MBBI,
926 const AArch64InstrInfo *TII,
927 MachineOperand &CallTarget,
928 unsigned RegMaskStartIdx) {
929 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
930
931 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
932 "invalid operand for regular call");
933 return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx);
934}
935
936bool AArch64ExpandPseudo::expandCALL_RVMARKER(
937 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
938 // Expand CALL_RVMARKER pseudo to:
939 // - a branch to the call target, followed by
940 // - the special `mov x29, x29` marker, if necessary, and
941 // - another branch, to the runtime function
942 // Mark the sequence as bundle, to avoid passes moving other code in between.
943 MachineInstr &MI = *MBBI;
944 MachineOperand &RVTarget = MI.getOperand(i: 0);
945 bool DoEmitMarker = MI.getOperand(i: 1).getImm();
946 assert(RVTarget.isGlobal() && "invalid operand for attached call");
947
948 MachineInstr *OriginalCall = nullptr;
949
950 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
951 // ptrauth call.
952 const MachineOperand &CallTarget = MI.getOperand(i: 2);
953 const MachineOperand &Key = MI.getOperand(i: 3);
954 const MachineOperand &IntDisc = MI.getOperand(i: 4);
955 const MachineOperand &AddrDisc = MI.getOperand(i: 5);
956
957 assert((Key.getImm() == AArch64PACKey::IA ||
958 Key.getImm() == AArch64PACKey::IB) &&
959 "Invalid auth call key");
960
961 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
962
963 OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops,
964 /*RegMaskStartIdx=*/6);
965 } else {
966 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
967 OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 2),
968 // Regmask starts after the RV and call targets.
969 /*RegMaskStartIdx=*/3);
970 }
971
972 if (DoEmitMarker)
973 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs))
974 .addReg(RegNo: AArch64::FP, Flags: RegState::Define)
975 .addReg(RegNo: AArch64::XZR)
976 .addReg(RegNo: AArch64::FP)
977 .addImm(Val: 0);
978
979 auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL))
980 .add(MO: RVTarget)
981 .getInstr();
982
983 if (MI.shouldUpdateAdditionalCallInfo())
984 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: OriginalCall);
985
986 MI.eraseFromParent();
987 finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(),
988 LastMI: std::next(x: RVCall->getIterator()));
989 return true;
990}
991
992bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
993 MachineBasicBlock::iterator MBBI) {
994 // Expand CALL_BTI pseudo to:
995 // - a branch to the call target
996 // - a BTI instruction
997 // Mark the sequence as a bundle, to avoid passes moving other code in
998 // between.
999 MachineInstr &MI = *MBBI;
1000 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
1001 // Regmask starts after the call target.
1002 /*RegMaskStartIdx=*/1);
1003
1004 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
1005
1006 MachineInstr *BTI =
1007 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT))
1008 // BTI J so that setjmp can to BR to this.
1009 .addImm(Val: 36)
1010 .getInstr();
1011
1012 if (MI.shouldUpdateAdditionalCallInfo())
1013 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Call);
1014
1015 MI.eraseFromParent();
1016 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
1017 return true;
1018}
1019
1020bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
1021 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1022 Register CtxReg = MBBI->getOperand(i: 0).getReg();
1023 Register BaseReg = MBBI->getOperand(i: 1).getReg();
1024 int Offset = MBBI->getOperand(i: 2).getImm();
1025 DebugLoc DL(MBBI->getDebugLoc());
1026 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1027
1028 if (STI.getTargetTriple().getArchName() != "arm64e") {
1029 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1030 .addUse(RegNo: CtxReg)
1031 .addUse(RegNo: BaseReg)
1032 .addImm(Val: Offset / 8)
1033 .setMIFlag(MachineInstr::FrameSetup);
1034 MBBI->eraseFromParent();
1035 return true;
1036 }
1037
1038 // We need to sign the context in an address-discriminated way. 0xc31a is a
1039 // fixed random value, chosen as part of the ABI.
1040 // add x16, xBase, #Offset
1041 // movk x16, #0xc31a, lsl #48
1042 // mov x17, x22/xzr
1043 // pacdb x17, x16
1044 // str x17, [xBase, #Offset]
1045 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
1046 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16)
1047 .addUse(RegNo: BaseReg)
1048 .addImm(Val: abs(x: Offset))
1049 .addImm(Val: 0)
1050 .setMIFlag(MachineInstr::FrameSetup);
1051 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16)
1052 .addUse(RegNo: AArch64::X16)
1053 .addImm(Val: 0xc31a)
1054 .addImm(Val: 48)
1055 .setMIFlag(MachineInstr::FrameSetup);
1056 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
1057 // move it somewhere before signing.
1058 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17)
1059 .addUse(RegNo: AArch64::XZR)
1060 .addUse(RegNo: CtxReg)
1061 .addImm(Val: 0)
1062 .setMIFlag(MachineInstr::FrameSetup);
1063 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17)
1064 .addUse(RegNo: AArch64::X17)
1065 .addUse(RegNo: AArch64::X16)
1066 .setMIFlag(MachineInstr::FrameSetup);
1067 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1068 .addUse(RegNo: AArch64::X17)
1069 .addUse(RegNo: BaseReg)
1070 .addImm(Val: Offset / 8)
1071 .setMIFlag(MachineInstr::FrameSetup);
1072
1073 MBBI->eraseFromParent();
1074 return true;
1075}
1076
1077bool AArch64ExpandPseudo::expandSTSHHAtomicStore(
1078 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1079 MachineInstr &MI = *MBBI;
1080 DebugLoc DL(MI.getDebugLoc());
1081
1082 unsigned Order = MI.getOperand(i: 2).getImm();
1083 unsigned Policy = MI.getOperand(i: 3).getImm();
1084 unsigned Size = MI.getOperand(i: 4).getImm();
1085
1086 bool IsRelaxed = Order == 0;
1087 unsigned StoreOpc = 0;
1088
1089 // __ATOMIC_RELAXED uses STR. __ATOMIC_{RELEASE/SEQ_CST} use STLR.
1090 switch (Size) {
1091 case 8:
1092 StoreOpc = IsRelaxed ? AArch64::STRBBui : AArch64::STLRB;
1093 break;
1094 case 16:
1095 StoreOpc = IsRelaxed ? AArch64::STRHHui : AArch64::STLRH;
1096 break;
1097 case 32:
1098 StoreOpc = IsRelaxed ? AArch64::STRWui : AArch64::STLRW;
1099 break;
1100 case 64:
1101 StoreOpc = IsRelaxed ? AArch64::STRXui : AArch64::STLRX;
1102 break;
1103 default:
1104 llvm_unreachable("Unexpected STSHH atomic store size");
1105 }
1106
1107 // Emit the hint with the retention policy immediate.
1108 MachineInstr *Hint = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STSHH))
1109 .addImm(Val: Policy)
1110 .getInstr();
1111
1112 // Emit the associated store instruction.
1113 Register ValReg = MI.getOperand(i: 0).getReg();
1114
1115 if (Size < 64) {
1116 const TargetRegisterInfo *TRI =
1117 MBB.getParent()->getSubtarget().getRegisterInfo();
1118 Register SubReg = TRI->getSubReg(Reg: ValReg, Idx: AArch64::sub_32);
1119 if (SubReg)
1120 ValReg = SubReg;
1121 }
1122
1123 MachineInstrBuilder Store = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc))
1124 .addReg(RegNo: ValReg)
1125 .add(MO: MI.getOperand(i: 1));
1126
1127 // Relaxed uses base+imm addressing with a zero offset.
1128 if (IsRelaxed)
1129 Store.addImm(Val: 0);
1130
1131 // Preserve memory operands and any implicit uses/defs.
1132 Store->setMemRefs(MF&: *MBB.getParent(), MemRefs: MI.memoperands());
1133 transferImpOps(OldMI&: MI, UseMI&: Store, DefMI&: Store);
1134
1135 // Bundle the hint and store so they remain adjacent.
1136 finalizeBundle(MBB, FirstMI: Hint->getIterator(), LastMI: std::next(x: Store->getIterator()));
1137
1138 MI.eraseFromParent();
1139 return true;
1140}
1141
1142AArch64ExpandPseudo::ConditionalBlocks
1143AArch64ExpandPseudo::expandConditionalPseudo(MachineBasicBlock &MBB,
1144 MachineBasicBlock::iterator MBBI,
1145 DebugLoc DL,
1146 MachineInstrBuilder &Branch) {
1147 assert((std::next(MBBI) != MBB.end() ||
1148 MBB.successors().begin() != MBB.successors().end()) &&
1149 "Unexpected unreachable in block");
1150
1151 // Split MBB and create two new blocks:
1152 // - MBB now contains all instructions before the conditional pseudo.
1153 // - CondBB contains the conditional pseudo instruction only.
1154 // - EndBB contains all instructions after the conditional pseudo.
1155 MachineInstr &PrevMI = *std::prev(x: MBBI);
1156 MachineBasicBlock *CondBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1157 MachineBasicBlock *EndBB =
1158 std::next(x: MBBI) == CondBB->end()
1159 ? *CondBB->successors().begin()
1160 : CondBB->splitAt(SplitInst&: *MBBI, /*UpdateLiveIns*/ true);
1161
1162 // Add the SMBB label to the branch instruction & create a branch to EndBB.
1163 Branch.addMBB(MBB: CondBB);
1164 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1165 .addMBB(MBB: EndBB);
1166 MBB.addSuccessor(Succ: EndBB);
1167
1168 // Create branch from CondBB to EndBB. Users of this helper should insert new
1169 // instructions at CondBB.back() -- i.e. before the branch.
1170 BuildMI(BB: CondBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1171 return {.CondBB: *CondBB, .EndBB: *EndBB};
1172}
1173
1174MachineBasicBlock *
1175AArch64ExpandPseudo::expandRestoreZASave(MachineBasicBlock &MBB,
1176 MachineBasicBlock::iterator MBBI) {
1177 MachineInstr &MI = *MBBI;
1178 DebugLoc DL = MI.getDebugLoc();
1179
1180 // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero.
1181 MachineInstrBuilder Branch =
1182 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX)).add(MO: MI.getOperand(i: 0));
1183
1184 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1185 // Replace the pseudo with a call (BL).
1186 MachineInstrBuilder MIB =
1187 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1188 // Copy operands (mainly the regmask) from the pseudo.
1189 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1190 MIB.add(MO: MI.getOperand(i: I));
1191 // Mark the TPIDR2 block pointer (X0) as an implicit use.
1192 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState::Implicit);
1193
1194 MI.eraseFromParent();
1195 return &EndBB;
1196}
1197
1198static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1199
1200MachineBasicBlock *
1201AArch64ExpandPseudo::expandCommitZASave(MachineBasicBlock &MBB,
1202 MachineBasicBlock::iterator MBBI) {
1203 MachineInstr &MI = *MBBI;
1204 DebugLoc DL = MI.getDebugLoc();
1205 [[maybe_unused]] auto *RI = MBB.getParent()->getSubtarget().getRegisterInfo();
1206
1207 // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero.
1208 MachineInstrBuilder Branch =
1209 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBNZX)).add(MO: MI.getOperand(i: 0));
1210
1211 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1212 // Replace the pseudo with a call (BL).
1213 MachineInstrBuilder MIB =
1214 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1215 // Copy operands (mainly the regmask) from the pseudo.
1216 for (unsigned I = 3; I < MI.getNumOperands(); ++I)
1217 MIB.add(MO: MI.getOperand(i: I));
1218 // Clear TPIDR2_EL0.
1219 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::MSR))
1220 .addImm(Val: AArch64SysReg::TPIDR2_EL0)
1221 .addReg(RegNo: AArch64::XZR);
1222 bool ZeroZA = MI.getOperand(i: 1).getImm() != 0;
1223 bool ZeroZT0 = MI.getOperand(i: 2).getImm() != 0;
1224 if (ZeroZA) {
1225 assert(MI.definesRegister(AArch64::ZAB0, RI) && "should define ZA!");
1226 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_M))
1227 .addImm(Val: ZERO_ALL_ZA_MASK)
1228 .addDef(RegNo: AArch64::ZAB0, Flags: RegState::ImplicitDefine);
1229 }
1230 if (ZeroZT0) {
1231 assert(MI.definesRegister(AArch64::ZT0, RI) && "should define ZT0!");
1232 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_T))
1233 .addDef(RegNo: AArch64::ZT0);
1234 }
1235
1236 MI.eraseFromParent();
1237 return &EndBB;
1238}
1239
1240MachineBasicBlock *
1241AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1242 MachineBasicBlock::iterator MBBI) {
1243 MachineInstr &MI = *MBBI;
1244 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1245 // Exception handling code generated by Clang may introduce unreachables and it
1246 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1247 // not just an optimisation, the code below expects a successor instruction/block
1248 // in order to split the block at MBBI.
1249 if (std::next(x: MBBI) == MBB.end() &&
1250 MI.getParent()->successors().begin() ==
1251 MI.getParent()->successors().end()) {
1252 MI.eraseFromParent();
1253 return &MBB;
1254 }
1255
1256 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1257 // following operands:
1258 //
1259 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1260 //
1261 // The pseudo is expanded into a conditional smstart/smstop, with a
1262 // check if pstate.sm (register) equals the expected value, and if not,
1263 // invokes the smstart/smstop.
1264 //
1265 // As an example, the following block contains a normal call from a
1266 // streaming-compatible function:
1267 //
1268 // OrigBB:
1269 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1270 // bl @normal_callee
1271 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1272 //
1273 // ...which will be transformed into:
1274 //
1275 // OrigBB:
1276 // TBNZx %0:gpr64, 0, SMBB
1277 // b EndBB
1278 //
1279 // SMBB:
1280 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1281 //
1282 // EndBB:
1283 // bl @normal_callee
1284 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1285 //
1286 DebugLoc DL = MI.getDebugLoc();
1287
1288 // Create the conditional branch based on the third operand of the
1289 // instruction, which tells us if we are wrapping a normal or streaming
1290 // function.
1291 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1292 // expected value for the callee (0 for a normal callee and 1 for a streaming
1293 // callee).
1294 unsigned Opc;
1295 switch (MI.getOperand(i: 2).getImm()) {
1296 case AArch64SME::Always:
1297 llvm_unreachable("Should have matched to instruction directly");
1298 case AArch64SME::IfCallerIsStreaming:
1299 Opc = AArch64::TBNZW;
1300 break;
1301 case AArch64SME::IfCallerIsNonStreaming:
1302 Opc = AArch64::TBZW;
1303 break;
1304 }
1305 auto PStateSM = MI.getOperand(i: 3).getReg();
1306 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1307 unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32);
1308 MachineInstrBuilder Tbx =
1309 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0);
1310
1311 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch&: Tbx);
1312 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1313 MachineInstrBuilder MIB = BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: MI.getDebugLoc(),
1314 MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1));
1315 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1316 // these contain the CopyFromReg for the first argument and the flag to
1317 // indicate whether the callee is streaming or normal).
1318 MIB.add(MO: MI.getOperand(i: 0));
1319 MIB.add(MO: MI.getOperand(i: 1));
1320 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1321 MIB.add(MO: MI.getOperand(i));
1322
1323 MI.eraseFromParent();
1324 return &EndBB;
1325}
1326
1327bool AArch64ExpandPseudo::expandMultiVecPseudo(
1328 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1329 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1330 unsigned ContiguousOp, unsigned StridedOpc) {
1331 MachineInstr &MI = *MBBI;
1332 Register Tuple = MI.getOperand(i: 0).getReg();
1333
1334 auto ContiguousRange = ContiguousClass.getRegisters();
1335 auto StridedRange = StridedClass.getRegisters();
1336 unsigned Opc;
1337 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1338 Opc = ContiguousOp;
1339 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1340 Opc = StridedOpc;
1341 } else
1342 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1343
1344 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
1345 .add(MO: MI.getOperand(i: 0))
1346 .add(MO: MI.getOperand(i: 1))
1347 .add(MO: MI.getOperand(i: 2))
1348 .add(MO: MI.getOperand(i: 3));
1349 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1350 MI.eraseFromParent();
1351 return true;
1352}
1353
1354bool AArch64ExpandPseudo::expandFormTuplePseudo(
1355 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1356 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1357 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1358 MachineInstr &MI = *MBBI;
1359 Register ReturnTuple = MI.getOperand(i: 0).getReg();
1360
1361 const TargetRegisterInfo *TRI =
1362 MBB.getParent()->getSubtarget().getRegisterInfo();
1363 for (unsigned I = 0; I < Size; ++I) {
1364 Register FormTupleOpReg = MI.getOperand(i: I + 1).getReg();
1365 Register ReturnTupleSubReg =
1366 TRI->getSubReg(Reg: ReturnTuple, Idx: AArch64::zsub0 + I);
1367 // Add copies to ensure the subregisters remain in the correct order
1368 // for any contigious operation they are used by.
1369 if (FormTupleOpReg != ReturnTupleSubReg)
1370 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORR_ZZZ))
1371 .addReg(RegNo: ReturnTupleSubReg, Flags: RegState::Define)
1372 .addReg(RegNo: FormTupleOpReg)
1373 .addReg(RegNo: FormTupleOpReg);
1374 }
1375
1376 MI.eraseFromParent();
1377 return true;
1378}
1379
1380/// If MBBI references a pseudo instruction that should be expanded here,
1381/// do the expansion and return true. Otherwise return false.
1382bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1383 MachineBasicBlock::iterator MBBI,
1384 MachineBasicBlock::iterator &NextMBBI) {
1385 MachineInstr &MI = *MBBI;
1386 unsigned Opcode = MI.getOpcode();
1387
1388 // Check if we can expand the destructive op
1389 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1390 if (OrigInstr != -1) {
1391 auto &Orig = TII->get(Opcode: OrigInstr);
1392 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1393 AArch64::NotDestructive) {
1394 return expand_DestructiveOp(MI, MBB, MBBI);
1395 }
1396 }
1397
1398 switch (Opcode) {
1399 default:
1400 break;
1401
1402 case AArch64::BSPv8i8:
1403 case AArch64::BSPv16i8: {
1404 Register DstReg = MI.getOperand(i: 0).getReg();
1405 if (DstReg == MI.getOperand(i: 3).getReg()) {
1406 // Expand to BIT
1407 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1408 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1409 : AArch64::BITv16i8))
1410 .add(MO: MI.getOperand(i: 0))
1411 .add(MO: MI.getOperand(i: 3))
1412 .add(MO: MI.getOperand(i: 2))
1413 .add(MO: MI.getOperand(i: 1));
1414 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1415 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1416 // Expand to BIF
1417 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1418 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1419 : AArch64::BIFv16i8))
1420 .add(MO: MI.getOperand(i: 0))
1421 .add(MO: MI.getOperand(i: 2))
1422 .add(MO: MI.getOperand(i: 3))
1423 .add(MO: MI.getOperand(i: 1));
1424 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1425 } else {
1426 // Expand to BSL, use additional move if required
1427 if (DstReg == MI.getOperand(i: 1).getReg()) {
1428 auto I =
1429 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1430 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1431 : AArch64::BSLv16i8))
1432 .add(MO: MI.getOperand(i: 0))
1433 .add(MO: MI.getOperand(i: 1))
1434 .add(MO: MI.getOperand(i: 2))
1435 .add(MO: MI.getOperand(i: 3));
1436 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1437 } else {
1438 RegState RegState =
1439 getRenamableRegState(B: MI.getOperand(i: 1).isRenamable()) |
1440 getKillRegState(
1441 B: MI.getOperand(i: 1).isKill() &&
1442 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg() &&
1443 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 3).getReg());
1444 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1445 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1446 : AArch64::ORRv16i8))
1447 .addReg(RegNo: DstReg,
1448 Flags: RegState::Define |
1449 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1450 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState)
1451 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState);
1452 auto I2 =
1453 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1454 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1455 : AArch64::BSLv16i8))
1456 .add(MO: MI.getOperand(i: 0))
1457 .addReg(RegNo: DstReg,
1458 Flags: RegState::Kill | getRenamableRegState(
1459 B: MI.getOperand(i: 0).isRenamable()))
1460 .add(MO: MI.getOperand(i: 2))
1461 .add(MO: MI.getOperand(i: 3));
1462 transferImpOps(OldMI&: MI, UseMI&: I2, DefMI&: I2);
1463 }
1464 }
1465 MI.eraseFromParent();
1466 return true;
1467 }
1468
1469 case AArch64::ADDWrr:
1470 case AArch64::SUBWrr:
1471 case AArch64::ADDXrr:
1472 case AArch64::SUBXrr:
1473 case AArch64::ADDSWrr:
1474 case AArch64::SUBSWrr:
1475 case AArch64::ADDSXrr:
1476 case AArch64::SUBSXrr:
1477 case AArch64::ANDWrr:
1478 case AArch64::ANDXrr:
1479 case AArch64::BICWrr:
1480 case AArch64::BICXrr:
1481 case AArch64::ANDSWrr:
1482 case AArch64::ANDSXrr:
1483 case AArch64::BICSWrr:
1484 case AArch64::BICSXrr:
1485 case AArch64::EONWrr:
1486 case AArch64::EONXrr:
1487 case AArch64::EORWrr:
1488 case AArch64::EORXrr:
1489 case AArch64::ORNWrr:
1490 case AArch64::ORNXrr:
1491 case AArch64::ORRWrr:
1492 case AArch64::ORRXrr: {
1493 unsigned Opcode;
1494 switch (MI.getOpcode()) {
1495 default:
1496 return false;
1497 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1498 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1499 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1500 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1501 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1502 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1503 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1504 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1505 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1506 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1507 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1508 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1509 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1510 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1511 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1512 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1513 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1514 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1515 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1516 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1517 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1518 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1519 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1520 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1521 }
1522 MachineFunction &MF = *MBB.getParent();
1523 // Try to create new inst without implicit operands added.
1524 MachineInstr *NewMI = MF.CreateMachineInstr(
1525 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1526 MBB.insert(I: MBBI, MI: NewMI);
1527 MachineInstrBuilder MIB1(MF, NewMI);
1528 MIB1->setPCSections(MF, MD: MI.getPCSections());
1529 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Define)
1530 .add(MO: MI.getOperand(i: 1))
1531 .add(MO: MI.getOperand(i: 2))
1532 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1533 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1534 if (auto DebugNumber = MI.peekDebugInstrNum())
1535 NewMI->setDebugInstrNum(DebugNumber);
1536 MI.eraseFromParent();
1537 return true;
1538 }
1539
1540 case AArch64::LOADgot: {
1541 MachineFunction *MF = MBB.getParent();
1542 Register DstReg = MI.getOperand(i: 0).getReg();
1543 const MachineOperand &MO1 = MI.getOperand(i: 1);
1544 unsigned Flags = MO1.getTargetFlags();
1545
1546 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1547 // Tiny codemodel expand to LDR
1548 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1549 MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg);
1550
1551 if (MO1.isGlobal()) {
1552 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1553 } else if (MO1.isSymbol()) {
1554 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1555 } else {
1556 assert(MO1.isCPI() &&
1557 "Only expect globals, externalsymbols, or constant pools");
1558 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1559 }
1560 } else {
1561 // Small codemodel expand into ADRP + LDR.
1562 MachineFunction &MF = *MI.getParent()->getParent();
1563 DebugLoc DL = MI.getDebugLoc();
1564 MachineInstrBuilder MIB1 =
1565 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg);
1566
1567 MachineInstrBuilder MIB2;
1568 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1569 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1570 unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32);
1571 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui))
1572 .addDef(RegNo: Reg32)
1573 .addReg(RegNo: DstReg, Flags: RegState::Kill)
1574 .addReg(RegNo: DstReg, Flags: RegState::Implicit);
1575 } else {
1576 Register DstReg = MI.getOperand(i: 0).getReg();
1577 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
1578 .add(MO: MI.getOperand(i: 0))
1579 .addUse(RegNo: DstReg, Flags: RegState::Kill);
1580 }
1581
1582 if (MO1.isGlobal()) {
1583 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1584 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1585 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1586 } else if (MO1.isSymbol()) {
1587 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1588 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1589 AArch64II::MO_PAGEOFF |
1590 AArch64II::MO_NC);
1591 } else {
1592 assert(MO1.isCPI() &&
1593 "Only expect globals, externalsymbols, or constant pools");
1594 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1595 TargetFlags: Flags | AArch64II::MO_PAGE);
1596 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1597 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1598 AArch64II::MO_NC);
1599 }
1600
1601 // If the LOADgot instruction has a debug-instr-number, annotate the
1602 // LDRWui instruction that it is expanded to with the same
1603 // debug-instr-number to preserve debug information.
1604 if (MI.peekDebugInstrNum() != 0)
1605 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1606 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1607 }
1608 MI.eraseFromParent();
1609 return true;
1610 }
1611 case AArch64::MOVaddrBA: {
1612 MachineFunction &MF = *MI.getParent()->getParent();
1613 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1614 // blockaddress expressions have to come from a constant pool because the
1615 // largest addend (and hence offset within a function) allowed for ADRP is
1616 // only 8MB.
1617 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1618 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1619
1620 MachineConstantPool *MCP = MF.getConstantPool();
1621 unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1622
1623 Register DstReg = MI.getOperand(i: 0).getReg();
1624 auto MIB1 =
1625 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1626 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
1627 auto MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1628 MCID: TII->get(Opcode: AArch64::LDRXui), DestReg: DstReg)
1629 .addUse(RegNo: DstReg)
1630 .addConstantPoolIndex(
1631 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1632 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1633 MI.eraseFromParent();
1634 return true;
1635 }
1636 }
1637 [[fallthrough]];
1638 case AArch64::MOVaddr:
1639 case AArch64::MOVaddrJT:
1640 case AArch64::MOVaddrCP:
1641 case AArch64::MOVaddrTLS:
1642 case AArch64::MOVaddrEXT: {
1643 // Expand into ADRP + ADD.
1644 Register DstReg = MI.getOperand(i: 0).getReg();
1645 assert(DstReg != AArch64::XZR);
1646 MachineInstrBuilder MIB1 =
1647 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1648 .add(MO: MI.getOperand(i: 1));
1649
1650 if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) {
1651 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1652 // We do so by creating a MOVK that sets bits 48-63 of the register to
1653 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1654 // the small code model so we can assume a binary size of <= 4GB, which
1655 // makes the untagged PC relative offset positive. The binary must also be
1656 // loaded into address range [0, 2^48). Both of these properties need to
1657 // be ensured at runtime when using tagged addresses.
1658 auto Tag = MI.getOperand(i: 1);
1659 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1660 Tag.setOffset(0x100000000);
1661 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: DstReg)
1662 .addReg(RegNo: DstReg)
1663 .add(MO: Tag)
1664 .addImm(Val: 48);
1665 }
1666
1667 MachineInstrBuilder MIB2 =
1668 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1669 .add(MO: MI.getOperand(i: 0))
1670 .addReg(RegNo: DstReg)
1671 .add(MO: MI.getOperand(i: 2))
1672 .addImm(Val: 0);
1673
1674 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1675 MI.eraseFromParent();
1676 return true;
1677 }
1678 case AArch64::ADDlowTLS:
1679 // Produce a plain ADD
1680 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1681 .add(MO: MI.getOperand(i: 0))
1682 .add(MO: MI.getOperand(i: 1))
1683 .add(MO: MI.getOperand(i: 2))
1684 .addImm(Val: 0);
1685 MI.eraseFromParent();
1686 return true;
1687
1688 case AArch64::MOVbaseTLS: {
1689 Register DstReg = MI.getOperand(i: 0).getReg();
1690 auto SysReg = AArch64SysReg::TPIDR_EL0;
1691 MachineFunction *MF = MBB.getParent();
1692 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1693 SysReg = AArch64SysReg::TPIDR_EL3;
1694 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1695 SysReg = AArch64SysReg::TPIDR_EL2;
1696 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1697 SysReg = AArch64SysReg::TPIDR_EL1;
1698 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1699 SysReg = AArch64SysReg::TPIDRRO_EL0;
1700 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg)
1701 .addImm(Val: SysReg);
1702 MI.eraseFromParent();
1703 return true;
1704 }
1705
1706 case AArch64::MOVi32imm:
1707 return expandMOVImm(MBB, MBBI, BitSize: 32);
1708 case AArch64::MOVi64imm:
1709 return expandMOVImm(MBB, MBBI, BitSize: 64);
1710 case AArch64::RET_ReallyLR: {
1711 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1712 // function and missing live-ins. We are fine in practice because callee
1713 // saved register handling ensures the register value is restored before
1714 // RET, but we need the undef flag here to appease the MachineVerifier
1715 // liveness checks.
1716 MachineInstrBuilder MIB =
1717 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET))
1718 .addReg(RegNo: AArch64::LR, Flags: RegState::Undef);
1719 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1720 MI.eraseFromParent();
1721 return true;
1722 }
1723 case AArch64::CMP_SWAP_8:
1724 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB,
1725 CmpOp: AArch64::SUBSWrx,
1726 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0),
1727 ZeroReg: AArch64::WZR, NextMBBI);
1728 case AArch64::CMP_SWAP_16:
1729 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH,
1730 CmpOp: AArch64::SUBSWrx,
1731 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0),
1732 ZeroReg: AArch64::WZR, NextMBBI);
1733 case AArch64::CMP_SWAP_32:
1734 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW,
1735 CmpOp: AArch64::SUBSWrs,
1736 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1737 ZeroReg: AArch64::WZR, NextMBBI);
1738 case AArch64::CMP_SWAP_64:
1739 return expandCMP_SWAP(MBB, MBBI,
1740 LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs,
1741 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1742 ZeroReg: AArch64::XZR, NextMBBI);
1743 case AArch64::CMP_SWAP_128:
1744 case AArch64::CMP_SWAP_128_RELEASE:
1745 case AArch64::CMP_SWAP_128_ACQUIRE:
1746 case AArch64::CMP_SWAP_128_MONOTONIC:
1747 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1748
1749 case AArch64::AESMCrrTied:
1750 case AArch64::AESIMCrrTied: {
1751 MachineInstrBuilder MIB =
1752 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1753 MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1754 AArch64::AESIMCrr))
1755 .add(MO: MI.getOperand(i: 0))
1756 .add(MO: MI.getOperand(i: 1));
1757 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1758 MI.eraseFromParent();
1759 return true;
1760 }
1761 case AArch64::IRGstack: {
1762 MachineFunction &MF = *MBB.getParent();
1763 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1764 const AArch64FrameLowering *TFI =
1765 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1766
1767 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1768 // almost always point to SP-after-prologue; if not, emit a longer
1769 // instruction sequence.
1770 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1771 Register FrameReg;
1772 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1773 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, StackID: TargetStackID::Default /*StackID*/,
1774 FrameReg,
1775 /*PreferFP=*/false,
1776 /*ForSimm=*/true);
1777 Register SrcReg = FrameReg;
1778 if (FrameRegOffset) {
1779 // Use output register as temporary.
1780 SrcReg = MI.getOperand(i: 0).getReg();
1781 emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg,
1782 Offset: FrameRegOffset, TII);
1783 }
1784 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG))
1785 .add(MO: MI.getOperand(i: 0))
1786 .addUse(RegNo: SrcReg)
1787 .add(MO: MI.getOperand(i: 2));
1788 MI.eraseFromParent();
1789 return true;
1790 }
1791 case AArch64::TAGPstack: {
1792 int64_t Offset = MI.getOperand(i: 2).getImm();
1793 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1794 MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1795 .add(MO: MI.getOperand(i: 0))
1796 .add(MO: MI.getOperand(i: 1))
1797 .addImm(Val: std::abs(i: Offset))
1798 .add(MO: MI.getOperand(i: 4));
1799 MI.eraseFromParent();
1800 return true;
1801 }
1802 case AArch64::STGloop_wback:
1803 case AArch64::STZGloop_wback:
1804 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1805 case AArch64::STGloop:
1806 case AArch64::STZGloop:
1807 report_fatal_error(
1808 reason: "Non-writeback variants of STGloop / STZGloop should not "
1809 "survive past PrologEpilogInserter.");
1810 case AArch64::STR_ZZZZXI:
1811 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
1812 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4);
1813 case AArch64::STR_ZZZXI:
1814 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3);
1815 case AArch64::STR_ZZXI:
1816 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
1817 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2);
1818 case AArch64::STR_PPXI:
1819 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2);
1820 case AArch64::LDR_ZZZZXI:
1821 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
1822 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4);
1823 case AArch64::LDR_ZZZXI:
1824 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3);
1825 case AArch64::LDR_ZZXI:
1826 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
1827 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2);
1828 case AArch64::LDR_PPXI:
1829 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2);
1830 case AArch64::BLR_RVMARKER:
1831 case AArch64::BLRA_RVMARKER:
1832 return expandCALL_RVMARKER(MBB, MBBI);
1833 case AArch64::BLR_BTI:
1834 return expandCALL_BTI(MBB, MBBI);
1835 case AArch64::StoreSwiftAsyncContext:
1836 return expandStoreSwiftAsyncContext(MBB, MBBI);
1837 case AArch64::STSHH_ATOMIC_STORE_SZ:
1838 return expandSTSHHAtomicStore(MBB, MBBI);
1839 case AArch64::RestoreZAPseudo:
1840 case AArch64::CommitZASavePseudo:
1841 case AArch64::MSRpstatePseudo: {
1842 auto *NewMBB = [&] {
1843 switch (Opcode) {
1844 case AArch64::RestoreZAPseudo:
1845 return expandRestoreZASave(MBB, MBBI);
1846 case AArch64::CommitZASavePseudo:
1847 return expandCommitZASave(MBB, MBBI);
1848 case AArch64::MSRpstatePseudo:
1849 return expandCondSMToggle(MBB, MBBI);
1850 default:
1851 llvm_unreachable("Unexpected conditional pseudo!");
1852 }
1853 }();
1854 if (NewMBB != &MBB)
1855 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1856 return true;
1857 }
1858 case AArch64::InOutZAUsePseudo:
1859 case AArch64::RequiresZASavePseudo:
1860 case AArch64::RequiresZT0SavePseudo:
1861 case AArch64::SMEStateAllocPseudo:
1862 case AArch64::COALESCER_BARRIER_FPR16:
1863 case AArch64::COALESCER_BARRIER_FPR32:
1864 case AArch64::COALESCER_BARRIER_FPR64:
1865 case AArch64::COALESCER_BARRIER_FPR128:
1866 MI.eraseFromParent();
1867 return true;
1868 case AArch64::LD1B_2Z_IMM_PSEUDO:
1869 return expandMultiVecPseudo(
1870 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1871 ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM);
1872 case AArch64::LD1H_2Z_IMM_PSEUDO:
1873 return expandMultiVecPseudo(
1874 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1875 ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM);
1876 case AArch64::LD1W_2Z_IMM_PSEUDO:
1877 return expandMultiVecPseudo(
1878 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1879 ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM);
1880 case AArch64::LD1D_2Z_IMM_PSEUDO:
1881 return expandMultiVecPseudo(
1882 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1883 ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM);
1884 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1885 return expandMultiVecPseudo(
1886 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1887 ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM);
1888 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1889 return expandMultiVecPseudo(
1890 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1891 ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM);
1892 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1893 return expandMultiVecPseudo(
1894 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1895 ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM);
1896 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1897 return expandMultiVecPseudo(
1898 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1899 ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM);
1900 case AArch64::LD1B_2Z_PSEUDO:
1901 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1902 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z,
1903 StridedOpc: AArch64::LD1B_2Z_STRIDED);
1904 case AArch64::LD1H_2Z_PSEUDO:
1905 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1906 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z,
1907 StridedOpc: AArch64::LD1H_2Z_STRIDED);
1908 case AArch64::LD1W_2Z_PSEUDO:
1909 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1910 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z,
1911 StridedOpc: AArch64::LD1W_2Z_STRIDED);
1912 case AArch64::LD1D_2Z_PSEUDO:
1913 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1914 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z,
1915 StridedOpc: AArch64::LD1D_2Z_STRIDED);
1916 case AArch64::LDNT1B_2Z_PSEUDO:
1917 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1918 StridedClass: AArch64::ZPR2StridedRegClass,
1919 ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED);
1920 case AArch64::LDNT1H_2Z_PSEUDO:
1921 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1922 StridedClass: AArch64::ZPR2StridedRegClass,
1923 ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED);
1924 case AArch64::LDNT1W_2Z_PSEUDO:
1925 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1926 StridedClass: AArch64::ZPR2StridedRegClass,
1927 ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED);
1928 case AArch64::LDNT1D_2Z_PSEUDO:
1929 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1930 StridedClass: AArch64::ZPR2StridedRegClass,
1931 ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED);
1932 case AArch64::LD1B_4Z_IMM_PSEUDO:
1933 return expandMultiVecPseudo(
1934 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1935 ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM);
1936 case AArch64::LD1H_4Z_IMM_PSEUDO:
1937 return expandMultiVecPseudo(
1938 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1939 ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM);
1940 case AArch64::LD1W_4Z_IMM_PSEUDO:
1941 return expandMultiVecPseudo(
1942 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1943 ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM);
1944 case AArch64::LD1D_4Z_IMM_PSEUDO:
1945 return expandMultiVecPseudo(
1946 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1947 ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM);
1948 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1949 return expandMultiVecPseudo(
1950 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1951 ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM);
1952 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1953 return expandMultiVecPseudo(
1954 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1955 ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM);
1956 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1957 return expandMultiVecPseudo(
1958 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1959 ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM);
1960 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1961 return expandMultiVecPseudo(
1962 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1963 ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM);
1964 case AArch64::LD1B_4Z_PSEUDO:
1965 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1966 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z,
1967 StridedOpc: AArch64::LD1B_4Z_STRIDED);
1968 case AArch64::LD1H_4Z_PSEUDO:
1969 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1970 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z,
1971 StridedOpc: AArch64::LD1H_4Z_STRIDED);
1972 case AArch64::LD1W_4Z_PSEUDO:
1973 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1974 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z,
1975 StridedOpc: AArch64::LD1W_4Z_STRIDED);
1976 case AArch64::LD1D_4Z_PSEUDO:
1977 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1978 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z,
1979 StridedOpc: AArch64::LD1D_4Z_STRIDED);
1980 case AArch64::LDNT1B_4Z_PSEUDO:
1981 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1982 StridedClass: AArch64::ZPR4StridedRegClass,
1983 ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED);
1984 case AArch64::LDNT1H_4Z_PSEUDO:
1985 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1986 StridedClass: AArch64::ZPR4StridedRegClass,
1987 ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED);
1988 case AArch64::LDNT1W_4Z_PSEUDO:
1989 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1990 StridedClass: AArch64::ZPR4StridedRegClass,
1991 ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED);
1992 case AArch64::LDNT1D_4Z_PSEUDO:
1993 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1994 StridedClass: AArch64::ZPR4StridedRegClass,
1995 ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED);
1996 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1997 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 2);
1998 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1999 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 4);
2000 case AArch64::EON_ZZZ:
2001 case AArch64::NAND_ZZZ:
2002 case AArch64::NOR_ZZZ:
2003 return expandSVEBitwisePseudo(MI, MBB, MBBI);
2004 }
2005 return false;
2006}
2007
2008/// Iterate over the instructions in basic block MBB and expand any
2009/// pseudo instructions. Return true if anything was modified.
2010bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
2011 bool Modified = false;
2012
2013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2014 while (MBBI != E) {
2015 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
2016 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
2017 MBBI = NMBBI;
2018 }
2019
2020 return Modified;
2021}
2022
2023bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
2024 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
2025
2026 bool Modified = false;
2027 for (auto &MBB : MF)
2028 Modified |= expandMBB(MBB);
2029 return Modified;
2030}
2031
2032/// Returns an instance of the pseudo instruction expansion pass.
2033FunctionPass *llvm::createAArch64ExpandPseudoPass() {
2034 return new AArch64ExpandPseudo();
2035}
2036