1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CodeGen.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/TargetParser/Triple.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40
41using namespace llvm;
42
43#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
44
45namespace {
46
47class AArch64ExpandPseudo : public MachineFunctionPass {
48public:
49 const AArch64InstrInfo *TII;
50
51 static char ID;
52
53 AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
54
55 bool runOnMachineFunction(MachineFunction &Fn) override;
56
57 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
58
59private:
60 bool expandMBB(MachineBasicBlock &MBB);
61 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
62 MachineBasicBlock::iterator &NextMBBI);
63 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
64 MachineBasicBlock::iterator MBBI,
65 TargetRegisterClass ContiguousClass,
66 TargetRegisterClass StridedClass,
67 unsigned ContiguousOpc, unsigned StridedOpc);
68 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
69 MachineBasicBlock::iterator MBBI,
70 MachineBasicBlock::iterator &NextMBBI,
71 unsigned Size);
72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73 unsigned BitSize);
74
75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76 MachineBasicBlock::iterator MBBI);
77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79 unsigned ExtendImm, unsigned ZeroReg,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSetTagLoop(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI,
86 MachineBasicBlock::iterator &NextMBBI);
87 bool expandSVESpillFill(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI, unsigned Opc,
89 unsigned N);
90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI);
92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94 MachineBasicBlock::iterator MBBI);
95 struct ConditionalBlocks {
96 MachineBasicBlock &CondBB;
97 MachineBasicBlock &EndBB;
98 };
99 ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI,
101 DebugLoc DL,
102 MachineInstrBuilder &Branch);
103 MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB,
104 MachineBasicBlock::iterator MBBI);
105 MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI);
107 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
108 MachineBasicBlock::iterator MBBI);
109};
110
111} // end anonymous namespace
112
113char AArch64ExpandPseudo::ID = 0;
114
115INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
116 AARCH64_EXPAND_PSEUDO_NAME, false, false)
117
118/// Transfer implicit operands on the pseudo instruction to the
119/// instructions created from the expansion.
120static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
121 MachineInstrBuilder &DefMI) {
122 const MCInstrDesc &Desc = OldMI.getDesc();
123 for (const MachineOperand &MO :
124 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
125 assert(MO.isReg() && MO.getReg());
126 if (MO.isUse())
127 UseMI.add(MO);
128 else
129 DefMI.add(MO);
130 }
131}
132
133/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
134/// real move-immediate instructions to synthesize the immediate.
135bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
136 MachineBasicBlock::iterator MBBI,
137 unsigned BitSize) {
138 MachineInstr &MI = *MBBI;
139 Register DstReg = MI.getOperand(i: 0).getReg();
140 RegState RenamableState =
141 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable());
142 uint64_t Imm = MI.getOperand(i: 1).getImm();
143
144 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
145 // Useless def, and we don't want to risk creating an invalid ORR (which
146 // would really write to sp).
147 MI.eraseFromParent();
148 return true;
149 }
150
151 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
152 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
153 assert(Insn.size() != 0);
154
155 SmallVector<MachineInstrBuilder, 4> MIBS;
156 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
157 bool LastItem = std::next(x: I) == E;
158 switch (I->Opcode)
159 {
160 default: llvm_unreachable("unhandled!"); break;
161
162 case AArch64::ORRWri:
163 case AArch64::ORRXri:
164 if (I->Op1 == 0) {
165 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
166 .add(MO: MI.getOperand(i: 0))
167 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
168 .addImm(Val: I->Op2));
169 } else {
170 Register DstReg = MI.getOperand(i: 0).getReg();
171 bool DstIsDead = MI.getOperand(i: 0).isDead();
172 MIBS.push_back(
173 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
174 .addReg(RegNo: DstReg, Flags: RegState::Define |
175 getDeadRegState(B: DstIsDead && LastItem) |
176 RenamableState)
177 .addReg(RegNo: DstReg)
178 .addImm(Val: I->Op2));
179 }
180 break;
181 case AArch64::EONXrs:
182 case AArch64::EORXrs:
183 case AArch64::ORRWrs:
184 case AArch64::ORRXrs: {
185 Register DstReg = MI.getOperand(i: 0).getReg();
186 bool DstIsDead = MI.getOperand(i: 0).isDead();
187 MIBS.push_back(
188 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
189 .addReg(RegNo: DstReg, Flags: RegState::Define |
190 getDeadRegState(B: DstIsDead && LastItem) |
191 RenamableState)
192 .addReg(RegNo: DstReg)
193 .addReg(RegNo: DstReg)
194 .addImm(Val: I->Op2));
195 } break;
196 case AArch64::ANDXri:
197 case AArch64::EORXri:
198 if (I->Op1 == 0) {
199 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
200 .add(MO: MI.getOperand(i: 0))
201 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
202 .addImm(Val: I->Op2));
203 } else {
204 Register DstReg = MI.getOperand(i: 0).getReg();
205 bool DstIsDead = MI.getOperand(i: 0).isDead();
206 MIBS.push_back(
207 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
208 .addReg(RegNo: DstReg, Flags: RegState::Define |
209 getDeadRegState(B: DstIsDead && LastItem) |
210 RenamableState)
211 .addReg(RegNo: DstReg)
212 .addImm(Val: I->Op2));
213 }
214 break;
215 case AArch64::MOVNWi:
216 case AArch64::MOVNXi:
217 case AArch64::MOVZWi:
218 case AArch64::MOVZXi: {
219 bool DstIsDead = MI.getOperand(i: 0).isDead();
220 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
221 .addReg(RegNo: DstReg, Flags: RegState::Define |
222 getDeadRegState(B: DstIsDead && LastItem) |
223 RenamableState)
224 .addImm(Val: I->Op1)
225 .addImm(Val: I->Op2));
226 } break;
227 case AArch64::MOVKWi:
228 case AArch64::MOVKXi: {
229 Register DstReg = MI.getOperand(i: 0).getReg();
230 bool DstIsDead = MI.getOperand(i: 0).isDead();
231 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
232 .addReg(RegNo: DstReg,
233 Flags: RegState::Define |
234 getDeadRegState(B: DstIsDead && LastItem) |
235 RenamableState)
236 .addReg(RegNo: DstReg)
237 .addImm(Val: I->Op1)
238 .addImm(Val: I->Op2));
239 } break;
240 }
241 }
242 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
243 MI.eraseFromParent();
244 return true;
245}
246
247bool AArch64ExpandPseudo::expandCMP_SWAP(
248 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
249 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
250 MachineBasicBlock::iterator &NextMBBI) {
251 MachineInstr &MI = *MBBI;
252 MIMetadata MIMD(MI);
253 const MachineOperand &Dest = MI.getOperand(i: 0);
254 Register StatusReg = MI.getOperand(i: 1).getReg();
255 bool StatusDead = MI.getOperand(i: 1).isDead();
256 // Duplicating undef operands into 2 instructions does not guarantee the same
257 // value on both; However undef should be replaced by xzr anyway.
258 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
259 Register AddrReg = MI.getOperand(i: 2).getReg();
260 Register DesiredReg = MI.getOperand(i: 3).getReg();
261 Register NewReg = MI.getOperand(i: 4).getReg();
262
263 MachineFunction *MF = MBB.getParent();
264 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
265 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
266 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
267
268 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
269 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
270 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
271
272 // .Lloadcmp:
273 // mov wStatus, 0
274 // ldaxr xDest, [xAddr]
275 // cmp xDest, xDesired
276 // b.ne .Ldone
277 if (!StatusDead)
278 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg)
279 .addImm(Val: 0).addImm(Val: 0);
280 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg())
281 .addReg(RegNo: AddrReg);
282 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg)
283 .addReg(RegNo: Dest.getReg(), Flags: getKillRegState(B: Dest.isDead()))
284 .addReg(RegNo: DesiredReg)
285 .addImm(Val: ExtendImm);
286 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc))
287 .addImm(Val: AArch64CC::NE)
288 .addMBB(MBB: DoneBB)
289 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
290 LoadCmpBB->addSuccessor(Succ: DoneBB);
291 LoadCmpBB->addSuccessor(Succ: StoreBB);
292
293 // .Lstore:
294 // stlxr wStatus, xNew, [xAddr]
295 // cbnz wStatus, .Lloadcmp
296 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg)
297 .addReg(RegNo: NewReg)
298 .addReg(RegNo: AddrReg);
299 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
300 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
301 .addMBB(MBB: LoadCmpBB);
302 StoreBB->addSuccessor(Succ: LoadCmpBB);
303 StoreBB->addSuccessor(Succ: DoneBB);
304
305 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
306 DoneBB->transferSuccessors(FromMBB: &MBB);
307
308 MBB.addSuccessor(Succ: LoadCmpBB);
309
310 NextMBBI = MBB.end();
311 MI.eraseFromParent();
312
313 // Recompute livein lists.
314 LivePhysRegs LiveRegs;
315 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
316 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
317 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
318 // Do an extra pass around the loop to get loop carried registers right.
319 StoreBB->clearLiveIns();
320 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
321 LoadCmpBB->clearLiveIns();
322 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
323
324 return true;
325}
326
327bool AArch64ExpandPseudo::expandCMP_SWAP_128(
328 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
329 MachineBasicBlock::iterator &NextMBBI) {
330 MachineInstr &MI = *MBBI;
331 MIMetadata MIMD(MI);
332 MachineOperand &DestLo = MI.getOperand(i: 0);
333 MachineOperand &DestHi = MI.getOperand(i: 1);
334 Register StatusReg = MI.getOperand(i: 2).getReg();
335 bool StatusDead = MI.getOperand(i: 2).isDead();
336 // Duplicating undef operands into 2 instructions does not guarantee the same
337 // value on both; However undef should be replaced by xzr anyway.
338 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
339 Register AddrReg = MI.getOperand(i: 3).getReg();
340 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
341 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
342 Register NewLoReg = MI.getOperand(i: 6).getReg();
343 Register NewHiReg = MI.getOperand(i: 7).getReg();
344
345 unsigned LdxpOp, StxpOp;
346
347 switch (MI.getOpcode()) {
348 case AArch64::CMP_SWAP_128_MONOTONIC:
349 LdxpOp = AArch64::LDXPX;
350 StxpOp = AArch64::STXPX;
351 break;
352 case AArch64::CMP_SWAP_128_RELEASE:
353 LdxpOp = AArch64::LDXPX;
354 StxpOp = AArch64::STLXPX;
355 break;
356 case AArch64::CMP_SWAP_128_ACQUIRE:
357 LdxpOp = AArch64::LDAXPX;
358 StxpOp = AArch64::STXPX;
359 break;
360 case AArch64::CMP_SWAP_128:
361 LdxpOp = AArch64::LDAXPX;
362 StxpOp = AArch64::STLXPX;
363 break;
364 default:
365 llvm_unreachable("Unexpected opcode");
366 }
367
368 MachineFunction *MF = MBB.getParent();
369 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
370 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
371 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
372 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
373
374 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
375 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
376 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
377 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
378
379 // .Lloadcmp:
380 // ldaxp xDestLo, xDestHi, [xAddr]
381 // cmp xDestLo, xDesiredLo
382 // sbcs xDestHi, xDesiredHi
383 // b.ne .Ldone
384 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp))
385 .addReg(RegNo: DestLo.getReg(), Flags: RegState::Define)
386 .addReg(RegNo: DestHi.getReg(), Flags: RegState::Define)
387 .addReg(RegNo: AddrReg);
388 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
389 .addReg(RegNo: DestLo.getReg(), Flags: getKillRegState(B: DestLo.isDead()))
390 .addReg(RegNo: DesiredLoReg)
391 .addImm(Val: 0);
392 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
393 .addUse(RegNo: AArch64::WZR)
394 .addUse(RegNo: AArch64::WZR)
395 .addImm(Val: AArch64CC::EQ);
396 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
397 .addReg(RegNo: DestHi.getReg(), Flags: getKillRegState(B: DestHi.isDead()))
398 .addReg(RegNo: DesiredHiReg)
399 .addImm(Val: 0);
400 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
401 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
402 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
403 .addImm(Val: AArch64CC::EQ);
404 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
405 .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
406 .addMBB(MBB: FailBB);
407 LoadCmpBB->addSuccessor(Succ: FailBB);
408 LoadCmpBB->addSuccessor(Succ: StoreBB);
409
410 // .Lstore:
411 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
412 // cbnz wStatus, .Lloadcmp
413 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
414 .addReg(RegNo: NewLoReg)
415 .addReg(RegNo: NewHiReg)
416 .addReg(RegNo: AddrReg);
417 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
418 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
419 .addMBB(MBB: LoadCmpBB);
420 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB);
421 StoreBB->addSuccessor(Succ: LoadCmpBB);
422 StoreBB->addSuccessor(Succ: DoneBB);
423
424 // .Lfail:
425 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
426 // cbnz wStatus, .Lloadcmp
427 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
428 .addReg(RegNo: DestLo.getReg())
429 .addReg(RegNo: DestHi.getReg())
430 .addReg(RegNo: AddrReg);
431 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
432 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
433 .addMBB(MBB: LoadCmpBB);
434 FailBB->addSuccessor(Succ: LoadCmpBB);
435 FailBB->addSuccessor(Succ: DoneBB);
436
437 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
438 DoneBB->transferSuccessors(FromMBB: &MBB);
439
440 MBB.addSuccessor(Succ: LoadCmpBB);
441
442 NextMBBI = MBB.end();
443 MI.eraseFromParent();
444
445 // Recompute liveness bottom up.
446 LivePhysRegs LiveRegs;
447 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
448 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
449 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
450 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
451
452 // Do an extra pass in the loop to get the loop carried dependencies right.
453 FailBB->clearLiveIns();
454 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
455 StoreBB->clearLiveIns();
456 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
457 LoadCmpBB->clearLiveIns();
458 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
459
460 return true;
461}
462
463/// \brief Expand Pseudos to Instructions with destructive operands.
464///
465/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
466/// or for fixing relaxed register allocation conditions to comply with
467/// the instructions register constraints. The latter case may be cheaper
468/// than setting the register constraints in the register allocator,
469/// since that will insert regular MOV instructions rather than MOVPRFX.
470///
471/// Example (after register allocation):
472///
473/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
474///
475/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
476/// * We cannot map directly to FSUB_ZPmZ_B because the register
477/// constraints of the instruction are not met.
478/// * Also the _ZERO specifies the false lanes need to be zeroed.
479///
480/// We first try to see if the destructive operand == result operand,
481/// if not, we try to swap the operands, e.g.
482///
483/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
484///
485/// But because FSUB_ZPmZ is not commutative, this is semantically
486/// different, so we need a reverse instruction:
487///
488/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
489///
490/// Then we implement the zeroing of the false lanes of Z0 by adding
491/// a zeroing MOVPRFX instruction:
492///
493/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
494/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
495///
496/// Note that this can only be done for _ZERO or _UNDEF variants where
497/// we can guarantee the false lanes to be zeroed (by implementing this)
498/// or that they are undef (don't care / not used), otherwise the
499/// swapping of operands is illegal because the operation is not
500/// (or cannot be emulated to be) fully commutative.
501bool AArch64ExpandPseudo::expand_DestructiveOp(
502 MachineInstr &MI,
503 MachineBasicBlock &MBB,
504 MachineBasicBlock::iterator MBBI) {
505 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
506 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
507 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
508 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
509 Register DstReg = MI.getOperand(i: 0).getReg();
510 bool DstIsDead = MI.getOperand(i: 0).isDead();
511 bool UseRev = false;
512 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
513
514 switch (DType) {
515 case AArch64::DestructiveBinaryComm:
516 case AArch64::DestructiveBinaryCommWithRev:
517 if (DstReg == MI.getOperand(i: 3).getReg()) {
518 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
519 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
520 UseRev = true;
521 break;
522 }
523 [[fallthrough]];
524 case AArch64::DestructiveBinary:
525 case AArch64::DestructiveBinaryImm:
526 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
527 break;
528 case AArch64::DestructiveUnaryPassthru:
529 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
530 break;
531 case AArch64::DestructiveTernaryCommWithRev:
532 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
533 if (DstReg == MI.getOperand(i: 3).getReg()) {
534 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
535 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
536 UseRev = true;
537 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
538 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
539 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
540 UseRev = true;
541 }
542 break;
543 case AArch64::Destructive2xRegImmUnpred:
544 // EXT_ZZI_CONSTRUCTIVE Zd, Zs, Imm
545 // ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
546 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 1, args: 2);
547 break;
548 default:
549 llvm_unreachable("Unsupported Destructive Operand type");
550 }
551
552 // MOVPRFX can only be used if the destination operand
553 // is the destructive operand, not as any other operand,
554 // so the Destructive Operand must be unique.
555 bool DOPRegIsUnique = false;
556 switch (DType) {
557 case AArch64::DestructiveBinary:
558 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
559 break;
560 case AArch64::DestructiveBinaryComm:
561 case AArch64::DestructiveBinaryCommWithRev:
562 DOPRegIsUnique =
563 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
564 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
565 break;
566 case AArch64::DestructiveUnaryPassthru:
567 case AArch64::DestructiveBinaryImm:
568 case AArch64::Destructive2xRegImmUnpred:
569 DOPRegIsUnique = true;
570 break;
571 case AArch64::DestructiveTernaryCommWithRev:
572 DOPRegIsUnique =
573 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
574 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
575 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
576 break;
577 }
578
579 // Resolve the reverse opcode
580 if (UseRev) {
581 int NewOpcode;
582 // e.g. DIV -> DIVR
583 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
584 Opcode = NewOpcode;
585 // e.g. DIVR -> DIV
586 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
587 Opcode = NewOpcode;
588 }
589
590 // Get the right MOVPRFX
591 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
592 unsigned MovPrfx, LSLZero, MovPrfxZero;
593 switch (ElementSize) {
594 case AArch64::ElementSizeNone:
595 case AArch64::ElementSizeB:
596 MovPrfx = AArch64::MOVPRFX_ZZ;
597 LSLZero = AArch64::LSL_ZPmI_B;
598 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
599 break;
600 case AArch64::ElementSizeH:
601 MovPrfx = AArch64::MOVPRFX_ZZ;
602 LSLZero = AArch64::LSL_ZPmI_H;
603 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
604 break;
605 case AArch64::ElementSizeS:
606 MovPrfx = AArch64::MOVPRFX_ZZ;
607 LSLZero = AArch64::LSL_ZPmI_S;
608 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
609 break;
610 case AArch64::ElementSizeD:
611 MovPrfx = AArch64::MOVPRFX_ZZ;
612 LSLZero = AArch64::LSL_ZPmI_D;
613 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
614 break;
615 default:
616 llvm_unreachable("Unsupported ElementSize");
617 }
618
619 // Preserve undef state until DOP's reg is defined.
620 RegState DOPRegState = getUndefRegState(B: MI.getOperand(i: DOPIdx).isUndef());
621
622 //
623 // Create the destructive operation (if required)
624 //
625 MachineInstrBuilder PRFX, DOP;
626 if (FalseZero) {
627 // If we cannot prefix the requested instruction we'll instead emit a
628 // prefixed_zeroing_mov for DestructiveBinary.
629 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
630 DType == AArch64::DestructiveBinaryComm ||
631 DType == AArch64::DestructiveBinaryCommWithRev) &&
632 "The destructive operand should be unique");
633 assert(ElementSize != AArch64::ElementSizeNone &&
634 "This instruction is unpredicated");
635
636 // Merge source operand into destination register
637 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero))
638 .addReg(RegNo: DstReg, Flags: RegState::Define)
639 .addReg(RegNo: MI.getOperand(i: PredIdx).getReg())
640 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
641
642 // After the movprfx, the destructive operand is same as Dst
643 DOPIdx = 0;
644 DOPRegState = {};
645
646 // Create the additional LSL to zero the lanes when the DstReg is not
647 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
648 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
649 if ((DType == AArch64::DestructiveBinary ||
650 DType == AArch64::DestructiveBinaryComm ||
651 DType == AArch64::DestructiveBinaryCommWithRev) &&
652 !DOPRegIsUnique) {
653 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero))
654 .addReg(RegNo: DstReg, Flags: RegState::Define)
655 .add(MO: MI.getOperand(i: PredIdx))
656 .addReg(RegNo: DstReg)
657 .addImm(Val: 0);
658 }
659 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
660 assert(DOPRegIsUnique && "The destructive operand should be unique");
661 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx))
662 .addReg(RegNo: DstReg, Flags: RegState::Define)
663 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
664 DOPIdx = 0;
665 DOPRegState = {};
666 }
667
668 //
669 // Create the destructive operation
670 //
671 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode))
672 .addReg(RegNo: DstReg, Flags: RegState::Define | getDeadRegState(B: DstIsDead));
673 DOPRegState = DOPRegState | RegState::Kill;
674
675 switch (DType) {
676 case AArch64::DestructiveUnaryPassthru:
677 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
678 .add(MO: MI.getOperand(i: PredIdx))
679 .add(MO: MI.getOperand(i: SrcIdx));
680 break;
681 case AArch64::DestructiveBinary:
682 case AArch64::DestructiveBinaryImm:
683 case AArch64::DestructiveBinaryComm:
684 case AArch64::DestructiveBinaryCommWithRev:
685 DOP.add(MO: MI.getOperand(i: PredIdx))
686 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
687 .add(MO: MI.getOperand(i: SrcIdx));
688 break;
689 case AArch64::DestructiveTernaryCommWithRev:
690 DOP.add(MO: MI.getOperand(i: PredIdx))
691 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
692 .add(MO: MI.getOperand(i: SrcIdx))
693 .add(MO: MI.getOperand(i: Src2Idx));
694 break;
695 case AArch64::Destructive2xRegImmUnpred:
696 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
697 .add(MO: MI.getOperand(i: SrcIdx))
698 .add(MO: MI.getOperand(i: Src2Idx));
699 break;
700 }
701
702 if (PRFX) {
703 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
704 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
705 } else
706 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
707
708 MI.eraseFromParent();
709 return true;
710}
711
712bool AArch64ExpandPseudo::expandSetTagLoop(
713 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
714 MachineBasicBlock::iterator &NextMBBI) {
715 MachineInstr &MI = *MBBI;
716 DebugLoc DL = MI.getDebugLoc();
717 Register SizeReg = MI.getOperand(i: 0).getReg();
718 Register AddressReg = MI.getOperand(i: 1).getReg();
719
720 MachineFunction *MF = MBB.getParent();
721
722 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
723 const unsigned OpCode1 =
724 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
725 const unsigned OpCode2 =
726 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
727
728 unsigned Size = MI.getOperand(i: 2).getImm();
729 assert(Size > 0 && Size % 16 == 0);
730 if (Size % (16 * 2) != 0) {
731 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg)
732 .addReg(RegNo: AddressReg)
733 .addReg(RegNo: AddressReg)
734 .addImm(Val: 1);
735 Size -= 16;
736 }
737 MachineBasicBlock::iterator I =
738 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg)
739 .addImm(Val: Size);
740 expandMOVImm(MBB, MBBI: I, BitSize: 64);
741
742 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
743 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
744
745 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
746 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
747
748 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2))
749 .addDef(RegNo: AddressReg)
750 .addReg(RegNo: AddressReg)
751 .addReg(RegNo: AddressReg)
752 .addImm(Val: 2)
753 .cloneMemRefs(OtherMI: MI)
754 .setMIFlags(MI.getFlags());
755 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri))
756 .addDef(RegNo: SizeReg)
757 .addReg(RegNo: SizeReg)
758 .addImm(Val: 16 * 2)
759 .addImm(Val: 0);
760 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
761 .addImm(Val: AArch64CC::NE)
762 .addMBB(MBB: LoopBB)
763 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
764
765 LoopBB->addSuccessor(Succ: LoopBB);
766 LoopBB->addSuccessor(Succ: DoneBB);
767
768 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
769 DoneBB->transferSuccessors(FromMBB: &MBB);
770
771 MBB.addSuccessor(Succ: LoopBB);
772
773 NextMBBI = MBB.end();
774 MI.eraseFromParent();
775 // Recompute liveness bottom up.
776 LivePhysRegs LiveRegs;
777 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
778 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
779 // Do an extra pass in the loop to get the loop carried dependencies right.
780 // FIXME: is this necessary?
781 LoopBB->clearLiveIns();
782 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
783 DoneBB->clearLiveIns();
784 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
785
786 return true;
787}
788
789bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
790 MachineBasicBlock::iterator MBBI,
791 unsigned Opc, unsigned N) {
792 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
793 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
794 "Unexpected opcode");
795 RegState RState =
796 getDefRegState(B: Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI);
797 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
798 ? AArch64::zsub0
799 : AArch64::psub0;
800 const TargetRegisterInfo *TRI =
801 MBB.getParent()->getSubtarget().getRegisterInfo();
802 MachineInstr &MI = *MBBI;
803 for (unsigned Offset = 0; Offset < N; ++Offset) {
804 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
805 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
806 assert(ImmOffset >= -256 && ImmOffset < 256 &&
807 "Immediate spill offset out of range");
808 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
809 .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
810 Flags: RState)
811 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: getKillRegState(B: Kill))
812 .addImm(Val: ImmOffset);
813 }
814 MI.eraseFromParent();
815 return true;
816}
817
818// Create a call with the passed opcode and explicit operands, copying over all
819// the implicit operands from *MBBI, starting at the regmask.
820static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
821 MachineBasicBlock::iterator MBBI,
822 const AArch64InstrInfo *TII,
823 unsigned Opcode,
824 ArrayRef<MachineOperand> ExplicitOps,
825 unsigned RegMaskStartIdx) {
826 // Build the MI, with explicit operands first (including the call target).
827 MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode))
828 .add(MOs: ExplicitOps)
829 .getInstr();
830
831 // Register arguments are added during ISel, but cannot be added as explicit
832 // operands of the branch as it expects to be B <target> which is only one
833 // operand. Instead they are implicit operands used by the branch.
834 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
835 const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx);
836 assert(MOP.isReg() && "can only add register operands");
837 Call->addOperand(Op: MachineOperand::CreateReg(
838 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
839 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
840 RegMaskStartIdx++;
841 }
842 for (const MachineOperand &MO :
843 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
844 Call->addOperand(Op: MO);
845
846 return Call;
847}
848
849// Create a call to CallTarget, copying over all the operands from *MBBI,
850// starting at the regmask.
851static MachineInstr *createCall(MachineBasicBlock &MBB,
852 MachineBasicBlock::iterator MBBI,
853 const AArch64InstrInfo *TII,
854 MachineOperand &CallTarget,
855 unsigned RegMaskStartIdx) {
856 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
857
858 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
859 "invalid operand for regular call");
860 return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx);
861}
862
863bool AArch64ExpandPseudo::expandCALL_RVMARKER(
864 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
865 // Expand CALL_RVMARKER pseudo to:
866 // - a branch to the call target, followed by
867 // - the special `mov x29, x29` marker, if necessary, and
868 // - another branch, to the runtime function
869 // Mark the sequence as bundle, to avoid passes moving other code in between.
870 MachineInstr &MI = *MBBI;
871 MachineOperand &RVTarget = MI.getOperand(i: 0);
872 bool DoEmitMarker = MI.getOperand(i: 1).getImm();
873 assert(RVTarget.isGlobal() && "invalid operand for attached call");
874
875 MachineInstr *OriginalCall = nullptr;
876
877 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
878 // ptrauth call.
879 const MachineOperand &CallTarget = MI.getOperand(i: 2);
880 const MachineOperand &Key = MI.getOperand(i: 3);
881 const MachineOperand &IntDisc = MI.getOperand(i: 4);
882 const MachineOperand &AddrDisc = MI.getOperand(i: 5);
883
884 assert((Key.getImm() == AArch64PACKey::IA ||
885 Key.getImm() == AArch64PACKey::IB) &&
886 "Invalid auth call key");
887
888 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
889
890 OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops,
891 /*RegMaskStartIdx=*/6);
892 } else {
893 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
894 OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 2),
895 // Regmask starts after the RV and call targets.
896 /*RegMaskStartIdx=*/3);
897 }
898
899 if (DoEmitMarker)
900 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs))
901 .addReg(RegNo: AArch64::FP, Flags: RegState::Define)
902 .addReg(RegNo: AArch64::XZR)
903 .addReg(RegNo: AArch64::FP)
904 .addImm(Val: 0);
905
906 auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL))
907 .add(MO: RVTarget)
908 .getInstr();
909
910 if (MI.shouldUpdateAdditionalCallInfo())
911 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: OriginalCall);
912
913 MI.eraseFromParent();
914 finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(),
915 LastMI: std::next(x: RVCall->getIterator()));
916 return true;
917}
918
919bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
920 MachineBasicBlock::iterator MBBI) {
921 // Expand CALL_BTI pseudo to:
922 // - a branch to the call target
923 // - a BTI instruction
924 // Mark the sequence as a bundle, to avoid passes moving other code in
925 // between.
926 MachineInstr &MI = *MBBI;
927 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
928 // Regmask starts after the call target.
929 /*RegMaskStartIdx=*/1);
930
931 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
932
933 MachineInstr *BTI =
934 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT))
935 // BTI J so that setjmp can to BR to this.
936 .addImm(Val: 36)
937 .getInstr();
938
939 if (MI.shouldUpdateAdditionalCallInfo())
940 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Call);
941
942 MI.eraseFromParent();
943 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
944 return true;
945}
946
947bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
948 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
949 Register CtxReg = MBBI->getOperand(i: 0).getReg();
950 Register BaseReg = MBBI->getOperand(i: 1).getReg();
951 int Offset = MBBI->getOperand(i: 2).getImm();
952 DebugLoc DL(MBBI->getDebugLoc());
953 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
954
955 if (STI.getTargetTriple().getArchName() != "arm64e") {
956 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
957 .addUse(RegNo: CtxReg)
958 .addUse(RegNo: BaseReg)
959 .addImm(Val: Offset / 8)
960 .setMIFlag(MachineInstr::FrameSetup);
961 MBBI->eraseFromParent();
962 return true;
963 }
964
965 // We need to sign the context in an address-discriminated way. 0xc31a is a
966 // fixed random value, chosen as part of the ABI.
967 // add x16, xBase, #Offset
968 // movk x16, #0xc31a, lsl #48
969 // mov x17, x22/xzr
970 // pacdb x17, x16
971 // str x17, [xBase, #Offset]
972 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
973 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16)
974 .addUse(RegNo: BaseReg)
975 .addImm(Val: abs(x: Offset))
976 .addImm(Val: 0)
977 .setMIFlag(MachineInstr::FrameSetup);
978 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16)
979 .addUse(RegNo: AArch64::X16)
980 .addImm(Val: 0xc31a)
981 .addImm(Val: 48)
982 .setMIFlag(MachineInstr::FrameSetup);
983 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
984 // move it somewhere before signing.
985 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17)
986 .addUse(RegNo: AArch64::XZR)
987 .addUse(RegNo: CtxReg)
988 .addImm(Val: 0)
989 .setMIFlag(MachineInstr::FrameSetup);
990 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17)
991 .addUse(RegNo: AArch64::X17)
992 .addUse(RegNo: AArch64::X16)
993 .setMIFlag(MachineInstr::FrameSetup);
994 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
995 .addUse(RegNo: AArch64::X17)
996 .addUse(RegNo: BaseReg)
997 .addImm(Val: Offset / 8)
998 .setMIFlag(MachineInstr::FrameSetup);
999
1000 MBBI->eraseFromParent();
1001 return true;
1002}
1003
1004AArch64ExpandPseudo::ConditionalBlocks
1005AArch64ExpandPseudo::expandConditionalPseudo(MachineBasicBlock &MBB,
1006 MachineBasicBlock::iterator MBBI,
1007 DebugLoc DL,
1008 MachineInstrBuilder &Branch) {
1009 assert((std::next(MBBI) != MBB.end() ||
1010 MBB.successors().begin() != MBB.successors().end()) &&
1011 "Unexpected unreachable in block");
1012
1013 // Split MBB and create two new blocks:
1014 // - MBB now contains all instructions before the conditional pseudo.
1015 // - CondBB contains the conditional pseudo instruction only.
1016 // - EndBB contains all instructions after the conditional pseudo.
1017 MachineInstr &PrevMI = *std::prev(x: MBBI);
1018 MachineBasicBlock *CondBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1019 MachineBasicBlock *EndBB =
1020 std::next(x: MBBI) == CondBB->end()
1021 ? *CondBB->successors().begin()
1022 : CondBB->splitAt(SplitInst&: *MBBI, /*UpdateLiveIns*/ true);
1023
1024 // Add the SMBB label to the branch instruction & create a branch to EndBB.
1025 Branch.addMBB(MBB: CondBB);
1026 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1027 .addMBB(MBB: EndBB);
1028 MBB.addSuccessor(Succ: EndBB);
1029
1030 // Create branch from CondBB to EndBB. Users of this helper should insert new
1031 // instructions at CondBB.back() -- i.e. before the branch.
1032 BuildMI(BB: CondBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1033 return {.CondBB: *CondBB, .EndBB: *EndBB};
1034}
1035
1036MachineBasicBlock *
1037AArch64ExpandPseudo::expandRestoreZASave(MachineBasicBlock &MBB,
1038 MachineBasicBlock::iterator MBBI) {
1039 MachineInstr &MI = *MBBI;
1040 DebugLoc DL = MI.getDebugLoc();
1041
1042 // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero.
1043 MachineInstrBuilder Branch =
1044 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX)).add(MO: MI.getOperand(i: 0));
1045
1046 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1047 // Replace the pseudo with a call (BL).
1048 MachineInstrBuilder MIB =
1049 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1050 // Copy operands (mainly the regmask) from the pseudo.
1051 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1052 MIB.add(MO: MI.getOperand(i: I));
1053 // Mark the TPIDR2 block pointer (X0) as an implicit use.
1054 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState::Implicit);
1055
1056 MI.eraseFromParent();
1057 return &EndBB;
1058}
1059
1060static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1061
1062MachineBasicBlock *
1063AArch64ExpandPseudo::expandCommitZASave(MachineBasicBlock &MBB,
1064 MachineBasicBlock::iterator MBBI) {
1065 MachineInstr &MI = *MBBI;
1066 DebugLoc DL = MI.getDebugLoc();
1067 [[maybe_unused]] auto *RI = MBB.getParent()->getSubtarget().getRegisterInfo();
1068
1069 // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero.
1070 MachineInstrBuilder Branch =
1071 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBNZX)).add(MO: MI.getOperand(i: 0));
1072
1073 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1074 // Replace the pseudo with a call (BL).
1075 MachineInstrBuilder MIB =
1076 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1077 // Copy operands (mainly the regmask) from the pseudo.
1078 for (unsigned I = 3; I < MI.getNumOperands(); ++I)
1079 MIB.add(MO: MI.getOperand(i: I));
1080 // Clear TPIDR2_EL0.
1081 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::MSR))
1082 .addImm(Val: AArch64SysReg::TPIDR2_EL0)
1083 .addReg(RegNo: AArch64::XZR);
1084 bool ZeroZA = MI.getOperand(i: 1).getImm() != 0;
1085 bool ZeroZT0 = MI.getOperand(i: 2).getImm() != 0;
1086 if (ZeroZA) {
1087 assert(MI.definesRegister(AArch64::ZAB0, RI) && "should define ZA!");
1088 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_M))
1089 .addImm(Val: ZERO_ALL_ZA_MASK)
1090 .addDef(RegNo: AArch64::ZAB0, Flags: RegState::ImplicitDefine);
1091 }
1092 if (ZeroZT0) {
1093 assert(MI.definesRegister(AArch64::ZT0, RI) && "should define ZT0!");
1094 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_T))
1095 .addDef(RegNo: AArch64::ZT0);
1096 }
1097
1098 MI.eraseFromParent();
1099 return &EndBB;
1100}
1101
1102MachineBasicBlock *
1103AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1104 MachineBasicBlock::iterator MBBI) {
1105 MachineInstr &MI = *MBBI;
1106 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1107 // Exception handling code generated by Clang may introduce unreachables and it
1108 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1109 // not just an optimisation, the code below expects a successor instruction/block
1110 // in order to split the block at MBBI.
1111 if (std::next(x: MBBI) == MBB.end() &&
1112 MI.getParent()->successors().begin() ==
1113 MI.getParent()->successors().end()) {
1114 MI.eraseFromParent();
1115 return &MBB;
1116 }
1117
1118 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1119 // following operands:
1120 //
1121 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1122 //
1123 // The pseudo is expanded into a conditional smstart/smstop, with a
1124 // check if pstate.sm (register) equals the expected value, and if not,
1125 // invokes the smstart/smstop.
1126 //
1127 // As an example, the following block contains a normal call from a
1128 // streaming-compatible function:
1129 //
1130 // OrigBB:
1131 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1132 // bl @normal_callee
1133 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1134 //
1135 // ...which will be transformed into:
1136 //
1137 // OrigBB:
1138 // TBNZx %0:gpr64, 0, SMBB
1139 // b EndBB
1140 //
1141 // SMBB:
1142 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1143 //
1144 // EndBB:
1145 // bl @normal_callee
1146 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1147 //
1148 DebugLoc DL = MI.getDebugLoc();
1149
1150 // Create the conditional branch based on the third operand of the
1151 // instruction, which tells us if we are wrapping a normal or streaming
1152 // function.
1153 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1154 // expected value for the callee (0 for a normal callee and 1 for a streaming
1155 // callee).
1156 unsigned Opc;
1157 switch (MI.getOperand(i: 2).getImm()) {
1158 case AArch64SME::Always:
1159 llvm_unreachable("Should have matched to instruction directly");
1160 case AArch64SME::IfCallerIsStreaming:
1161 Opc = AArch64::TBNZW;
1162 break;
1163 case AArch64SME::IfCallerIsNonStreaming:
1164 Opc = AArch64::TBZW;
1165 break;
1166 }
1167 auto PStateSM = MI.getOperand(i: 3).getReg();
1168 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1169 unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32);
1170 MachineInstrBuilder Tbx =
1171 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0);
1172
1173 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch&: Tbx);
1174 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1175 MachineInstrBuilder MIB = BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: MI.getDebugLoc(),
1176 MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1));
1177 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1178 // these contain the CopyFromReg for the first argument and the flag to
1179 // indicate whether the callee is streaming or normal).
1180 MIB.add(MO: MI.getOperand(i: 0));
1181 MIB.add(MO: MI.getOperand(i: 1));
1182 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1183 MIB.add(MO: MI.getOperand(i));
1184
1185 MI.eraseFromParent();
1186 return &EndBB;
1187}
1188
1189bool AArch64ExpandPseudo::expandMultiVecPseudo(
1190 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1191 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1192 unsigned ContiguousOp, unsigned StridedOpc) {
1193 MachineInstr &MI = *MBBI;
1194 Register Tuple = MI.getOperand(i: 0).getReg();
1195
1196 auto ContiguousRange = ContiguousClass.getRegisters();
1197 auto StridedRange = StridedClass.getRegisters();
1198 unsigned Opc;
1199 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1200 Opc = ContiguousOp;
1201 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1202 Opc = StridedOpc;
1203 } else
1204 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1205
1206 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
1207 .add(MO: MI.getOperand(i: 0))
1208 .add(MO: MI.getOperand(i: 1))
1209 .add(MO: MI.getOperand(i: 2))
1210 .add(MO: MI.getOperand(i: 3));
1211 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1212 MI.eraseFromParent();
1213 return true;
1214}
1215
1216bool AArch64ExpandPseudo::expandFormTuplePseudo(
1217 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1218 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1219 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1220 MachineInstr &MI = *MBBI;
1221 Register ReturnTuple = MI.getOperand(i: 0).getReg();
1222
1223 const TargetRegisterInfo *TRI =
1224 MBB.getParent()->getSubtarget().getRegisterInfo();
1225 for (unsigned I = 0; I < Size; ++I) {
1226 Register FormTupleOpReg = MI.getOperand(i: I + 1).getReg();
1227 Register ReturnTupleSubReg =
1228 TRI->getSubReg(Reg: ReturnTuple, Idx: AArch64::zsub0 + I);
1229 // Add copies to ensure the subregisters remain in the correct order
1230 // for any contigious operation they are used by.
1231 if (FormTupleOpReg != ReturnTupleSubReg)
1232 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORR_ZZZ))
1233 .addReg(RegNo: ReturnTupleSubReg, Flags: RegState::Define)
1234 .addReg(RegNo: FormTupleOpReg)
1235 .addReg(RegNo: FormTupleOpReg);
1236 }
1237
1238 MI.eraseFromParent();
1239 return true;
1240}
1241
1242/// If MBBI references a pseudo instruction that should be expanded here,
1243/// do the expansion and return true. Otherwise return false.
1244bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1245 MachineBasicBlock::iterator MBBI,
1246 MachineBasicBlock::iterator &NextMBBI) {
1247 MachineInstr &MI = *MBBI;
1248 unsigned Opcode = MI.getOpcode();
1249
1250 // Check if we can expand the destructive op
1251 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1252 if (OrigInstr != -1) {
1253 auto &Orig = TII->get(Opcode: OrigInstr);
1254 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1255 AArch64::NotDestructive) {
1256 return expand_DestructiveOp(MI, MBB, MBBI);
1257 }
1258 }
1259
1260 switch (Opcode) {
1261 default:
1262 break;
1263
1264 case AArch64::BSPv8i8:
1265 case AArch64::BSPv16i8: {
1266 Register DstReg = MI.getOperand(i: 0).getReg();
1267 if (DstReg == MI.getOperand(i: 3).getReg()) {
1268 // Expand to BIT
1269 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1270 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1271 : AArch64::BITv16i8))
1272 .add(MO: MI.getOperand(i: 0))
1273 .add(MO: MI.getOperand(i: 3))
1274 .add(MO: MI.getOperand(i: 2))
1275 .add(MO: MI.getOperand(i: 1));
1276 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1277 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1278 // Expand to BIF
1279 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1280 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1281 : AArch64::BIFv16i8))
1282 .add(MO: MI.getOperand(i: 0))
1283 .add(MO: MI.getOperand(i: 2))
1284 .add(MO: MI.getOperand(i: 3))
1285 .add(MO: MI.getOperand(i: 1));
1286 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1287 } else {
1288 // Expand to BSL, use additional move if required
1289 if (DstReg == MI.getOperand(i: 1).getReg()) {
1290 auto I =
1291 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1292 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1293 : AArch64::BSLv16i8))
1294 .add(MO: MI.getOperand(i: 0))
1295 .add(MO: MI.getOperand(i: 1))
1296 .add(MO: MI.getOperand(i: 2))
1297 .add(MO: MI.getOperand(i: 3));
1298 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1299 } else {
1300 RegState RegState =
1301 getRenamableRegState(B: MI.getOperand(i: 1).isRenamable()) |
1302 getKillRegState(
1303 B: MI.getOperand(i: 1).isKill() &&
1304 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg() &&
1305 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 3).getReg());
1306 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1307 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1308 : AArch64::ORRv16i8))
1309 .addReg(RegNo: DstReg,
1310 Flags: RegState::Define |
1311 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1312 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState)
1313 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState);
1314 auto I2 =
1315 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1316 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1317 : AArch64::BSLv16i8))
1318 .add(MO: MI.getOperand(i: 0))
1319 .addReg(RegNo: DstReg,
1320 Flags: RegState::Kill | getRenamableRegState(
1321 B: MI.getOperand(i: 0).isRenamable()))
1322 .add(MO: MI.getOperand(i: 2))
1323 .add(MO: MI.getOperand(i: 3));
1324 transferImpOps(OldMI&: MI, UseMI&: I2, DefMI&: I2);
1325 }
1326 }
1327 MI.eraseFromParent();
1328 return true;
1329 }
1330
1331 case AArch64::ADDWrr:
1332 case AArch64::SUBWrr:
1333 case AArch64::ADDXrr:
1334 case AArch64::SUBXrr:
1335 case AArch64::ADDSWrr:
1336 case AArch64::SUBSWrr:
1337 case AArch64::ADDSXrr:
1338 case AArch64::SUBSXrr:
1339 case AArch64::ANDWrr:
1340 case AArch64::ANDXrr:
1341 case AArch64::BICWrr:
1342 case AArch64::BICXrr:
1343 case AArch64::ANDSWrr:
1344 case AArch64::ANDSXrr:
1345 case AArch64::BICSWrr:
1346 case AArch64::BICSXrr:
1347 case AArch64::EONWrr:
1348 case AArch64::EONXrr:
1349 case AArch64::EORWrr:
1350 case AArch64::EORXrr:
1351 case AArch64::ORNWrr:
1352 case AArch64::ORNXrr:
1353 case AArch64::ORRWrr:
1354 case AArch64::ORRXrr: {
1355 unsigned Opcode;
1356 switch (MI.getOpcode()) {
1357 default:
1358 return false;
1359 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1360 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1361 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1362 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1363 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1364 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1365 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1366 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1367 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1368 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1369 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1370 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1371 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1372 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1373 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1374 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1375 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1376 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1377 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1378 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1379 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1380 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1381 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1382 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1383 }
1384 MachineFunction &MF = *MBB.getParent();
1385 // Try to create new inst without implicit operands added.
1386 MachineInstr *NewMI = MF.CreateMachineInstr(
1387 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1388 MBB.insert(I: MBBI, MI: NewMI);
1389 MachineInstrBuilder MIB1(MF, NewMI);
1390 MIB1->setPCSections(MF, MD: MI.getPCSections());
1391 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Define)
1392 .add(MO: MI.getOperand(i: 1))
1393 .add(MO: MI.getOperand(i: 2))
1394 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1395 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1396 if (auto DebugNumber = MI.peekDebugInstrNum())
1397 NewMI->setDebugInstrNum(DebugNumber);
1398 MI.eraseFromParent();
1399 return true;
1400 }
1401
1402 case AArch64::LOADgot: {
1403 MachineFunction *MF = MBB.getParent();
1404 Register DstReg = MI.getOperand(i: 0).getReg();
1405 const MachineOperand &MO1 = MI.getOperand(i: 1);
1406 unsigned Flags = MO1.getTargetFlags();
1407
1408 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1409 // Tiny codemodel expand to LDR
1410 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1411 MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg);
1412
1413 if (MO1.isGlobal()) {
1414 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1415 } else if (MO1.isSymbol()) {
1416 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1417 } else {
1418 assert(MO1.isCPI() &&
1419 "Only expect globals, externalsymbols, or constant pools");
1420 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1421 }
1422 } else {
1423 // Small codemodel expand into ADRP + LDR.
1424 MachineFunction &MF = *MI.getParent()->getParent();
1425 DebugLoc DL = MI.getDebugLoc();
1426 MachineInstrBuilder MIB1 =
1427 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg);
1428
1429 MachineInstrBuilder MIB2;
1430 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1431 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1432 unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32);
1433 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui))
1434 .addDef(RegNo: Reg32)
1435 .addReg(RegNo: DstReg, Flags: RegState::Kill)
1436 .addReg(RegNo: DstReg, Flags: RegState::Implicit);
1437 } else {
1438 Register DstReg = MI.getOperand(i: 0).getReg();
1439 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
1440 .add(MO: MI.getOperand(i: 0))
1441 .addUse(RegNo: DstReg, Flags: RegState::Kill);
1442 }
1443
1444 if (MO1.isGlobal()) {
1445 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1446 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1447 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1448 } else if (MO1.isSymbol()) {
1449 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1450 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1451 AArch64II::MO_PAGEOFF |
1452 AArch64II::MO_NC);
1453 } else {
1454 assert(MO1.isCPI() &&
1455 "Only expect globals, externalsymbols, or constant pools");
1456 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1457 TargetFlags: Flags | AArch64II::MO_PAGE);
1458 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1459 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1460 AArch64II::MO_NC);
1461 }
1462
1463 // If the LOADgot instruction has a debug-instr-number, annotate the
1464 // LDRWui instruction that it is expanded to with the same
1465 // debug-instr-number to preserve debug information.
1466 if (MI.peekDebugInstrNum() != 0)
1467 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1468 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1469 }
1470 MI.eraseFromParent();
1471 return true;
1472 }
1473 case AArch64::MOVaddrBA: {
1474 MachineFunction &MF = *MI.getParent()->getParent();
1475 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1476 // blockaddress expressions have to come from a constant pool because the
1477 // largest addend (and hence offset within a function) allowed for ADRP is
1478 // only 8MB.
1479 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1480 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1481
1482 MachineConstantPool *MCP = MF.getConstantPool();
1483 unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1484
1485 Register DstReg = MI.getOperand(i: 0).getReg();
1486 auto MIB1 =
1487 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1488 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
1489 auto MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1490 MCID: TII->get(Opcode: AArch64::LDRXui), DestReg: DstReg)
1491 .addUse(RegNo: DstReg)
1492 .addConstantPoolIndex(
1493 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1494 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1495 MI.eraseFromParent();
1496 return true;
1497 }
1498 }
1499 [[fallthrough]];
1500 case AArch64::MOVaddr:
1501 case AArch64::MOVaddrJT:
1502 case AArch64::MOVaddrCP:
1503 case AArch64::MOVaddrTLS:
1504 case AArch64::MOVaddrEXT: {
1505 // Expand into ADRP + ADD.
1506 Register DstReg = MI.getOperand(i: 0).getReg();
1507 assert(DstReg != AArch64::XZR);
1508 MachineInstrBuilder MIB1 =
1509 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1510 .add(MO: MI.getOperand(i: 1));
1511
1512 if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) {
1513 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1514 // We do so by creating a MOVK that sets bits 48-63 of the register to
1515 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1516 // the small code model so we can assume a binary size of <= 4GB, which
1517 // makes the untagged PC relative offset positive. The binary must also be
1518 // loaded into address range [0, 2^48). Both of these properties need to
1519 // be ensured at runtime when using tagged addresses.
1520 auto Tag = MI.getOperand(i: 1);
1521 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1522 Tag.setOffset(0x100000000);
1523 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: DstReg)
1524 .addReg(RegNo: DstReg)
1525 .add(MO: Tag)
1526 .addImm(Val: 48);
1527 }
1528
1529 MachineInstrBuilder MIB2 =
1530 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1531 .add(MO: MI.getOperand(i: 0))
1532 .addReg(RegNo: DstReg)
1533 .add(MO: MI.getOperand(i: 2))
1534 .addImm(Val: 0);
1535
1536 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1537 MI.eraseFromParent();
1538 return true;
1539 }
1540 case AArch64::ADDlowTLS:
1541 // Produce a plain ADD
1542 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1543 .add(MO: MI.getOperand(i: 0))
1544 .add(MO: MI.getOperand(i: 1))
1545 .add(MO: MI.getOperand(i: 2))
1546 .addImm(Val: 0);
1547 MI.eraseFromParent();
1548 return true;
1549
1550 case AArch64::MOVbaseTLS: {
1551 Register DstReg = MI.getOperand(i: 0).getReg();
1552 auto SysReg = AArch64SysReg::TPIDR_EL0;
1553 MachineFunction *MF = MBB.getParent();
1554 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1555 SysReg = AArch64SysReg::TPIDR_EL3;
1556 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1557 SysReg = AArch64SysReg::TPIDR_EL2;
1558 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1559 SysReg = AArch64SysReg::TPIDR_EL1;
1560 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1561 SysReg = AArch64SysReg::TPIDRRO_EL0;
1562 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg)
1563 .addImm(Val: SysReg);
1564 MI.eraseFromParent();
1565 return true;
1566 }
1567
1568 case AArch64::MOVi32imm:
1569 return expandMOVImm(MBB, MBBI, BitSize: 32);
1570 case AArch64::MOVi64imm:
1571 return expandMOVImm(MBB, MBBI, BitSize: 64);
1572 case AArch64::RET_ReallyLR: {
1573 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1574 // function and missing live-ins. We are fine in practice because callee
1575 // saved register handling ensures the register value is restored before
1576 // RET, but we need the undef flag here to appease the MachineVerifier
1577 // liveness checks.
1578 MachineInstrBuilder MIB =
1579 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET))
1580 .addReg(RegNo: AArch64::LR, Flags: RegState::Undef);
1581 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1582 MI.eraseFromParent();
1583 return true;
1584 }
1585 case AArch64::CMP_SWAP_8:
1586 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB,
1587 CmpOp: AArch64::SUBSWrx,
1588 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0),
1589 ZeroReg: AArch64::WZR, NextMBBI);
1590 case AArch64::CMP_SWAP_16:
1591 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH,
1592 CmpOp: AArch64::SUBSWrx,
1593 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0),
1594 ZeroReg: AArch64::WZR, NextMBBI);
1595 case AArch64::CMP_SWAP_32:
1596 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW,
1597 CmpOp: AArch64::SUBSWrs,
1598 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1599 ZeroReg: AArch64::WZR, NextMBBI);
1600 case AArch64::CMP_SWAP_64:
1601 return expandCMP_SWAP(MBB, MBBI,
1602 LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs,
1603 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1604 ZeroReg: AArch64::XZR, NextMBBI);
1605 case AArch64::CMP_SWAP_128:
1606 case AArch64::CMP_SWAP_128_RELEASE:
1607 case AArch64::CMP_SWAP_128_ACQUIRE:
1608 case AArch64::CMP_SWAP_128_MONOTONIC:
1609 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1610
1611 case AArch64::AESMCrrTied:
1612 case AArch64::AESIMCrrTied: {
1613 MachineInstrBuilder MIB =
1614 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1615 MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1616 AArch64::AESIMCrr))
1617 .add(MO: MI.getOperand(i: 0))
1618 .add(MO: MI.getOperand(i: 1));
1619 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1620 MI.eraseFromParent();
1621 return true;
1622 }
1623 case AArch64::IRGstack: {
1624 MachineFunction &MF = *MBB.getParent();
1625 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1626 const AArch64FrameLowering *TFI =
1627 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1628
1629 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1630 // almost always point to SP-after-prologue; if not, emit a longer
1631 // instruction sequence.
1632 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1633 Register FrameReg;
1634 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1635 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, StackID: TargetStackID::Default /*StackID*/,
1636 FrameReg,
1637 /*PreferFP=*/false,
1638 /*ForSimm=*/true);
1639 Register SrcReg = FrameReg;
1640 if (FrameRegOffset) {
1641 // Use output register as temporary.
1642 SrcReg = MI.getOperand(i: 0).getReg();
1643 emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg,
1644 Offset: FrameRegOffset, TII);
1645 }
1646 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG))
1647 .add(MO: MI.getOperand(i: 0))
1648 .addUse(RegNo: SrcReg)
1649 .add(MO: MI.getOperand(i: 2));
1650 MI.eraseFromParent();
1651 return true;
1652 }
1653 case AArch64::TAGPstack: {
1654 int64_t Offset = MI.getOperand(i: 2).getImm();
1655 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1656 MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1657 .add(MO: MI.getOperand(i: 0))
1658 .add(MO: MI.getOperand(i: 1))
1659 .addImm(Val: std::abs(i: Offset))
1660 .add(MO: MI.getOperand(i: 4));
1661 MI.eraseFromParent();
1662 return true;
1663 }
1664 case AArch64::STGloop_wback:
1665 case AArch64::STZGloop_wback:
1666 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1667 case AArch64::STGloop:
1668 case AArch64::STZGloop:
1669 report_fatal_error(
1670 reason: "Non-writeback variants of STGloop / STZGloop should not "
1671 "survive past PrologEpilogInserter.");
1672 case AArch64::STR_ZZZZXI:
1673 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
1674 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4);
1675 case AArch64::STR_ZZZXI:
1676 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3);
1677 case AArch64::STR_ZZXI:
1678 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
1679 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2);
1680 case AArch64::STR_PPXI:
1681 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2);
1682 case AArch64::LDR_ZZZZXI:
1683 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
1684 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4);
1685 case AArch64::LDR_ZZZXI:
1686 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3);
1687 case AArch64::LDR_ZZXI:
1688 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
1689 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2);
1690 case AArch64::LDR_PPXI:
1691 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2);
1692 case AArch64::BLR_RVMARKER:
1693 case AArch64::BLRA_RVMARKER:
1694 return expandCALL_RVMARKER(MBB, MBBI);
1695 case AArch64::BLR_BTI:
1696 return expandCALL_BTI(MBB, MBBI);
1697 case AArch64::StoreSwiftAsyncContext:
1698 return expandStoreSwiftAsyncContext(MBB, MBBI);
1699 case AArch64::RestoreZAPseudo:
1700 case AArch64::CommitZASavePseudo:
1701 case AArch64::MSRpstatePseudo: {
1702 auto *NewMBB = [&] {
1703 switch (Opcode) {
1704 case AArch64::RestoreZAPseudo:
1705 return expandRestoreZASave(MBB, MBBI);
1706 case AArch64::CommitZASavePseudo:
1707 return expandCommitZASave(MBB, MBBI);
1708 case AArch64::MSRpstatePseudo:
1709 return expandCondSMToggle(MBB, MBBI);
1710 default:
1711 llvm_unreachable("Unexpected conditional pseudo!");
1712 }
1713 }();
1714 if (NewMBB != &MBB)
1715 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1716 return true;
1717 }
1718 case AArch64::InOutZAUsePseudo:
1719 case AArch64::RequiresZASavePseudo:
1720 case AArch64::RequiresZT0SavePseudo:
1721 case AArch64::SMEStateAllocPseudo:
1722 case AArch64::COALESCER_BARRIER_FPR16:
1723 case AArch64::COALESCER_BARRIER_FPR32:
1724 case AArch64::COALESCER_BARRIER_FPR64:
1725 case AArch64::COALESCER_BARRIER_FPR128:
1726 MI.eraseFromParent();
1727 return true;
1728 case AArch64::LD1B_2Z_IMM_PSEUDO:
1729 return expandMultiVecPseudo(
1730 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1731 ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM);
1732 case AArch64::LD1H_2Z_IMM_PSEUDO:
1733 return expandMultiVecPseudo(
1734 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1735 ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM);
1736 case AArch64::LD1W_2Z_IMM_PSEUDO:
1737 return expandMultiVecPseudo(
1738 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1739 ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM);
1740 case AArch64::LD1D_2Z_IMM_PSEUDO:
1741 return expandMultiVecPseudo(
1742 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1743 ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM);
1744 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1745 return expandMultiVecPseudo(
1746 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1747 ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM);
1748 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1749 return expandMultiVecPseudo(
1750 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1751 ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM);
1752 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1753 return expandMultiVecPseudo(
1754 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1755 ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM);
1756 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1757 return expandMultiVecPseudo(
1758 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1759 ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM);
1760 case AArch64::LD1B_2Z_PSEUDO:
1761 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1762 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z,
1763 StridedOpc: AArch64::LD1B_2Z_STRIDED);
1764 case AArch64::LD1H_2Z_PSEUDO:
1765 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1766 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z,
1767 StridedOpc: AArch64::LD1H_2Z_STRIDED);
1768 case AArch64::LD1W_2Z_PSEUDO:
1769 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1770 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z,
1771 StridedOpc: AArch64::LD1W_2Z_STRIDED);
1772 case AArch64::LD1D_2Z_PSEUDO:
1773 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1774 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z,
1775 StridedOpc: AArch64::LD1D_2Z_STRIDED);
1776 case AArch64::LDNT1B_2Z_PSEUDO:
1777 return expandMultiVecPseudo(
1778 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1779 ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED);
1780 case AArch64::LDNT1H_2Z_PSEUDO:
1781 return expandMultiVecPseudo(
1782 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1783 ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED);
1784 case AArch64::LDNT1W_2Z_PSEUDO:
1785 return expandMultiVecPseudo(
1786 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1787 ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED);
1788 case AArch64::LDNT1D_2Z_PSEUDO:
1789 return expandMultiVecPseudo(
1790 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1791 ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED);
1792 case AArch64::LD1B_4Z_IMM_PSEUDO:
1793 return expandMultiVecPseudo(
1794 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1795 ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM);
1796 case AArch64::LD1H_4Z_IMM_PSEUDO:
1797 return expandMultiVecPseudo(
1798 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1799 ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM);
1800 case AArch64::LD1W_4Z_IMM_PSEUDO:
1801 return expandMultiVecPseudo(
1802 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1803 ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM);
1804 case AArch64::LD1D_4Z_IMM_PSEUDO:
1805 return expandMultiVecPseudo(
1806 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1807 ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM);
1808 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1809 return expandMultiVecPseudo(
1810 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1811 ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM);
1812 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1813 return expandMultiVecPseudo(
1814 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1815 ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM);
1816 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1817 return expandMultiVecPseudo(
1818 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1819 ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM);
1820 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1821 return expandMultiVecPseudo(
1822 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1823 ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM);
1824 case AArch64::LD1B_4Z_PSEUDO:
1825 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1826 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z,
1827 StridedOpc: AArch64::LD1B_4Z_STRIDED);
1828 case AArch64::LD1H_4Z_PSEUDO:
1829 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1830 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z,
1831 StridedOpc: AArch64::LD1H_4Z_STRIDED);
1832 case AArch64::LD1W_4Z_PSEUDO:
1833 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1834 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z,
1835 StridedOpc: AArch64::LD1W_4Z_STRIDED);
1836 case AArch64::LD1D_4Z_PSEUDO:
1837 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1838 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z,
1839 StridedOpc: AArch64::LD1D_4Z_STRIDED);
1840 case AArch64::LDNT1B_4Z_PSEUDO:
1841 return expandMultiVecPseudo(
1842 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1843 ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED);
1844 case AArch64::LDNT1H_4Z_PSEUDO:
1845 return expandMultiVecPseudo(
1846 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1847 ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED);
1848 case AArch64::LDNT1W_4Z_PSEUDO:
1849 return expandMultiVecPseudo(
1850 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1851 ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED);
1852 case AArch64::LDNT1D_4Z_PSEUDO:
1853 return expandMultiVecPseudo(
1854 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1855 ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED);
1856 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1857 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 2);
1858 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1859 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 4);
1860 }
1861 return false;
1862}
1863
1864/// Iterate over the instructions in basic block MBB and expand any
1865/// pseudo instructions. Return true if anything was modified.
1866bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1867 bool Modified = false;
1868
1869 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1870 while (MBBI != E) {
1871 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
1872 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
1873 MBBI = NMBBI;
1874 }
1875
1876 return Modified;
1877}
1878
1879bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1880 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
1881
1882 bool Modified = false;
1883 for (auto &MBB : MF)
1884 Modified |= expandMBB(MBB);
1885 return Modified;
1886}
1887
1888/// Returns an instance of the pseudo instruction expansion pass.
1889FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1890 return new AArch64ExpandPseudo();
1891}
1892