1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CodeGen.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/TargetParser/Triple.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40
41using namespace llvm;
42
43#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
44
45namespace {
46
47class AArch64ExpandPseudo : public MachineFunctionPass {
48public:
49 const AArch64InstrInfo *TII;
50
51 static char ID;
52
53 AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
54
55 bool runOnMachineFunction(MachineFunction &Fn) override;
56
57 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
58
59private:
60 bool expandMBB(MachineBasicBlock &MBB);
61 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
62 MachineBasicBlock::iterator &NextMBBI);
63 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
64 MachineBasicBlock::iterator MBBI,
65 TargetRegisterClass ContiguousClass,
66 TargetRegisterClass StridedClass,
67 unsigned ContiguousOpc, unsigned StridedOpc);
68 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
69 MachineBasicBlock::iterator MBBI,
70 MachineBasicBlock::iterator &NextMBBI,
71 unsigned Size);
72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73 unsigned BitSize);
74
75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76 MachineBasicBlock::iterator MBBI);
77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79 unsigned ExtendImm, unsigned ZeroReg,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSetTagLoop(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI,
86 MachineBasicBlock::iterator &NextMBBI);
87 bool expandSVESpillFill(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI, unsigned Opc,
89 unsigned N);
90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI);
92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94 MachineBasicBlock::iterator MBBI);
95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96 MachineBasicBlock::iterator MBBI);
97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98 MachineBasicBlock::iterator MBBI);
99};
100
101} // end anonymous namespace
102
103char AArch64ExpandPseudo::ID = 0;
104
105INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107
108/// Transfer implicit operands on the pseudo instruction to the
109/// instructions created from the expansion.
110static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111 MachineInstrBuilder &DefMI) {
112 const MCInstrDesc &Desc = OldMI.getDesc();
113 for (const MachineOperand &MO :
114 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
115 assert(MO.isReg() && MO.getReg());
116 if (MO.isUse())
117 UseMI.add(MO);
118 else
119 DefMI.add(MO);
120 }
121}
122
123/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124/// real move-immediate instructions to synthesize the immediate.
125bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
127 unsigned BitSize) {
128 MachineInstr &MI = *MBBI;
129 Register DstReg = MI.getOperand(i: 0).getReg();
130 uint64_t RenamableState =
131 MI.getOperand(i: 0).isRenamable() ? RegState::Renamable : 0;
132 uint64_t Imm = MI.getOperand(i: 1).getImm();
133
134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135 // Useless def, and we don't want to risk creating an invalid ORR (which
136 // would really write to sp).
137 MI.eraseFromParent();
138 return true;
139 }
140
141 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143 assert(Insn.size() != 0);
144
145 SmallVector<MachineInstrBuilder, 4> MIBS;
146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147 bool LastItem = std::next(x: I) == E;
148 switch (I->Opcode)
149 {
150 default: llvm_unreachable("unhandled!"); break;
151
152 case AArch64::ORRWri:
153 case AArch64::ORRXri:
154 if (I->Op1 == 0) {
155 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
156 .add(MO: MI.getOperand(i: 0))
157 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158 .addImm(Val: I->Op2));
159 } else {
160 Register DstReg = MI.getOperand(i: 0).getReg();
161 bool DstIsDead = MI.getOperand(i: 0).isDead();
162 MIBS.push_back(
163 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
164 .addReg(RegNo: DstReg, flags: RegState::Define |
165 getDeadRegState(B: DstIsDead && LastItem) |
166 RenamableState)
167 .addReg(RegNo: DstReg)
168 .addImm(Val: I->Op2));
169 }
170 break;
171 case AArch64::ORRWrs:
172 case AArch64::ORRXrs: {
173 Register DstReg = MI.getOperand(i: 0).getReg();
174 bool DstIsDead = MI.getOperand(i: 0).isDead();
175 MIBS.push_back(
176 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
177 .addReg(RegNo: DstReg, flags: RegState::Define |
178 getDeadRegState(B: DstIsDead && LastItem) |
179 RenamableState)
180 .addReg(RegNo: DstReg)
181 .addReg(RegNo: DstReg)
182 .addImm(Val: I->Op2));
183 } break;
184 case AArch64::ANDXri:
185 case AArch64::EORXri:
186 if (I->Op1 == 0) {
187 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
188 .add(MO: MI.getOperand(i: 0))
189 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
190 .addImm(Val: I->Op2));
191 } else {
192 Register DstReg = MI.getOperand(i: 0).getReg();
193 bool DstIsDead = MI.getOperand(i: 0).isDead();
194 MIBS.push_back(
195 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
196 .addReg(RegNo: DstReg, flags: RegState::Define |
197 getDeadRegState(B: DstIsDead && LastItem) |
198 RenamableState)
199 .addReg(RegNo: DstReg)
200 .addImm(Val: I->Op2));
201 }
202 break;
203 case AArch64::MOVNWi:
204 case AArch64::MOVNXi:
205 case AArch64::MOVZWi:
206 case AArch64::MOVZXi: {
207 bool DstIsDead = MI.getOperand(i: 0).isDead();
208 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
209 .addReg(RegNo: DstReg, flags: RegState::Define |
210 getDeadRegState(B: DstIsDead && LastItem) |
211 RenamableState)
212 .addImm(Val: I->Op1)
213 .addImm(Val: I->Op2));
214 } break;
215 case AArch64::MOVKWi:
216 case AArch64::MOVKXi: {
217 Register DstReg = MI.getOperand(i: 0).getReg();
218 bool DstIsDead = MI.getOperand(i: 0).isDead();
219 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
220 .addReg(RegNo: DstReg,
221 flags: RegState::Define |
222 getDeadRegState(B: DstIsDead && LastItem) |
223 RenamableState)
224 .addReg(RegNo: DstReg)
225 .addImm(Val: I->Op1)
226 .addImm(Val: I->Op2));
227 } break;
228 }
229 }
230 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
231 MI.eraseFromParent();
232 return true;
233}
234
235bool AArch64ExpandPseudo::expandCMP_SWAP(
236 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
237 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
238 MachineBasicBlock::iterator &NextMBBI) {
239 MachineInstr &MI = *MBBI;
240 MIMetadata MIMD(MI);
241 const MachineOperand &Dest = MI.getOperand(i: 0);
242 Register StatusReg = MI.getOperand(i: 1).getReg();
243 bool StatusDead = MI.getOperand(i: 1).isDead();
244 // Duplicating undef operands into 2 instructions does not guarantee the same
245 // value on both; However undef should be replaced by xzr anyway.
246 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
247 Register AddrReg = MI.getOperand(i: 2).getReg();
248 Register DesiredReg = MI.getOperand(i: 3).getReg();
249 Register NewReg = MI.getOperand(i: 4).getReg();
250
251 MachineFunction *MF = MBB.getParent();
252 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
253 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
254 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
255
256 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
257 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
258 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
259
260 // .Lloadcmp:
261 // mov wStatus, 0
262 // ldaxr xDest, [xAddr]
263 // cmp xDest, xDesired
264 // b.ne .Ldone
265 if (!StatusDead)
266 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg)
267 .addImm(Val: 0).addImm(Val: 0);
268 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg())
269 .addReg(RegNo: AddrReg);
270 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg)
271 .addReg(RegNo: Dest.getReg(), flags: getKillRegState(B: Dest.isDead()))
272 .addReg(RegNo: DesiredReg)
273 .addImm(Val: ExtendImm);
274 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc))
275 .addImm(Val: AArch64CC::NE)
276 .addMBB(MBB: DoneBB)
277 .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | RegState::Kill);
278 LoadCmpBB->addSuccessor(Succ: DoneBB);
279 LoadCmpBB->addSuccessor(Succ: StoreBB);
280
281 // .Lstore:
282 // stlxr wStatus, xNew, [xAddr]
283 // cbnz wStatus, .Lloadcmp
284 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg)
285 .addReg(RegNo: NewReg)
286 .addReg(RegNo: AddrReg);
287 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
288 .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead))
289 .addMBB(MBB: LoadCmpBB);
290 StoreBB->addSuccessor(Succ: LoadCmpBB);
291 StoreBB->addSuccessor(Succ: DoneBB);
292
293 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
294 DoneBB->transferSuccessors(FromMBB: &MBB);
295
296 MBB.addSuccessor(Succ: LoadCmpBB);
297
298 NextMBBI = MBB.end();
299 MI.eraseFromParent();
300
301 // Recompute livein lists.
302 LivePhysRegs LiveRegs;
303 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
304 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
305 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
306 // Do an extra pass around the loop to get loop carried registers right.
307 StoreBB->clearLiveIns();
308 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
309 LoadCmpBB->clearLiveIns();
310 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
311
312 return true;
313}
314
315bool AArch64ExpandPseudo::expandCMP_SWAP_128(
316 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
317 MachineBasicBlock::iterator &NextMBBI) {
318 MachineInstr &MI = *MBBI;
319 MIMetadata MIMD(MI);
320 MachineOperand &DestLo = MI.getOperand(i: 0);
321 MachineOperand &DestHi = MI.getOperand(i: 1);
322 Register StatusReg = MI.getOperand(i: 2).getReg();
323 bool StatusDead = MI.getOperand(i: 2).isDead();
324 // Duplicating undef operands into 2 instructions does not guarantee the same
325 // value on both; However undef should be replaced by xzr anyway.
326 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
327 Register AddrReg = MI.getOperand(i: 3).getReg();
328 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
329 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
330 Register NewLoReg = MI.getOperand(i: 6).getReg();
331 Register NewHiReg = MI.getOperand(i: 7).getReg();
332
333 unsigned LdxpOp, StxpOp;
334
335 switch (MI.getOpcode()) {
336 case AArch64::CMP_SWAP_128_MONOTONIC:
337 LdxpOp = AArch64::LDXPX;
338 StxpOp = AArch64::STXPX;
339 break;
340 case AArch64::CMP_SWAP_128_RELEASE:
341 LdxpOp = AArch64::LDXPX;
342 StxpOp = AArch64::STLXPX;
343 break;
344 case AArch64::CMP_SWAP_128_ACQUIRE:
345 LdxpOp = AArch64::LDAXPX;
346 StxpOp = AArch64::STXPX;
347 break;
348 case AArch64::CMP_SWAP_128:
349 LdxpOp = AArch64::LDAXPX;
350 StxpOp = AArch64::STLXPX;
351 break;
352 default:
353 llvm_unreachable("Unexpected opcode");
354 }
355
356 MachineFunction *MF = MBB.getParent();
357 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
358 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
359 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
360 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
361
362 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
363 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
364 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
365 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
366
367 // .Lloadcmp:
368 // ldaxp xDestLo, xDestHi, [xAddr]
369 // cmp xDestLo, xDesiredLo
370 // sbcs xDestHi, xDesiredHi
371 // b.ne .Ldone
372 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp))
373 .addReg(RegNo: DestLo.getReg(), flags: RegState::Define)
374 .addReg(RegNo: DestHi.getReg(), flags: RegState::Define)
375 .addReg(RegNo: AddrReg);
376 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
377 .addReg(RegNo: DestLo.getReg(), flags: getKillRegState(B: DestLo.isDead()))
378 .addReg(RegNo: DesiredLoReg)
379 .addImm(Val: 0);
380 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
381 .addUse(RegNo: AArch64::WZR)
382 .addUse(RegNo: AArch64::WZR)
383 .addImm(Val: AArch64CC::EQ);
384 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
385 .addReg(RegNo: DestHi.getReg(), flags: getKillRegState(B: DestHi.isDead()))
386 .addReg(RegNo: DesiredHiReg)
387 .addImm(Val: 0);
388 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
389 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
390 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
391 .addImm(Val: AArch64CC::EQ);
392 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
393 .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
394 .addMBB(MBB: FailBB);
395 LoadCmpBB->addSuccessor(Succ: FailBB);
396 LoadCmpBB->addSuccessor(Succ: StoreBB);
397
398 // .Lstore:
399 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
400 // cbnz wStatus, .Lloadcmp
401 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
402 .addReg(RegNo: NewLoReg)
403 .addReg(RegNo: NewHiReg)
404 .addReg(RegNo: AddrReg);
405 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
406 .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead))
407 .addMBB(MBB: LoadCmpBB);
408 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB);
409 StoreBB->addSuccessor(Succ: LoadCmpBB);
410 StoreBB->addSuccessor(Succ: DoneBB);
411
412 // .Lfail:
413 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
414 // cbnz wStatus, .Lloadcmp
415 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
416 .addReg(RegNo: DestLo.getReg())
417 .addReg(RegNo: DestHi.getReg())
418 .addReg(RegNo: AddrReg);
419 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
420 .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead))
421 .addMBB(MBB: LoadCmpBB);
422 FailBB->addSuccessor(Succ: LoadCmpBB);
423 FailBB->addSuccessor(Succ: DoneBB);
424
425 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
426 DoneBB->transferSuccessors(FromMBB: &MBB);
427
428 MBB.addSuccessor(Succ: LoadCmpBB);
429
430 NextMBBI = MBB.end();
431 MI.eraseFromParent();
432
433 // Recompute liveness bottom up.
434 LivePhysRegs LiveRegs;
435 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
436 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
437 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
438 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
439
440 // Do an extra pass in the loop to get the loop carried dependencies right.
441 FailBB->clearLiveIns();
442 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
443 StoreBB->clearLiveIns();
444 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
445 LoadCmpBB->clearLiveIns();
446 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
447
448 return true;
449}
450
451/// \brief Expand Pseudos to Instructions with destructive operands.
452///
453/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
454/// or for fixing relaxed register allocation conditions to comply with
455/// the instructions register constraints. The latter case may be cheaper
456/// than setting the register constraints in the register allocator,
457/// since that will insert regular MOV instructions rather than MOVPRFX.
458///
459/// Example (after register allocation):
460///
461/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
462///
463/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
464/// * We cannot map directly to FSUB_ZPmZ_B because the register
465/// constraints of the instruction are not met.
466/// * Also the _ZERO specifies the false lanes need to be zeroed.
467///
468/// We first try to see if the destructive operand == result operand,
469/// if not, we try to swap the operands, e.g.
470///
471/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
472///
473/// But because FSUB_ZPmZ is not commutative, this is semantically
474/// different, so we need a reverse instruction:
475///
476/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
477///
478/// Then we implement the zeroing of the false lanes of Z0 by adding
479/// a zeroing MOVPRFX instruction:
480///
481/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
482/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
483///
484/// Note that this can only be done for _ZERO or _UNDEF variants where
485/// we can guarantee the false lanes to be zeroed (by implementing this)
486/// or that they are undef (don't care / not used), otherwise the
487/// swapping of operands is illegal because the operation is not
488/// (or cannot be emulated to be) fully commutative.
489bool AArch64ExpandPseudo::expand_DestructiveOp(
490 MachineInstr &MI,
491 MachineBasicBlock &MBB,
492 MachineBasicBlock::iterator MBBI) {
493 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
494 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
495 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
496 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
497 Register DstReg = MI.getOperand(i: 0).getReg();
498 bool DstIsDead = MI.getOperand(i: 0).isDead();
499 bool UseRev = false;
500 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
501
502 switch (DType) {
503 case AArch64::DestructiveBinaryComm:
504 case AArch64::DestructiveBinaryCommWithRev:
505 if (DstReg == MI.getOperand(i: 3).getReg()) {
506 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
507 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
508 UseRev = true;
509 break;
510 }
511 [[fallthrough]];
512 case AArch64::DestructiveBinary:
513 case AArch64::DestructiveBinaryImm:
514 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
515 break;
516 case AArch64::DestructiveUnaryPassthru:
517 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
518 break;
519 case AArch64::DestructiveTernaryCommWithRev:
520 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
521 if (DstReg == MI.getOperand(i: 3).getReg()) {
522 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
523 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
524 UseRev = true;
525 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
526 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
527 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
528 UseRev = true;
529 }
530 break;
531 default:
532 llvm_unreachable("Unsupported Destructive Operand type");
533 }
534
535 // MOVPRFX can only be used if the destination operand
536 // is the destructive operand, not as any other operand,
537 // so the Destructive Operand must be unique.
538 bool DOPRegIsUnique = false;
539 switch (DType) {
540 case AArch64::DestructiveBinary:
541 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
542 break;
543 case AArch64::DestructiveBinaryComm:
544 case AArch64::DestructiveBinaryCommWithRev:
545 DOPRegIsUnique =
546 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
547 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
548 break;
549 case AArch64::DestructiveUnaryPassthru:
550 case AArch64::DestructiveBinaryImm:
551 DOPRegIsUnique = true;
552 break;
553 case AArch64::DestructiveTernaryCommWithRev:
554 DOPRegIsUnique =
555 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
556 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
557 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
558 break;
559 }
560
561 // Resolve the reverse opcode
562 if (UseRev) {
563 int NewOpcode;
564 // e.g. DIV -> DIVR
565 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
566 Opcode = NewOpcode;
567 // e.g. DIVR -> DIV
568 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
569 Opcode = NewOpcode;
570 }
571
572 // Get the right MOVPRFX
573 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
574 unsigned MovPrfx, LSLZero, MovPrfxZero;
575 switch (ElementSize) {
576 case AArch64::ElementSizeNone:
577 case AArch64::ElementSizeB:
578 MovPrfx = AArch64::MOVPRFX_ZZ;
579 LSLZero = AArch64::LSL_ZPmI_B;
580 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
581 break;
582 case AArch64::ElementSizeH:
583 MovPrfx = AArch64::MOVPRFX_ZZ;
584 LSLZero = AArch64::LSL_ZPmI_H;
585 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
586 break;
587 case AArch64::ElementSizeS:
588 MovPrfx = AArch64::MOVPRFX_ZZ;
589 LSLZero = AArch64::LSL_ZPmI_S;
590 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
591 break;
592 case AArch64::ElementSizeD:
593 MovPrfx = AArch64::MOVPRFX_ZZ;
594 LSLZero = AArch64::LSL_ZPmI_D;
595 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
596 break;
597 default:
598 llvm_unreachable("Unsupported ElementSize");
599 }
600
601 //
602 // Create the destructive operation (if required)
603 //
604 MachineInstrBuilder PRFX, DOP;
605 if (FalseZero) {
606 // If we cannot prefix the requested instruction we'll instead emit a
607 // prefixed_zeroing_mov for DestructiveBinary.
608 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
609 DType == AArch64::DestructiveBinaryComm ||
610 DType == AArch64::DestructiveBinaryCommWithRev) &&
611 "The destructive operand should be unique");
612 assert(ElementSize != AArch64::ElementSizeNone &&
613 "This instruction is unpredicated");
614
615 // Merge source operand into destination register
616 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero))
617 .addReg(RegNo: DstReg, flags: RegState::Define)
618 .addReg(RegNo: MI.getOperand(i: PredIdx).getReg())
619 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg());
620
621 // After the movprfx, the destructive operand is same as Dst
622 DOPIdx = 0;
623
624 // Create the additional LSL to zero the lanes when the DstReg is not
625 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
626 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
627 if ((DType == AArch64::DestructiveBinary ||
628 DType == AArch64::DestructiveBinaryComm ||
629 DType == AArch64::DestructiveBinaryCommWithRev) &&
630 !DOPRegIsUnique) {
631 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero))
632 .addReg(RegNo: DstReg, flags: RegState::Define)
633 .add(MO: MI.getOperand(i: PredIdx))
634 .addReg(RegNo: DstReg)
635 .addImm(Val: 0);
636 }
637 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
638 assert(DOPRegIsUnique && "The destructive operand should be unique");
639 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx))
640 .addReg(RegNo: DstReg, flags: RegState::Define)
641 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg());
642 DOPIdx = 0;
643 }
644
645 //
646 // Create the destructive operation
647 //
648 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode))
649 .addReg(RegNo: DstReg, flags: RegState::Define | getDeadRegState(B: DstIsDead));
650
651 switch (DType) {
652 case AArch64::DestructiveUnaryPassthru:
653 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
654 .add(MO: MI.getOperand(i: PredIdx))
655 .add(MO: MI.getOperand(i: SrcIdx));
656 break;
657 case AArch64::DestructiveBinary:
658 case AArch64::DestructiveBinaryImm:
659 case AArch64::DestructiveBinaryComm:
660 case AArch64::DestructiveBinaryCommWithRev:
661 DOP.add(MO: MI.getOperand(i: PredIdx))
662 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
663 .add(MO: MI.getOperand(i: SrcIdx));
664 break;
665 case AArch64::DestructiveTernaryCommWithRev:
666 DOP.add(MO: MI.getOperand(i: PredIdx))
667 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill)
668 .add(MO: MI.getOperand(i: SrcIdx))
669 .add(MO: MI.getOperand(i: Src2Idx));
670 break;
671 }
672
673 if (PRFX) {
674 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
675 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
676 } else
677 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
678
679 MI.eraseFromParent();
680 return true;
681}
682
683bool AArch64ExpandPseudo::expandSetTagLoop(
684 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
685 MachineBasicBlock::iterator &NextMBBI) {
686 MachineInstr &MI = *MBBI;
687 DebugLoc DL = MI.getDebugLoc();
688 Register SizeReg = MI.getOperand(i: 0).getReg();
689 Register AddressReg = MI.getOperand(i: 1).getReg();
690
691 MachineFunction *MF = MBB.getParent();
692
693 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
694 const unsigned OpCode1 =
695 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
696 const unsigned OpCode2 =
697 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
698
699 unsigned Size = MI.getOperand(i: 2).getImm();
700 assert(Size > 0 && Size % 16 == 0);
701 if (Size % (16 * 2) != 0) {
702 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg)
703 .addReg(RegNo: AddressReg)
704 .addReg(RegNo: AddressReg)
705 .addImm(Val: 1);
706 Size -= 16;
707 }
708 MachineBasicBlock::iterator I =
709 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg)
710 .addImm(Val: Size);
711 expandMOVImm(MBB, MBBI: I, BitSize: 64);
712
713 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
714 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
715
716 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
717 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
718
719 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2))
720 .addDef(RegNo: AddressReg)
721 .addReg(RegNo: AddressReg)
722 .addReg(RegNo: AddressReg)
723 .addImm(Val: 2)
724 .cloneMemRefs(OtherMI: MI)
725 .setMIFlags(MI.getFlags());
726 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri))
727 .addDef(RegNo: SizeReg)
728 .addReg(RegNo: SizeReg)
729 .addImm(Val: 16 * 2)
730 .addImm(Val: 0);
731 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
732 .addImm(Val: AArch64CC::NE)
733 .addMBB(MBB: LoopBB)
734 .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | RegState::Kill);
735
736 LoopBB->addSuccessor(Succ: LoopBB);
737 LoopBB->addSuccessor(Succ: DoneBB);
738
739 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
740 DoneBB->transferSuccessors(FromMBB: &MBB);
741
742 MBB.addSuccessor(Succ: LoopBB);
743
744 NextMBBI = MBB.end();
745 MI.eraseFromParent();
746 // Recompute liveness bottom up.
747 LivePhysRegs LiveRegs;
748 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
749 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
750 // Do an extra pass in the loop to get the loop carried dependencies right.
751 // FIXME: is this necessary?
752 LoopBB->clearLiveIns();
753 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
754 DoneBB->clearLiveIns();
755 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
756
757 return true;
758}
759
760bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
761 MachineBasicBlock::iterator MBBI,
762 unsigned Opc, unsigned N) {
763 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
764 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
765 "Unexpected opcode");
766 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
767 ? RegState::Define
768 : 0;
769 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
770 ? AArch64::zsub0
771 : AArch64::psub0;
772 const TargetRegisterInfo *TRI =
773 MBB.getParent()->getSubtarget().getRegisterInfo();
774 MachineInstr &MI = *MBBI;
775 for (unsigned Offset = 0; Offset < N; ++Offset) {
776 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
777 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
778 assert(ImmOffset >= -256 && ImmOffset < 256 &&
779 "Immediate spill offset out of range");
780 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
781 .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
782 flags: RState)
783 .addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: getKillRegState(B: Kill))
784 .addImm(Val: ImmOffset);
785 }
786 MI.eraseFromParent();
787 return true;
788}
789
790// Create a call with the passed opcode and explicit operands, copying over all
791// the implicit operands from *MBBI, starting at the regmask.
792static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
793 MachineBasicBlock::iterator MBBI,
794 const AArch64InstrInfo *TII,
795 unsigned Opcode,
796 ArrayRef<MachineOperand> ExplicitOps,
797 unsigned RegMaskStartIdx) {
798 // Build the MI, with explicit operands first (including the call target).
799 MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode))
800 .add(MOs: ExplicitOps)
801 .getInstr();
802
803 // Register arguments are added during ISel, but cannot be added as explicit
804 // operands of the branch as it expects to be B <target> which is only one
805 // operand. Instead they are implicit operands used by the branch.
806 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
807 const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx);
808 assert(MOP.isReg() && "can only add register operands");
809 Call->addOperand(Op: MachineOperand::CreateReg(
810 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
811 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
812 RegMaskStartIdx++;
813 }
814 for (const MachineOperand &MO :
815 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
816 Call->addOperand(Op: MO);
817
818 return Call;
819}
820
821// Create a call to CallTarget, copying over all the operands from *MBBI,
822// starting at the regmask.
823static MachineInstr *createCall(MachineBasicBlock &MBB,
824 MachineBasicBlock::iterator MBBI,
825 const AArch64InstrInfo *TII,
826 MachineOperand &CallTarget,
827 unsigned RegMaskStartIdx) {
828 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
829
830 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
831 "invalid operand for regular call");
832 return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx);
833}
834
835bool AArch64ExpandPseudo::expandCALL_RVMARKER(
836 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
837 // Expand CALL_RVMARKER pseudo to:
838 // - a branch to the call target, followed by
839 // - the special `mov x29, x29` marker, if necessary, and
840 // - another branch, to the runtime function
841 // Mark the sequence as bundle, to avoid passes moving other code in between.
842 MachineInstr &MI = *MBBI;
843 MachineOperand &RVTarget = MI.getOperand(i: 0);
844 bool DoEmitMarker = MI.getOperand(i: 1).getImm();
845 assert(RVTarget.isGlobal() && "invalid operand for attached call");
846
847 MachineInstr *OriginalCall = nullptr;
848
849 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
850 // ptrauth call.
851 const MachineOperand &CallTarget = MI.getOperand(i: 2);
852 const MachineOperand &Key = MI.getOperand(i: 3);
853 const MachineOperand &IntDisc = MI.getOperand(i: 4);
854 const MachineOperand &AddrDisc = MI.getOperand(i: 5);
855
856 assert((Key.getImm() == AArch64PACKey::IA ||
857 Key.getImm() == AArch64PACKey::IB) &&
858 "Invalid auth call key");
859
860 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
861
862 OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops,
863 /*RegMaskStartIdx=*/6);
864 } else {
865 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
866 OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 2),
867 // Regmask starts after the RV and call targets.
868 /*RegMaskStartIdx=*/3);
869 }
870
871 if (DoEmitMarker)
872 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs))
873 .addReg(RegNo: AArch64::FP, flags: RegState::Define)
874 .addReg(RegNo: AArch64::XZR)
875 .addReg(RegNo: AArch64::FP)
876 .addImm(Val: 0);
877
878 auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL))
879 .add(MO: RVTarget)
880 .getInstr();
881
882 if (MI.shouldUpdateAdditionalCallInfo())
883 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: OriginalCall);
884
885 MI.eraseFromParent();
886 finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(),
887 LastMI: std::next(x: RVCall->getIterator()));
888 return true;
889}
890
891bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
892 MachineBasicBlock::iterator MBBI) {
893 // Expand CALL_BTI pseudo to:
894 // - a branch to the call target
895 // - a BTI instruction
896 // Mark the sequence as a bundle, to avoid passes moving other code in
897 // between.
898 MachineInstr &MI = *MBBI;
899 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
900 // Regmask starts after the call target.
901 /*RegMaskStartIdx=*/1);
902
903 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
904
905 MachineInstr *BTI =
906 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT))
907 // BTI J so that setjmp can to BR to this.
908 .addImm(Val: 36)
909 .getInstr();
910
911 if (MI.shouldUpdateAdditionalCallInfo())
912 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Call);
913
914 MI.eraseFromParent();
915 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
916 return true;
917}
918
919bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
920 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
921 Register CtxReg = MBBI->getOperand(i: 0).getReg();
922 Register BaseReg = MBBI->getOperand(i: 1).getReg();
923 int Offset = MBBI->getOperand(i: 2).getImm();
924 DebugLoc DL(MBBI->getDebugLoc());
925 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
926
927 if (STI.getTargetTriple().getArchName() != "arm64e") {
928 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
929 .addUse(RegNo: CtxReg)
930 .addUse(RegNo: BaseReg)
931 .addImm(Val: Offset / 8)
932 .setMIFlag(MachineInstr::FrameSetup);
933 MBBI->eraseFromParent();
934 return true;
935 }
936
937 // We need to sign the context in an address-discriminated way. 0xc31a is a
938 // fixed random value, chosen as part of the ABI.
939 // add x16, xBase, #Offset
940 // movk x16, #0xc31a, lsl #48
941 // mov x17, x22/xzr
942 // pacdb x17, x16
943 // str x17, [xBase, #Offset]
944 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
945 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16)
946 .addUse(RegNo: BaseReg)
947 .addImm(Val: abs(x: Offset))
948 .addImm(Val: 0)
949 .setMIFlag(MachineInstr::FrameSetup);
950 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16)
951 .addUse(RegNo: AArch64::X16)
952 .addImm(Val: 0xc31a)
953 .addImm(Val: 48)
954 .setMIFlag(MachineInstr::FrameSetup);
955 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
956 // move it somewhere before signing.
957 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17)
958 .addUse(RegNo: AArch64::XZR)
959 .addUse(RegNo: CtxReg)
960 .addImm(Val: 0)
961 .setMIFlag(MachineInstr::FrameSetup);
962 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17)
963 .addUse(RegNo: AArch64::X17)
964 .addUse(RegNo: AArch64::X16)
965 .setMIFlag(MachineInstr::FrameSetup);
966 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
967 .addUse(RegNo: AArch64::X17)
968 .addUse(RegNo: BaseReg)
969 .addImm(Val: Offset / 8)
970 .setMIFlag(MachineInstr::FrameSetup);
971
972 MBBI->eraseFromParent();
973 return true;
974}
975
976MachineBasicBlock *
977AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
978 MachineBasicBlock::iterator MBBI) {
979 MachineInstr &MI = *MBBI;
980 assert((std::next(MBBI) != MBB.end() ||
981 MI.getParent()->successors().begin() !=
982 MI.getParent()->successors().end()) &&
983 "Unexpected unreachable in block that restores ZA");
984
985 // Compare TPIDR2_EL0 value against 0.
986 DebugLoc DL = MI.getDebugLoc();
987 MachineInstrBuilder Cbz = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX))
988 .add(MO: MI.getOperand(i: 0));
989
990 // Split MBB and create two new blocks:
991 // - MBB now contains all instructions before RestoreZAPseudo.
992 // - SMBB contains the RestoreZAPseudo instruction only.
993 // - EndBB contains all instructions after RestoreZAPseudo.
994 MachineInstr &PrevMI = *std::prev(x: MBBI);
995 MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
996 MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end()
997 ? *SMBB->successors().begin()
998 : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true);
999
1000 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1001 Cbz.addMBB(MBB: SMBB);
1002 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1003 .addMBB(MBB: EndBB);
1004 MBB.addSuccessor(Succ: EndBB);
1005
1006 // Replace the pseudo with a call (BL).
1007 MachineInstrBuilder MIB =
1008 BuildMI(BB&: *SMBB, I: SMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1009 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: RegState::Implicit);
1010 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1011 MIB.add(MO: MI.getOperand(i: I));
1012 BuildMI(BB: SMBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1013
1014 MI.eraseFromParent();
1015 return EndBB;
1016}
1017
1018MachineBasicBlock *
1019AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1020 MachineBasicBlock::iterator MBBI) {
1021 MachineInstr &MI = *MBBI;
1022 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1023 // Exception handling code generated by Clang may introduce unreachables and it
1024 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1025 // not just an optimisation, the code below expects a successor instruction/block
1026 // in order to split the block at MBBI.
1027 if (std::next(x: MBBI) == MBB.end() &&
1028 MI.getParent()->successors().begin() ==
1029 MI.getParent()->successors().end()) {
1030 MI.eraseFromParent();
1031 return &MBB;
1032 }
1033
1034 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1035 // following operands:
1036 //
1037 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1038 //
1039 // The pseudo is expanded into a conditional smstart/smstop, with a
1040 // check if pstate.sm (register) equals the expected value, and if not,
1041 // invokes the smstart/smstop.
1042 //
1043 // As an example, the following block contains a normal call from a
1044 // streaming-compatible function:
1045 //
1046 // OrigBB:
1047 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1048 // bl @normal_callee
1049 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1050 //
1051 // ...which will be transformed into:
1052 //
1053 // OrigBB:
1054 // TBNZx %0:gpr64, 0, SMBB
1055 // b EndBB
1056 //
1057 // SMBB:
1058 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1059 //
1060 // EndBB:
1061 // bl @normal_callee
1062 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1063 //
1064 DebugLoc DL = MI.getDebugLoc();
1065
1066 // Create the conditional branch based on the third operand of the
1067 // instruction, which tells us if we are wrapping a normal or streaming
1068 // function.
1069 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1070 // expected value for the callee (0 for a normal callee and 1 for a streaming
1071 // callee).
1072 unsigned Opc;
1073 switch (MI.getOperand(i: 2).getImm()) {
1074 case AArch64SME::Always:
1075 llvm_unreachable("Should have matched to instruction directly");
1076 case AArch64SME::IfCallerIsStreaming:
1077 Opc = AArch64::TBNZW;
1078 break;
1079 case AArch64SME::IfCallerIsNonStreaming:
1080 Opc = AArch64::TBZW;
1081 break;
1082 }
1083 auto PStateSM = MI.getOperand(i: 3).getReg();
1084 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1085 unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32);
1086 MachineInstrBuilder Tbx =
1087 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0);
1088
1089 // Split MBB and create two new blocks:
1090 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1091 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1092 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1093 MachineInstr &PrevMI = *std::prev(x: MBBI);
1094 MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1095 MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end()
1096 ? *SMBB->successors().begin()
1097 : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true);
1098
1099 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1100 Tbx.addMBB(MBB: SMBB);
1101 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1102 .addMBB(MBB: EndBB);
1103 MBB.addSuccessor(Succ: EndBB);
1104
1105 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1106 MachineInstrBuilder MIB = BuildMI(BB&: *SMBB, I: SMBB->begin(), MIMD: MI.getDebugLoc(),
1107 MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1));
1108 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1109 // these contain the CopyFromReg for the first argument and the flag to
1110 // indicate whether the callee is streaming or normal).
1111 MIB.add(MO: MI.getOperand(i: 0));
1112 MIB.add(MO: MI.getOperand(i: 1));
1113 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1114 MIB.add(MO: MI.getOperand(i));
1115
1116 BuildMI(BB: SMBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1117
1118 MI.eraseFromParent();
1119 return EndBB;
1120}
1121
1122bool AArch64ExpandPseudo::expandMultiVecPseudo(
1123 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1124 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1125 unsigned ContiguousOp, unsigned StridedOpc) {
1126 MachineInstr &MI = *MBBI;
1127 Register Tuple = MI.getOperand(i: 0).getReg();
1128
1129 auto ContiguousRange = ContiguousClass.getRegisters();
1130 auto StridedRange = StridedClass.getRegisters();
1131 unsigned Opc;
1132 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1133 Opc = ContiguousOp;
1134 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1135 Opc = StridedOpc;
1136 } else
1137 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1138
1139 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
1140 .add(MO: MI.getOperand(i: 0))
1141 .add(MO: MI.getOperand(i: 1))
1142 .add(MO: MI.getOperand(i: 2))
1143 .add(MO: MI.getOperand(i: 3));
1144 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1145 MI.eraseFromParent();
1146 return true;
1147}
1148
1149bool AArch64ExpandPseudo::expandFormTuplePseudo(
1150 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1151 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1152 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1153 MachineInstr &MI = *MBBI;
1154 Register ReturnTuple = MI.getOperand(i: 0).getReg();
1155
1156 const TargetRegisterInfo *TRI =
1157 MBB.getParent()->getSubtarget().getRegisterInfo();
1158 for (unsigned I = 0; I < Size; ++I) {
1159 Register FormTupleOpReg = MI.getOperand(i: I + 1).getReg();
1160 Register ReturnTupleSubReg =
1161 TRI->getSubReg(Reg: ReturnTuple, Idx: AArch64::zsub0 + I);
1162 // Add copies to ensure the subregisters remain in the correct order
1163 // for any contigious operation they are used by.
1164 if (FormTupleOpReg != ReturnTupleSubReg)
1165 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORR_ZZZ))
1166 .addReg(RegNo: ReturnTupleSubReg, flags: RegState::Define)
1167 .addReg(RegNo: FormTupleOpReg)
1168 .addReg(RegNo: FormTupleOpReg);
1169 }
1170
1171 MI.eraseFromParent();
1172 return true;
1173}
1174
1175/// If MBBI references a pseudo instruction that should be expanded here,
1176/// do the expansion and return true. Otherwise return false.
1177bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1178 MachineBasicBlock::iterator MBBI,
1179 MachineBasicBlock::iterator &NextMBBI) {
1180 MachineInstr &MI = *MBBI;
1181 unsigned Opcode = MI.getOpcode();
1182
1183 // Check if we can expand the destructive op
1184 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1185 if (OrigInstr != -1) {
1186 auto &Orig = TII->get(Opcode: OrigInstr);
1187 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1188 AArch64::NotDestructive) {
1189 return expand_DestructiveOp(MI, MBB, MBBI);
1190 }
1191 }
1192
1193 switch (Opcode) {
1194 default:
1195 break;
1196
1197 case AArch64::BSPv8i8:
1198 case AArch64::BSPv16i8: {
1199 Register DstReg = MI.getOperand(i: 0).getReg();
1200 if (DstReg == MI.getOperand(i: 3).getReg()) {
1201 // Expand to BIT
1202 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1203 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1204 : AArch64::BITv16i8))
1205 .add(MO: MI.getOperand(i: 0))
1206 .add(MO: MI.getOperand(i: 3))
1207 .add(MO: MI.getOperand(i: 2))
1208 .add(MO: MI.getOperand(i: 1));
1209 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1210 // Expand to BIF
1211 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1212 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1213 : AArch64::BIFv16i8))
1214 .add(MO: MI.getOperand(i: 0))
1215 .add(MO: MI.getOperand(i: 2))
1216 .add(MO: MI.getOperand(i: 3))
1217 .add(MO: MI.getOperand(i: 1));
1218 } else {
1219 // Expand to BSL, use additional move if required
1220 if (DstReg == MI.getOperand(i: 1).getReg()) {
1221 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1222 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1223 : AArch64::BSLv16i8))
1224 .add(MO: MI.getOperand(i: 0))
1225 .add(MO: MI.getOperand(i: 1))
1226 .add(MO: MI.getOperand(i: 2))
1227 .add(MO: MI.getOperand(i: 3));
1228 } else {
1229 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1230 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1231 : AArch64::ORRv16i8))
1232 .addReg(RegNo: DstReg,
1233 flags: RegState::Define |
1234 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1235 .add(MO: MI.getOperand(i: 1))
1236 .add(MO: MI.getOperand(i: 1));
1237 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1238 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1239 : AArch64::BSLv16i8))
1240 .add(MO: MI.getOperand(i: 0))
1241 .addReg(RegNo: DstReg,
1242 flags: RegState::Kill |
1243 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1244 .add(MO: MI.getOperand(i: 2))
1245 .add(MO: MI.getOperand(i: 3));
1246 }
1247 }
1248 MI.eraseFromParent();
1249 return true;
1250 }
1251
1252 case AArch64::ADDWrr:
1253 case AArch64::SUBWrr:
1254 case AArch64::ADDXrr:
1255 case AArch64::SUBXrr:
1256 case AArch64::ADDSWrr:
1257 case AArch64::SUBSWrr:
1258 case AArch64::ADDSXrr:
1259 case AArch64::SUBSXrr:
1260 case AArch64::ANDWrr:
1261 case AArch64::ANDXrr:
1262 case AArch64::BICWrr:
1263 case AArch64::BICXrr:
1264 case AArch64::ANDSWrr:
1265 case AArch64::ANDSXrr:
1266 case AArch64::BICSWrr:
1267 case AArch64::BICSXrr:
1268 case AArch64::EONWrr:
1269 case AArch64::EONXrr:
1270 case AArch64::EORWrr:
1271 case AArch64::EORXrr:
1272 case AArch64::ORNWrr:
1273 case AArch64::ORNXrr:
1274 case AArch64::ORRWrr:
1275 case AArch64::ORRXrr: {
1276 unsigned Opcode;
1277 switch (MI.getOpcode()) {
1278 default:
1279 return false;
1280 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1281 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1282 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1283 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1284 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1285 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1286 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1287 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1288 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1289 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1290 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1291 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1292 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1293 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1294 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1295 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1296 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1297 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1298 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1299 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1300 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1301 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1302 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1303 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1304 }
1305 MachineFunction &MF = *MBB.getParent();
1306 // Try to create new inst without implicit operands added.
1307 MachineInstr *NewMI = MF.CreateMachineInstr(
1308 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1309 MBB.insert(I: MBBI, MI: NewMI);
1310 MachineInstrBuilder MIB1(MF, NewMI);
1311 MIB1->setPCSections(MF, MD: MI.getPCSections());
1312 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), flags: RegState::Define)
1313 .add(MO: MI.getOperand(i: 1))
1314 .add(MO: MI.getOperand(i: 2))
1315 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1316 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1317 if (auto DebugNumber = MI.peekDebugInstrNum())
1318 NewMI->setDebugInstrNum(DebugNumber);
1319 MI.eraseFromParent();
1320 return true;
1321 }
1322
1323 case AArch64::LOADgot: {
1324 MachineFunction *MF = MBB.getParent();
1325 Register DstReg = MI.getOperand(i: 0).getReg();
1326 const MachineOperand &MO1 = MI.getOperand(i: 1);
1327 unsigned Flags = MO1.getTargetFlags();
1328
1329 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1330 // Tiny codemodel expand to LDR
1331 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1332 MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg);
1333
1334 if (MO1.isGlobal()) {
1335 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1336 } else if (MO1.isSymbol()) {
1337 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1338 } else {
1339 assert(MO1.isCPI() &&
1340 "Only expect globals, externalsymbols, or constant pools");
1341 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1342 }
1343 } else {
1344 // Small codemodel expand into ADRP + LDR.
1345 MachineFunction &MF = *MI.getParent()->getParent();
1346 DebugLoc DL = MI.getDebugLoc();
1347 MachineInstrBuilder MIB1 =
1348 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg);
1349
1350 MachineInstrBuilder MIB2;
1351 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1352 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1353 unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32);
1354 unsigned DstFlags = MI.getOperand(i: 0).getTargetFlags();
1355 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui))
1356 .addDef(RegNo: Reg32)
1357 .addReg(RegNo: DstReg, flags: RegState::Kill)
1358 .addReg(RegNo: DstReg, flags: DstFlags | RegState::Implicit);
1359 } else {
1360 Register DstReg = MI.getOperand(i: 0).getReg();
1361 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
1362 .add(MO: MI.getOperand(i: 0))
1363 .addUse(RegNo: DstReg, Flags: RegState::Kill);
1364 }
1365
1366 if (MO1.isGlobal()) {
1367 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1368 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1369 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1370 } else if (MO1.isSymbol()) {
1371 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1372 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1373 AArch64II::MO_PAGEOFF |
1374 AArch64II::MO_NC);
1375 } else {
1376 assert(MO1.isCPI() &&
1377 "Only expect globals, externalsymbols, or constant pools");
1378 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1379 TargetFlags: Flags | AArch64II::MO_PAGE);
1380 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1381 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1382 AArch64II::MO_NC);
1383 }
1384
1385 // If the LOADgot instruction has a debug-instr-number, annotate the
1386 // LDRWui instruction that it is expanded to with the same
1387 // debug-instr-number to preserve debug information.
1388 if (MI.peekDebugInstrNum() != 0)
1389 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1390 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1391 }
1392 MI.eraseFromParent();
1393 return true;
1394 }
1395 case AArch64::MOVaddrBA: {
1396 MachineFunction &MF = *MI.getParent()->getParent();
1397 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1398 // blockaddress expressions have to come from a constant pool because the
1399 // largest addend (and hence offset within a function) allowed for ADRP is
1400 // only 8MB.
1401 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1402 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1403
1404 MachineConstantPool *MCP = MF.getConstantPool();
1405 unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1406
1407 Register DstReg = MI.getOperand(i: 0).getReg();
1408 auto MIB1 =
1409 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1410 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
1411 auto MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1412 MCID: TII->get(Opcode: AArch64::LDRXui), DestReg: DstReg)
1413 .addUse(RegNo: DstReg)
1414 .addConstantPoolIndex(
1415 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1416 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1417 MI.eraseFromParent();
1418 return true;
1419 }
1420 }
1421 [[fallthrough]];
1422 case AArch64::MOVaddr:
1423 case AArch64::MOVaddrJT:
1424 case AArch64::MOVaddrCP:
1425 case AArch64::MOVaddrTLS:
1426 case AArch64::MOVaddrEXT: {
1427 // Expand into ADRP + ADD.
1428 Register DstReg = MI.getOperand(i: 0).getReg();
1429 assert(DstReg != AArch64::XZR);
1430 MachineInstrBuilder MIB1 =
1431 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1432 .add(MO: MI.getOperand(i: 1));
1433
1434 if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) {
1435 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1436 // We do so by creating a MOVK that sets bits 48-63 of the register to
1437 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1438 // the small code model so we can assume a binary size of <= 4GB, which
1439 // makes the untagged PC relative offset positive. The binary must also be
1440 // loaded into address range [0, 2^48). Both of these properties need to
1441 // be ensured at runtime when using tagged addresses.
1442 auto Tag = MI.getOperand(i: 1);
1443 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1444 Tag.setOffset(0x100000000);
1445 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: DstReg)
1446 .addReg(RegNo: DstReg)
1447 .add(MO: Tag)
1448 .addImm(Val: 48);
1449 }
1450
1451 MachineInstrBuilder MIB2 =
1452 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1453 .add(MO: MI.getOperand(i: 0))
1454 .addReg(RegNo: DstReg)
1455 .add(MO: MI.getOperand(i: 2))
1456 .addImm(Val: 0);
1457
1458 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1459 MI.eraseFromParent();
1460 return true;
1461 }
1462 case AArch64::ADDlowTLS:
1463 // Produce a plain ADD
1464 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1465 .add(MO: MI.getOperand(i: 0))
1466 .add(MO: MI.getOperand(i: 1))
1467 .add(MO: MI.getOperand(i: 2))
1468 .addImm(Val: 0);
1469 MI.eraseFromParent();
1470 return true;
1471
1472 case AArch64::MOVbaseTLS: {
1473 Register DstReg = MI.getOperand(i: 0).getReg();
1474 auto SysReg = AArch64SysReg::TPIDR_EL0;
1475 MachineFunction *MF = MBB.getParent();
1476 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1477 SysReg = AArch64SysReg::TPIDR_EL3;
1478 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1479 SysReg = AArch64SysReg::TPIDR_EL2;
1480 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1481 SysReg = AArch64SysReg::TPIDR_EL1;
1482 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1483 SysReg = AArch64SysReg::TPIDRRO_EL0;
1484 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg)
1485 .addImm(Val: SysReg);
1486 MI.eraseFromParent();
1487 return true;
1488 }
1489
1490 case AArch64::MOVi32imm:
1491 return expandMOVImm(MBB, MBBI, BitSize: 32);
1492 case AArch64::MOVi64imm:
1493 return expandMOVImm(MBB, MBBI, BitSize: 64);
1494 case AArch64::RET_ReallyLR: {
1495 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1496 // function and missing live-ins. We are fine in practice because callee
1497 // saved register handling ensures the register value is restored before
1498 // RET, but we need the undef flag here to appease the MachineVerifier
1499 // liveness checks.
1500 MachineInstrBuilder MIB =
1501 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET))
1502 .addReg(RegNo: AArch64::LR, flags: RegState::Undef);
1503 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1504 MI.eraseFromParent();
1505 return true;
1506 }
1507 case AArch64::CMP_SWAP_8:
1508 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB,
1509 CmpOp: AArch64::SUBSWrx,
1510 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0),
1511 ZeroReg: AArch64::WZR, NextMBBI);
1512 case AArch64::CMP_SWAP_16:
1513 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH,
1514 CmpOp: AArch64::SUBSWrx,
1515 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0),
1516 ZeroReg: AArch64::WZR, NextMBBI);
1517 case AArch64::CMP_SWAP_32:
1518 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW,
1519 CmpOp: AArch64::SUBSWrs,
1520 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1521 ZeroReg: AArch64::WZR, NextMBBI);
1522 case AArch64::CMP_SWAP_64:
1523 return expandCMP_SWAP(MBB, MBBI,
1524 LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs,
1525 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1526 ZeroReg: AArch64::XZR, NextMBBI);
1527 case AArch64::CMP_SWAP_128:
1528 case AArch64::CMP_SWAP_128_RELEASE:
1529 case AArch64::CMP_SWAP_128_ACQUIRE:
1530 case AArch64::CMP_SWAP_128_MONOTONIC:
1531 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1532
1533 case AArch64::AESMCrrTied:
1534 case AArch64::AESIMCrrTied: {
1535 MachineInstrBuilder MIB =
1536 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1537 MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1538 AArch64::AESIMCrr))
1539 .add(MO: MI.getOperand(i: 0))
1540 .add(MO: MI.getOperand(i: 1));
1541 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1542 MI.eraseFromParent();
1543 return true;
1544 }
1545 case AArch64::IRGstack: {
1546 MachineFunction &MF = *MBB.getParent();
1547 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1548 const AArch64FrameLowering *TFI =
1549 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1550
1551 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1552 // almost always point to SP-after-prologue; if not, emit a longer
1553 // instruction sequence.
1554 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1555 Register FrameReg;
1556 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1557 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, isSVE: false /*isSVE*/, FrameReg,
1558 /*PreferFP=*/false,
1559 /*ForSimm=*/true);
1560 Register SrcReg = FrameReg;
1561 if (FrameRegOffset) {
1562 // Use output register as temporary.
1563 SrcReg = MI.getOperand(i: 0).getReg();
1564 emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg,
1565 Offset: FrameRegOffset, TII);
1566 }
1567 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG))
1568 .add(MO: MI.getOperand(i: 0))
1569 .addUse(RegNo: SrcReg)
1570 .add(MO: MI.getOperand(i: 2));
1571 MI.eraseFromParent();
1572 return true;
1573 }
1574 case AArch64::TAGPstack: {
1575 int64_t Offset = MI.getOperand(i: 2).getImm();
1576 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1577 MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1578 .add(MO: MI.getOperand(i: 0))
1579 .add(MO: MI.getOperand(i: 1))
1580 .addImm(Val: std::abs(i: Offset))
1581 .add(MO: MI.getOperand(i: 4));
1582 MI.eraseFromParent();
1583 return true;
1584 }
1585 case AArch64::STGloop_wback:
1586 case AArch64::STZGloop_wback:
1587 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1588 case AArch64::STGloop:
1589 case AArch64::STZGloop:
1590 report_fatal_error(
1591 reason: "Non-writeback variants of STGloop / STZGloop should not "
1592 "survive past PrologEpilogInserter.");
1593 case AArch64::STR_ZZZZXI:
1594 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4);
1595 case AArch64::STR_ZZZXI:
1596 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3);
1597 case AArch64::STR_ZZXI:
1598 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2);
1599 case AArch64::STR_PPXI:
1600 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2);
1601 case AArch64::LDR_ZZZZXI:
1602 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4);
1603 case AArch64::LDR_ZZZXI:
1604 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3);
1605 case AArch64::LDR_ZZXI:
1606 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2);
1607 case AArch64::LDR_PPXI:
1608 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2);
1609 case AArch64::BLR_RVMARKER:
1610 case AArch64::BLRA_RVMARKER:
1611 return expandCALL_RVMARKER(MBB, MBBI);
1612 case AArch64::BLR_BTI:
1613 return expandCALL_BTI(MBB, MBBI);
1614 case AArch64::StoreSwiftAsyncContext:
1615 return expandStoreSwiftAsyncContext(MBB, MBBI);
1616 case AArch64::RestoreZAPseudo: {
1617 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1618 if (NewMBB != &MBB)
1619 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1620 return true;
1621 }
1622 case AArch64::MSRpstatePseudo: {
1623 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1624 if (NewMBB != &MBB)
1625 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1626 return true;
1627 }
1628 case AArch64::COALESCER_BARRIER_FPR16:
1629 case AArch64::COALESCER_BARRIER_FPR32:
1630 case AArch64::COALESCER_BARRIER_FPR64:
1631 case AArch64::COALESCER_BARRIER_FPR128:
1632 MI.eraseFromParent();
1633 return true;
1634 case AArch64::LD1B_2Z_IMM_PSEUDO:
1635 return expandMultiVecPseudo(
1636 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1637 ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM);
1638 case AArch64::LD1H_2Z_IMM_PSEUDO:
1639 return expandMultiVecPseudo(
1640 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1641 ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM);
1642 case AArch64::LD1W_2Z_IMM_PSEUDO:
1643 return expandMultiVecPseudo(
1644 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1645 ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM);
1646 case AArch64::LD1D_2Z_IMM_PSEUDO:
1647 return expandMultiVecPseudo(
1648 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1649 ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM);
1650 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1651 return expandMultiVecPseudo(
1652 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1653 ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM);
1654 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1655 return expandMultiVecPseudo(
1656 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1657 ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM);
1658 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1659 return expandMultiVecPseudo(
1660 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1661 ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM);
1662 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1663 return expandMultiVecPseudo(
1664 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1665 ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM);
1666 case AArch64::LD1B_2Z_PSEUDO:
1667 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1668 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z,
1669 StridedOpc: AArch64::LD1B_2Z_STRIDED);
1670 case AArch64::LD1H_2Z_PSEUDO:
1671 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1672 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z,
1673 StridedOpc: AArch64::LD1H_2Z_STRIDED);
1674 case AArch64::LD1W_2Z_PSEUDO:
1675 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1676 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z,
1677 StridedOpc: AArch64::LD1W_2Z_STRIDED);
1678 case AArch64::LD1D_2Z_PSEUDO:
1679 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1680 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z,
1681 StridedOpc: AArch64::LD1D_2Z_STRIDED);
1682 case AArch64::LDNT1B_2Z_PSEUDO:
1683 return expandMultiVecPseudo(
1684 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1685 ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED);
1686 case AArch64::LDNT1H_2Z_PSEUDO:
1687 return expandMultiVecPseudo(
1688 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1689 ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED);
1690 case AArch64::LDNT1W_2Z_PSEUDO:
1691 return expandMultiVecPseudo(
1692 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1693 ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED);
1694 case AArch64::LDNT1D_2Z_PSEUDO:
1695 return expandMultiVecPseudo(
1696 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1697 ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED);
1698 case AArch64::LD1B_4Z_IMM_PSEUDO:
1699 return expandMultiVecPseudo(
1700 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1701 ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM);
1702 case AArch64::LD1H_4Z_IMM_PSEUDO:
1703 return expandMultiVecPseudo(
1704 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1705 ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM);
1706 case AArch64::LD1W_4Z_IMM_PSEUDO:
1707 return expandMultiVecPseudo(
1708 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1709 ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM);
1710 case AArch64::LD1D_4Z_IMM_PSEUDO:
1711 return expandMultiVecPseudo(
1712 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1713 ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM);
1714 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1715 return expandMultiVecPseudo(
1716 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1717 ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM);
1718 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1719 return expandMultiVecPseudo(
1720 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1721 ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM);
1722 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1723 return expandMultiVecPseudo(
1724 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1725 ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM);
1726 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1727 return expandMultiVecPseudo(
1728 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1729 ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM);
1730 case AArch64::LD1B_4Z_PSEUDO:
1731 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1732 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z,
1733 StridedOpc: AArch64::LD1B_4Z_STRIDED);
1734 case AArch64::LD1H_4Z_PSEUDO:
1735 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1736 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z,
1737 StridedOpc: AArch64::LD1H_4Z_STRIDED);
1738 case AArch64::LD1W_4Z_PSEUDO:
1739 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1740 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z,
1741 StridedOpc: AArch64::LD1W_4Z_STRIDED);
1742 case AArch64::LD1D_4Z_PSEUDO:
1743 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1744 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z,
1745 StridedOpc: AArch64::LD1D_4Z_STRIDED);
1746 case AArch64::LDNT1B_4Z_PSEUDO:
1747 return expandMultiVecPseudo(
1748 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1749 ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED);
1750 case AArch64::LDNT1H_4Z_PSEUDO:
1751 return expandMultiVecPseudo(
1752 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1753 ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED);
1754 case AArch64::LDNT1W_4Z_PSEUDO:
1755 return expandMultiVecPseudo(
1756 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1757 ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED);
1758 case AArch64::LDNT1D_4Z_PSEUDO:
1759 return expandMultiVecPseudo(
1760 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1761 ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED);
1762 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1763 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 2);
1764 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1765 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 4);
1766 }
1767 return false;
1768}
1769
1770/// Iterate over the instructions in basic block MBB and expand any
1771/// pseudo instructions. Return true if anything was modified.
1772bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1773 bool Modified = false;
1774
1775 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1776 while (MBBI != E) {
1777 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
1778 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
1779 MBBI = NMBBI;
1780 }
1781
1782 return Modified;
1783}
1784
1785bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1786 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1787
1788 bool Modified = false;
1789 for (auto &MBB : MF)
1790 Modified |= expandMBB(MBB);
1791 return Modified;
1792}
1793
1794/// Returns an instance of the pseudo instruction expansion pass.
1795FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1796 return new AArch64ExpandPseudo();
1797}
1798