1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/CodeGen/LivePhysRegs.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineInstrBundle.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/CodeGen/TargetSubtargetInfo.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/CodeGen.h"
36#include "llvm/Target/TargetMachine.h"
37#include "llvm/TargetParser/Triple.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41
42using namespace llvm;
43
44#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
45
46namespace {
47
48class AArch64ExpandPseudoImpl {
49public:
50 const AArch64InstrInfo *TII;
51
52 bool run(MachineFunction &MF);
53
54private:
55 bool expandMBB(MachineBasicBlock &MBB);
56 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
57 MachineBasicBlock::iterator &NextMBBI);
58 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
59 MachineBasicBlock::iterator MBBI,
60 TargetRegisterClass ContiguousClass,
61 TargetRegisterClass StridedClass,
62 unsigned ContiguousOpc, unsigned StridedOpc);
63 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
64 MachineBasicBlock::iterator MBBI,
65 MachineBasicBlock::iterator &NextMBBI,
66 unsigned Size);
67 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68 unsigned BitSize);
69
70 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
71 MachineBasicBlock::iterator MBBI);
72 bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
73 MachineBasicBlock::iterator MBBI);
74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76 unsigned ExtendImm, unsigned ZeroReg,
77 MachineBasicBlock::iterator &NextMBBI);
78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MBBI,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandSetTagLoop(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSVESpillFill(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI, unsigned Opc,
86 unsigned N);
87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI);
89 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
90 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI);
92 bool expandSTSHHAtomicStore(MachineBasicBlock &MBB,
93 MachineBasicBlock::iterator MBBI);
94 struct ConditionalBlocks {
95 MachineBasicBlock &CondBB;
96 MachineBasicBlock &EndBB;
97 };
98 ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB,
99 MachineBasicBlock::iterator MBBI,
100 DebugLoc DL,
101 MachineInstrBuilder &Branch);
102 MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB,
103 MachineBasicBlock::iterator MBBI);
104 MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB,
105 MachineBasicBlock::iterator MBBI);
106 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
107 MachineBasicBlock::iterator MBBI);
108};
109
110class AArch64ExpandPseudoLegacy : public MachineFunctionPass {
111public:
112 static char ID;
113
114 AArch64ExpandPseudoLegacy() : MachineFunctionPass(ID) {}
115
116 bool runOnMachineFunction(MachineFunction &MF) override;
117
118 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
119};
120
121} // end anonymous namespace
122
123char AArch64ExpandPseudoLegacy::ID = 0;
124
125INITIALIZE_PASS(AArch64ExpandPseudoLegacy, "aarch64-expand-pseudo",
126 AARCH64_EXPAND_PSEUDO_NAME, false, false)
127
128/// Transfer implicit operands on the pseudo instruction to the
129/// instructions created from the expansion.
130static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
131 MachineInstrBuilder &DefMI) {
132 const MCInstrDesc &Desc = OldMI.getDesc();
133 for (const MachineOperand &MO :
134 llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) {
135 assert(MO.isReg() && MO.getReg());
136 if (MO.isUse())
137 UseMI.add(MO);
138 else
139 DefMI.add(MO);
140 }
141}
142
143/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
144/// real move-immediate instructions to synthesize the immediate.
145bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
146 MachineBasicBlock::iterator MBBI,
147 unsigned BitSize) {
148 MachineInstr &MI = *MBBI;
149 Register DstReg = MI.getOperand(i: 0).getReg();
150 RegState RenamableState =
151 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable());
152 uint64_t Imm = MI.getOperand(i: 1).getImm();
153
154 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
155 // Useless def, and we don't want to risk creating an invalid ORR (which
156 // would really write to sp).
157 MI.eraseFromParent();
158 return true;
159 }
160
161 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
162 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
163 assert(Insn.size() != 0);
164
165 SmallVector<MachineInstrBuilder, 4> MIBS;
166 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
167 bool LastItem = std::next(x: I) == E;
168 switch (I->Opcode)
169 {
170 default: llvm_unreachable("unhandled!"); break;
171
172 case AArch64::ORRWri:
173 case AArch64::ORRXri:
174 case AArch64::ANDXri:
175 case AArch64::EORXri:
176 if (I->Op1 == 0) {
177 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
178 .add(MO: MI.getOperand(i: 0))
179 .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR)
180 .addImm(Val: I->Op2));
181 } else {
182 Register DstReg = MI.getOperand(i: 0).getReg();
183 bool DstIsDead = MI.getOperand(i: 0).isDead();
184 MIBS.push_back(
185 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
186 .addReg(RegNo: DstReg, Flags: RegState::Define |
187 getDeadRegState(B: DstIsDead && LastItem) |
188 RenamableState)
189 .addReg(RegNo: DstReg)
190 .addImm(Val: I->Op2));
191 }
192 break;
193 case AArch64::EONXrs:
194 case AArch64::EORXrs:
195 case AArch64::ORRWrs:
196 case AArch64::ORRXrs: {
197 Register DstReg = MI.getOperand(i: 0).getReg();
198 bool DstIsDead = MI.getOperand(i: 0).isDead();
199 MIBS.push_back(
200 Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
201 .addReg(RegNo: DstReg, Flags: RegState::Define |
202 getDeadRegState(B: DstIsDead && LastItem) |
203 RenamableState)
204 .addReg(RegNo: DstReg)
205 .addReg(RegNo: DstReg)
206 .addImm(Val: I->Op2));
207 } break;
208 case AArch64::MOVNWi:
209 case AArch64::MOVNXi:
210 case AArch64::MOVZWi:
211 case AArch64::MOVZXi: {
212 bool DstIsDead = MI.getOperand(i: 0).isDead();
213 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
214 .addReg(RegNo: DstReg, Flags: RegState::Define |
215 getDeadRegState(B: DstIsDead && LastItem) |
216 RenamableState)
217 .addImm(Val: I->Op1)
218 .addImm(Val: I->Op2));
219 } break;
220 case AArch64::MOVKWi:
221 case AArch64::MOVKXi: {
222 Register DstReg = MI.getOperand(i: 0).getReg();
223 bool DstIsDead = MI.getOperand(i: 0).isDead();
224 MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode))
225 .addReg(RegNo: DstReg,
226 Flags: RegState::Define |
227 getDeadRegState(B: DstIsDead && LastItem) |
228 RenamableState)
229 .addReg(RegNo: DstReg)
230 .addImm(Val: I->Op1)
231 .addImm(Val: I->Op2));
232 } break;
233 }
234 }
235 transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back());
236 MI.eraseFromParent();
237 return true;
238}
239
240bool AArch64ExpandPseudoImpl::expandCMP_SWAP(
241 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
242 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
243 MachineBasicBlock::iterator &NextMBBI) {
244 MachineInstr &MI = *MBBI;
245 MIMetadata MIMD(MI);
246 const MachineOperand &Dest = MI.getOperand(i: 0);
247 Register StatusReg = MI.getOperand(i: 1).getReg();
248 bool StatusDead = MI.getOperand(i: 1).isDead();
249 // Duplicating undef operands into 2 instructions does not guarantee the same
250 // value on both; However undef should be replaced by xzr anyway.
251 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
252 Register AddrReg = MI.getOperand(i: 2).getReg();
253 Register DesiredReg = MI.getOperand(i: 3).getReg();
254 Register NewReg = MI.getOperand(i: 4).getReg();
255
256 MachineFunction *MF = MBB.getParent();
257 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
258 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
259 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
260
261 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
262 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
263 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB);
264
265 // .Lloadcmp:
266 // mov wStatus, 0
267 // ldaxr xDest, [xAddr]
268 // cmp xDest, xDesired
269 // b.ne .Ldone
270 if (!StatusDead)
271 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg)
272 .addImm(Val: 0).addImm(Val: 0);
273 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg())
274 .addReg(RegNo: AddrReg);
275 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg)
276 .addReg(RegNo: Dest.getReg(), Flags: getKillRegState(B: Dest.isDead()))
277 .addReg(RegNo: DesiredReg)
278 .addImm(Val: ExtendImm);
279 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc))
280 .addImm(Val: AArch64CC::NE)
281 .addMBB(MBB: DoneBB)
282 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
283 LoadCmpBB->addSuccessor(Succ: DoneBB);
284 LoadCmpBB->addSuccessor(Succ: StoreBB);
285
286 // .Lstore:
287 // stlxr wStatus, xNew, [xAddr]
288 // cbnz wStatus, .Lloadcmp
289 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg)
290 .addReg(RegNo: NewReg)
291 .addReg(RegNo: AddrReg);
292 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
293 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
294 .addMBB(MBB: LoadCmpBB);
295 StoreBB->addSuccessor(Succ: LoadCmpBB);
296 StoreBB->addSuccessor(Succ: DoneBB);
297
298 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
299 DoneBB->transferSuccessors(FromMBB: &MBB);
300
301 MBB.addSuccessor(Succ: LoadCmpBB);
302
303 NextMBBI = MBB.end();
304 MI.eraseFromParent();
305
306 // Recompute livein lists.
307 LivePhysRegs LiveRegs;
308 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
309 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
310 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
311 // Do an extra pass around the loop to get loop carried registers right.
312 StoreBB->clearLiveIns();
313 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
314 LoadCmpBB->clearLiveIns();
315 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
316
317 return true;
318}
319
320bool AArch64ExpandPseudoImpl::expandCMP_SWAP_128(
321 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
322 MachineBasicBlock::iterator &NextMBBI) {
323 MachineInstr &MI = *MBBI;
324 MIMetadata MIMD(MI);
325 MachineOperand &DestLo = MI.getOperand(i: 0);
326 MachineOperand &DestHi = MI.getOperand(i: 1);
327 Register StatusReg = MI.getOperand(i: 2).getReg();
328 bool StatusDead = MI.getOperand(i: 2).isDead();
329 // Duplicating undef operands into 2 instructions does not guarantee the same
330 // value on both; However undef should be replaced by xzr anyway.
331 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
332 Register AddrReg = MI.getOperand(i: 3).getReg();
333 Register DesiredLoReg = MI.getOperand(i: 4).getReg();
334 Register DesiredHiReg = MI.getOperand(i: 5).getReg();
335 Register NewLoReg = MI.getOperand(i: 6).getReg();
336 Register NewHiReg = MI.getOperand(i: 7).getReg();
337
338 unsigned LdxpOp, StxpOp;
339
340 switch (MI.getOpcode()) {
341 case AArch64::CMP_SWAP_128_MONOTONIC:
342 LdxpOp = AArch64::LDXPX;
343 StxpOp = AArch64::STXPX;
344 break;
345 case AArch64::CMP_SWAP_128_RELEASE:
346 LdxpOp = AArch64::LDXPX;
347 StxpOp = AArch64::STLXPX;
348 break;
349 case AArch64::CMP_SWAP_128_ACQUIRE:
350 LdxpOp = AArch64::LDAXPX;
351 StxpOp = AArch64::STXPX;
352 break;
353 case AArch64::CMP_SWAP_128:
354 LdxpOp = AArch64::LDAXPX;
355 StxpOp = AArch64::STLXPX;
356 break;
357 default:
358 llvm_unreachable("Unexpected opcode");
359 }
360
361 MachineFunction *MF = MBB.getParent();
362 auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
363 auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
364 auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
365 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
366
367 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB);
368 MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB);
369 MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB);
370 MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB);
371
372 // .Lloadcmp:
373 // ldaxp xDestLo, xDestHi, [xAddr]
374 // cmp xDestLo, xDesiredLo
375 // sbcs xDestHi, xDesiredHi
376 // b.ne .Ldone
377 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp))
378 .addReg(RegNo: DestLo.getReg(), Flags: RegState::Define)
379 .addReg(RegNo: DestHi.getReg(), Flags: RegState::Define)
380 .addReg(RegNo: AddrReg);
381 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
382 .addReg(RegNo: DestLo.getReg(), Flags: getKillRegState(B: DestLo.isDead()))
383 .addReg(RegNo: DesiredLoReg)
384 .addImm(Val: 0);
385 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
386 .addUse(RegNo: AArch64::WZR)
387 .addUse(RegNo: AArch64::WZR)
388 .addImm(Val: AArch64CC::EQ);
389 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR)
390 .addReg(RegNo: DestHi.getReg(), Flags: getKillRegState(B: DestHi.isDead()))
391 .addReg(RegNo: DesiredHiReg)
392 .addImm(Val: 0);
393 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg)
394 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
395 .addUse(RegNo: StatusReg, Flags: RegState::Kill)
396 .addImm(Val: AArch64CC::EQ);
397 BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
398 .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
399 .addMBB(MBB: FailBB);
400 LoadCmpBB->addSuccessor(Succ: FailBB);
401 LoadCmpBB->addSuccessor(Succ: StoreBB);
402
403 // .Lstore:
404 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
405 // cbnz wStatus, .Lloadcmp
406 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
407 .addReg(RegNo: NewLoReg)
408 .addReg(RegNo: NewHiReg)
409 .addReg(RegNo: AddrReg);
410 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
411 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
412 .addMBB(MBB: LoadCmpBB);
413 BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB);
414 StoreBB->addSuccessor(Succ: LoadCmpBB);
415 StoreBB->addSuccessor(Succ: DoneBB);
416
417 // .Lfail:
418 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
419 // cbnz wStatus, .Lloadcmp
420 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg)
421 .addReg(RegNo: DestLo.getReg())
422 .addReg(RegNo: DestHi.getReg())
423 .addReg(RegNo: AddrReg);
424 BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW))
425 .addReg(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead))
426 .addMBB(MBB: LoadCmpBB);
427 FailBB->addSuccessor(Succ: LoadCmpBB);
428 FailBB->addSuccessor(Succ: DoneBB);
429
430 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
431 DoneBB->transferSuccessors(FromMBB: &MBB);
432
433 MBB.addSuccessor(Succ: LoadCmpBB);
434
435 NextMBBI = MBB.end();
436 MI.eraseFromParent();
437
438 // Recompute liveness bottom up.
439 LivePhysRegs LiveRegs;
440 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
441 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
442 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
443 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
444
445 // Do an extra pass in the loop to get the loop carried dependencies right.
446 FailBB->clearLiveIns();
447 computeAndAddLiveIns(LiveRegs, MBB&: *FailBB);
448 StoreBB->clearLiveIns();
449 computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB);
450 LoadCmpBB->clearLiveIns();
451 computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB);
452
453 return true;
454}
455
456/// \brief Expand Pseudos to Instructions with destructive operands.
457///
458/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
459/// or for fixing relaxed register allocation conditions to comply with
460/// the instructions register constraints. The latter case may be cheaper
461/// than setting the register constraints in the register allocator,
462/// since that will insert regular MOV instructions rather than MOVPRFX.
463///
464/// Example (after register allocation):
465///
466/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
467///
468/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
469/// * We cannot map directly to FSUB_ZPmZ_B because the register
470/// constraints of the instruction are not met.
471/// * Also the _ZERO specifies the false lanes need to be zeroed.
472///
473/// We first try to see if the destructive operand == result operand,
474/// if not, we try to swap the operands, e.g.
475///
476/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
477///
478/// But because FSUB_ZPmZ is not commutative, this is semantically
479/// different, so we need a reverse instruction:
480///
481/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
482///
483/// Then we implement the zeroing of the false lanes of Z0 by adding
484/// a zeroing MOVPRFX instruction:
485///
486/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
487/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
488///
489/// Note that this can only be done for _ZERO or _UNDEF variants where
490/// we can guarantee the false lanes to be zeroed (by implementing this)
491/// or that they are undef (don't care / not used), otherwise the
492/// swapping of operands is illegal because the operation is not
493/// (or cannot be emulated to be) fully commutative.
494bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
495 MachineInstr &MI, MachineBasicBlock &MBB,
496 MachineBasicBlock::iterator MBBI) {
497 unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
498 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
499 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
500 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
501 Register DstReg = MI.getOperand(i: 0).getReg();
502 bool DstIsDead = MI.getOperand(i: 0).isDead();
503 bool UseRev = false;
504 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
505
506 switch (DType) {
507 case AArch64::DestructiveBinaryComm:
508 case AArch64::DestructiveBinaryCommWithRev:
509 if (DstReg == MI.getOperand(i: 3).getReg()) {
510 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
511 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2);
512 UseRev = true;
513 break;
514 }
515 [[fallthrough]];
516 case AArch64::DestructiveBinary:
517 case AArch64::DestructiveBinaryImm:
518 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3);
519 break;
520 case AArch64::DestructiveUnaryPassthru:
521 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3);
522 break;
523 case AArch64::DestructiveTernaryCommWithRev:
524 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4);
525 if (DstReg == MI.getOperand(i: 3).getReg()) {
526 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
527 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2);
528 UseRev = true;
529 } else if (DstReg == MI.getOperand(i: 4).getReg()) {
530 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
531 std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2);
532 UseRev = true;
533 }
534 break;
535 case AArch64::Destructive2xRegImmUnpred:
536 // EXT_ZZI_CONSTRUCTIVE Zd, Zs, Imm
537 // ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
538 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 1, args: 2);
539 break;
540 case AArch64::DestructiveBinaryShImmUnpred:
541 std::tie(args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3);
542 break;
543 default:
544 llvm_unreachable("Unsupported Destructive Operand type");
545 }
546
547 // MOVPRFX can only be used if the destination operand
548 // is the destructive operand, not as any other operand,
549 // so the Destructive Operand must be unique.
550 bool DOPRegIsUnique = false;
551 switch (DType) {
552 case AArch64::DestructiveBinary:
553 DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg();
554 break;
555 case AArch64::DestructiveBinaryComm:
556 case AArch64::DestructiveBinaryCommWithRev:
557 DOPRegIsUnique =
558 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
559 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg();
560 break;
561 case AArch64::DestructiveUnaryPassthru:
562 case AArch64::DestructiveBinaryImm:
563 case AArch64::DestructiveBinaryShImmUnpred:
564 case AArch64::Destructive2xRegImmUnpred:
565 DOPRegIsUnique = true;
566 break;
567 case AArch64::DestructiveTernaryCommWithRev:
568 DOPRegIsUnique =
569 DstReg != MI.getOperand(i: DOPIdx).getReg() ||
570 (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() &&
571 MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg());
572 break;
573 }
574
575 // Resolve the reverse opcode
576 if (UseRev) {
577 int NewOpcode;
578 // e.g. DIV -> DIVR
579 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
580 Opcode = NewOpcode;
581 // e.g. DIVR -> DIV
582 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
583 Opcode = NewOpcode;
584 }
585
586 // Get the right MOVPRFX
587 uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode);
588 unsigned MovPrfx, LSLZero, MovPrfxZero;
589 switch (ElementSize) {
590 case AArch64::ElementSizeNone:
591 case AArch64::ElementSizeB:
592 MovPrfx = AArch64::MOVPRFX_ZZ;
593 LSLZero = AArch64::LSL_ZPmI_B;
594 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
595 break;
596 case AArch64::ElementSizeH:
597 MovPrfx = AArch64::MOVPRFX_ZZ;
598 LSLZero = AArch64::LSL_ZPmI_H;
599 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
600 break;
601 case AArch64::ElementSizeS:
602 MovPrfx = AArch64::MOVPRFX_ZZ;
603 LSLZero = AArch64::LSL_ZPmI_S;
604 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
605 break;
606 case AArch64::ElementSizeD:
607 MovPrfx = AArch64::MOVPRFX_ZZ;
608 LSLZero = AArch64::LSL_ZPmI_D;
609 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
610 break;
611 default:
612 llvm_unreachable("Unsupported ElementSize");
613 }
614
615 // Preserve undef state until DOP's reg is defined.
616 RegState DOPRegState = getUndefRegState(B: MI.getOperand(i: DOPIdx).isUndef());
617
618 //
619 // Create the destructive operation (if required)
620 //
621 MachineInstrBuilder PRFX, DOP;
622 if (FalseZero) {
623 // If we cannot prefix the requested instruction we'll instead emit a
624 // prefixed_zeroing_mov for DestructiveBinary.
625 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
626 DType == AArch64::DestructiveBinaryComm ||
627 DType == AArch64::DestructiveBinaryCommWithRev) &&
628 "The destructive operand should be unique");
629 assert(ElementSize != AArch64::ElementSizeNone &&
630 "This instruction is unpredicated");
631
632 // Merge source operand into destination register
633 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero))
634 .addReg(RegNo: DstReg, Flags: RegState::Define)
635 .addReg(RegNo: MI.getOperand(i: PredIdx).getReg())
636 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
637
638 // After the movprfx, the destructive operand is same as Dst
639 DOPIdx = 0;
640 DOPRegState = {};
641
642 // Create the additional LSL to zero the lanes when the DstReg is not
643 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
644 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
645 if ((DType == AArch64::DestructiveBinary ||
646 DType == AArch64::DestructiveBinaryComm ||
647 DType == AArch64::DestructiveBinaryCommWithRev) &&
648 !DOPRegIsUnique) {
649 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero))
650 .addReg(RegNo: DstReg, Flags: RegState::Define)
651 .add(MO: MI.getOperand(i: PredIdx))
652 .addReg(RegNo: DstReg)
653 .addImm(Val: 0);
654 }
655 } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) {
656 assert(DOPRegIsUnique && "The destructive operand should be unique");
657 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx))
658 .addReg(RegNo: DstReg, Flags: RegState::Define)
659 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState);
660 DOPIdx = 0;
661 DOPRegState = {};
662 }
663
664 //
665 // Create the destructive operation
666 //
667 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode))
668 .addReg(RegNo: DstReg, Flags: RegState::Define | getDeadRegState(B: DstIsDead));
669 DOPRegState = DOPRegState | RegState::Kill;
670
671 switch (DType) {
672 case AArch64::DestructiveUnaryPassthru:
673 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
674 .add(MO: MI.getOperand(i: PredIdx))
675 .add(MO: MI.getOperand(i: SrcIdx));
676 break;
677 case AArch64::DestructiveBinary:
678 case AArch64::DestructiveBinaryImm:
679 case AArch64::DestructiveBinaryComm:
680 case AArch64::DestructiveBinaryCommWithRev:
681 DOP.add(MO: MI.getOperand(i: PredIdx))
682 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
683 .add(MO: MI.getOperand(i: SrcIdx));
684 break;
685 case AArch64::DestructiveTernaryCommWithRev:
686 DOP.add(MO: MI.getOperand(i: PredIdx))
687 .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
688 .add(MO: MI.getOperand(i: SrcIdx))
689 .add(MO: MI.getOperand(i: Src2Idx));
690 break;
691 case AArch64::DestructiveBinaryShImmUnpred:
692 case AArch64::Destructive2xRegImmUnpred:
693 DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), Flags: DOPRegState)
694 .add(MO: MI.getOperand(i: SrcIdx))
695 .add(MO: MI.getOperand(i: Src2Idx));
696 break;
697 }
698
699 if (PRFX) {
700 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
701 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
702 } else
703 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
704
705 MI.eraseFromParent();
706 return true;
707}
708
709bool AArch64ExpandPseudoImpl::expandSVEBitwisePseudo(
710 MachineInstr &MI, MachineBasicBlock &MBB,
711 MachineBasicBlock::iterator MBBI) {
712 MachineInstrBuilder PRFX, DOP;
713 const unsigned Opcode = MI.getOpcode();
714 const MachineOperand &Op0 = MI.getOperand(i: 0);
715 const MachineOperand *Op1 = &MI.getOperand(i: 1);
716 const MachineOperand *Op2 = &MI.getOperand(i: 2);
717 const Register DOPReg = Op0.getReg();
718
719 if (DOPReg == Op2->getReg()) {
720 // Commute the operands to allow destroying the second source.
721 std::swap(a&: Op1, b&: Op2);
722 } else if (DOPReg != Op1->getReg()) {
723 // If not in destructive form, emit a MOVPRFX. The input should only be
724 // killed if unused by the subsequent instruction.
725 PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVPRFX_ZZ))
726 .addDef(RegNo: DOPReg, Flags: getRenamableRegState(B: Op0.isRenamable()))
727 .addReg(RegNo: Op1->getReg(),
728 Flags: getRenamableRegState(B: Op1->isRenamable()) |
729 getUndefRegState(B: Op1->isUndef()) |
730 getKillRegState(B: Op1->isKill() &&
731 Opcode == AArch64::NAND_ZZZ));
732 }
733
734 assert((DOPReg == Op1->getReg() || PRFX) && "invalid expansion");
735
736 const RegState DOPRegState = getRenamableRegState(B: Op0.isRenamable()) |
737 getUndefRegState(B: !PRFX && Op1->isUndef()) |
738 RegState::Kill;
739
740 switch (Opcode) {
741 default:
742 llvm_unreachable("unhandled opcode");
743 case AArch64::EON_ZZZ:
744 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BSL2N_ZZZZ))
745 .add(MO: Op0)
746 .addReg(RegNo: DOPReg, Flags: DOPRegState)
747 .add(MO: *Op1)
748 .add(MO: *Op2);
749 break;
750 case AArch64::NAND_ZZZ:
751 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
752 .add(MO: Op0)
753 .addReg(RegNo: DOPReg, Flags: DOPRegState)
754 .add(MO: *Op2)
755 .add(MO: *Op2);
756 break;
757 case AArch64::NOR_ZZZ:
758 DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::NBSL_ZZZZ))
759 .add(MO: Op0)
760 .addReg(RegNo: DOPReg, Flags: DOPRegState)
761 .add(MO: *Op2)
762 .add(MO: *Op1);
763 break;
764 }
765
766 if (PRFX) {
767 transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP);
768 finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator());
769 } else {
770 transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP);
771 }
772
773 MI.eraseFromParent();
774 return true;
775}
776
777bool AArch64ExpandPseudoImpl::expandSetTagLoop(
778 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
779 MachineBasicBlock::iterator &NextMBBI) {
780 MachineInstr &MI = *MBBI;
781 DebugLoc DL = MI.getDebugLoc();
782 Register SizeReg = MI.getOperand(i: 0).getReg();
783 Register AddressReg = MI.getOperand(i: 1).getReg();
784
785 MachineFunction *MF = MBB.getParent();
786
787 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
788 const unsigned OpCode1 =
789 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
790 const unsigned OpCode2 =
791 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
792
793 unsigned Size = MI.getOperand(i: 2).getImm();
794 assert(Size > 0 && Size % 16 == 0);
795 if (Size % (16 * 2) != 0) {
796 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg)
797 .addReg(RegNo: AddressReg)
798 .addReg(RegNo: AddressReg)
799 .addImm(Val: 1);
800 Size -= 16;
801 }
802 MachineBasicBlock::iterator I =
803 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg)
804 .addImm(Val: Size);
805 expandMOVImm(MBB, MBBI: I, BitSize: 64);
806
807 auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
808 auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
809
810 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB);
811 MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB);
812
813 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2))
814 .addDef(RegNo: AddressReg)
815 .addReg(RegNo: AddressReg)
816 .addReg(RegNo: AddressReg)
817 .addImm(Val: 2)
818 .cloneMemRefs(OtherMI: MI)
819 .setMIFlags(MI.getFlags());
820 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri))
821 .addDef(RegNo: SizeReg)
822 .addReg(RegNo: SizeReg)
823 .addImm(Val: 16 * 2)
824 .addImm(Val: 0);
825 BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
826 .addImm(Val: AArch64CC::NE)
827 .addMBB(MBB: LoopBB)
828 .addReg(RegNo: AArch64::NZCV, Flags: RegState::Implicit | RegState::Kill);
829
830 LoopBB->addSuccessor(Succ: LoopBB);
831 LoopBB->addSuccessor(Succ: DoneBB);
832
833 DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end());
834 DoneBB->transferSuccessors(FromMBB: &MBB);
835
836 MBB.addSuccessor(Succ: LoopBB);
837
838 NextMBBI = MBB.end();
839 MI.eraseFromParent();
840 // Recompute liveness bottom up.
841 LivePhysRegs LiveRegs;
842 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
843 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
844 // Do an extra pass in the loop to get the loop carried dependencies right.
845 // FIXME: is this necessary?
846 LoopBB->clearLiveIns();
847 computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB);
848 DoneBB->clearLiveIns();
849 computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB);
850
851 return true;
852}
853
854bool AArch64ExpandPseudoImpl::expandSVESpillFill(
855 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opc,
856 unsigned N) {
857 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
858 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
859 "Unexpected opcode");
860 RegState RState =
861 getDefRegState(B: Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI);
862 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
863 ? AArch64::zsub0
864 : AArch64::psub0;
865 const TargetRegisterInfo *TRI =
866 MBB.getParent()->getSubtarget().getRegisterInfo();
867 MachineInstr &MI = *MBBI;
868 for (unsigned Offset = 0; Offset < N; ++Offset) {
869 int ImmOffset = MI.getOperand(i: 2).getImm() + Offset;
870 bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false;
871 assert(ImmOffset >= -256 && ImmOffset < 256 &&
872 "Immediate spill offset out of range");
873 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
874 .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset),
875 Flags: RState)
876 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: getKillRegState(B: Kill))
877 .addImm(Val: ImmOffset);
878 }
879 MI.eraseFromParent();
880 return true;
881}
882
883// Create a call with the passed opcode and explicit operands, copying over all
884// the implicit operands from *MBBI, starting at the regmask.
885static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
886 MachineBasicBlock::iterator MBBI,
887 const AArch64InstrInfo *TII,
888 unsigned Opcode,
889 ArrayRef<MachineOperand> ExplicitOps,
890 unsigned RegMaskStartIdx) {
891 // Build the MI, with explicit operands first (including the call target).
892 MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode))
893 .add(MOs: ExplicitOps)
894 .getInstr();
895
896 // Register arguments are added during ISel, but cannot be added as explicit
897 // operands of the branch as it expects to be B <target> which is only one
898 // operand. Instead they are implicit operands used by the branch.
899 while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) {
900 const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx);
901 assert(MOP.isReg() && "can only add register operands");
902 Call->addOperand(Op: MachineOperand::CreateReg(
903 Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false,
904 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
905 RegMaskStartIdx++;
906 }
907 for (const MachineOperand &MO :
908 llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx))
909 Call->addOperand(Op: MO);
910
911 return Call;
912}
913
914// Create a call to CallTarget, copying over all the operands from *MBBI,
915// starting at the regmask.
916static MachineInstr *createCall(MachineBasicBlock &MBB,
917 MachineBasicBlock::iterator MBBI,
918 const AArch64InstrInfo *TII,
919 MachineOperand &CallTarget,
920 unsigned RegMaskStartIdx) {
921 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
922
923 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
924 "invalid operand for regular call");
925 return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx);
926}
927
928bool AArch64ExpandPseudoImpl::expandCALL_RVMARKER(
929 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
930 // Expand CALL_RVMARKER pseudo to:
931 // - a branch to the call target, followed by
932 // - the special `mov x29, x29` marker, if necessary, and
933 // - another branch, to the runtime function
934 // Mark the sequence as bundle, to avoid passes moving other code in between.
935 MachineInstr &MI = *MBBI;
936 MachineOperand &RVTarget = MI.getOperand(i: 0);
937 bool DoEmitMarker = MI.getOperand(i: 1).getImm();
938 assert(RVTarget.isGlobal() && "invalid operand for attached call");
939
940 MachineInstr *OriginalCall = nullptr;
941
942 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
943 // ptrauth call.
944 const MachineOperand &CallTarget = MI.getOperand(i: 2);
945 const MachineOperand &Key = MI.getOperand(i: 3);
946 const MachineOperand &IntDisc = MI.getOperand(i: 4);
947 const MachineOperand &AddrDisc = MI.getOperand(i: 5);
948
949 assert((Key.getImm() == AArch64PACKey::IA ||
950 Key.getImm() == AArch64PACKey::IB) &&
951 "Invalid auth call key");
952
953 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
954
955 OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops,
956 /*RegMaskStartIdx=*/6);
957 } else {
958 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
959 OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 2),
960 // Regmask starts after the RV and call targets.
961 /*RegMaskStartIdx=*/3);
962 }
963
964 if (DoEmitMarker)
965 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs))
966 .addReg(RegNo: AArch64::FP, Flags: RegState::Define)
967 .addReg(RegNo: AArch64::XZR)
968 .addReg(RegNo: AArch64::FP)
969 .addImm(Val: 0);
970
971 auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL))
972 .add(MO: RVTarget)
973 .getInstr();
974
975 if (MI.shouldUpdateAdditionalCallInfo())
976 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: OriginalCall);
977
978 MI.eraseFromParent();
979 finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(),
980 LastMI: std::next(x: RVCall->getIterator()));
981 return true;
982}
983
984bool AArch64ExpandPseudoImpl::expandCALL_BTI(MachineBasicBlock &MBB,
985 MachineBasicBlock::iterator MBBI) {
986 // Expand CALL_BTI pseudo to:
987 // - a branch to the call target
988 // - a BTI instruction
989 // Mark the sequence as a bundle, to avoid passes moving other code in
990 // between.
991 MachineInstr &MI = *MBBI;
992 MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0),
993 // Regmask starts after the call target.
994 /*RegMaskStartIdx=*/1);
995
996 Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType());
997
998 MachineInstr *BTI =
999 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT))
1000 // BTI J so that setjmp can to BR to this.
1001 .addImm(Val: 36)
1002 .getInstr();
1003
1004 if (MI.shouldUpdateAdditionalCallInfo())
1005 MBB.getParent()->moveAdditionalCallInfo(Old: &MI, New: Call);
1006
1007 MI.eraseFromParent();
1008 finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator()));
1009 return true;
1010}
1011
1012bool AArch64ExpandPseudoImpl::expandStoreSwiftAsyncContext(
1013 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1014 Register CtxReg = MBBI->getOperand(i: 0).getReg();
1015 Register BaseReg = MBBI->getOperand(i: 1).getReg();
1016 int Offset = MBBI->getOperand(i: 2).getImm();
1017 DebugLoc DL(MBBI->getDebugLoc());
1018 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1019
1020 if (STI.getTargetTriple().getArchName() != "arm64e") {
1021 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1022 .addUse(RegNo: CtxReg)
1023 .addUse(RegNo: BaseReg)
1024 .addImm(Val: Offset / 8)
1025 .setMIFlag(MachineInstr::FrameSetup);
1026 MBBI->eraseFromParent();
1027 return true;
1028 }
1029
1030 // We need to sign the context in an address-discriminated way. 0xc31a is a
1031 // fixed random value, chosen as part of the ABI.
1032 // add x16, xBase, #Offset
1033 // movk x16, #0xc31a, lsl #48
1034 // mov x17, x22/xzr
1035 // pacdb x17, x16
1036 // str x17, [xBase, #Offset]
1037 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
1038 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16)
1039 .addUse(RegNo: BaseReg)
1040 .addImm(Val: abs(x: Offset))
1041 .addImm(Val: 0)
1042 .setMIFlag(MachineInstr::FrameSetup);
1043 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16)
1044 .addUse(RegNo: AArch64::X16)
1045 .addImm(Val: 0xc31a)
1046 .addImm(Val: 48)
1047 .setMIFlag(MachineInstr::FrameSetup);
1048 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
1049 // move it somewhere before signing.
1050 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17)
1051 .addUse(RegNo: AArch64::XZR)
1052 .addUse(RegNo: CtxReg)
1053 .addImm(Val: 0)
1054 .setMIFlag(MachineInstr::FrameSetup);
1055 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17)
1056 .addUse(RegNo: AArch64::X17)
1057 .addUse(RegNo: AArch64::X16)
1058 .setMIFlag(MachineInstr::FrameSetup);
1059 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
1060 .addUse(RegNo: AArch64::X17)
1061 .addUse(RegNo: BaseReg)
1062 .addImm(Val: Offset / 8)
1063 .setMIFlag(MachineInstr::FrameSetup);
1064
1065 MBBI->eraseFromParent();
1066 return true;
1067}
1068
1069bool AArch64ExpandPseudoImpl::expandSTSHHAtomicStore(
1070 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1071 MachineInstr &MI = *MBBI;
1072 DebugLoc DL(MI.getDebugLoc());
1073
1074 unsigned Order = MI.getOperand(i: 2).getImm();
1075 unsigned Policy = MI.getOperand(i: 3).getImm();
1076 unsigned Size = MI.getOperand(i: 4).getImm();
1077
1078 bool IsRelaxed = Order == 0;
1079 unsigned StoreOpc = 0;
1080
1081 // __ATOMIC_RELAXED uses STR. __ATOMIC_{RELEASE/SEQ_CST} use STLR.
1082 switch (Size) {
1083 case 8:
1084 StoreOpc = IsRelaxed ? AArch64::STRBBui : AArch64::STLRB;
1085 break;
1086 case 16:
1087 StoreOpc = IsRelaxed ? AArch64::STRHHui : AArch64::STLRH;
1088 break;
1089 case 32:
1090 StoreOpc = IsRelaxed ? AArch64::STRWui : AArch64::STLRW;
1091 break;
1092 case 64:
1093 StoreOpc = IsRelaxed ? AArch64::STRXui : AArch64::STLRX;
1094 break;
1095 default:
1096 llvm_unreachable("Unexpected STSHH atomic store size");
1097 }
1098
1099 // Emit the hint with the retention policy immediate.
1100 MachineInstr *Hint = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STSHH))
1101 .addImm(Val: Policy)
1102 .getInstr();
1103
1104 // Emit the associated store instruction.
1105 Register ValReg = MI.getOperand(i: 0).getReg();
1106
1107 if (Size < 64) {
1108 const TargetRegisterInfo *TRI =
1109 MBB.getParent()->getSubtarget().getRegisterInfo();
1110 Register SubReg = TRI->getSubReg(Reg: ValReg, Idx: AArch64::sub_32);
1111 if (SubReg)
1112 ValReg = SubReg;
1113 }
1114
1115 MachineInstrBuilder Store = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc))
1116 .addReg(RegNo: ValReg)
1117 .add(MO: MI.getOperand(i: 1));
1118
1119 // Relaxed uses base+imm addressing with a zero offset.
1120 if (IsRelaxed)
1121 Store.addImm(Val: 0);
1122
1123 // Preserve memory operands and any implicit uses/defs.
1124 Store->setMemRefs(MF&: *MBB.getParent(), MemRefs: MI.memoperands());
1125 transferImpOps(OldMI&: MI, UseMI&: Store, DefMI&: Store);
1126
1127 // Bundle the hint and store so they remain adjacent.
1128 finalizeBundle(MBB, FirstMI: Hint->getIterator(), LastMI: std::next(x: Store->getIterator()));
1129
1130 MI.eraseFromParent();
1131 return true;
1132}
1133
1134AArch64ExpandPseudoImpl::ConditionalBlocks
1135AArch64ExpandPseudoImpl::expandConditionalPseudo(
1136 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
1137 MachineInstrBuilder &Branch) {
1138 assert((std::next(MBBI) != MBB.end() ||
1139 MBB.successors().begin() != MBB.successors().end()) &&
1140 "Unexpected unreachable in block");
1141
1142 // Split MBB and create two new blocks:
1143 // - MBB now contains all instructions before the conditional pseudo.
1144 // - CondBB contains the conditional pseudo instruction only.
1145 // - EndBB contains all instructions after the conditional pseudo.
1146 MachineInstr &PrevMI = *std::prev(x: MBBI);
1147 MachineBasicBlock *CondBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true);
1148 MachineBasicBlock *EndBB =
1149 std::next(x: MBBI) == CondBB->end()
1150 ? *CondBB->successors().begin()
1151 : CondBB->splitAt(SplitInst&: *MBBI, /*UpdateLiveIns*/ true);
1152
1153 // Add the SMBB label to the branch instruction & create a branch to EndBB.
1154 Branch.addMBB(MBB: CondBB);
1155 BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
1156 .addMBB(MBB: EndBB);
1157 MBB.addSuccessor(Succ: EndBB);
1158
1159 // Create branch from CondBB to EndBB. Users of this helper should insert new
1160 // instructions at CondBB.back() -- i.e. before the branch.
1161 BuildMI(BB: CondBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB);
1162 return {.CondBB: *CondBB, .EndBB: *EndBB};
1163}
1164
1165MachineBasicBlock *
1166AArch64ExpandPseudoImpl::expandRestoreZASave(MachineBasicBlock &MBB,
1167 MachineBasicBlock::iterator MBBI) {
1168 MachineInstr &MI = *MBBI;
1169 DebugLoc DL = MI.getDebugLoc();
1170
1171 // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero.
1172 MachineInstrBuilder Branch =
1173 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX)).add(MO: MI.getOperand(i: 0));
1174
1175 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1176 // Replace the pseudo with a call (BL).
1177 MachineInstrBuilder MIB =
1178 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1179 // Copy operands (mainly the regmask) from the pseudo.
1180 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1181 MIB.add(MO: MI.getOperand(i: I));
1182 // Mark the TPIDR2 block pointer (X0) as an implicit use.
1183 MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState::Implicit);
1184
1185 MI.eraseFromParent();
1186 return &EndBB;
1187}
1188
1189static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1190
1191MachineBasicBlock *
1192AArch64ExpandPseudoImpl::expandCommitZASave(MachineBasicBlock &MBB,
1193 MachineBasicBlock::iterator MBBI) {
1194 MachineInstr &MI = *MBBI;
1195 DebugLoc DL = MI.getDebugLoc();
1196 [[maybe_unused]] auto *RI = MBB.getParent()->getSubtarget().getRegisterInfo();
1197
1198 // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero.
1199 MachineInstrBuilder Branch =
1200 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBNZX)).add(MO: MI.getOperand(i: 0));
1201
1202 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1203 // Replace the pseudo with a call (BL).
1204 MachineInstrBuilder MIB =
1205 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL));
1206 // Copy operands (mainly the regmask) from the pseudo.
1207 for (unsigned I = 3; I < MI.getNumOperands(); ++I)
1208 MIB.add(MO: MI.getOperand(i: I));
1209 // Clear TPIDR2_EL0.
1210 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::MSR))
1211 .addImm(Val: AArch64SysReg::TPIDR2_EL0)
1212 .addReg(RegNo: AArch64::XZR);
1213 bool ZeroZA = MI.getOperand(i: 1).getImm() != 0;
1214 bool ZeroZT0 = MI.getOperand(i: 2).getImm() != 0;
1215 if (ZeroZA) {
1216 assert(MI.definesRegister(AArch64::ZAB0, RI) && "should define ZA!");
1217 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_M))
1218 .addImm(Val: ZERO_ALL_ZA_MASK)
1219 .addDef(RegNo: AArch64::ZAB0, Flags: RegState::ImplicitDefine);
1220 }
1221 if (ZeroZT0) {
1222 assert(MI.definesRegister(AArch64::ZT0, RI) && "should define ZT0!");
1223 BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ZERO_T))
1224 .addDef(RegNo: AArch64::ZT0);
1225 }
1226
1227 MI.eraseFromParent();
1228 return &EndBB;
1229}
1230
1231MachineBasicBlock *
1232AArch64ExpandPseudoImpl::expandCondSMToggle(MachineBasicBlock &MBB,
1233 MachineBasicBlock::iterator MBBI) {
1234 MachineInstr &MI = *MBBI;
1235 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1236 // Exception handling code generated by Clang may introduce unreachables and it
1237 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1238 // not just an optimisation, the code below expects a successor instruction/block
1239 // in order to split the block at MBBI.
1240 if (std::next(x: MBBI) == MBB.end() &&
1241 MI.getParent()->successors().begin() ==
1242 MI.getParent()->successors().end()) {
1243 MI.eraseFromParent();
1244 return &MBB;
1245 }
1246
1247 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1248 // following operands:
1249 //
1250 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1251 //
1252 // The pseudo is expanded into a conditional smstart/smstop, with a
1253 // check if pstate.sm (register) equals the expected value, and if not,
1254 // invokes the smstart/smstop.
1255 //
1256 // As an example, the following block contains a normal call from a
1257 // streaming-compatible function:
1258 //
1259 // OrigBB:
1260 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1261 // bl @normal_callee
1262 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1263 //
1264 // ...which will be transformed into:
1265 //
1266 // OrigBB:
1267 // TBNZx %0:gpr64, 0, SMBB
1268 // b EndBB
1269 //
1270 // SMBB:
1271 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1272 //
1273 // EndBB:
1274 // bl @normal_callee
1275 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1276 //
1277 DebugLoc DL = MI.getDebugLoc();
1278
1279 // Create the conditional branch based on the third operand of the
1280 // instruction, which tells us if we are wrapping a normal or streaming
1281 // function.
1282 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1283 // expected value for the callee (0 for a normal callee and 1 for a streaming
1284 // callee).
1285 unsigned Opc;
1286 switch (MI.getOperand(i: 2).getImm()) {
1287 case AArch64SME::Always:
1288 llvm_unreachable("Should have matched to instruction directly");
1289 case AArch64SME::IfCallerIsStreaming:
1290 Opc = AArch64::TBNZW;
1291 break;
1292 case AArch64SME::IfCallerIsNonStreaming:
1293 Opc = AArch64::TBZW;
1294 break;
1295 }
1296 auto PStateSM = MI.getOperand(i: 3).getReg();
1297 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1298 unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32);
1299 MachineInstrBuilder Tbx =
1300 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0);
1301
1302 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch&: Tbx);
1303 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1304 MachineInstrBuilder MIB = BuildMI(BB&: CondBB, I&: CondBB.back(), MIMD: MI.getDebugLoc(),
1305 MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1));
1306 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1307 // these contain the CopyFromReg for the first argument and the flag to
1308 // indicate whether the callee is streaming or normal).
1309 MIB.add(MO: MI.getOperand(i: 0));
1310 MIB.add(MO: MI.getOperand(i: 1));
1311 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1312 MIB.add(MO: MI.getOperand(i));
1313
1314 MI.eraseFromParent();
1315 return &EndBB;
1316}
1317
1318bool AArch64ExpandPseudoImpl::expandMultiVecPseudo(
1319 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1320 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1321 unsigned ContiguousOp, unsigned StridedOpc) {
1322 MachineInstr &MI = *MBBI;
1323 Register Tuple = MI.getOperand(i: 0).getReg();
1324
1325 auto ContiguousRange = ContiguousClass.getRegisters();
1326 auto StridedRange = StridedClass.getRegisters();
1327 unsigned Opc;
1328 if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) {
1329 Opc = ContiguousOp;
1330 } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) {
1331 Opc = StridedOpc;
1332 } else
1333 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1334
1335 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc))
1336 .add(MO: MI.getOperand(i: 0))
1337 .add(MO: MI.getOperand(i: 1))
1338 .add(MO: MI.getOperand(i: 2))
1339 .add(MO: MI.getOperand(i: 3));
1340 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1341 MI.eraseFromParent();
1342 return true;
1343}
1344
1345bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
1346 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1347 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1348 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1349 MachineInstr &MI = *MBBI;
1350 Register ReturnTuple = MI.getOperand(i: 0).getReg();
1351
1352 const TargetRegisterInfo *TRI =
1353 MBB.getParent()->getSubtarget().getRegisterInfo();
1354 for (unsigned I = 0; I < Size; ++I) {
1355 Register FormTupleOpReg = MI.getOperand(i: I + 1).getReg();
1356 Register ReturnTupleSubReg =
1357 TRI->getSubReg(Reg: ReturnTuple, Idx: AArch64::zsub0 + I);
1358 // Add copies to ensure the subregisters remain in the correct order
1359 // for any contigious operation they are used by.
1360 if (FormTupleOpReg != ReturnTupleSubReg)
1361 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORR_ZZZ))
1362 .addReg(RegNo: ReturnTupleSubReg, Flags: RegState::Define)
1363 .addReg(RegNo: FormTupleOpReg)
1364 .addReg(RegNo: FormTupleOpReg);
1365 }
1366
1367 MI.eraseFromParent();
1368 return true;
1369}
1370
1371/// If MBBI references a pseudo instruction that should be expanded here,
1372/// do the expansion and return true. Otherwise return false.
1373bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
1374 MachineBasicBlock::iterator MBBI,
1375 MachineBasicBlock::iterator &NextMBBI) {
1376 MachineInstr &MI = *MBBI;
1377 unsigned Opcode = MI.getOpcode();
1378
1379 // Check if we can expand the destructive op
1380 int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode());
1381 if (OrigInstr != -1) {
1382 auto &Orig = TII->get(Opcode: OrigInstr);
1383 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1384 AArch64::NotDestructive) {
1385 return expand_DestructiveOp(MI, MBB, MBBI);
1386 }
1387 }
1388
1389 switch (Opcode) {
1390 default:
1391 break;
1392
1393 case AArch64::BSPv8i8:
1394 case AArch64::BSPv16i8: {
1395 Register DstReg = MI.getOperand(i: 0).getReg();
1396 if (DstReg == MI.getOperand(i: 3).getReg()) {
1397 // Expand to BIT
1398 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1399 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1400 : AArch64::BITv16i8))
1401 .add(MO: MI.getOperand(i: 0))
1402 .add(MO: MI.getOperand(i: 3))
1403 .add(MO: MI.getOperand(i: 2))
1404 .add(MO: MI.getOperand(i: 1));
1405 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1406 } else if (DstReg == MI.getOperand(i: 2).getReg()) {
1407 // Expand to BIF
1408 auto I = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1409 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1410 : AArch64::BIFv16i8))
1411 .add(MO: MI.getOperand(i: 0))
1412 .add(MO: MI.getOperand(i: 2))
1413 .add(MO: MI.getOperand(i: 3))
1414 .add(MO: MI.getOperand(i: 1));
1415 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1416 } else {
1417 // Expand to BSL, use additional move if required
1418 if (DstReg == MI.getOperand(i: 1).getReg()) {
1419 auto I =
1420 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1421 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1422 : AArch64::BSLv16i8))
1423 .add(MO: MI.getOperand(i: 0))
1424 .add(MO: MI.getOperand(i: 1))
1425 .add(MO: MI.getOperand(i: 2))
1426 .add(MO: MI.getOperand(i: 3));
1427 transferImpOps(OldMI&: MI, UseMI&: I, DefMI&: I);
1428 } else {
1429 RegState RegState =
1430 getRenamableRegState(B: MI.getOperand(i: 1).isRenamable()) |
1431 getKillRegState(
1432 B: MI.getOperand(i: 1).isKill() &&
1433 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg() &&
1434 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 3).getReg());
1435 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1436 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1437 : AArch64::ORRv16i8))
1438 .addReg(RegNo: DstReg,
1439 Flags: RegState::Define |
1440 getRenamableRegState(B: MI.getOperand(i: 0).isRenamable()))
1441 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState)
1442 .addReg(RegNo: MI.getOperand(i: 1).getReg(), Flags: RegState);
1443 auto I2 =
1444 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1445 MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1446 : AArch64::BSLv16i8))
1447 .add(MO: MI.getOperand(i: 0))
1448 .addReg(RegNo: DstReg,
1449 Flags: RegState::Kill | getRenamableRegState(
1450 B: MI.getOperand(i: 0).isRenamable()))
1451 .add(MO: MI.getOperand(i: 2))
1452 .add(MO: MI.getOperand(i: 3));
1453 transferImpOps(OldMI&: MI, UseMI&: I2, DefMI&: I2);
1454 }
1455 }
1456 MI.eraseFromParent();
1457 return true;
1458 }
1459
1460 case AArch64::ADDWrr:
1461 case AArch64::SUBWrr:
1462 case AArch64::ADDXrr:
1463 case AArch64::SUBXrr:
1464 case AArch64::ADDSWrr:
1465 case AArch64::SUBSWrr:
1466 case AArch64::ADDSXrr:
1467 case AArch64::SUBSXrr:
1468 case AArch64::ANDWrr:
1469 case AArch64::ANDXrr:
1470 case AArch64::BICWrr:
1471 case AArch64::BICXrr:
1472 case AArch64::ANDSWrr:
1473 case AArch64::ANDSXrr:
1474 case AArch64::BICSWrr:
1475 case AArch64::BICSXrr:
1476 case AArch64::EONWrr:
1477 case AArch64::EONXrr:
1478 case AArch64::EORWrr:
1479 case AArch64::EORXrr:
1480 case AArch64::ORNWrr:
1481 case AArch64::ORNXrr:
1482 case AArch64::ORRWrr:
1483 case AArch64::ORRXrr: {
1484 unsigned Opcode;
1485 switch (MI.getOpcode()) {
1486 default:
1487 return false;
1488 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1489 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1490 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1491 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1492 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1493 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1494 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1495 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1496 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1497 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1498 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1499 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1500 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1501 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1502 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1503 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1504 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1505 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1506 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1507 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1508 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1509 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1510 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1511 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1512 }
1513 MachineFunction &MF = *MBB.getParent();
1514 // Try to create new inst without implicit operands added.
1515 MachineInstr *NewMI = MF.CreateMachineInstr(
1516 MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true);
1517 MBB.insert(I: MBBI, MI: NewMI);
1518 MachineInstrBuilder MIB1(MF, NewMI);
1519 MIB1->setPCSections(MF, MD: MI.getPCSections());
1520 MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Define)
1521 .add(MO: MI.getOperand(i: 1))
1522 .add(MO: MI.getOperand(i: 2))
1523 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
1524 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1);
1525 if (auto DebugNumber = MI.peekDebugInstrNum())
1526 NewMI->setDebugInstrNum(DebugNumber);
1527 MI.eraseFromParent();
1528 return true;
1529 }
1530
1531 case AArch64::LOADgot: {
1532 MachineFunction *MF = MBB.getParent();
1533 Register DstReg = MI.getOperand(i: 0).getReg();
1534 const MachineOperand &MO1 = MI.getOperand(i: 1);
1535 unsigned Flags = MO1.getTargetFlags();
1536
1537 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1538 // Tiny codemodel expand to LDR
1539 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1540 MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg);
1541
1542 if (MO1.isGlobal()) {
1543 MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags);
1544 } else if (MO1.isSymbol()) {
1545 MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags);
1546 } else {
1547 assert(MO1.isCPI() &&
1548 "Only expect globals, externalsymbols, or constant pools");
1549 MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags);
1550 }
1551 } else {
1552 // Small codemodel expand into ADRP + LDR.
1553 MachineFunction &MF = *MI.getParent()->getParent();
1554 DebugLoc DL = MI.getDebugLoc();
1555 MachineInstrBuilder MIB1 =
1556 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg);
1557
1558 MachineInstrBuilder MIB2;
1559 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1560 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1561 unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32);
1562 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui))
1563 .addDef(RegNo: Reg32)
1564 .addReg(RegNo: DstReg, Flags: RegState::Kill)
1565 .addReg(RegNo: DstReg, Flags: RegState::Implicit);
1566 } else {
1567 Register DstReg = MI.getOperand(i: 0).getReg();
1568 MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
1569 .add(MO: MI.getOperand(i: 0))
1570 .addUse(RegNo: DstReg, Flags: RegState::Kill);
1571 }
1572
1573 if (MO1.isGlobal()) {
1574 MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE);
1575 MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0,
1576 TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1577 } else if (MO1.isSymbol()) {
1578 MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE);
1579 MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags |
1580 AArch64II::MO_PAGEOFF |
1581 AArch64II::MO_NC);
1582 } else {
1583 assert(MO1.isCPI() &&
1584 "Only expect globals, externalsymbols, or constant pools");
1585 MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1586 TargetFlags: Flags | AArch64II::MO_PAGE);
1587 MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(),
1588 TargetFlags: Flags | AArch64II::MO_PAGEOFF |
1589 AArch64II::MO_NC);
1590 }
1591
1592 // If the LOADgot instruction has a debug-instr-number, annotate the
1593 // LDRWui instruction that it is expanded to with the same
1594 // debug-instr-number to preserve debug information.
1595 if (MI.peekDebugInstrNum() != 0)
1596 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1597 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1598 }
1599 MI.eraseFromParent();
1600 return true;
1601 }
1602 case AArch64::MOVaddrBA: {
1603 MachineFunction &MF = *MI.getParent()->getParent();
1604 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1605 // blockaddress expressions have to come from a constant pool because the
1606 // largest addend (and hence offset within a function) allowed for ADRP is
1607 // only 8MB.
1608 const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress();
1609 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1610
1611 MachineConstantPool *MCP = MF.getConstantPool();
1612 unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8));
1613
1614 Register DstReg = MI.getOperand(i: 0).getReg();
1615 auto MIB1 =
1616 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1617 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
1618 auto MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1619 MCID: TII->get(Opcode: AArch64::LDRXui), DestReg: DstReg)
1620 .addUse(RegNo: DstReg)
1621 .addConstantPoolIndex(
1622 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1623 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1624 MI.eraseFromParent();
1625 return true;
1626 }
1627 }
1628 [[fallthrough]];
1629 case AArch64::MOVaddr:
1630 case AArch64::MOVaddrJT:
1631 case AArch64::MOVaddrCP:
1632 case AArch64::MOVaddrTLS:
1633 case AArch64::MOVaddrEXT: {
1634 // Expand into ADRP + ADD.
1635 Register DstReg = MI.getOperand(i: 0).getReg();
1636 assert(DstReg != AArch64::XZR);
1637 MachineInstrBuilder MIB1 =
1638 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg)
1639 .add(MO: MI.getOperand(i: 1));
1640
1641 if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) {
1642 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1643 // We do so by creating a MOVK that sets bits 48-63 of the register to
1644 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1645 // the small code model so we can assume a binary size of <= 4GB, which
1646 // makes the untagged PC relative offset positive. The binary must also be
1647 // loaded into address range [0, 2^48). Both of these properties need to
1648 // be ensured at runtime when using tagged addresses.
1649 auto Tag = MI.getOperand(i: 1);
1650 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1651 Tag.setOffset(0x100000000);
1652 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: DstReg)
1653 .addReg(RegNo: DstReg)
1654 .add(MO: Tag)
1655 .addImm(Val: 48);
1656 }
1657
1658 MachineInstrBuilder MIB2 =
1659 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1660 .add(MO: MI.getOperand(i: 0))
1661 .addReg(RegNo: DstReg)
1662 .add(MO: MI.getOperand(i: 2))
1663 .addImm(Val: 0);
1664
1665 transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2);
1666 MI.eraseFromParent();
1667 return true;
1668 }
1669 case AArch64::ADDlowTLS:
1670 // Produce a plain ADD
1671 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
1672 .add(MO: MI.getOperand(i: 0))
1673 .add(MO: MI.getOperand(i: 1))
1674 .add(MO: MI.getOperand(i: 2))
1675 .addImm(Val: 0);
1676 MI.eraseFromParent();
1677 return true;
1678
1679 case AArch64::MOVbaseTLS: {
1680 Register DstReg = MI.getOperand(i: 0).getReg();
1681 auto SysReg = AArch64SysReg::TPIDR_EL0;
1682 MachineFunction *MF = MBB.getParent();
1683 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1684 SysReg = AArch64SysReg::TPIDR_EL3;
1685 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1686 SysReg = AArch64SysReg::TPIDR_EL2;
1687 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1688 SysReg = AArch64SysReg::TPIDR_EL1;
1689 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1690 SysReg = AArch64SysReg::TPIDRRO_EL0;
1691 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg)
1692 .addImm(Val: SysReg);
1693 MI.eraseFromParent();
1694 return true;
1695 }
1696
1697 case AArch64::MOVi32imm:
1698 return expandMOVImm(MBB, MBBI, BitSize: 32);
1699 case AArch64::MOVi64imm:
1700 return expandMOVImm(MBB, MBBI, BitSize: 64);
1701 case AArch64::RET_ReallyLR: {
1702 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1703 // function and missing live-ins. We are fine in practice because callee
1704 // saved register handling ensures the register value is restored before
1705 // RET, but we need the undef flag here to appease the MachineVerifier
1706 // liveness checks.
1707 MachineInstrBuilder MIB =
1708 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET))
1709 .addReg(RegNo: AArch64::LR, Flags: RegState::Undef);
1710 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1711 MI.eraseFromParent();
1712 return true;
1713 }
1714 case AArch64::CMP_SWAP_8:
1715 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB,
1716 CmpOp: AArch64::SUBSWrx,
1717 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0),
1718 ZeroReg: AArch64::WZR, NextMBBI);
1719 case AArch64::CMP_SWAP_16:
1720 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH,
1721 CmpOp: AArch64::SUBSWrx,
1722 ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0),
1723 ZeroReg: AArch64::WZR, NextMBBI);
1724 case AArch64::CMP_SWAP_32:
1725 return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW,
1726 CmpOp: AArch64::SUBSWrs,
1727 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1728 ZeroReg: AArch64::WZR, NextMBBI);
1729 case AArch64::CMP_SWAP_64:
1730 return expandCMP_SWAP(MBB, MBBI,
1731 LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs,
1732 ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0),
1733 ZeroReg: AArch64::XZR, NextMBBI);
1734 case AArch64::CMP_SWAP_128:
1735 case AArch64::CMP_SWAP_128_RELEASE:
1736 case AArch64::CMP_SWAP_128_ACQUIRE:
1737 case AArch64::CMP_SWAP_128_MONOTONIC:
1738 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1739
1740 case AArch64::AESMCrrTied:
1741 case AArch64::AESIMCrrTied: {
1742 MachineInstrBuilder MIB =
1743 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1744 MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1745 AArch64::AESIMCrr))
1746 .add(MO: MI.getOperand(i: 0))
1747 .add(MO: MI.getOperand(i: 1));
1748 transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB);
1749 MI.eraseFromParent();
1750 return true;
1751 }
1752 case AArch64::IRGstack: {
1753 MachineFunction &MF = *MBB.getParent();
1754 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1755 const AArch64FrameLowering *TFI =
1756 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1757
1758 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1759 // almost always point to SP-after-prologue; if not, emit a longer
1760 // instruction sequence.
1761 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1762 Register FrameReg;
1763 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1764 MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, StackID: TargetStackID::Default /*StackID*/,
1765 FrameReg,
1766 /*PreferFP=*/false,
1767 /*ForSimm=*/true);
1768 Register SrcReg = FrameReg;
1769 if (FrameRegOffset) {
1770 // Use output register as temporary.
1771 SrcReg = MI.getOperand(i: 0).getReg();
1772 emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg,
1773 Offset: FrameRegOffset, TII);
1774 }
1775 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG))
1776 .add(MO: MI.getOperand(i: 0))
1777 .addUse(RegNo: SrcReg)
1778 .add(MO: MI.getOperand(i: 2));
1779 MI.eraseFromParent();
1780 return true;
1781 }
1782 case AArch64::TAGPstack: {
1783 int64_t Offset = MI.getOperand(i: 2).getImm();
1784 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
1785 MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1786 .add(MO: MI.getOperand(i: 0))
1787 .add(MO: MI.getOperand(i: 1))
1788 .addImm(Val: std::abs(i: Offset))
1789 .add(MO: MI.getOperand(i: 4));
1790 MI.eraseFromParent();
1791 return true;
1792 }
1793 case AArch64::STGloop_wback:
1794 case AArch64::STZGloop_wback:
1795 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1796 case AArch64::STGloop:
1797 case AArch64::STZGloop:
1798 report_fatal_error(
1799 reason: "Non-writeback variants of STGloop / STZGloop should not "
1800 "survive past PrologEpilogInserter.");
1801 case AArch64::STR_ZZZZXI:
1802 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
1803 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4);
1804 case AArch64::STR_ZZZXI:
1805 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3);
1806 case AArch64::STR_ZZXI:
1807 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
1808 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2);
1809 case AArch64::STR_PPXI:
1810 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2);
1811 case AArch64::LDR_ZZZZXI:
1812 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
1813 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4);
1814 case AArch64::LDR_ZZZXI:
1815 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3);
1816 case AArch64::LDR_ZZXI:
1817 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
1818 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2);
1819 case AArch64::LDR_PPXI:
1820 return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2);
1821 case AArch64::BLR_RVMARKER:
1822 case AArch64::BLRA_RVMARKER:
1823 return expandCALL_RVMARKER(MBB, MBBI);
1824 case AArch64::BLR_BTI:
1825 return expandCALL_BTI(MBB, MBBI);
1826 case AArch64::StoreSwiftAsyncContext:
1827 return expandStoreSwiftAsyncContext(MBB, MBBI);
1828 case AArch64::STSHH_ATOMIC_STORE_SZ:
1829 return expandSTSHHAtomicStore(MBB, MBBI);
1830 case AArch64::RestoreZAPseudo:
1831 case AArch64::CommitZASavePseudo:
1832 case AArch64::MSRpstatePseudo: {
1833 auto *NewMBB = [&] {
1834 switch (Opcode) {
1835 case AArch64::RestoreZAPseudo:
1836 return expandRestoreZASave(MBB, MBBI);
1837 case AArch64::CommitZASavePseudo:
1838 return expandCommitZASave(MBB, MBBI);
1839 case AArch64::MSRpstatePseudo:
1840 return expandCondSMToggle(MBB, MBBI);
1841 default:
1842 llvm_unreachable("Unexpected conditional pseudo!");
1843 }
1844 }();
1845 if (NewMBB != &MBB)
1846 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1847 return true;
1848 }
1849 case AArch64::InOutZAUsePseudo:
1850 case AArch64::RequiresZASavePseudo:
1851 case AArch64::RequiresZT0SavePseudo:
1852 case AArch64::SMEStateAllocPseudo:
1853 case AArch64::COALESCER_BARRIER_FPR16:
1854 case AArch64::COALESCER_BARRIER_FPR32:
1855 case AArch64::COALESCER_BARRIER_FPR64:
1856 case AArch64::COALESCER_BARRIER_FPR128:
1857 MI.eraseFromParent();
1858 return true;
1859 case AArch64::LD1B_2Z_IMM_PSEUDO:
1860 return expandMultiVecPseudo(
1861 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1862 ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM);
1863 case AArch64::LD1H_2Z_IMM_PSEUDO:
1864 return expandMultiVecPseudo(
1865 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1866 ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM);
1867 case AArch64::LD1W_2Z_IMM_PSEUDO:
1868 return expandMultiVecPseudo(
1869 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1870 ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM);
1871 case AArch64::LD1D_2Z_IMM_PSEUDO:
1872 return expandMultiVecPseudo(
1873 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1874 ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM);
1875 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1876 return expandMultiVecPseudo(
1877 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1878 ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM);
1879 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1880 return expandMultiVecPseudo(
1881 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1882 ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM);
1883 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1884 return expandMultiVecPseudo(
1885 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1886 ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM);
1887 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1888 return expandMultiVecPseudo(
1889 MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass,
1890 ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM);
1891 case AArch64::LD1B_2Z_PSEUDO:
1892 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1893 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z,
1894 StridedOpc: AArch64::LD1B_2Z_STRIDED);
1895 case AArch64::LD1H_2Z_PSEUDO:
1896 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1897 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z,
1898 StridedOpc: AArch64::LD1H_2Z_STRIDED);
1899 case AArch64::LD1W_2Z_PSEUDO:
1900 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1901 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z,
1902 StridedOpc: AArch64::LD1W_2Z_STRIDED);
1903 case AArch64::LD1D_2Z_PSEUDO:
1904 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1905 StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z,
1906 StridedOpc: AArch64::LD1D_2Z_STRIDED);
1907 case AArch64::LDNT1B_2Z_PSEUDO:
1908 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1909 StridedClass: AArch64::ZPR2StridedRegClass,
1910 ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED);
1911 case AArch64::LDNT1H_2Z_PSEUDO:
1912 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1913 StridedClass: AArch64::ZPR2StridedRegClass,
1914 ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED);
1915 case AArch64::LDNT1W_2Z_PSEUDO:
1916 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1917 StridedClass: AArch64::ZPR2StridedRegClass,
1918 ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED);
1919 case AArch64::LDNT1D_2Z_PSEUDO:
1920 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass,
1921 StridedClass: AArch64::ZPR2StridedRegClass,
1922 ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED);
1923 case AArch64::LD1B_4Z_IMM_PSEUDO:
1924 return expandMultiVecPseudo(
1925 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1926 ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM);
1927 case AArch64::LD1H_4Z_IMM_PSEUDO:
1928 return expandMultiVecPseudo(
1929 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1930 ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM);
1931 case AArch64::LD1W_4Z_IMM_PSEUDO:
1932 return expandMultiVecPseudo(
1933 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1934 ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM);
1935 case AArch64::LD1D_4Z_IMM_PSEUDO:
1936 return expandMultiVecPseudo(
1937 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1938 ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM);
1939 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1940 return expandMultiVecPseudo(
1941 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1942 ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM);
1943 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1944 return expandMultiVecPseudo(
1945 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1946 ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM);
1947 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1948 return expandMultiVecPseudo(
1949 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1950 ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM);
1951 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1952 return expandMultiVecPseudo(
1953 MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass,
1954 ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM);
1955 case AArch64::LD1B_4Z_PSEUDO:
1956 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1957 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z,
1958 StridedOpc: AArch64::LD1B_4Z_STRIDED);
1959 case AArch64::LD1H_4Z_PSEUDO:
1960 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1961 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z,
1962 StridedOpc: AArch64::LD1H_4Z_STRIDED);
1963 case AArch64::LD1W_4Z_PSEUDO:
1964 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1965 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z,
1966 StridedOpc: AArch64::LD1W_4Z_STRIDED);
1967 case AArch64::LD1D_4Z_PSEUDO:
1968 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1969 StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z,
1970 StridedOpc: AArch64::LD1D_4Z_STRIDED);
1971 case AArch64::LDNT1B_4Z_PSEUDO:
1972 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1973 StridedClass: AArch64::ZPR4StridedRegClass,
1974 ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED);
1975 case AArch64::LDNT1H_4Z_PSEUDO:
1976 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1977 StridedClass: AArch64::ZPR4StridedRegClass,
1978 ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED);
1979 case AArch64::LDNT1W_4Z_PSEUDO:
1980 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1981 StridedClass: AArch64::ZPR4StridedRegClass,
1982 ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED);
1983 case AArch64::LDNT1D_4Z_PSEUDO:
1984 return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass,
1985 StridedClass: AArch64::ZPR4StridedRegClass,
1986 ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED);
1987 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1988 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 2);
1989 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1990 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, Size: 4);
1991 case AArch64::EON_ZZZ:
1992 case AArch64::NAND_ZZZ:
1993 case AArch64::NOR_ZZZ:
1994 return expandSVEBitwisePseudo(MI, MBB, MBBI);
1995 }
1996 return false;
1997}
1998
1999/// Iterate over the instructions in basic block MBB and expand any
2000/// pseudo instructions. Return true if anything was modified.
2001bool AArch64ExpandPseudoImpl::expandMBB(MachineBasicBlock &MBB) {
2002 bool Modified = false;
2003
2004 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2005 while (MBBI != E) {
2006 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
2007 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
2008 MBBI = NMBBI;
2009 }
2010
2011 return Modified;
2012}
2013
2014bool AArch64ExpandPseudoImpl::run(MachineFunction &MF) {
2015 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
2016
2017 bool Modified = false;
2018 for (auto &MBB : MF)
2019 Modified |= expandMBB(MBB);
2020 return Modified;
2021}
2022
2023bool AArch64ExpandPseudoLegacy::runOnMachineFunction(MachineFunction &MF) {
2024 return AArch64ExpandPseudoImpl().run(MF);
2025}
2026
2027/// Returns an instance of the pseudo instruction expansion pass.
2028FunctionPass *llvm::createAArch64ExpandPseudoLegacyPass() {
2029 return new AArch64ExpandPseudoLegacy();
2030}
2031
2032PreservedAnalyses
2033AArch64ExpandPseudoPass::run(MachineFunction &MF,
2034 MachineFunctionAnalysisManager &MFAM) {
2035 const bool Changed = AArch64ExpandPseudoImpl().run(MF);
2036 if (!Changed)
2037 return PreservedAnalyses::all();
2038 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
2039 PA.preserveSet<CFGAnalyses>();
2040 return PA;
2041}
2042