1 | //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands pseudo instructions into target |
10 | // instructions to allow proper scheduling and other late optimizations. This |
11 | // pass should be run after register allocation but before the post-regalloc |
12 | // scheduling pass. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AArch64ExpandImm.h" |
17 | #include "AArch64InstrInfo.h" |
18 | #include "AArch64MachineFunctionInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/CodeGen/LivePhysRegs.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineConstantPool.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
31 | #include "llvm/IR/DebugLoc.h" |
32 | #include "llvm/MC/MCInstrDesc.h" |
33 | #include "llvm/Pass.h" |
34 | #include "llvm/Support/CodeGen.h" |
35 | #include "llvm/Support/MathExtras.h" |
36 | #include "llvm/Target/TargetMachine.h" |
37 | #include "llvm/TargetParser/Triple.h" |
38 | #include <cassert> |
39 | #include <cstdint> |
40 | #include <iterator> |
41 | #include <utility> |
42 | |
43 | using namespace llvm; |
44 | |
45 | #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" |
46 | |
47 | namespace { |
48 | |
49 | class AArch64ExpandPseudo : public MachineFunctionPass { |
50 | public: |
51 | const AArch64InstrInfo *TII; |
52 | |
53 | static char ID; |
54 | |
55 | AArch64ExpandPseudo() : MachineFunctionPass(ID) { |
56 | initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); |
57 | } |
58 | |
59 | bool runOnMachineFunction(MachineFunction &Fn) override; |
60 | |
61 | StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } |
62 | |
63 | private: |
64 | bool expandMBB(MachineBasicBlock &MBB); |
65 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
66 | MachineBasicBlock::iterator &NextMBBI); |
67 | bool expandMultiVecPseudo(MachineBasicBlock &MBB, |
68 | MachineBasicBlock::iterator MBBI, |
69 | TargetRegisterClass ContiguousClass, |
70 | TargetRegisterClass StridedClass, |
71 | unsigned ContiguousOpc, unsigned StridedOpc); |
72 | bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
73 | unsigned BitSize); |
74 | |
75 | bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, |
76 | MachineBasicBlock::iterator MBBI); |
77 | bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
78 | unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, |
79 | unsigned ExtendImm, unsigned ZeroReg, |
80 | MachineBasicBlock::iterator &NextMBBI); |
81 | bool expandCMP_SWAP_128(MachineBasicBlock &MBB, |
82 | MachineBasicBlock::iterator MBBI, |
83 | MachineBasicBlock::iterator &NextMBBI); |
84 | bool expandSetTagLoop(MachineBasicBlock &MBB, |
85 | MachineBasicBlock::iterator MBBI, |
86 | MachineBasicBlock::iterator &NextMBBI); |
87 | bool expandSVESpillFill(MachineBasicBlock &MBB, |
88 | MachineBasicBlock::iterator MBBI, unsigned Opc, |
89 | unsigned N); |
90 | bool expandCALL_RVMARKER(MachineBasicBlock &MBB, |
91 | MachineBasicBlock::iterator MBBI); |
92 | bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); |
93 | bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, |
94 | MachineBasicBlock::iterator MBBI); |
95 | MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, |
96 | MachineBasicBlock::iterator MBBI); |
97 | MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, |
98 | MachineBasicBlock::iterator MBBI); |
99 | }; |
100 | |
101 | } // end anonymous namespace |
102 | |
103 | char AArch64ExpandPseudo::ID = 0; |
104 | |
105 | INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo" , |
106 | AARCH64_EXPAND_PSEUDO_NAME, false, false) |
107 | |
108 | /// Transfer implicit operands on the pseudo instruction to the |
109 | /// instructions created from the expansion. |
110 | static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, |
111 | MachineInstrBuilder &DefMI) { |
112 | const MCInstrDesc &Desc = OldMI.getDesc(); |
113 | for (const MachineOperand &MO : |
114 | llvm::drop_begin(RangeOrContainer: OldMI.operands(), N: Desc.getNumOperands())) { |
115 | assert(MO.isReg() && MO.getReg()); |
116 | if (MO.isUse()) |
117 | UseMI.add(MO); |
118 | else |
119 | DefMI.add(MO); |
120 | } |
121 | } |
122 | |
123 | /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more |
124 | /// real move-immediate instructions to synthesize the immediate. |
125 | bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, |
126 | MachineBasicBlock::iterator MBBI, |
127 | unsigned BitSize) { |
128 | MachineInstr &MI = *MBBI; |
129 | Register DstReg = MI.getOperand(i: 0).getReg(); |
130 | uint64_t RenamableState = |
131 | MI.getOperand(i: 0).isRenamable() ? RegState::Renamable : 0; |
132 | uint64_t Imm = MI.getOperand(i: 1).getImm(); |
133 | |
134 | if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { |
135 | // Useless def, and we don't want to risk creating an invalid ORR (which |
136 | // would really write to sp). |
137 | MI.eraseFromParent(); |
138 | return true; |
139 | } |
140 | |
141 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
142 | AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); |
143 | assert(Insn.size() != 0); |
144 | |
145 | SmallVector<MachineInstrBuilder, 4> MIBS; |
146 | for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { |
147 | bool LastItem = std::next(x: I) == E; |
148 | switch (I->Opcode) |
149 | { |
150 | default: llvm_unreachable("unhandled!" ); break; |
151 | |
152 | case AArch64::ORRWri: |
153 | case AArch64::ORRXri: |
154 | if (I->Op1 == 0) { |
155 | MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
156 | .add(MO: MI.getOperand(i: 0)) |
157 | .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR) |
158 | .addImm(Val: I->Op2)); |
159 | } else { |
160 | Register DstReg = MI.getOperand(i: 0).getReg(); |
161 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
162 | MIBS.push_back( |
163 | Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
164 | .addReg(RegNo: DstReg, flags: RegState::Define | |
165 | getDeadRegState(B: DstIsDead && LastItem) | |
166 | RenamableState) |
167 | .addReg(RegNo: DstReg) |
168 | .addImm(Val: I->Op2)); |
169 | } |
170 | break; |
171 | case AArch64::ORRWrs: |
172 | case AArch64::ORRXrs: { |
173 | Register DstReg = MI.getOperand(i: 0).getReg(); |
174 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
175 | MIBS.push_back( |
176 | Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
177 | .addReg(RegNo: DstReg, flags: RegState::Define | |
178 | getDeadRegState(B: DstIsDead && LastItem) | |
179 | RenamableState) |
180 | .addReg(RegNo: DstReg) |
181 | .addReg(RegNo: DstReg) |
182 | .addImm(Val: I->Op2)); |
183 | } break; |
184 | case AArch64::ANDXri: |
185 | case AArch64::EORXri: |
186 | if (I->Op1 == 0) { |
187 | MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
188 | .add(MO: MI.getOperand(i: 0)) |
189 | .addReg(RegNo: BitSize == 32 ? AArch64::WZR : AArch64::XZR) |
190 | .addImm(Val: I->Op2)); |
191 | } else { |
192 | Register DstReg = MI.getOperand(i: 0).getReg(); |
193 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
194 | MIBS.push_back( |
195 | Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
196 | .addReg(RegNo: DstReg, flags: RegState::Define | |
197 | getDeadRegState(B: DstIsDead && LastItem) | |
198 | RenamableState) |
199 | .addReg(RegNo: DstReg) |
200 | .addImm(Val: I->Op2)); |
201 | } |
202 | break; |
203 | case AArch64::MOVNWi: |
204 | case AArch64::MOVNXi: |
205 | case AArch64::MOVZWi: |
206 | case AArch64::MOVZXi: { |
207 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
208 | MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
209 | .addReg(RegNo: DstReg, flags: RegState::Define | |
210 | getDeadRegState(B: DstIsDead && LastItem) | |
211 | RenamableState) |
212 | .addImm(Val: I->Op1) |
213 | .addImm(Val: I->Op2)); |
214 | } break; |
215 | case AArch64::MOVKWi: |
216 | case AArch64::MOVKXi: { |
217 | Register DstReg = MI.getOperand(i: 0).getReg(); |
218 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
219 | MIBS.push_back(Elt: BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: I->Opcode)) |
220 | .addReg(RegNo: DstReg, |
221 | flags: RegState::Define | |
222 | getDeadRegState(B: DstIsDead && LastItem) | |
223 | RenamableState) |
224 | .addReg(RegNo: DstReg) |
225 | .addImm(Val: I->Op1) |
226 | .addImm(Val: I->Op2)); |
227 | } break; |
228 | } |
229 | } |
230 | transferImpOps(OldMI&: MI, UseMI&: MIBS.front(), DefMI&: MIBS.back()); |
231 | MI.eraseFromParent(); |
232 | return true; |
233 | } |
234 | |
235 | bool AArch64ExpandPseudo::expandCMP_SWAP( |
236 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, |
237 | unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, |
238 | MachineBasicBlock::iterator &NextMBBI) { |
239 | MachineInstr &MI = *MBBI; |
240 | MIMetadata MIMD(MI); |
241 | const MachineOperand &Dest = MI.getOperand(i: 0); |
242 | Register StatusReg = MI.getOperand(i: 1).getReg(); |
243 | bool StatusDead = MI.getOperand(i: 1).isDead(); |
244 | // Duplicating undef operands into 2 instructions does not guarantee the same |
245 | // value on both; However undef should be replaced by xzr anyway. |
246 | assert(!MI.getOperand(2).isUndef() && "cannot handle undef" ); |
247 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
248 | Register DesiredReg = MI.getOperand(i: 3).getReg(); |
249 | Register NewReg = MI.getOperand(i: 4).getReg(); |
250 | |
251 | MachineFunction *MF = MBB.getParent(); |
252 | auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
253 | auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
254 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
255 | |
256 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB); |
257 | MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB); |
258 | MF->insert(MBBI: ++StoreBB->getIterator(), MBB: DoneBB); |
259 | |
260 | // .Lloadcmp: |
261 | // mov wStatus, 0 |
262 | // ldaxr xDest, [xAddr] |
263 | // cmp xDest, xDesired |
264 | // b.ne .Ldone |
265 | if (!StatusDead) |
266 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::MOVZWi), DestReg: StatusReg) |
267 | .addImm(Val: 0).addImm(Val: 0); |
268 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdarOp), DestReg: Dest.getReg()) |
269 | .addReg(RegNo: AddrReg); |
270 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: CmpOp), DestReg: ZeroReg) |
271 | .addReg(RegNo: Dest.getReg(), flags: getKillRegState(B: Dest.isDead())) |
272 | .addReg(RegNo: DesiredReg) |
273 | .addImm(Val: ExtendImm); |
274 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::Bcc)) |
275 | .addImm(Val: AArch64CC::NE) |
276 | .addMBB(MBB: DoneBB) |
277 | .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | RegState::Kill); |
278 | LoadCmpBB->addSuccessor(Succ: DoneBB); |
279 | LoadCmpBB->addSuccessor(Succ: StoreBB); |
280 | |
281 | // .Lstore: |
282 | // stlxr wStatus, xNew, [xAddr] |
283 | // cbnz wStatus, .Lloadcmp |
284 | BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StlrOp), DestReg: StatusReg) |
285 | .addReg(RegNo: NewReg) |
286 | .addReg(RegNo: AddrReg); |
287 | BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW)) |
288 | .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead)) |
289 | .addMBB(MBB: LoadCmpBB); |
290 | StoreBB->addSuccessor(Succ: LoadCmpBB); |
291 | StoreBB->addSuccessor(Succ: DoneBB); |
292 | |
293 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
294 | DoneBB->transferSuccessors(FromMBB: &MBB); |
295 | |
296 | MBB.addSuccessor(Succ: LoadCmpBB); |
297 | |
298 | NextMBBI = MBB.end(); |
299 | MI.eraseFromParent(); |
300 | |
301 | // Recompute livein lists. |
302 | LivePhysRegs LiveRegs; |
303 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
304 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
305 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
306 | // Do an extra pass around the loop to get loop carried registers right. |
307 | StoreBB->clearLiveIns(); |
308 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
309 | LoadCmpBB->clearLiveIns(); |
310 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
311 | |
312 | return true; |
313 | } |
314 | |
315 | bool AArch64ExpandPseudo::expandCMP_SWAP_128( |
316 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
317 | MachineBasicBlock::iterator &NextMBBI) { |
318 | MachineInstr &MI = *MBBI; |
319 | MIMetadata MIMD(MI); |
320 | MachineOperand &DestLo = MI.getOperand(i: 0); |
321 | MachineOperand &DestHi = MI.getOperand(i: 1); |
322 | Register StatusReg = MI.getOperand(i: 2).getReg(); |
323 | bool StatusDead = MI.getOperand(i: 2).isDead(); |
324 | // Duplicating undef operands into 2 instructions does not guarantee the same |
325 | // value on both; However undef should be replaced by xzr anyway. |
326 | assert(!MI.getOperand(3).isUndef() && "cannot handle undef" ); |
327 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
328 | Register DesiredLoReg = MI.getOperand(i: 4).getReg(); |
329 | Register DesiredHiReg = MI.getOperand(i: 5).getReg(); |
330 | Register NewLoReg = MI.getOperand(i: 6).getReg(); |
331 | Register NewHiReg = MI.getOperand(i: 7).getReg(); |
332 | |
333 | unsigned LdxpOp, StxpOp; |
334 | |
335 | switch (MI.getOpcode()) { |
336 | case AArch64::CMP_SWAP_128_MONOTONIC: |
337 | LdxpOp = AArch64::LDXPX; |
338 | StxpOp = AArch64::STXPX; |
339 | break; |
340 | case AArch64::CMP_SWAP_128_RELEASE: |
341 | LdxpOp = AArch64::LDXPX; |
342 | StxpOp = AArch64::STLXPX; |
343 | break; |
344 | case AArch64::CMP_SWAP_128_ACQUIRE: |
345 | LdxpOp = AArch64::LDAXPX; |
346 | StxpOp = AArch64::STXPX; |
347 | break; |
348 | case AArch64::CMP_SWAP_128: |
349 | LdxpOp = AArch64::LDAXPX; |
350 | StxpOp = AArch64::STLXPX; |
351 | break; |
352 | default: |
353 | llvm_unreachable("Unexpected opcode" ); |
354 | } |
355 | |
356 | MachineFunction *MF = MBB.getParent(); |
357 | auto LoadCmpBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
358 | auto StoreBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
359 | auto FailBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
360 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
361 | |
362 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoadCmpBB); |
363 | MF->insert(MBBI: ++LoadCmpBB->getIterator(), MBB: StoreBB); |
364 | MF->insert(MBBI: ++StoreBB->getIterator(), MBB: FailBB); |
365 | MF->insert(MBBI: ++FailBB->getIterator(), MBB: DoneBB); |
366 | |
367 | // .Lloadcmp: |
368 | // ldaxp xDestLo, xDestHi, [xAddr] |
369 | // cmp xDestLo, xDesiredLo |
370 | // sbcs xDestHi, xDesiredHi |
371 | // b.ne .Ldone |
372 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: LdxpOp)) |
373 | .addReg(RegNo: DestLo.getReg(), flags: RegState::Define) |
374 | .addReg(RegNo: DestHi.getReg(), flags: RegState::Define) |
375 | .addReg(RegNo: AddrReg); |
376 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR) |
377 | .addReg(RegNo: DestLo.getReg(), flags: getKillRegState(B: DestLo.isDead())) |
378 | .addReg(RegNo: DesiredLoReg) |
379 | .addImm(Val: 0); |
380 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg) |
381 | .addUse(RegNo: AArch64::WZR) |
382 | .addUse(RegNo: AArch64::WZR) |
383 | .addImm(Val: AArch64CC::EQ); |
384 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::SUBSXrs), DestReg: AArch64::XZR) |
385 | .addReg(RegNo: DestHi.getReg(), flags: getKillRegState(B: DestHi.isDead())) |
386 | .addReg(RegNo: DesiredHiReg) |
387 | .addImm(Val: 0); |
388 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CSINCWr), DestReg: StatusReg) |
389 | .addUse(RegNo: StatusReg, Flags: RegState::Kill) |
390 | .addUse(RegNo: StatusReg, Flags: RegState::Kill) |
391 | .addImm(Val: AArch64CC::EQ); |
392 | BuildMI(BB: LoadCmpBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW)) |
393 | .addUse(RegNo: StatusReg, Flags: getKillRegState(B: StatusDead)) |
394 | .addMBB(MBB: FailBB); |
395 | LoadCmpBB->addSuccessor(Succ: FailBB); |
396 | LoadCmpBB->addSuccessor(Succ: StoreBB); |
397 | |
398 | // .Lstore: |
399 | // stlxp wStatus, xNewLo, xNewHi, [xAddr] |
400 | // cbnz wStatus, .Lloadcmp |
401 | BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg) |
402 | .addReg(RegNo: NewLoReg) |
403 | .addReg(RegNo: NewHiReg) |
404 | .addReg(RegNo: AddrReg); |
405 | BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW)) |
406 | .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead)) |
407 | .addMBB(MBB: LoadCmpBB); |
408 | BuildMI(BB: StoreBB, MIMD, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: DoneBB); |
409 | StoreBB->addSuccessor(Succ: LoadCmpBB); |
410 | StoreBB->addSuccessor(Succ: DoneBB); |
411 | |
412 | // .Lfail: |
413 | // stlxp wStatus, xDestLo, xDestHi, [xAddr] |
414 | // cbnz wStatus, .Lloadcmp |
415 | BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: StxpOp), DestReg: StatusReg) |
416 | .addReg(RegNo: DestLo.getReg()) |
417 | .addReg(RegNo: DestHi.getReg()) |
418 | .addReg(RegNo: AddrReg); |
419 | BuildMI(BB: FailBB, MIMD, MCID: TII->get(Opcode: AArch64::CBNZW)) |
420 | .addReg(RegNo: StatusReg, flags: getKillRegState(B: StatusDead)) |
421 | .addMBB(MBB: LoadCmpBB); |
422 | FailBB->addSuccessor(Succ: LoadCmpBB); |
423 | FailBB->addSuccessor(Succ: DoneBB); |
424 | |
425 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
426 | DoneBB->transferSuccessors(FromMBB: &MBB); |
427 | |
428 | MBB.addSuccessor(Succ: LoadCmpBB); |
429 | |
430 | NextMBBI = MBB.end(); |
431 | MI.eraseFromParent(); |
432 | |
433 | // Recompute liveness bottom up. |
434 | LivePhysRegs LiveRegs; |
435 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
436 | computeAndAddLiveIns(LiveRegs, MBB&: *FailBB); |
437 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
438 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
439 | |
440 | // Do an extra pass in the loop to get the loop carried dependencies right. |
441 | FailBB->clearLiveIns(); |
442 | computeAndAddLiveIns(LiveRegs, MBB&: *FailBB); |
443 | StoreBB->clearLiveIns(); |
444 | computeAndAddLiveIns(LiveRegs, MBB&: *StoreBB); |
445 | LoadCmpBB->clearLiveIns(); |
446 | computeAndAddLiveIns(LiveRegs, MBB&: *LoadCmpBB); |
447 | |
448 | return true; |
449 | } |
450 | |
451 | /// \brief Expand Pseudos to Instructions with destructive operands. |
452 | /// |
453 | /// This mechanism uses MOVPRFX instructions for zeroing the false lanes |
454 | /// or for fixing relaxed register allocation conditions to comply with |
455 | /// the instructions register constraints. The latter case may be cheaper |
456 | /// than setting the register constraints in the register allocator, |
457 | /// since that will insert regular MOV instructions rather than MOVPRFX. |
458 | /// |
459 | /// Example (after register allocation): |
460 | /// |
461 | /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 |
462 | /// |
463 | /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. |
464 | /// * We cannot map directly to FSUB_ZPmZ_B because the register |
465 | /// constraints of the instruction are not met. |
466 | /// * Also the _ZERO specifies the false lanes need to be zeroed. |
467 | /// |
468 | /// We first try to see if the destructive operand == result operand, |
469 | /// if not, we try to swap the operands, e.g. |
470 | /// |
471 | /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 |
472 | /// |
473 | /// But because FSUB_ZPmZ is not commutative, this is semantically |
474 | /// different, so we need a reverse instruction: |
475 | /// |
476 | /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 |
477 | /// |
478 | /// Then we implement the zeroing of the false lanes of Z0 by adding |
479 | /// a zeroing MOVPRFX instruction: |
480 | /// |
481 | /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 |
482 | /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 |
483 | /// |
484 | /// Note that this can only be done for _ZERO or _UNDEF variants where |
485 | /// we can guarantee the false lanes to be zeroed (by implementing this) |
486 | /// or that they are undef (don't care / not used), otherwise the |
487 | /// swapping of operands is illegal because the operation is not |
488 | /// (or cannot be emulated to be) fully commutative. |
489 | bool AArch64ExpandPseudo::expand_DestructiveOp( |
490 | MachineInstr &MI, |
491 | MachineBasicBlock &MBB, |
492 | MachineBasicBlock::iterator MBBI) { |
493 | unsigned Opcode = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode()); |
494 | uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; |
495 | uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; |
496 | bool FalseZero = FalseLanes == AArch64::FalseLanesZero; |
497 | Register DstReg = MI.getOperand(i: 0).getReg(); |
498 | bool DstIsDead = MI.getOperand(i: 0).isDead(); |
499 | bool UseRev = false; |
500 | unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; |
501 | |
502 | switch (DType) { |
503 | case AArch64::DestructiveBinaryComm: |
504 | case AArch64::DestructiveBinaryCommWithRev: |
505 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
506 | // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 |
507 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 3, args: 2); |
508 | UseRev = true; |
509 | break; |
510 | } |
511 | [[fallthrough]]; |
512 | case AArch64::DestructiveBinary: |
513 | case AArch64::DestructiveBinaryImm: |
514 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 1, args: 2, args: 3); |
515 | break; |
516 | case AArch64::DestructiveUnaryPassthru: |
517 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx) = std::make_tuple(args: 2, args: 3, args: 3); |
518 | break; |
519 | case AArch64::DestructiveTernaryCommWithRev: |
520 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 2, args: 3, args: 4); |
521 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
522 | // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za |
523 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 3, args: 4, args: 2); |
524 | UseRev = true; |
525 | } else if (DstReg == MI.getOperand(i: 4).getReg()) { |
526 | // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za |
527 | std::tie(args&: PredIdx, args&: DOPIdx, args&: SrcIdx, args&: Src2Idx) = std::make_tuple(args: 1, args: 4, args: 3, args: 2); |
528 | UseRev = true; |
529 | } |
530 | break; |
531 | default: |
532 | llvm_unreachable("Unsupported Destructive Operand type" ); |
533 | } |
534 | |
535 | // MOVPRFX can only be used if the destination operand |
536 | // is the destructive operand, not as any other operand, |
537 | // so the Destructive Operand must be unique. |
538 | bool DOPRegIsUnique = false; |
539 | switch (DType) { |
540 | case AArch64::DestructiveBinary: |
541 | DOPRegIsUnique = DstReg != MI.getOperand(i: SrcIdx).getReg(); |
542 | break; |
543 | case AArch64::DestructiveBinaryComm: |
544 | case AArch64::DestructiveBinaryCommWithRev: |
545 | DOPRegIsUnique = |
546 | DstReg != MI.getOperand(i: DOPIdx).getReg() || |
547 | MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg(); |
548 | break; |
549 | case AArch64::DestructiveUnaryPassthru: |
550 | case AArch64::DestructiveBinaryImm: |
551 | DOPRegIsUnique = true; |
552 | break; |
553 | case AArch64::DestructiveTernaryCommWithRev: |
554 | DOPRegIsUnique = |
555 | DstReg != MI.getOperand(i: DOPIdx).getReg() || |
556 | (MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: SrcIdx).getReg() && |
557 | MI.getOperand(i: DOPIdx).getReg() != MI.getOperand(i: Src2Idx).getReg()); |
558 | break; |
559 | } |
560 | |
561 | // Resolve the reverse opcode |
562 | if (UseRev) { |
563 | int NewOpcode; |
564 | // e.g. DIV -> DIVR |
565 | if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) |
566 | Opcode = NewOpcode; |
567 | // e.g. DIVR -> DIV |
568 | else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) |
569 | Opcode = NewOpcode; |
570 | } |
571 | |
572 | // Get the right MOVPRFX |
573 | uint64_t ElementSize = TII->getElementSizeForOpcode(Opc: Opcode); |
574 | unsigned MovPrfx, LSLZero, MovPrfxZero; |
575 | switch (ElementSize) { |
576 | case AArch64::ElementSizeNone: |
577 | case AArch64::ElementSizeB: |
578 | MovPrfx = AArch64::MOVPRFX_ZZ; |
579 | LSLZero = AArch64::LSL_ZPmI_B; |
580 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; |
581 | break; |
582 | case AArch64::ElementSizeH: |
583 | MovPrfx = AArch64::MOVPRFX_ZZ; |
584 | LSLZero = AArch64::LSL_ZPmI_H; |
585 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; |
586 | break; |
587 | case AArch64::ElementSizeS: |
588 | MovPrfx = AArch64::MOVPRFX_ZZ; |
589 | LSLZero = AArch64::LSL_ZPmI_S; |
590 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; |
591 | break; |
592 | case AArch64::ElementSizeD: |
593 | MovPrfx = AArch64::MOVPRFX_ZZ; |
594 | LSLZero = AArch64::LSL_ZPmI_D; |
595 | MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; |
596 | break; |
597 | default: |
598 | llvm_unreachable("Unsupported ElementSize" ); |
599 | } |
600 | |
601 | // |
602 | // Create the destructive operation (if required) |
603 | // |
604 | MachineInstrBuilder PRFX, DOP; |
605 | if (FalseZero) { |
606 | // If we cannot prefix the requested instruction we'll instead emit a |
607 | // prefixed_zeroing_mov for DestructiveBinary. |
608 | assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || |
609 | DType == AArch64::DestructiveBinaryComm || |
610 | DType == AArch64::DestructiveBinaryCommWithRev) && |
611 | "The destructive operand should be unique" ); |
612 | assert(ElementSize != AArch64::ElementSizeNone && |
613 | "This instruction is unpredicated" ); |
614 | |
615 | // Merge source operand into destination register |
616 | PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfxZero)) |
617 | .addReg(RegNo: DstReg, flags: RegState::Define) |
618 | .addReg(RegNo: MI.getOperand(i: PredIdx).getReg()) |
619 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg()); |
620 | |
621 | // After the movprfx, the destructive operand is same as Dst |
622 | DOPIdx = 0; |
623 | |
624 | // Create the additional LSL to zero the lanes when the DstReg is not |
625 | // unique. Zeros the lanes in z0 that aren't active in p0 with sequence |
626 | // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; |
627 | if ((DType == AArch64::DestructiveBinary || |
628 | DType == AArch64::DestructiveBinaryComm || |
629 | DType == AArch64::DestructiveBinaryCommWithRev) && |
630 | !DOPRegIsUnique) { |
631 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LSLZero)) |
632 | .addReg(RegNo: DstReg, flags: RegState::Define) |
633 | .add(MO: MI.getOperand(i: PredIdx)) |
634 | .addReg(RegNo: DstReg) |
635 | .addImm(Val: 0); |
636 | } |
637 | } else if (DstReg != MI.getOperand(i: DOPIdx).getReg()) { |
638 | assert(DOPRegIsUnique && "The destructive operand should be unique" ); |
639 | PRFX = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: MovPrfx)) |
640 | .addReg(RegNo: DstReg, flags: RegState::Define) |
641 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg()); |
642 | DOPIdx = 0; |
643 | } |
644 | |
645 | // |
646 | // Create the destructive operation |
647 | // |
648 | DOP = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode)) |
649 | .addReg(RegNo: DstReg, flags: RegState::Define | getDeadRegState(B: DstIsDead)); |
650 | |
651 | switch (DType) { |
652 | case AArch64::DestructiveUnaryPassthru: |
653 | DOP.addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
654 | .add(MO: MI.getOperand(i: PredIdx)) |
655 | .add(MO: MI.getOperand(i: SrcIdx)); |
656 | break; |
657 | case AArch64::DestructiveBinary: |
658 | case AArch64::DestructiveBinaryImm: |
659 | case AArch64::DestructiveBinaryComm: |
660 | case AArch64::DestructiveBinaryCommWithRev: |
661 | DOP.add(MO: MI.getOperand(i: PredIdx)) |
662 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
663 | .add(MO: MI.getOperand(i: SrcIdx)); |
664 | break; |
665 | case AArch64::DestructiveTernaryCommWithRev: |
666 | DOP.add(MO: MI.getOperand(i: PredIdx)) |
667 | .addReg(RegNo: MI.getOperand(i: DOPIdx).getReg(), flags: RegState::Kill) |
668 | .add(MO: MI.getOperand(i: SrcIdx)) |
669 | .add(MO: MI.getOperand(i: Src2Idx)); |
670 | break; |
671 | } |
672 | |
673 | if (PRFX) { |
674 | finalizeBundle(MBB, FirstMI: PRFX->getIterator(), LastMI: MBBI->getIterator()); |
675 | transferImpOps(OldMI&: MI, UseMI&: PRFX, DefMI&: DOP); |
676 | } else |
677 | transferImpOps(OldMI&: MI, UseMI&: DOP, DefMI&: DOP); |
678 | |
679 | MI.eraseFromParent(); |
680 | return true; |
681 | } |
682 | |
683 | bool AArch64ExpandPseudo::expandSetTagLoop( |
684 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
685 | MachineBasicBlock::iterator &NextMBBI) { |
686 | MachineInstr &MI = *MBBI; |
687 | DebugLoc DL = MI.getDebugLoc(); |
688 | Register SizeReg = MI.getOperand(i: 0).getReg(); |
689 | Register AddressReg = MI.getOperand(i: 1).getReg(); |
690 | |
691 | MachineFunction *MF = MBB.getParent(); |
692 | |
693 | bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; |
694 | const unsigned OpCode1 = |
695 | ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; |
696 | const unsigned OpCode2 = |
697 | ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; |
698 | |
699 | unsigned Size = MI.getOperand(i: 2).getImm(); |
700 | assert(Size > 0 && Size % 16 == 0); |
701 | if (Size % (16 * 2) != 0) { |
702 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: OpCode1), DestReg: AddressReg) |
703 | .addReg(RegNo: AddressReg) |
704 | .addReg(RegNo: AddressReg) |
705 | .addImm(Val: 1); |
706 | Size -= 16; |
707 | } |
708 | MachineBasicBlock::iterator I = |
709 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: SizeReg) |
710 | .addImm(Val: Size); |
711 | expandMOVImm(MBB, MBBI: I, BitSize: 64); |
712 | |
713 | auto LoopBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
714 | auto DoneBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
715 | |
716 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopBB); |
717 | MF->insert(MBBI: ++LoopBB->getIterator(), MBB: DoneBB); |
718 | |
719 | BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: OpCode2)) |
720 | .addDef(RegNo: AddressReg) |
721 | .addReg(RegNo: AddressReg) |
722 | .addReg(RegNo: AddressReg) |
723 | .addImm(Val: 2) |
724 | .cloneMemRefs(OtherMI: MI) |
725 | .setMIFlags(MI.getFlags()); |
726 | BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXri)) |
727 | .addDef(RegNo: SizeReg) |
728 | .addReg(RegNo: SizeReg) |
729 | .addImm(Val: 16 * 2) |
730 | .addImm(Val: 0); |
731 | BuildMI(BB: LoopBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc)) |
732 | .addImm(Val: AArch64CC::NE) |
733 | .addMBB(MBB: LoopBB) |
734 | .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | RegState::Kill); |
735 | |
736 | LoopBB->addSuccessor(Succ: LoopBB); |
737 | LoopBB->addSuccessor(Succ: DoneBB); |
738 | |
739 | DoneBB->splice(Where: DoneBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
740 | DoneBB->transferSuccessors(FromMBB: &MBB); |
741 | |
742 | MBB.addSuccessor(Succ: LoopBB); |
743 | |
744 | NextMBBI = MBB.end(); |
745 | MI.eraseFromParent(); |
746 | // Recompute liveness bottom up. |
747 | LivePhysRegs LiveRegs; |
748 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
749 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB); |
750 | // Do an extra pass in the loop to get the loop carried dependencies right. |
751 | // FIXME: is this necessary? |
752 | LoopBB->clearLiveIns(); |
753 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopBB); |
754 | DoneBB->clearLiveIns(); |
755 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneBB); |
756 | |
757 | return true; |
758 | } |
759 | |
760 | bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, |
761 | MachineBasicBlock::iterator MBBI, |
762 | unsigned Opc, unsigned N) { |
763 | assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || |
764 | Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && |
765 | "Unexpected opcode" ); |
766 | unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) |
767 | ? RegState::Define |
768 | : 0; |
769 | unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) |
770 | ? AArch64::zsub0 |
771 | : AArch64::psub0; |
772 | const TargetRegisterInfo *TRI = |
773 | MBB.getParent()->getSubtarget().getRegisterInfo(); |
774 | MachineInstr &MI = *MBBI; |
775 | for (unsigned Offset = 0; Offset < N; ++Offset) { |
776 | int ImmOffset = MI.getOperand(i: 2).getImm() + Offset; |
777 | bool Kill = (Offset + 1 == N) ? MI.getOperand(i: 1).isKill() : false; |
778 | assert(ImmOffset >= -256 && ImmOffset < 256 && |
779 | "Immediate spill offset out of range" ); |
780 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc)) |
781 | .addReg(RegNo: TRI->getSubReg(Reg: MI.getOperand(i: 0).getReg(), Idx: sub0 + Offset), |
782 | flags: RState) |
783 | .addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: getKillRegState(B: Kill)) |
784 | .addImm(Val: ImmOffset); |
785 | } |
786 | MI.eraseFromParent(); |
787 | return true; |
788 | } |
789 | |
790 | // Create a call with the passed opcode and explicit operands, copying over all |
791 | // the implicit operands from *MBBI, starting at the regmask. |
792 | static MachineInstr *createCallWithOps(MachineBasicBlock &MBB, |
793 | MachineBasicBlock::iterator MBBI, |
794 | const AArch64InstrInfo *TII, |
795 | unsigned Opcode, |
796 | ArrayRef<MachineOperand> ExplicitOps, |
797 | unsigned RegMaskStartIdx) { |
798 | // Build the MI, with explicit operands first (including the call target). |
799 | MachineInstr *Call = BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode)) |
800 | .add(MOs: ExplicitOps) |
801 | .getInstr(); |
802 | |
803 | // Register arguments are added during ISel, but cannot be added as explicit |
804 | // operands of the branch as it expects to be B <target> which is only one |
805 | // operand. Instead they are implicit operands used by the branch. |
806 | while (!MBBI->getOperand(i: RegMaskStartIdx).isRegMask()) { |
807 | const MachineOperand &MOP = MBBI->getOperand(i: RegMaskStartIdx); |
808 | assert(MOP.isReg() && "can only add register operands" ); |
809 | Call->addOperand(Op: MachineOperand::CreateReg( |
810 | Reg: MOP.getReg(), /*Def=*/isDef: false, /*Implicit=*/isImp: true, /*isKill=*/false, |
811 | /*isDead=*/false, /*isUndef=*/MOP.isUndef())); |
812 | RegMaskStartIdx++; |
813 | } |
814 | for (const MachineOperand &MO : |
815 | llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: RegMaskStartIdx)) |
816 | Call->addOperand(Op: MO); |
817 | |
818 | return Call; |
819 | } |
820 | |
821 | // Create a call to CallTarget, copying over all the operands from *MBBI, |
822 | // starting at the regmask. |
823 | static MachineInstr *createCall(MachineBasicBlock &MBB, |
824 | MachineBasicBlock::iterator MBBI, |
825 | const AArch64InstrInfo *TII, |
826 | MachineOperand &CallTarget, |
827 | unsigned RegMaskStartIdx) { |
828 | unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; |
829 | |
830 | assert((CallTarget.isGlobal() || CallTarget.isReg()) && |
831 | "invalid operand for regular call" ); |
832 | return createCallWithOps(MBB, MBBI, TII, Opcode: Opc, ExplicitOps: CallTarget, RegMaskStartIdx); |
833 | } |
834 | |
835 | bool AArch64ExpandPseudo::expandCALL_RVMARKER( |
836 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { |
837 | // Expand CALL_RVMARKER pseudo to: |
838 | // - a branch to the call target, followed by |
839 | // - the special `mov x29, x29` marker, and |
840 | // - another branch, to the runtime function |
841 | // Mark the sequence as bundle, to avoid passes moving other code in between. |
842 | MachineInstr &MI = *MBBI; |
843 | MachineOperand &RVTarget = MI.getOperand(i: 0); |
844 | assert(RVTarget.isGlobal() && "invalid operand for attached call" ); |
845 | |
846 | MachineInstr *OriginalCall = nullptr; |
847 | |
848 | if (MI.getOpcode() == AArch64::BLRA_RVMARKER) { |
849 | // ptrauth call. |
850 | const MachineOperand &CallTarget = MI.getOperand(i: 1); |
851 | const MachineOperand &Key = MI.getOperand(i: 2); |
852 | const MachineOperand &IntDisc = MI.getOperand(i: 3); |
853 | const MachineOperand &AddrDisc = MI.getOperand(i: 4); |
854 | |
855 | assert((Key.getImm() == AArch64PACKey::IA || |
856 | Key.getImm() == AArch64PACKey::IB) && |
857 | "Invalid auth call key" ); |
858 | |
859 | MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc}; |
860 | |
861 | OriginalCall = createCallWithOps(MBB, MBBI, TII, Opcode: AArch64::BLRA, ExplicitOps: Ops, |
862 | /*RegMaskStartIdx=*/5); |
863 | } else { |
864 | assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI" ); |
865 | OriginalCall = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 1), |
866 | // Regmask starts after the RV and call targets. |
867 | /*RegMaskStartIdx=*/2); |
868 | } |
869 | |
870 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ORRXrs)) |
871 | .addReg(RegNo: AArch64::FP, flags: RegState::Define) |
872 | .addReg(RegNo: AArch64::XZR) |
873 | .addReg(RegNo: AArch64::FP) |
874 | .addImm(Val: 0); |
875 | |
876 | auto *RVCall = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::BL)) |
877 | .add(MO: RVTarget) |
878 | .getInstr(); |
879 | |
880 | if (MI.shouldUpdateCallSiteInfo()) |
881 | MBB.getParent()->moveCallSiteInfo(Old: &MI, New: OriginalCall); |
882 | |
883 | MI.eraseFromParent(); |
884 | finalizeBundle(MBB, FirstMI: OriginalCall->getIterator(), |
885 | LastMI: std::next(x: RVCall->getIterator())); |
886 | return true; |
887 | } |
888 | |
889 | bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, |
890 | MachineBasicBlock::iterator MBBI) { |
891 | // Expand CALL_BTI pseudo to: |
892 | // - a branch to the call target |
893 | // - a BTI instruction |
894 | // Mark the sequence as a bundle, to avoid passes moving other code in |
895 | // between. |
896 | MachineInstr &MI = *MBBI; |
897 | MachineInstr *Call = createCall(MBB, MBBI, TII, CallTarget&: MI.getOperand(i: 0), |
898 | // Regmask starts after the call target. |
899 | /*RegMaskStartIdx=*/1); |
900 | |
901 | Call->setCFIType(MF&: *MBB.getParent(), Type: MI.getCFIType()); |
902 | |
903 | MachineInstr *BTI = |
904 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::HINT)) |
905 | // BTI J so that setjmp can to BR to this. |
906 | .addImm(Val: 36) |
907 | .getInstr(); |
908 | |
909 | if (MI.shouldUpdateCallSiteInfo()) |
910 | MBB.getParent()->moveCallSiteInfo(Old: &MI, New: Call); |
911 | |
912 | MI.eraseFromParent(); |
913 | finalizeBundle(MBB, FirstMI: Call->getIterator(), LastMI: std::next(x: BTI->getIterator())); |
914 | return true; |
915 | } |
916 | |
917 | bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( |
918 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { |
919 | Register CtxReg = MBBI->getOperand(i: 0).getReg(); |
920 | Register BaseReg = MBBI->getOperand(i: 1).getReg(); |
921 | int Offset = MBBI->getOperand(i: 2).getImm(); |
922 | DebugLoc DL(MBBI->getDebugLoc()); |
923 | auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); |
924 | |
925 | if (STI.getTargetTriple().getArchName() != "arm64e" ) { |
926 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui)) |
927 | .addUse(RegNo: CtxReg) |
928 | .addUse(RegNo: BaseReg) |
929 | .addImm(Val: Offset / 8) |
930 | .setMIFlag(MachineInstr::FrameSetup); |
931 | MBBI->eraseFromParent(); |
932 | return true; |
933 | } |
934 | |
935 | // We need to sign the context in an address-discriminated way. 0xc31a is a |
936 | // fixed random value, chosen as part of the ABI. |
937 | // add x16, xBase, #Offset |
938 | // movk x16, #0xc31a, lsl #48 |
939 | // mov x17, x22/xzr |
940 | // pacdb x17, x16 |
941 | // str x17, [xBase, #Offset] |
942 | unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; |
943 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: AArch64::X16) |
944 | .addUse(RegNo: BaseReg) |
945 | .addImm(Val: abs(x: Offset)) |
946 | .addImm(Val: 0) |
947 | .setMIFlag(MachineInstr::FrameSetup); |
948 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X16) |
949 | .addUse(RegNo: AArch64::X16) |
950 | .addImm(Val: 0xc31a) |
951 | .addImm(Val: 48) |
952 | .setMIFlag(MachineInstr::FrameSetup); |
953 | // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so |
954 | // move it somewhere before signing. |
955 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::X17) |
956 | .addUse(RegNo: AArch64::XZR) |
957 | .addUse(RegNo: CtxReg) |
958 | .addImm(Val: 0) |
959 | .setMIFlag(MachineInstr::FrameSetup); |
960 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PACDB), DestReg: AArch64::X17) |
961 | .addUse(RegNo: AArch64::X17) |
962 | .addUse(RegNo: AArch64::X16) |
963 | .setMIFlag(MachineInstr::FrameSetup); |
964 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui)) |
965 | .addUse(RegNo: AArch64::X17) |
966 | .addUse(RegNo: BaseReg) |
967 | .addImm(Val: Offset / 8) |
968 | .setMIFlag(MachineInstr::FrameSetup); |
969 | |
970 | MBBI->eraseFromParent(); |
971 | return true; |
972 | } |
973 | |
974 | MachineBasicBlock * |
975 | AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, |
976 | MachineBasicBlock::iterator MBBI) { |
977 | MachineInstr &MI = *MBBI; |
978 | assert((std::next(MBBI) != MBB.end() || |
979 | MI.getParent()->successors().begin() != |
980 | MI.getParent()->successors().end()) && |
981 | "Unexpected unreachable in block that restores ZA" ); |
982 | |
983 | // Compare TPIDR2_EL0 value against 0. |
984 | DebugLoc DL = MI.getDebugLoc(); |
985 | MachineInstrBuilder Cbz = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::CBZX)) |
986 | .add(MO: MI.getOperand(i: 0)); |
987 | |
988 | // Split MBB and create two new blocks: |
989 | // - MBB now contains all instructions before RestoreZAPseudo. |
990 | // - SMBB contains the RestoreZAPseudo instruction only. |
991 | // - EndBB contains all instructions after RestoreZAPseudo. |
992 | MachineInstr &PrevMI = *std::prev(x: MBBI); |
993 | MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true); |
994 | MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end() |
995 | ? *SMBB->successors().begin() |
996 | : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true); |
997 | |
998 | // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. |
999 | Cbz.addMBB(MBB: SMBB); |
1000 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)) |
1001 | .addMBB(MBB: EndBB); |
1002 | MBB.addSuccessor(Succ: EndBB); |
1003 | |
1004 | // Replace the pseudo with a call (BL). |
1005 | MachineInstrBuilder MIB = |
1006 | BuildMI(BB&: *SMBB, I: SMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::BL)); |
1007 | MIB.addReg(RegNo: MI.getOperand(i: 1).getReg(), flags: RegState::Implicit); |
1008 | for (unsigned I = 2; I < MI.getNumOperands(); ++I) |
1009 | MIB.add(MO: MI.getOperand(i: I)); |
1010 | BuildMI(BB: SMBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB); |
1011 | |
1012 | MI.eraseFromParent(); |
1013 | return EndBB; |
1014 | } |
1015 | |
1016 | MachineBasicBlock * |
1017 | AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, |
1018 | MachineBasicBlock::iterator MBBI) { |
1019 | MachineInstr &MI = *MBBI; |
1020 | // In the case of a smstart/smstop before a unreachable, just remove the pseudo. |
1021 | // Exception handling code generated by Clang may introduce unreachables and it |
1022 | // seems unnecessary to restore pstate.sm when that happens. Note that it is |
1023 | // not just an optimisation, the code below expects a successor instruction/block |
1024 | // in order to split the block at MBBI. |
1025 | if (std::next(x: MBBI) == MBB.end() && |
1026 | MI.getParent()->successors().begin() == |
1027 | MI.getParent()->successors().end()) { |
1028 | MI.eraseFromParent(); |
1029 | return &MBB; |
1030 | } |
1031 | |
1032 | // Expand the pseudo into smstart or smstop instruction. The pseudo has the |
1033 | // following operands: |
1034 | // |
1035 | // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask> |
1036 | // |
1037 | // The pseudo is expanded into a conditional smstart/smstop, with a |
1038 | // check if pstate.sm (register) equals the expected value, and if not, |
1039 | // invokes the smstart/smstop. |
1040 | // |
1041 | // As an example, the following block contains a normal call from a |
1042 | // streaming-compatible function: |
1043 | // |
1044 | // OrigBB: |
1045 | // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP |
1046 | // bl @normal_callee |
1047 | // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART |
1048 | // |
1049 | // ...which will be transformed into: |
1050 | // |
1051 | // OrigBB: |
1052 | // TBNZx %0:gpr64, 0, SMBB |
1053 | // b EndBB |
1054 | // |
1055 | // SMBB: |
1056 | // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP |
1057 | // |
1058 | // EndBB: |
1059 | // bl @normal_callee |
1060 | // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART |
1061 | // |
1062 | DebugLoc DL = MI.getDebugLoc(); |
1063 | |
1064 | // Create the conditional branch based on the third operand of the |
1065 | // instruction, which tells us if we are wrapping a normal or streaming |
1066 | // function. |
1067 | // We test the live value of pstate.sm and toggle pstate.sm if this is not the |
1068 | // expected value for the callee (0 for a normal callee and 1 for a streaming |
1069 | // callee). |
1070 | unsigned Opc; |
1071 | switch (MI.getOperand(i: 2).getImm()) { |
1072 | case AArch64SME::Always: |
1073 | llvm_unreachable("Should have matched to instruction directly" ); |
1074 | case AArch64SME::IfCallerIsStreaming: |
1075 | Opc = AArch64::TBNZW; |
1076 | break; |
1077 | case AArch64SME::IfCallerIsNonStreaming: |
1078 | Opc = AArch64::TBZW; |
1079 | break; |
1080 | } |
1081 | auto PStateSM = MI.getOperand(i: 3).getReg(); |
1082 | auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
1083 | unsigned SMReg32 = TRI->getSubReg(Reg: PStateSM, Idx: AArch64::sub_32); |
1084 | MachineInstrBuilder Tbx = |
1085 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc)).addReg(RegNo: SMReg32).addImm(Val: 0); |
1086 | |
1087 | // Split MBB and create two new blocks: |
1088 | // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. |
1089 | // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. |
1090 | // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. |
1091 | MachineInstr &PrevMI = *std::prev(x: MBBI); |
1092 | MachineBasicBlock *SMBB = MBB.splitAt(SplitInst&: PrevMI, /*UpdateLiveIns*/ true); |
1093 | MachineBasicBlock *EndBB = std::next(x: MI.getIterator()) == SMBB->end() |
1094 | ? *SMBB->successors().begin() |
1095 | : SMBB->splitAt(SplitInst&: MI, /*UpdateLiveIns*/ true); |
1096 | |
1097 | // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. |
1098 | Tbx.addMBB(MBB: SMBB); |
1099 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)) |
1100 | .addMBB(MBB: EndBB); |
1101 | MBB.addSuccessor(Succ: EndBB); |
1102 | |
1103 | // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. |
1104 | MachineInstrBuilder MIB = BuildMI(BB&: *SMBB, I: SMBB->begin(), MIMD: MI.getDebugLoc(), |
1105 | MCID: TII->get(Opcode: AArch64::MSRpstatesvcrImm1)); |
1106 | // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as |
1107 | // these contain the CopyFromReg for the first argument and the flag to |
1108 | // indicate whether the callee is streaming or normal). |
1109 | MIB.add(MO: MI.getOperand(i: 0)); |
1110 | MIB.add(MO: MI.getOperand(i: 1)); |
1111 | for (unsigned i = 4; i < MI.getNumOperands(); ++i) |
1112 | MIB.add(MO: MI.getOperand(i)); |
1113 | |
1114 | BuildMI(BB: SMBB, MIMD: DL, MCID: TII->get(Opcode: AArch64::B)).addMBB(MBB: EndBB); |
1115 | |
1116 | MI.eraseFromParent(); |
1117 | return EndBB; |
1118 | } |
1119 | |
1120 | bool AArch64ExpandPseudo::expandMultiVecPseudo( |
1121 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
1122 | TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, |
1123 | unsigned ContiguousOp, unsigned StridedOpc) { |
1124 | MachineInstr &MI = *MBBI; |
1125 | Register Tuple = MI.getOperand(i: 0).getReg(); |
1126 | |
1127 | auto ContiguousRange = ContiguousClass.getRegisters(); |
1128 | auto StridedRange = StridedClass.getRegisters(); |
1129 | unsigned Opc; |
1130 | if (llvm::is_contained(Range&: ContiguousRange, Element: Tuple.asMCReg())) { |
1131 | Opc = ContiguousOp; |
1132 | } else if (llvm::is_contained(Range&: StridedRange, Element: Tuple.asMCReg())) { |
1133 | Opc = StridedOpc; |
1134 | } else |
1135 | llvm_unreachable("Cannot expand Multi-Vector pseudo" ); |
1136 | |
1137 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc)) |
1138 | .add(MO: MI.getOperand(i: 0)) |
1139 | .add(MO: MI.getOperand(i: 1)) |
1140 | .add(MO: MI.getOperand(i: 2)) |
1141 | .add(MO: MI.getOperand(i: 3)); |
1142 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1143 | MI.eraseFromParent(); |
1144 | return true; |
1145 | } |
1146 | |
1147 | /// If MBBI references a pseudo instruction that should be expanded here, |
1148 | /// do the expansion and return true. Otherwise return false. |
1149 | bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, |
1150 | MachineBasicBlock::iterator MBBI, |
1151 | MachineBasicBlock::iterator &NextMBBI) { |
1152 | MachineInstr &MI = *MBBI; |
1153 | unsigned Opcode = MI.getOpcode(); |
1154 | |
1155 | // Check if we can expand the destructive op |
1156 | int OrigInstr = AArch64::getSVEPseudoMap(Opcode: MI.getOpcode()); |
1157 | if (OrigInstr != -1) { |
1158 | auto &Orig = TII->get(Opcode: OrigInstr); |
1159 | if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != |
1160 | AArch64::NotDestructive) { |
1161 | return expand_DestructiveOp(MI, MBB, MBBI); |
1162 | } |
1163 | } |
1164 | |
1165 | switch (Opcode) { |
1166 | default: |
1167 | break; |
1168 | |
1169 | case AArch64::BSPv8i8: |
1170 | case AArch64::BSPv16i8: { |
1171 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1172 | if (DstReg == MI.getOperand(i: 3).getReg()) { |
1173 | // Expand to BIT |
1174 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1175 | MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 |
1176 | : AArch64::BITv16i8)) |
1177 | .add(MO: MI.getOperand(i: 0)) |
1178 | .add(MO: MI.getOperand(i: 3)) |
1179 | .add(MO: MI.getOperand(i: 2)) |
1180 | .add(MO: MI.getOperand(i: 1)); |
1181 | } else if (DstReg == MI.getOperand(i: 2).getReg()) { |
1182 | // Expand to BIF |
1183 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1184 | MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 |
1185 | : AArch64::BIFv16i8)) |
1186 | .add(MO: MI.getOperand(i: 0)) |
1187 | .add(MO: MI.getOperand(i: 2)) |
1188 | .add(MO: MI.getOperand(i: 3)) |
1189 | .add(MO: MI.getOperand(i: 1)); |
1190 | } else { |
1191 | // Expand to BSL, use additional move if required |
1192 | if (DstReg == MI.getOperand(i: 1).getReg()) { |
1193 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1194 | MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 |
1195 | : AArch64::BSLv16i8)) |
1196 | .add(MO: MI.getOperand(i: 0)) |
1197 | .add(MO: MI.getOperand(i: 1)) |
1198 | .add(MO: MI.getOperand(i: 2)) |
1199 | .add(MO: MI.getOperand(i: 3)); |
1200 | } else { |
1201 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1202 | MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 |
1203 | : AArch64::ORRv16i8)) |
1204 | .addReg(RegNo: DstReg, |
1205 | flags: RegState::Define | |
1206 | getRenamableRegState(B: MI.getOperand(i: 0).isRenamable())) |
1207 | .add(MO: MI.getOperand(i: 1)) |
1208 | .add(MO: MI.getOperand(i: 1)); |
1209 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1210 | MCID: TII->get(Opcode: Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 |
1211 | : AArch64::BSLv16i8)) |
1212 | .add(MO: MI.getOperand(i: 0)) |
1213 | .addReg(RegNo: DstReg, |
1214 | flags: RegState::Kill | |
1215 | getRenamableRegState(B: MI.getOperand(i: 0).isRenamable())) |
1216 | .add(MO: MI.getOperand(i: 2)) |
1217 | .add(MO: MI.getOperand(i: 3)); |
1218 | } |
1219 | } |
1220 | MI.eraseFromParent(); |
1221 | return true; |
1222 | } |
1223 | |
1224 | case AArch64::ADDWrr: |
1225 | case AArch64::SUBWrr: |
1226 | case AArch64::ADDXrr: |
1227 | case AArch64::SUBXrr: |
1228 | case AArch64::ADDSWrr: |
1229 | case AArch64::SUBSWrr: |
1230 | case AArch64::ADDSXrr: |
1231 | case AArch64::SUBSXrr: |
1232 | case AArch64::ANDWrr: |
1233 | case AArch64::ANDXrr: |
1234 | case AArch64::BICWrr: |
1235 | case AArch64::BICXrr: |
1236 | case AArch64::ANDSWrr: |
1237 | case AArch64::ANDSXrr: |
1238 | case AArch64::BICSWrr: |
1239 | case AArch64::BICSXrr: |
1240 | case AArch64::EONWrr: |
1241 | case AArch64::EONXrr: |
1242 | case AArch64::EORWrr: |
1243 | case AArch64::EORXrr: |
1244 | case AArch64::ORNWrr: |
1245 | case AArch64::ORNXrr: |
1246 | case AArch64::ORRWrr: |
1247 | case AArch64::ORRXrr: { |
1248 | unsigned Opcode; |
1249 | switch (MI.getOpcode()) { |
1250 | default: |
1251 | return false; |
1252 | case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; |
1253 | case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; |
1254 | case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; |
1255 | case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; |
1256 | case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; |
1257 | case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; |
1258 | case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; |
1259 | case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; |
1260 | case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; |
1261 | case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; |
1262 | case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; |
1263 | case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; |
1264 | case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; |
1265 | case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; |
1266 | case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; |
1267 | case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; |
1268 | case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; |
1269 | case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; |
1270 | case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; |
1271 | case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; |
1272 | case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; |
1273 | case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; |
1274 | case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; |
1275 | case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; |
1276 | } |
1277 | MachineFunction &MF = *MBB.getParent(); |
1278 | // Try to create new inst without implicit operands added. |
1279 | MachineInstr *NewMI = MF.CreateMachineInstr( |
1280 | MCID: TII->get(Opcode), DL: MI.getDebugLoc(), /*NoImplicit=*/true); |
1281 | MBB.insert(I: MBBI, MI: NewMI); |
1282 | MachineInstrBuilder MIB1(MF, NewMI); |
1283 | MIB1->setPCSections(MF, MD: MI.getPCSections()); |
1284 | MIB1.addReg(RegNo: MI.getOperand(i: 0).getReg(), flags: RegState::Define) |
1285 | .add(MO: MI.getOperand(i: 1)) |
1286 | .add(MO: MI.getOperand(i: 2)) |
1287 | .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0)); |
1288 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB1); |
1289 | if (auto DebugNumber = MI.peekDebugInstrNum()) |
1290 | NewMI->setDebugInstrNum(DebugNumber); |
1291 | MI.eraseFromParent(); |
1292 | return true; |
1293 | } |
1294 | |
1295 | case AArch64::LOADgot: { |
1296 | MachineFunction *MF = MBB.getParent(); |
1297 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1298 | const MachineOperand &MO1 = MI.getOperand(i: 1); |
1299 | unsigned Flags = MO1.getTargetFlags(); |
1300 | |
1301 | if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { |
1302 | // Tiny codemodel expand to LDR |
1303 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1304 | MCID: TII->get(Opcode: AArch64::LDRXl), DestReg: DstReg); |
1305 | |
1306 | if (MO1.isGlobal()) { |
1307 | MIB.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags); |
1308 | } else if (MO1.isSymbol()) { |
1309 | MIB.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags); |
1310 | } else { |
1311 | assert(MO1.isCPI() && |
1312 | "Only expect globals, externalsymbols, or constant pools" ); |
1313 | MIB.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), TargetFlags: Flags); |
1314 | } |
1315 | } else { |
1316 | // Small codemodel expand into ADRP + LDR. |
1317 | MachineFunction &MF = *MI.getParent()->getParent(); |
1318 | DebugLoc DL = MI.getDebugLoc(); |
1319 | MachineInstrBuilder MIB1 = |
1320 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg); |
1321 | |
1322 | MachineInstrBuilder MIB2; |
1323 | if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { |
1324 | auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
1325 | unsigned Reg32 = TRI->getSubReg(Reg: DstReg, Idx: AArch64::sub_32); |
1326 | unsigned DstFlags = MI.getOperand(i: 0).getTargetFlags(); |
1327 | MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDRWui)) |
1328 | .addDef(RegNo: Reg32) |
1329 | .addReg(RegNo: DstReg, flags: RegState::Kill) |
1330 | .addReg(RegNo: DstReg, flags: DstFlags | RegState::Implicit); |
1331 | } else { |
1332 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1333 | MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui)) |
1334 | .add(MO: MI.getOperand(i: 0)) |
1335 | .addUse(RegNo: DstReg, Flags: RegState::Kill); |
1336 | } |
1337 | |
1338 | if (MO1.isGlobal()) { |
1339 | MIB1.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, TargetFlags: Flags | AArch64II::MO_PAGE); |
1340 | MIB2.addGlobalAddress(GV: MO1.getGlobal(), Offset: 0, |
1341 | TargetFlags: Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
1342 | } else if (MO1.isSymbol()) { |
1343 | MIB1.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | AArch64II::MO_PAGE); |
1344 | MIB2.addExternalSymbol(FnName: MO1.getSymbolName(), TargetFlags: Flags | |
1345 | AArch64II::MO_PAGEOFF | |
1346 | AArch64II::MO_NC); |
1347 | } else { |
1348 | assert(MO1.isCPI() && |
1349 | "Only expect globals, externalsymbols, or constant pools" ); |
1350 | MIB1.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), |
1351 | TargetFlags: Flags | AArch64II::MO_PAGE); |
1352 | MIB2.addConstantPoolIndex(Idx: MO1.getIndex(), Offset: MO1.getOffset(), |
1353 | TargetFlags: Flags | AArch64II::MO_PAGEOFF | |
1354 | AArch64II::MO_NC); |
1355 | } |
1356 | |
1357 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2); |
1358 | } |
1359 | MI.eraseFromParent(); |
1360 | return true; |
1361 | } |
1362 | case AArch64::MOVaddrBA: { |
1363 | MachineFunction &MF = *MI.getParent()->getParent(); |
1364 | if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { |
1365 | // blockaddress expressions have to come from a constant pool because the |
1366 | // largest addend (and hence offset within a function) allowed for ADRP is |
1367 | // only 8MB. |
1368 | const BlockAddress *BA = MI.getOperand(i: 1).getBlockAddress(); |
1369 | assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset" ); |
1370 | |
1371 | MachineConstantPool *MCP = MF.getConstantPool(); |
1372 | unsigned CPIdx = MCP->getConstantPoolIndex(C: BA, Alignment: Align(8)); |
1373 | |
1374 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1375 | auto MIB1 = |
1376 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg) |
1377 | .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
1378 | auto MIB2 = BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1379 | MCID: TII->get(Opcode: AArch64::LDRXui), DestReg: DstReg) |
1380 | .addUse(RegNo: DstReg) |
1381 | .addConstantPoolIndex( |
1382 | Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
1383 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2); |
1384 | MI.eraseFromParent(); |
1385 | return true; |
1386 | } |
1387 | } |
1388 | [[fallthrough]]; |
1389 | case AArch64::MOVaddr: |
1390 | case AArch64::MOVaddrJT: |
1391 | case AArch64::MOVaddrCP: |
1392 | case AArch64::MOVaddrTLS: |
1393 | case AArch64::MOVaddrEXT: { |
1394 | // Expand into ADRP + ADD. |
1395 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1396 | assert(DstReg != AArch64::XZR); |
1397 | MachineInstrBuilder MIB1 = |
1398 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADRP), DestReg: DstReg) |
1399 | .add(MO: MI.getOperand(i: 1)); |
1400 | |
1401 | if (MI.getOperand(i: 1).getTargetFlags() & AArch64II::MO_TAGGED) { |
1402 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
1403 | // We do so by creating a MOVK that sets bits 48-63 of the register to |
1404 | // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
1405 | // the small code model so we can assume a binary size of <= 4GB, which |
1406 | // makes the untagged PC relative offset positive. The binary must also be |
1407 | // loaded into address range [0, 2^48). Both of these properties need to |
1408 | // be ensured at runtime when using tagged addresses. |
1409 | auto Tag = MI.getOperand(i: 1); |
1410 | Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); |
1411 | Tag.setOffset(0x100000000); |
1412 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: DstReg) |
1413 | .addReg(RegNo: DstReg) |
1414 | .add(MO: Tag) |
1415 | .addImm(Val: 48); |
1416 | } |
1417 | |
1418 | MachineInstrBuilder MIB2 = |
1419 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
1420 | .add(MO: MI.getOperand(i: 0)) |
1421 | .addReg(RegNo: DstReg) |
1422 | .add(MO: MI.getOperand(i: 2)) |
1423 | .addImm(Val: 0); |
1424 | |
1425 | transferImpOps(OldMI&: MI, UseMI&: MIB1, DefMI&: MIB2); |
1426 | MI.eraseFromParent(); |
1427 | return true; |
1428 | } |
1429 | case AArch64::ADDlowTLS: |
1430 | // Produce a plain ADD |
1431 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
1432 | .add(MO: MI.getOperand(i: 0)) |
1433 | .add(MO: MI.getOperand(i: 1)) |
1434 | .add(MO: MI.getOperand(i: 2)) |
1435 | .addImm(Val: 0); |
1436 | MI.eraseFromParent(); |
1437 | return true; |
1438 | |
1439 | case AArch64::MOVbaseTLS: { |
1440 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1441 | auto SysReg = AArch64SysReg::TPIDR_EL0; |
1442 | MachineFunction *MF = MBB.getParent(); |
1443 | if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) |
1444 | SysReg = AArch64SysReg::TPIDR_EL3; |
1445 | else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) |
1446 | SysReg = AArch64SysReg::TPIDR_EL2; |
1447 | else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) |
1448 | SysReg = AArch64SysReg::TPIDR_EL1; |
1449 | else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) |
1450 | SysReg = AArch64SysReg::TPIDRRO_EL0; |
1451 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::MRS), DestReg: DstReg) |
1452 | .addImm(Val: SysReg); |
1453 | MI.eraseFromParent(); |
1454 | return true; |
1455 | } |
1456 | |
1457 | case AArch64::MOVi32imm: |
1458 | return expandMOVImm(MBB, MBBI, BitSize: 32); |
1459 | case AArch64::MOVi64imm: |
1460 | return expandMOVImm(MBB, MBBI, BitSize: 64); |
1461 | case AArch64::RET_ReallyLR: { |
1462 | // Hiding the LR use with RET_ReallyLR may lead to extra kills in the |
1463 | // function and missing live-ins. We are fine in practice because callee |
1464 | // saved register handling ensures the register value is restored before |
1465 | // RET, but we need the undef flag here to appease the MachineVerifier |
1466 | // liveness checks. |
1467 | MachineInstrBuilder MIB = |
1468 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::RET)) |
1469 | .addReg(RegNo: AArch64::LR, flags: RegState::Undef); |
1470 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1471 | MI.eraseFromParent(); |
1472 | return true; |
1473 | } |
1474 | case AArch64::CMP_SWAP_8: |
1475 | return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRB, StlrOp: AArch64::STLXRB, |
1476 | CmpOp: AArch64::SUBSWrx, |
1477 | ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTB, Imm: 0), |
1478 | ZeroReg: AArch64::WZR, NextMBBI); |
1479 | case AArch64::CMP_SWAP_16: |
1480 | return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRH, StlrOp: AArch64::STLXRH, |
1481 | CmpOp: AArch64::SUBSWrx, |
1482 | ExtendImm: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTH, Imm: 0), |
1483 | ZeroReg: AArch64::WZR, NextMBBI); |
1484 | case AArch64::CMP_SWAP_32: |
1485 | return expandCMP_SWAP(MBB, MBBI, LdarOp: AArch64::LDAXRW, StlrOp: AArch64::STLXRW, |
1486 | CmpOp: AArch64::SUBSWrs, |
1487 | ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0), |
1488 | ZeroReg: AArch64::WZR, NextMBBI); |
1489 | case AArch64::CMP_SWAP_64: |
1490 | return expandCMP_SWAP(MBB, MBBI, |
1491 | LdarOp: AArch64::LDAXRX, StlrOp: AArch64::STLXRX, CmpOp: AArch64::SUBSXrs, |
1492 | ExtendImm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0), |
1493 | ZeroReg: AArch64::XZR, NextMBBI); |
1494 | case AArch64::CMP_SWAP_128: |
1495 | case AArch64::CMP_SWAP_128_RELEASE: |
1496 | case AArch64::CMP_SWAP_128_ACQUIRE: |
1497 | case AArch64::CMP_SWAP_128_MONOTONIC: |
1498 | return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); |
1499 | |
1500 | case AArch64::AESMCrrTied: |
1501 | case AArch64::AESIMCrrTied: { |
1502 | MachineInstrBuilder MIB = |
1503 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1504 | MCID: TII->get(Opcode: Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : |
1505 | AArch64::AESIMCrr)) |
1506 | .add(MO: MI.getOperand(i: 0)) |
1507 | .add(MO: MI.getOperand(i: 1)); |
1508 | transferImpOps(OldMI&: MI, UseMI&: MIB, DefMI&: MIB); |
1509 | MI.eraseFromParent(); |
1510 | return true; |
1511 | } |
1512 | case AArch64::IRGstack: { |
1513 | MachineFunction &MF = *MBB.getParent(); |
1514 | const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); |
1515 | const AArch64FrameLowering *TFI = |
1516 | MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); |
1517 | |
1518 | // IRG does not allow immediate offset. getTaggedBasePointerOffset should |
1519 | // almost always point to SP-after-prologue; if not, emit a longer |
1520 | // instruction sequence. |
1521 | int BaseOffset = -AFI->getTaggedBasePointerOffset(); |
1522 | Register FrameReg; |
1523 | StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( |
1524 | MF, ObjectOffset: BaseOffset, isFixed: false /*isFixed*/, isSVE: false /*isSVE*/, FrameReg, |
1525 | /*PreferFP=*/false, |
1526 | /*ForSimm=*/true); |
1527 | Register SrcReg = FrameReg; |
1528 | if (FrameRegOffset) { |
1529 | // Use output register as temporary. |
1530 | SrcReg = MI.getOperand(i: 0).getReg(); |
1531 | emitFrameOffset(MBB, MBBI: &MI, DL: MI.getDebugLoc(), DestReg: SrcReg, SrcReg: FrameReg, |
1532 | Offset: FrameRegOffset, TII); |
1533 | } |
1534 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::IRG)) |
1535 | .add(MO: MI.getOperand(i: 0)) |
1536 | .addUse(RegNo: SrcReg) |
1537 | .add(MO: MI.getOperand(i: 2)); |
1538 | MI.eraseFromParent(); |
1539 | return true; |
1540 | } |
1541 | case AArch64::TAGPstack: { |
1542 | int64_t Offset = MI.getOperand(i: 2).getImm(); |
1543 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
1544 | MCID: TII->get(Opcode: Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) |
1545 | .add(MO: MI.getOperand(i: 0)) |
1546 | .add(MO: MI.getOperand(i: 1)) |
1547 | .addImm(Val: std::abs(i: Offset)) |
1548 | .add(MO: MI.getOperand(i: 4)); |
1549 | MI.eraseFromParent(); |
1550 | return true; |
1551 | } |
1552 | case AArch64::STGloop_wback: |
1553 | case AArch64::STZGloop_wback: |
1554 | return expandSetTagLoop(MBB, MBBI, NextMBBI); |
1555 | case AArch64::STGloop: |
1556 | case AArch64::STZGloop: |
1557 | report_fatal_error( |
1558 | reason: "Non-writeback variants of STGloop / STZGloop should not " |
1559 | "survive past PrologEpilogInserter." ); |
1560 | case AArch64::STR_ZZZZXI: |
1561 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 4); |
1562 | case AArch64::STR_ZZZXI: |
1563 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 3); |
1564 | case AArch64::STR_ZZXI: |
1565 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_ZXI, N: 2); |
1566 | case AArch64::STR_PPXI: |
1567 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::STR_PXI, N: 2); |
1568 | case AArch64::LDR_ZZZZXI: |
1569 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 4); |
1570 | case AArch64::LDR_ZZZXI: |
1571 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 3); |
1572 | case AArch64::LDR_ZZXI: |
1573 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_ZXI, N: 2); |
1574 | case AArch64::LDR_PPXI: |
1575 | return expandSVESpillFill(MBB, MBBI, Opc: AArch64::LDR_PXI, N: 2); |
1576 | case AArch64::BLR_RVMARKER: |
1577 | case AArch64::BLRA_RVMARKER: |
1578 | return expandCALL_RVMARKER(MBB, MBBI); |
1579 | case AArch64::BLR_BTI: |
1580 | return expandCALL_BTI(MBB, MBBI); |
1581 | case AArch64::StoreSwiftAsyncContext: |
1582 | return expandStoreSwiftAsyncContext(MBB, MBBI); |
1583 | case AArch64::RestoreZAPseudo: { |
1584 | auto *NewMBB = expandRestoreZA(MBB, MBBI); |
1585 | if (NewMBB != &MBB) |
1586 | NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. |
1587 | return true; |
1588 | } |
1589 | case AArch64::MSRpstatePseudo: { |
1590 | auto *NewMBB = expandCondSMToggle(MBB, MBBI); |
1591 | if (NewMBB != &MBB) |
1592 | NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. |
1593 | return true; |
1594 | } |
1595 | case AArch64::COALESCER_BARRIER_FPR16: |
1596 | case AArch64::COALESCER_BARRIER_FPR32: |
1597 | case AArch64::COALESCER_BARRIER_FPR64: |
1598 | case AArch64::COALESCER_BARRIER_FPR128: |
1599 | MI.eraseFromParent(); |
1600 | return true; |
1601 | case AArch64::LD1B_2Z_IMM_PSEUDO: |
1602 | return expandMultiVecPseudo( |
1603 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1604 | ContiguousOp: AArch64::LD1B_2Z_IMM, StridedOpc: AArch64::LD1B_2Z_STRIDED_IMM); |
1605 | case AArch64::LD1H_2Z_IMM_PSEUDO: |
1606 | return expandMultiVecPseudo( |
1607 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1608 | ContiguousOp: AArch64::LD1H_2Z_IMM, StridedOpc: AArch64::LD1H_2Z_STRIDED_IMM); |
1609 | case AArch64::LD1W_2Z_IMM_PSEUDO: |
1610 | return expandMultiVecPseudo( |
1611 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1612 | ContiguousOp: AArch64::LD1W_2Z_IMM, StridedOpc: AArch64::LD1W_2Z_STRIDED_IMM); |
1613 | case AArch64::LD1D_2Z_IMM_PSEUDO: |
1614 | return expandMultiVecPseudo( |
1615 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1616 | ContiguousOp: AArch64::LD1D_2Z_IMM, StridedOpc: AArch64::LD1D_2Z_STRIDED_IMM); |
1617 | case AArch64::LDNT1B_2Z_IMM_PSEUDO: |
1618 | return expandMultiVecPseudo( |
1619 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1620 | ContiguousOp: AArch64::LDNT1B_2Z_IMM, StridedOpc: AArch64::LDNT1B_2Z_STRIDED_IMM); |
1621 | case AArch64::LDNT1H_2Z_IMM_PSEUDO: |
1622 | return expandMultiVecPseudo( |
1623 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1624 | ContiguousOp: AArch64::LDNT1H_2Z_IMM, StridedOpc: AArch64::LDNT1H_2Z_STRIDED_IMM); |
1625 | case AArch64::LDNT1W_2Z_IMM_PSEUDO: |
1626 | return expandMultiVecPseudo( |
1627 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1628 | ContiguousOp: AArch64::LDNT1W_2Z_IMM, StridedOpc: AArch64::LDNT1W_2Z_STRIDED_IMM); |
1629 | case AArch64::LDNT1D_2Z_IMM_PSEUDO: |
1630 | return expandMultiVecPseudo( |
1631 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1632 | ContiguousOp: AArch64::LDNT1D_2Z_IMM, StridedOpc: AArch64::LDNT1D_2Z_STRIDED_IMM); |
1633 | case AArch64::LD1B_2Z_PSEUDO: |
1634 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, |
1635 | StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1B_2Z, |
1636 | StridedOpc: AArch64::LD1B_2Z_STRIDED); |
1637 | case AArch64::LD1H_2Z_PSEUDO: |
1638 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, |
1639 | StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1H_2Z, |
1640 | StridedOpc: AArch64::LD1H_2Z_STRIDED); |
1641 | case AArch64::LD1W_2Z_PSEUDO: |
1642 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, |
1643 | StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1W_2Z, |
1644 | StridedOpc: AArch64::LD1W_2Z_STRIDED); |
1645 | case AArch64::LD1D_2Z_PSEUDO: |
1646 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, |
1647 | StridedClass: AArch64::ZPR2StridedRegClass, ContiguousOp: AArch64::LD1D_2Z, |
1648 | StridedOpc: AArch64::LD1D_2Z_STRIDED); |
1649 | case AArch64::LDNT1B_2Z_PSEUDO: |
1650 | return expandMultiVecPseudo( |
1651 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1652 | ContiguousOp: AArch64::LDNT1B_2Z, StridedOpc: AArch64::LDNT1B_2Z_STRIDED); |
1653 | case AArch64::LDNT1H_2Z_PSEUDO: |
1654 | return expandMultiVecPseudo( |
1655 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1656 | ContiguousOp: AArch64::LDNT1H_2Z, StridedOpc: AArch64::LDNT1H_2Z_STRIDED); |
1657 | case AArch64::LDNT1W_2Z_PSEUDO: |
1658 | return expandMultiVecPseudo( |
1659 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1660 | ContiguousOp: AArch64::LDNT1W_2Z, StridedOpc: AArch64::LDNT1W_2Z_STRIDED); |
1661 | case AArch64::LDNT1D_2Z_PSEUDO: |
1662 | return expandMultiVecPseudo( |
1663 | MBB, MBBI, ContiguousClass: AArch64::ZPR2RegClass, StridedClass: AArch64::ZPR2StridedRegClass, |
1664 | ContiguousOp: AArch64::LDNT1D_2Z, StridedOpc: AArch64::LDNT1D_2Z_STRIDED); |
1665 | case AArch64::LD1B_4Z_IMM_PSEUDO: |
1666 | return expandMultiVecPseudo( |
1667 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1668 | ContiguousOp: AArch64::LD1B_4Z_IMM, StridedOpc: AArch64::LD1B_4Z_STRIDED_IMM); |
1669 | case AArch64::LD1H_4Z_IMM_PSEUDO: |
1670 | return expandMultiVecPseudo( |
1671 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1672 | ContiguousOp: AArch64::LD1H_4Z_IMM, StridedOpc: AArch64::LD1H_4Z_STRIDED_IMM); |
1673 | case AArch64::LD1W_4Z_IMM_PSEUDO: |
1674 | return expandMultiVecPseudo( |
1675 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1676 | ContiguousOp: AArch64::LD1W_4Z_IMM, StridedOpc: AArch64::LD1W_4Z_STRIDED_IMM); |
1677 | case AArch64::LD1D_4Z_IMM_PSEUDO: |
1678 | return expandMultiVecPseudo( |
1679 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1680 | ContiguousOp: AArch64::LD1D_4Z_IMM, StridedOpc: AArch64::LD1D_4Z_STRIDED_IMM); |
1681 | case AArch64::LDNT1B_4Z_IMM_PSEUDO: |
1682 | return expandMultiVecPseudo( |
1683 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1684 | ContiguousOp: AArch64::LDNT1B_4Z_IMM, StridedOpc: AArch64::LDNT1B_4Z_STRIDED_IMM); |
1685 | case AArch64::LDNT1H_4Z_IMM_PSEUDO: |
1686 | return expandMultiVecPseudo( |
1687 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1688 | ContiguousOp: AArch64::LDNT1H_4Z_IMM, StridedOpc: AArch64::LDNT1H_4Z_STRIDED_IMM); |
1689 | case AArch64::LDNT1W_4Z_IMM_PSEUDO: |
1690 | return expandMultiVecPseudo( |
1691 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1692 | ContiguousOp: AArch64::LDNT1W_4Z_IMM, StridedOpc: AArch64::LDNT1W_4Z_STRIDED_IMM); |
1693 | case AArch64::LDNT1D_4Z_IMM_PSEUDO: |
1694 | return expandMultiVecPseudo( |
1695 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1696 | ContiguousOp: AArch64::LDNT1D_4Z_IMM, StridedOpc: AArch64::LDNT1D_4Z_STRIDED_IMM); |
1697 | case AArch64::LD1B_4Z_PSEUDO: |
1698 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, |
1699 | StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1B_4Z, |
1700 | StridedOpc: AArch64::LD1B_4Z_STRIDED); |
1701 | case AArch64::LD1H_4Z_PSEUDO: |
1702 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, |
1703 | StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1H_4Z, |
1704 | StridedOpc: AArch64::LD1H_4Z_STRIDED); |
1705 | case AArch64::LD1W_4Z_PSEUDO: |
1706 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, |
1707 | StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1W_4Z, |
1708 | StridedOpc: AArch64::LD1W_4Z_STRIDED); |
1709 | case AArch64::LD1D_4Z_PSEUDO: |
1710 | return expandMultiVecPseudo(MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, |
1711 | StridedClass: AArch64::ZPR4StridedRegClass, ContiguousOp: AArch64::LD1D_4Z, |
1712 | StridedOpc: AArch64::LD1D_4Z_STRIDED); |
1713 | case AArch64::LDNT1B_4Z_PSEUDO: |
1714 | return expandMultiVecPseudo( |
1715 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1716 | ContiguousOp: AArch64::LDNT1B_4Z, StridedOpc: AArch64::LDNT1B_4Z_STRIDED); |
1717 | case AArch64::LDNT1H_4Z_PSEUDO: |
1718 | return expandMultiVecPseudo( |
1719 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1720 | ContiguousOp: AArch64::LDNT1H_4Z, StridedOpc: AArch64::LDNT1H_4Z_STRIDED); |
1721 | case AArch64::LDNT1W_4Z_PSEUDO: |
1722 | return expandMultiVecPseudo( |
1723 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1724 | ContiguousOp: AArch64::LDNT1W_4Z, StridedOpc: AArch64::LDNT1W_4Z_STRIDED); |
1725 | case AArch64::LDNT1D_4Z_PSEUDO: |
1726 | return expandMultiVecPseudo( |
1727 | MBB, MBBI, ContiguousClass: AArch64::ZPR4RegClass, StridedClass: AArch64::ZPR4StridedRegClass, |
1728 | ContiguousOp: AArch64::LDNT1D_4Z, StridedOpc: AArch64::LDNT1D_4Z_STRIDED); |
1729 | } |
1730 | return false; |
1731 | } |
1732 | |
1733 | /// Iterate over the instructions in basic block MBB and expand any |
1734 | /// pseudo instructions. Return true if anything was modified. |
1735 | bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { |
1736 | bool Modified = false; |
1737 | |
1738 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1739 | while (MBBI != E) { |
1740 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
1741 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
1742 | MBBI = NMBBI; |
1743 | } |
1744 | |
1745 | return Modified; |
1746 | } |
1747 | |
1748 | bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { |
1749 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
1750 | |
1751 | bool Modified = false; |
1752 | for (auto &MBB : MF) |
1753 | Modified |= expandMBB(MBB); |
1754 | return Modified; |
1755 | } |
1756 | |
1757 | /// Returns an instance of the pseudo instruction expansion pass. |
1758 | FunctionPass *llvm::createAArch64ExpandPseudoPass() { |
1759 | return new AArch64ExpandPseudo(); |
1760 | } |
1761 | |