1//===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions post RA. With such method, LL/SC loop is considered as
11// a whole blob and make spilling unlikely happens in the LL/SC loop.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCInstrInfo.h"
18#include "PPCTargetMachine.h"
19
20#include "llvm/CodeGen/LivePhysRegs.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "ppc-atomic-expand"
27
28namespace {
29
30class PPCExpandAtomicPseudo : public MachineFunctionPass {
31public:
32 const PPCInstrInfo *TII;
33 const PPCRegisterInfo *TRI;
34 static char ID;
35
36 PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {
37 initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38 }
39
40 bool runOnMachineFunction(MachineFunction &MF) override;
41
42private:
43 bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
44 MachineBasicBlock::iterator &NMBBI);
45 bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
46 MachineBasicBlock::iterator &NMBBI);
47 bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
48 MachineBasicBlock::iterator &NMBBI);
49};
50
51static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
52 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
53 Register Dest0, Register Dest1, Register Src0,
54 Register Src1) {
55 const MCInstrDesc &OR = TII->get(Opcode: PPC::OR8);
56 const MCInstrDesc &XOR = TII->get(Opcode: PPC::XOR8);
57 if (Dest0 == Src1 && Dest1 == Src0) {
58 // The most tricky case, swapping values.
59 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1);
60 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest1).addReg(RegNo: Dest0).addReg(RegNo: Dest1);
61 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1);
62 } else if (Dest0 != Src0 || Dest1 != Src1) {
63 if (Dest0 == Src1 || Dest1 != Src0) {
64 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1);
65 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0);
66 } else {
67 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0);
68 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1);
69 }
70 }
71}
72
73bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74 bool Changed = false;
75 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
76 TRI = &TII->getRegisterInfo();
77 for (MachineBasicBlock &MBB : MF) {
78 for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
79 MBBI != MBBE;) {
80 MachineInstr &MI = *MBBI;
81 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
82 Changed |= expandMI(MBB, MI, NMBBI);
83 MBBI = NMBBI;
84 }
85 }
86 if (Changed)
87 MF.RenumberBlocks();
88 return Changed;
89}
90
91bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
92 MachineBasicBlock::iterator &NMBBI) {
93 switch (MI.getOpcode()) {
94 case PPC::ATOMIC_SWAP_I128:
95 case PPC::ATOMIC_LOAD_ADD_I128:
96 case PPC::ATOMIC_LOAD_SUB_I128:
97 case PPC::ATOMIC_LOAD_XOR_I128:
98 case PPC::ATOMIC_LOAD_NAND_I128:
99 case PPC::ATOMIC_LOAD_AND_I128:
100 case PPC::ATOMIC_LOAD_OR_I128:
101 return expandAtomicRMW128(MBB, MI, NMBBI);
102 case PPC::ATOMIC_CMP_SWAP_I128:
103 return expandAtomicCmpSwap128(MBB, MI, NMBBI);
104 case PPC::BUILD_QUADWORD: {
105 Register Dst = MI.getOperand(i: 0).getReg();
106 Register DstHi = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x0);
107 Register DstLo = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x1);
108 Register Lo = MI.getOperand(i: 1).getReg();
109 Register Hi = MI.getOperand(i: 2).getReg();
110 PairedCopy(TII, MBB, MBBI: MI, DL: MI.getDebugLoc(), Dest0: DstHi, Dest1: DstLo, Src0: Hi, Src1: Lo);
111 MI.eraseFromParent();
112 return true;
113 }
114 default:
115 return false;
116 }
117}
118
119bool PPCExpandAtomicPseudo::expandAtomicRMW128(
120 MachineBasicBlock &MBB, MachineInstr &MI,
121 MachineBasicBlock::iterator &NMBBI) {
122 const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX);
123 const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX);
124 DebugLoc DL = MI.getDebugLoc();
125 MachineFunction *MF = MBB.getParent();
126 const BasicBlock *BB = MBB.getBasicBlock();
127 // Create layout of control flow.
128 MachineFunction::iterator MFI = ++MBB.getIterator();
129 MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
130 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
131 MF->insert(MBBI: MFI, MBB: LoopMBB);
132 MF->insert(MBBI: MFI, MBB: ExitMBB);
133 ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()),
134 To: MBB.end());
135 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
136 MBB.addSuccessor(Succ: LoopMBB);
137
138 // For non-min/max operations, control flow is kinda like:
139 // MBB:
140 // ...
141 // LoopMBB:
142 // lqarx in, ptr
143 // addc out.sub_x1, in.sub_x1, op.sub_x1
144 // adde out.sub_x0, in.sub_x0, op.sub_x0
145 // stqcx out, ptr
146 // bne- LoopMBB
147 // ExitMBB:
148 // ...
149 Register Old = MI.getOperand(i: 0).getReg();
150 Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0);
151 Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1);
152 Register Scratch = MI.getOperand(i: 1).getReg();
153 Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0);
154 Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1);
155 Register RA = MI.getOperand(i: 2).getReg();
156 Register RB = MI.getOperand(i: 3).getReg();
157 Register IncrLo = MI.getOperand(i: 4).getReg();
158 Register IncrHi = MI.getOperand(i: 5).getReg();
159 unsigned RMWOpcode = MI.getOpcode();
160
161 MachineBasicBlock *CurrentMBB = LoopMBB;
162 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB);
163
164 switch (RMWOpcode) {
165 case PPC::ATOMIC_SWAP_I128:
166 PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo,
167 Src0: IncrHi, Src1: IncrLo);
168 break;
169 case PPC::ATOMIC_LOAD_ADD_I128:
170 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDC8), DestReg: ScratchLo)
171 .addReg(RegNo: IncrLo)
172 .addReg(RegNo: OldLo);
173 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDE8), DestReg: ScratchHi)
174 .addReg(RegNo: IncrHi)
175 .addReg(RegNo: OldHi);
176 break;
177 case PPC::ATOMIC_LOAD_SUB_I128:
178 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFC8), DestReg: ScratchLo)
179 .addReg(RegNo: IncrLo)
180 .addReg(RegNo: OldLo);
181 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFE8), DestReg: ScratchHi)
182 .addReg(RegNo: IncrHi)
183 .addReg(RegNo: OldHi);
184 break;
185
186#define TRIVIAL_ATOMICRMW(Opcode, Instr) \
187 case Opcode: \
188 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \
189 .addReg(IncrLo) \
190 .addReg(OldLo); \
191 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \
192 .addReg(IncrHi) \
193 .addReg(OldHi); \
194 break
195
196 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
197 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
198 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
199 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
200#undef TRIVIAL_ATOMICRMW
201 default:
202 llvm_unreachable("Unhandled atomic RMW operation");
203 }
204 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB);
205 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
206 .addImm(Val: PPC::PRED_NE)
207 .addReg(RegNo: PPC::CR0)
208 .addMBB(MBB: LoopMBB);
209 CurrentMBB->addSuccessor(Succ: LoopMBB);
210 CurrentMBB->addSuccessor(Succ: ExitMBB);
211 fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB});
212 NMBBI = MBB.end();
213 MI.eraseFromParent();
214 return true;
215}
216
217bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
218 MachineBasicBlock &MBB, MachineInstr &MI,
219 MachineBasicBlock::iterator &NMBBI) {
220 const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX);
221 const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX);
222 DebugLoc DL = MI.getDebugLoc();
223 MachineFunction *MF = MBB.getParent();
224 const BasicBlock *BB = MBB.getBasicBlock();
225 Register Old = MI.getOperand(i: 0).getReg();
226 Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0);
227 Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1);
228 Register Scratch = MI.getOperand(i: 1).getReg();
229 Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0);
230 Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1);
231 Register RA = MI.getOperand(i: 2).getReg();
232 Register RB = MI.getOperand(i: 3).getReg();
233 Register CmpLo = MI.getOperand(i: 4).getReg();
234 Register CmpHi = MI.getOperand(i: 5).getReg();
235 Register NewLo = MI.getOperand(i: 6).getReg();
236 Register NewHi = MI.getOperand(i: 7).getReg();
237 // Create layout of control flow.
238 // loop:
239 // old = lqarx ptr
240 // <compare old, cmp>
241 // bne 0, exit
242 // succ:
243 // stqcx new ptr
244 // bne 0, loop
245 // exit:
246 // ....
247 MachineFunction::iterator MFI = ++MBB.getIterator();
248 MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
249 MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
250 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
251 MF->insert(MBBI: MFI, MBB: LoopCmpMBB);
252 MF->insert(MBBI: MFI, MBB: CmpSuccMBB);
253 MF->insert(MBBI: MFI, MBB: ExitMBB);
254 ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()),
255 To: MBB.end());
256 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
257 MBB.addSuccessor(Succ: LoopCmpMBB);
258 // Build loop.
259 MachineBasicBlock *CurrentMBB = LoopCmpMBB;
260 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB);
261 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchLo)
262 .addReg(RegNo: OldLo)
263 .addReg(RegNo: CmpLo);
264 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchHi)
265 .addReg(RegNo: OldHi)
266 .addReg(RegNo: CmpHi);
267 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::OR8_rec), DestReg: ScratchLo)
268 .addReg(RegNo: ScratchLo)
269 .addReg(RegNo: ScratchHi);
270 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
271 .addImm(Val: PPC::PRED_NE)
272 .addReg(RegNo: PPC::CR0)
273 .addMBB(MBB: ExitMBB);
274 CurrentMBB->addSuccessor(Succ: CmpSuccMBB);
275 CurrentMBB->addSuccessor(Succ: ExitMBB);
276 // Build succ.
277 CurrentMBB = CmpSuccMBB;
278 PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo,
279 Src0: NewHi, Src1: NewLo);
280 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB);
281 BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
282 .addImm(Val: PPC::PRED_NE)
283 .addReg(RegNo: PPC::CR0)
284 .addMBB(MBB: LoopCmpMBB);
285 CurrentMBB->addSuccessor(Succ: LoopCmpMBB);
286 CurrentMBB->addSuccessor(Succ: ExitMBB);
287
288 fullyRecomputeLiveIns(MBBs: {ExitMBB, CmpSuccMBB, LoopCmpMBB});
289 NMBBI = MBB.end();
290 MI.eraseFromParent();
291 return true;
292}
293
294} // namespace
295
296INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
297 false, false)
298
299char PPCExpandAtomicPseudo::ID = 0;
300FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
301 return new PPCExpandAtomicPseudo();
302}
303