1 | //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions post RA. With such method, LL/SC loop is considered as |
11 | // a whole blob and make spilling unlikely happens in the LL/SC loop. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "MCTargetDesc/PPCPredicates.h" |
16 | #include "PPC.h" |
17 | #include "PPCInstrInfo.h" |
18 | |
19 | #include "llvm/CodeGen/LivePhysRegs.h" |
20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | |
23 | using namespace llvm; |
24 | |
25 | #define DEBUG_TYPE "ppc-atomic-expand" |
26 | |
27 | namespace { |
28 | |
29 | class PPCExpandAtomicPseudo : public MachineFunctionPass { |
30 | public: |
31 | const PPCInstrInfo *TII; |
32 | const PPCRegisterInfo *TRI; |
33 | static char ID; |
34 | |
35 | PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {} |
36 | |
37 | bool runOnMachineFunction(MachineFunction &MF) override; |
38 | |
39 | private: |
40 | bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
41 | MachineBasicBlock::iterator &NMBBI); |
42 | bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, |
43 | MachineBasicBlock::iterator &NMBBI); |
44 | bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, |
45 | MachineBasicBlock::iterator &NMBBI); |
46 | }; |
47 | |
48 | static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, |
49 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, |
50 | Register Dest0, Register Dest1, Register Src0, |
51 | Register Src1) { |
52 | const MCInstrDesc &OR = TII->get(Opcode: PPC::OR8); |
53 | const MCInstrDesc &XOR = TII->get(Opcode: PPC::XOR8); |
54 | if (Dest0 == Src1 && Dest1 == Src0) { |
55 | // The most tricky case, swapping values. |
56 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
57 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest1).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
58 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
59 | } else if (Dest0 != Src0 || Dest1 != Src1) { |
60 | if (Dest0 == Src1 || Dest1 != Src0) { |
61 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
62 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
63 | } else { |
64 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
65 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
66 | } |
67 | } |
68 | } |
69 | |
70 | bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
71 | bool Changed = false; |
72 | TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
73 | TRI = &TII->getRegisterInfo(); |
74 | for (MachineBasicBlock &MBB : MF) { |
75 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); |
76 | MBBI != MBBE;) { |
77 | MachineInstr &MI = *MBBI; |
78 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
79 | Changed |= expandMI(MBB, MI, NMBBI); |
80 | MBBI = NMBBI; |
81 | } |
82 | } |
83 | if (Changed) |
84 | MF.RenumberBlocks(); |
85 | return Changed; |
86 | } |
87 | |
88 | bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
89 | MachineBasicBlock::iterator &NMBBI) { |
90 | switch (MI.getOpcode()) { |
91 | case PPC::ATOMIC_SWAP_I128: |
92 | case PPC::ATOMIC_LOAD_ADD_I128: |
93 | case PPC::ATOMIC_LOAD_SUB_I128: |
94 | case PPC::ATOMIC_LOAD_XOR_I128: |
95 | case PPC::ATOMIC_LOAD_NAND_I128: |
96 | case PPC::ATOMIC_LOAD_AND_I128: |
97 | case PPC::ATOMIC_LOAD_OR_I128: |
98 | return expandAtomicRMW128(MBB, MI, NMBBI); |
99 | case PPC::ATOMIC_CMP_SWAP_I128: |
100 | return expandAtomicCmpSwap128(MBB, MI, NMBBI); |
101 | case PPC::BUILD_QUADWORD: { |
102 | Register Dst = MI.getOperand(i: 0).getReg(); |
103 | Register DstHi = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x0); |
104 | Register DstLo = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x1); |
105 | Register Lo = MI.getOperand(i: 1).getReg(); |
106 | Register Hi = MI.getOperand(i: 2).getReg(); |
107 | PairedCopy(TII, MBB, MBBI: MI, DL: MI.getDebugLoc(), Dest0: DstHi, Dest1: DstLo, Src0: Hi, Src1: Lo); |
108 | MI.eraseFromParent(); |
109 | return true; |
110 | } |
111 | default: |
112 | return false; |
113 | } |
114 | } |
115 | |
116 | bool PPCExpandAtomicPseudo::expandAtomicRMW128( |
117 | MachineBasicBlock &MBB, MachineInstr &MI, |
118 | MachineBasicBlock::iterator &NMBBI) { |
119 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
120 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
121 | DebugLoc DL = MI.getDebugLoc(); |
122 | MachineFunction *MF = MBB.getParent(); |
123 | const BasicBlock *BB = MBB.getBasicBlock(); |
124 | // Create layout of control flow. |
125 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
126 | MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); |
127 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
128 | MF->insert(MBBI: MFI, MBB: LoopMBB); |
129 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
130 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
131 | To: MBB.end()); |
132 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
133 | MBB.addSuccessor(Succ: LoopMBB); |
134 | |
135 | // For non-min/max operations, control flow is kinda like: |
136 | // MBB: |
137 | // ... |
138 | // LoopMBB: |
139 | // lqarx in, ptr |
140 | // addc out.sub_x1, in.sub_x1, op.sub_x1 |
141 | // adde out.sub_x0, in.sub_x0, op.sub_x0 |
142 | // stqcx out, ptr |
143 | // bne- LoopMBB |
144 | // ExitMBB: |
145 | // ... |
146 | Register Old = MI.getOperand(i: 0).getReg(); |
147 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
148 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
149 | Register Scratch = MI.getOperand(i: 1).getReg(); |
150 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
151 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
152 | Register RA = MI.getOperand(i: 2).getReg(); |
153 | Register RB = MI.getOperand(i: 3).getReg(); |
154 | Register IncrLo = MI.getOperand(i: 4).getReg(); |
155 | Register IncrHi = MI.getOperand(i: 5).getReg(); |
156 | unsigned RMWOpcode = MI.getOpcode(); |
157 | |
158 | MachineBasicBlock *CurrentMBB = LoopMBB; |
159 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
160 | |
161 | switch (RMWOpcode) { |
162 | case PPC::ATOMIC_SWAP_I128: |
163 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
164 | Src0: IncrHi, Src1: IncrLo); |
165 | break; |
166 | case PPC::ATOMIC_LOAD_ADD_I128: |
167 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDC8), DestReg: ScratchLo) |
168 | .addReg(RegNo: IncrLo) |
169 | .addReg(RegNo: OldLo); |
170 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDE8), DestReg: ScratchHi) |
171 | .addReg(RegNo: IncrHi) |
172 | .addReg(RegNo: OldHi); |
173 | break; |
174 | case PPC::ATOMIC_LOAD_SUB_I128: |
175 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFC8), DestReg: ScratchLo) |
176 | .addReg(RegNo: IncrLo) |
177 | .addReg(RegNo: OldLo); |
178 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFE8), DestReg: ScratchHi) |
179 | .addReg(RegNo: IncrHi) |
180 | .addReg(RegNo: OldHi); |
181 | break; |
182 | |
183 | #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ |
184 | case Opcode: \ |
185 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ |
186 | .addReg(IncrLo) \ |
187 | .addReg(OldLo); \ |
188 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ |
189 | .addReg(IncrHi) \ |
190 | .addReg(OldHi); \ |
191 | break |
192 | |
193 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); |
194 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); |
195 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); |
196 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); |
197 | #undef TRIVIAL_ATOMICRMW |
198 | default: |
199 | llvm_unreachable("Unhandled atomic RMW operation" ); |
200 | } |
201 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
202 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
203 | .addImm(Val: PPC::PRED_NE) |
204 | .addReg(RegNo: PPC::CR0) |
205 | .addMBB(MBB: LoopMBB); |
206 | CurrentMBB->addSuccessor(Succ: LoopMBB); |
207 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
208 | fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB}); |
209 | NMBBI = MBB.end(); |
210 | MI.eraseFromParent(); |
211 | return true; |
212 | } |
213 | |
214 | bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( |
215 | MachineBasicBlock &MBB, MachineInstr &MI, |
216 | MachineBasicBlock::iterator &NMBBI) { |
217 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
218 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
219 | DebugLoc DL = MI.getDebugLoc(); |
220 | MachineFunction *MF = MBB.getParent(); |
221 | const BasicBlock *BB = MBB.getBasicBlock(); |
222 | Register Old = MI.getOperand(i: 0).getReg(); |
223 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
224 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
225 | Register Scratch = MI.getOperand(i: 1).getReg(); |
226 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
227 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
228 | Register RA = MI.getOperand(i: 2).getReg(); |
229 | Register RB = MI.getOperand(i: 3).getReg(); |
230 | Register CmpLo = MI.getOperand(i: 4).getReg(); |
231 | Register CmpHi = MI.getOperand(i: 5).getReg(); |
232 | Register NewLo = MI.getOperand(i: 6).getReg(); |
233 | Register NewHi = MI.getOperand(i: 7).getReg(); |
234 | // Create layout of control flow. |
235 | // loop: |
236 | // old = lqarx ptr |
237 | // <compare old, cmp> |
238 | // bne 0, exit |
239 | // succ: |
240 | // stqcx new ptr |
241 | // bne 0, loop |
242 | // exit: |
243 | // .... |
244 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
245 | MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); |
246 | MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); |
247 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
248 | MF->insert(MBBI: MFI, MBB: LoopCmpMBB); |
249 | MF->insert(MBBI: MFI, MBB: CmpSuccMBB); |
250 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
251 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
252 | To: MBB.end()); |
253 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
254 | MBB.addSuccessor(Succ: LoopCmpMBB); |
255 | // Build loop. |
256 | MachineBasicBlock *CurrentMBB = LoopCmpMBB; |
257 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
258 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchLo) |
259 | .addReg(RegNo: OldLo) |
260 | .addReg(RegNo: CmpLo); |
261 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchHi) |
262 | .addReg(RegNo: OldHi) |
263 | .addReg(RegNo: CmpHi); |
264 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::OR8_rec), DestReg: ScratchLo) |
265 | .addReg(RegNo: ScratchLo) |
266 | .addReg(RegNo: ScratchHi); |
267 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
268 | .addImm(Val: PPC::PRED_NE) |
269 | .addReg(RegNo: PPC::CR0) |
270 | .addMBB(MBB: ExitMBB); |
271 | CurrentMBB->addSuccessor(Succ: CmpSuccMBB); |
272 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
273 | // Build succ. |
274 | CurrentMBB = CmpSuccMBB; |
275 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
276 | Src0: NewHi, Src1: NewLo); |
277 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
278 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
279 | .addImm(Val: PPC::PRED_NE) |
280 | .addReg(RegNo: PPC::CR0) |
281 | .addMBB(MBB: LoopCmpMBB); |
282 | CurrentMBB->addSuccessor(Succ: LoopCmpMBB); |
283 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
284 | |
285 | fullyRecomputeLiveIns(MBBs: {ExitMBB, CmpSuccMBB, LoopCmpMBB}); |
286 | NMBBI = MBB.end(); |
287 | MI.eraseFromParent(); |
288 | return true; |
289 | } |
290 | |
291 | } // namespace |
292 | |
293 | INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic" , |
294 | false, false) |
295 | |
296 | char PPCExpandAtomicPseudo::ID = 0; |
297 | FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { |
298 | return new PPCExpandAtomicPseudo(); |
299 | } |
300 | |