1 | //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions post RA. With such method, LL/SC loop is considered as |
11 | // a whole blob and make spilling unlikely happens in the LL/SC loop. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "MCTargetDesc/PPCPredicates.h" |
16 | #include "PPC.h" |
17 | #include "PPCInstrInfo.h" |
18 | #include "PPCTargetMachine.h" |
19 | |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define DEBUG_TYPE "ppc-atomic-expand" |
27 | |
28 | namespace { |
29 | |
30 | class PPCExpandAtomicPseudo : public MachineFunctionPass { |
31 | public: |
32 | const PPCInstrInfo *TII; |
33 | const PPCRegisterInfo *TRI; |
34 | static char ID; |
35 | |
36 | PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { |
37 | initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); |
38 | } |
39 | |
40 | bool runOnMachineFunction(MachineFunction &MF) override; |
41 | |
42 | private: |
43 | bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
44 | MachineBasicBlock::iterator &NMBBI); |
45 | bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, |
46 | MachineBasicBlock::iterator &NMBBI); |
47 | bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, |
48 | MachineBasicBlock::iterator &NMBBI); |
49 | }; |
50 | |
51 | static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, |
52 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, |
53 | Register Dest0, Register Dest1, Register Src0, |
54 | Register Src1) { |
55 | const MCInstrDesc &OR = TII->get(Opcode: PPC::OR8); |
56 | const MCInstrDesc &XOR = TII->get(Opcode: PPC::XOR8); |
57 | if (Dest0 == Src1 && Dest1 == Src0) { |
58 | // The most tricky case, swapping values. |
59 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
60 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest1).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
61 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
62 | } else if (Dest0 != Src0 || Dest1 != Src1) { |
63 | if (Dest0 == Src1 || Dest1 != Src0) { |
64 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
65 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
66 | } else { |
67 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
68 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
69 | } |
70 | } |
71 | } |
72 | |
73 | bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
74 | bool Changed = false; |
75 | TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
76 | TRI = &TII->getRegisterInfo(); |
77 | for (MachineBasicBlock &MBB : MF) { |
78 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); |
79 | MBBI != MBBE;) { |
80 | MachineInstr &MI = *MBBI; |
81 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
82 | Changed |= expandMI(MBB, MI, NMBBI); |
83 | MBBI = NMBBI; |
84 | } |
85 | } |
86 | if (Changed) |
87 | MF.RenumberBlocks(); |
88 | return Changed; |
89 | } |
90 | |
91 | bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
92 | MachineBasicBlock::iterator &NMBBI) { |
93 | switch (MI.getOpcode()) { |
94 | case PPC::ATOMIC_SWAP_I128: |
95 | case PPC::ATOMIC_LOAD_ADD_I128: |
96 | case PPC::ATOMIC_LOAD_SUB_I128: |
97 | case PPC::ATOMIC_LOAD_XOR_I128: |
98 | case PPC::ATOMIC_LOAD_NAND_I128: |
99 | case PPC::ATOMIC_LOAD_AND_I128: |
100 | case PPC::ATOMIC_LOAD_OR_I128: |
101 | return expandAtomicRMW128(MBB, MI, NMBBI); |
102 | case PPC::ATOMIC_CMP_SWAP_I128: |
103 | return expandAtomicCmpSwap128(MBB, MI, NMBBI); |
104 | case PPC::BUILD_QUADWORD: { |
105 | Register Dst = MI.getOperand(i: 0).getReg(); |
106 | Register DstHi = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x0); |
107 | Register DstLo = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x1); |
108 | Register Lo = MI.getOperand(i: 1).getReg(); |
109 | Register Hi = MI.getOperand(i: 2).getReg(); |
110 | PairedCopy(TII, MBB, MBBI: MI, DL: MI.getDebugLoc(), Dest0: DstHi, Dest1: DstLo, Src0: Hi, Src1: Lo); |
111 | MI.eraseFromParent(); |
112 | return true; |
113 | } |
114 | default: |
115 | return false; |
116 | } |
117 | } |
118 | |
119 | bool PPCExpandAtomicPseudo::expandAtomicRMW128( |
120 | MachineBasicBlock &MBB, MachineInstr &MI, |
121 | MachineBasicBlock::iterator &NMBBI) { |
122 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
123 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
124 | DebugLoc DL = MI.getDebugLoc(); |
125 | MachineFunction *MF = MBB.getParent(); |
126 | const BasicBlock *BB = MBB.getBasicBlock(); |
127 | // Create layout of control flow. |
128 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
129 | MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); |
130 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
131 | MF->insert(MBBI: MFI, MBB: LoopMBB); |
132 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
133 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
134 | To: MBB.end()); |
135 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
136 | MBB.addSuccessor(Succ: LoopMBB); |
137 | |
138 | // For non-min/max operations, control flow is kinda like: |
139 | // MBB: |
140 | // ... |
141 | // LoopMBB: |
142 | // lqarx in, ptr |
143 | // addc out.sub_x1, in.sub_x1, op.sub_x1 |
144 | // adde out.sub_x0, in.sub_x0, op.sub_x0 |
145 | // stqcx out, ptr |
146 | // bne- LoopMBB |
147 | // ExitMBB: |
148 | // ... |
149 | Register Old = MI.getOperand(i: 0).getReg(); |
150 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
151 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
152 | Register Scratch = MI.getOperand(i: 1).getReg(); |
153 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
154 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
155 | Register RA = MI.getOperand(i: 2).getReg(); |
156 | Register RB = MI.getOperand(i: 3).getReg(); |
157 | Register IncrLo = MI.getOperand(i: 4).getReg(); |
158 | Register IncrHi = MI.getOperand(i: 5).getReg(); |
159 | unsigned RMWOpcode = MI.getOpcode(); |
160 | |
161 | MachineBasicBlock *CurrentMBB = LoopMBB; |
162 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
163 | |
164 | switch (RMWOpcode) { |
165 | case PPC::ATOMIC_SWAP_I128: |
166 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
167 | Src0: IncrHi, Src1: IncrLo); |
168 | break; |
169 | case PPC::ATOMIC_LOAD_ADD_I128: |
170 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDC8), DestReg: ScratchLo) |
171 | .addReg(RegNo: IncrLo) |
172 | .addReg(RegNo: OldLo); |
173 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDE8), DestReg: ScratchHi) |
174 | .addReg(RegNo: IncrHi) |
175 | .addReg(RegNo: OldHi); |
176 | break; |
177 | case PPC::ATOMIC_LOAD_SUB_I128: |
178 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFC8), DestReg: ScratchLo) |
179 | .addReg(RegNo: IncrLo) |
180 | .addReg(RegNo: OldLo); |
181 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFE8), DestReg: ScratchHi) |
182 | .addReg(RegNo: IncrHi) |
183 | .addReg(RegNo: OldHi); |
184 | break; |
185 | |
186 | #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ |
187 | case Opcode: \ |
188 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ |
189 | .addReg(IncrLo) \ |
190 | .addReg(OldLo); \ |
191 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ |
192 | .addReg(IncrHi) \ |
193 | .addReg(OldHi); \ |
194 | break |
195 | |
196 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); |
197 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); |
198 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); |
199 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); |
200 | #undef TRIVIAL_ATOMICRMW |
201 | default: |
202 | llvm_unreachable("Unhandled atomic RMW operation" ); |
203 | } |
204 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
205 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
206 | .addImm(Val: PPC::PRED_NE) |
207 | .addReg(RegNo: PPC::CR0) |
208 | .addMBB(MBB: LoopMBB); |
209 | CurrentMBB->addSuccessor(Succ: LoopMBB); |
210 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
211 | fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB}); |
212 | NMBBI = MBB.end(); |
213 | MI.eraseFromParent(); |
214 | return true; |
215 | } |
216 | |
217 | bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( |
218 | MachineBasicBlock &MBB, MachineInstr &MI, |
219 | MachineBasicBlock::iterator &NMBBI) { |
220 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
221 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
222 | DebugLoc DL = MI.getDebugLoc(); |
223 | MachineFunction *MF = MBB.getParent(); |
224 | const BasicBlock *BB = MBB.getBasicBlock(); |
225 | Register Old = MI.getOperand(i: 0).getReg(); |
226 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
227 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
228 | Register Scratch = MI.getOperand(i: 1).getReg(); |
229 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
230 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
231 | Register RA = MI.getOperand(i: 2).getReg(); |
232 | Register RB = MI.getOperand(i: 3).getReg(); |
233 | Register CmpLo = MI.getOperand(i: 4).getReg(); |
234 | Register CmpHi = MI.getOperand(i: 5).getReg(); |
235 | Register NewLo = MI.getOperand(i: 6).getReg(); |
236 | Register NewHi = MI.getOperand(i: 7).getReg(); |
237 | // Create layout of control flow. |
238 | // loop: |
239 | // old = lqarx ptr |
240 | // <compare old, cmp> |
241 | // bne 0, exit |
242 | // succ: |
243 | // stqcx new ptr |
244 | // bne 0, loop |
245 | // exit: |
246 | // .... |
247 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
248 | MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); |
249 | MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); |
250 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
251 | MF->insert(MBBI: MFI, MBB: LoopCmpMBB); |
252 | MF->insert(MBBI: MFI, MBB: CmpSuccMBB); |
253 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
254 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
255 | To: MBB.end()); |
256 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
257 | MBB.addSuccessor(Succ: LoopCmpMBB); |
258 | // Build loop. |
259 | MachineBasicBlock *CurrentMBB = LoopCmpMBB; |
260 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
261 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchLo) |
262 | .addReg(RegNo: OldLo) |
263 | .addReg(RegNo: CmpLo); |
264 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchHi) |
265 | .addReg(RegNo: OldHi) |
266 | .addReg(RegNo: CmpHi); |
267 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::OR8_rec), DestReg: ScratchLo) |
268 | .addReg(RegNo: ScratchLo) |
269 | .addReg(RegNo: ScratchHi); |
270 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
271 | .addImm(Val: PPC::PRED_NE) |
272 | .addReg(RegNo: PPC::CR0) |
273 | .addMBB(MBB: ExitMBB); |
274 | CurrentMBB->addSuccessor(Succ: CmpSuccMBB); |
275 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
276 | // Build succ. |
277 | CurrentMBB = CmpSuccMBB; |
278 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
279 | Src0: NewHi, Src1: NewLo); |
280 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
281 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
282 | .addImm(Val: PPC::PRED_NE) |
283 | .addReg(RegNo: PPC::CR0) |
284 | .addMBB(MBB: LoopCmpMBB); |
285 | CurrentMBB->addSuccessor(Succ: LoopCmpMBB); |
286 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
287 | |
288 | fullyRecomputeLiveIns(MBBs: {ExitMBB, CmpSuccMBB, LoopCmpMBB}); |
289 | NMBBI = MBB.end(); |
290 | MI.eraseFromParent(); |
291 | return true; |
292 | } |
293 | |
294 | } // namespace |
295 | |
296 | INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic" , |
297 | false, false) |
298 | |
299 | char PPCExpandAtomicPseudo::ID = 0; |
300 | FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { |
301 | return new PPCExpandAtomicPseudo(); |
302 | } |
303 | |