| 1 | //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains a pass that expands atomic pseudo instructions into |
| 10 | // target instructions post RA. With such method, LL/SC loop is considered as |
| 11 | // a whole blob and make spilling unlikely happens in the LL/SC loop. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "MCTargetDesc/PPCPredicates.h" |
| 16 | #include "PPC.h" |
| 17 | #include "PPCInstrInfo.h" |
| 18 | |
| 19 | #include "llvm/CodeGen/LivePhysRegs.h" |
| 20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 22 | |
| 23 | using namespace llvm; |
| 24 | |
| 25 | #define DEBUG_TYPE "ppc-atomic-expand" |
| 26 | |
| 27 | namespace { |
| 28 | |
| 29 | class PPCExpandAtomicPseudo : public MachineFunctionPass { |
| 30 | public: |
| 31 | const PPCInstrInfo *TII; |
| 32 | const PPCRegisterInfo *TRI; |
| 33 | static char ID; |
| 34 | |
| 35 | PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {} |
| 36 | |
| 37 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 38 | |
| 39 | private: |
| 40 | bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
| 41 | MachineBasicBlock::iterator &NMBBI); |
| 42 | bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, |
| 43 | MachineBasicBlock::iterator &NMBBI); |
| 44 | bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, |
| 45 | MachineBasicBlock::iterator &NMBBI); |
| 46 | }; |
| 47 | |
| 48 | static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, |
| 49 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, |
| 50 | Register Dest0, Register Dest1, Register Src0, |
| 51 | Register Src1) { |
| 52 | const MCInstrDesc &OR = TII->get(Opcode: PPC::OR8); |
| 53 | const MCInstrDesc &XOR = TII->get(Opcode: PPC::XOR8); |
| 54 | if (Dest0 == Src1 && Dest1 == Src0) { |
| 55 | // The most tricky case, swapping values. |
| 56 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
| 57 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest1).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
| 58 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: XOR, DestReg: Dest0).addReg(RegNo: Dest0).addReg(RegNo: Dest1); |
| 59 | } else if (Dest0 != Src0 || Dest1 != Src1) { |
| 60 | if (Dest0 == Src1 || Dest1 != Src0) { |
| 61 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
| 62 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
| 63 | } else { |
| 64 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest0).addReg(RegNo: Src0).addReg(RegNo: Src0); |
| 65 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: OR, DestReg: Dest1).addReg(RegNo: Src1).addReg(RegNo: Src1); |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
| 71 | bool Changed = false; |
| 72 | TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
| 73 | TRI = &TII->getRegisterInfo(); |
| 74 | for (MachineBasicBlock &MBB : MF) { |
| 75 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); |
| 76 | MBBI != MBBE;) { |
| 77 | MachineInstr &MI = *MBBI; |
| 78 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
| 79 | Changed |= expandMI(MBB, MI, NMBBI); |
| 80 | MBBI = NMBBI; |
| 81 | } |
| 82 | } |
| 83 | if (Changed) |
| 84 | MF.RenumberBlocks(); |
| 85 | return Changed; |
| 86 | } |
| 87 | |
| 88 | bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, |
| 89 | MachineBasicBlock::iterator &NMBBI) { |
| 90 | switch (MI.getOpcode()) { |
| 91 | case PPC::ATOMIC_SWAP_I128: |
| 92 | case PPC::ATOMIC_LOAD_ADD_I128: |
| 93 | case PPC::ATOMIC_LOAD_SUB_I128: |
| 94 | case PPC::ATOMIC_LOAD_XOR_I128: |
| 95 | case PPC::ATOMIC_LOAD_NAND_I128: |
| 96 | case PPC::ATOMIC_LOAD_AND_I128: |
| 97 | case PPC::ATOMIC_LOAD_OR_I128: |
| 98 | return expandAtomicRMW128(MBB, MI, NMBBI); |
| 99 | case PPC::ATOMIC_CMP_SWAP_I128: |
| 100 | return expandAtomicCmpSwap128(MBB, MI, NMBBI); |
| 101 | case PPC::BUILD_QUADWORD: { |
| 102 | Register Dst = MI.getOperand(i: 0).getReg(); |
| 103 | Register DstHi = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x0); |
| 104 | Register DstLo = TRI->getSubReg(Reg: Dst, Idx: PPC::sub_gp8_x1); |
| 105 | Register Lo = MI.getOperand(i: 1).getReg(); |
| 106 | Register Hi = MI.getOperand(i: 2).getReg(); |
| 107 | PairedCopy(TII, MBB, MBBI: MI, DL: MI.getDebugLoc(), Dest0: DstHi, Dest1: DstLo, Src0: Hi, Src1: Lo); |
| 108 | MI.eraseFromParent(); |
| 109 | return true; |
| 110 | } |
| 111 | default: |
| 112 | return false; |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | bool PPCExpandAtomicPseudo::expandAtomicRMW128( |
| 117 | MachineBasicBlock &MBB, MachineInstr &MI, |
| 118 | MachineBasicBlock::iterator &NMBBI) { |
| 119 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
| 120 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
| 121 | DebugLoc DL = MI.getDebugLoc(); |
| 122 | MachineFunction *MF = MBB.getParent(); |
| 123 | const BasicBlock *BB = MBB.getBasicBlock(); |
| 124 | // Create layout of control flow. |
| 125 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
| 126 | MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); |
| 127 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
| 128 | MF->insert(MBBI: MFI, MBB: LoopMBB); |
| 129 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
| 130 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
| 131 | To: MBB.end()); |
| 132 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
| 133 | MBB.addSuccessor(Succ: LoopMBB); |
| 134 | |
| 135 | // For non-min/max operations, control flow is kinda like: |
| 136 | // MBB: |
| 137 | // ... |
| 138 | // LoopMBB: |
| 139 | // lqarx in, ptr |
| 140 | // addc out.sub_x1, in.sub_x1, op.sub_x1 |
| 141 | // adde out.sub_x0, in.sub_x0, op.sub_x0 |
| 142 | // stqcx out, ptr |
| 143 | // bne- LoopMBB |
| 144 | // ExitMBB: |
| 145 | // ... |
| 146 | Register Old = MI.getOperand(i: 0).getReg(); |
| 147 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
| 148 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
| 149 | Register Scratch = MI.getOperand(i: 1).getReg(); |
| 150 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
| 151 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
| 152 | Register RA = MI.getOperand(i: 2).getReg(); |
| 153 | Register RB = MI.getOperand(i: 3).getReg(); |
| 154 | Register IncrLo = MI.getOperand(i: 4).getReg(); |
| 155 | Register IncrHi = MI.getOperand(i: 5).getReg(); |
| 156 | unsigned RMWOpcode = MI.getOpcode(); |
| 157 | |
| 158 | MachineBasicBlock *CurrentMBB = LoopMBB; |
| 159 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
| 160 | |
| 161 | switch (RMWOpcode) { |
| 162 | case PPC::ATOMIC_SWAP_I128: |
| 163 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
| 164 | Src0: IncrHi, Src1: IncrLo); |
| 165 | break; |
| 166 | case PPC::ATOMIC_LOAD_ADD_I128: |
| 167 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDC8), DestReg: ScratchLo) |
| 168 | .addReg(RegNo: IncrLo) |
| 169 | .addReg(RegNo: OldLo); |
| 170 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::ADDE8), DestReg: ScratchHi) |
| 171 | .addReg(RegNo: IncrHi) |
| 172 | .addReg(RegNo: OldHi); |
| 173 | break; |
| 174 | case PPC::ATOMIC_LOAD_SUB_I128: |
| 175 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFC8), DestReg: ScratchLo) |
| 176 | .addReg(RegNo: IncrLo) |
| 177 | .addReg(RegNo: OldLo); |
| 178 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::SUBFE8), DestReg: ScratchHi) |
| 179 | .addReg(RegNo: IncrHi) |
| 180 | .addReg(RegNo: OldHi); |
| 181 | break; |
| 182 | |
| 183 | #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ |
| 184 | case Opcode: \ |
| 185 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ |
| 186 | .addReg(IncrLo) \ |
| 187 | .addReg(OldLo); \ |
| 188 | BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ |
| 189 | .addReg(IncrHi) \ |
| 190 | .addReg(OldHi); \ |
| 191 | break |
| 192 | |
| 193 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); |
| 194 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); |
| 195 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); |
| 196 | TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); |
| 197 | #undef TRIVIAL_ATOMICRMW |
| 198 | default: |
| 199 | llvm_unreachable("Unhandled atomic RMW operation" ); |
| 200 | } |
| 201 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
| 202 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
| 203 | .addImm(Val: PPC::PRED_NE) |
| 204 | .addReg(RegNo: PPC::CR0) |
| 205 | .addMBB(MBB: LoopMBB); |
| 206 | CurrentMBB->addSuccessor(Succ: LoopMBB); |
| 207 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
| 208 | fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB}); |
| 209 | NMBBI = MBB.end(); |
| 210 | MI.eraseFromParent(); |
| 211 | return true; |
| 212 | } |
| 213 | |
| 214 | bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( |
| 215 | MachineBasicBlock &MBB, MachineInstr &MI, |
| 216 | MachineBasicBlock::iterator &NMBBI) { |
| 217 | const MCInstrDesc &LL = TII->get(Opcode: PPC::LQARX); |
| 218 | const MCInstrDesc &SC = TII->get(Opcode: PPC::STQCX); |
| 219 | DebugLoc DL = MI.getDebugLoc(); |
| 220 | MachineFunction *MF = MBB.getParent(); |
| 221 | const BasicBlock *BB = MBB.getBasicBlock(); |
| 222 | Register Old = MI.getOperand(i: 0).getReg(); |
| 223 | Register OldHi = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x0); |
| 224 | Register OldLo = TRI->getSubReg(Reg: Old, Idx: PPC::sub_gp8_x1); |
| 225 | Register Scratch = MI.getOperand(i: 1).getReg(); |
| 226 | Register ScratchHi = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x0); |
| 227 | Register ScratchLo = TRI->getSubReg(Reg: Scratch, Idx: PPC::sub_gp8_x1); |
| 228 | Register RA = MI.getOperand(i: 2).getReg(); |
| 229 | Register RB = MI.getOperand(i: 3).getReg(); |
| 230 | Register CmpLo = MI.getOperand(i: 4).getReg(); |
| 231 | Register CmpHi = MI.getOperand(i: 5).getReg(); |
| 232 | Register NewLo = MI.getOperand(i: 6).getReg(); |
| 233 | Register NewHi = MI.getOperand(i: 7).getReg(); |
| 234 | // Create layout of control flow. |
| 235 | // loop: |
| 236 | // old = lqarx ptr |
| 237 | // <compare old, cmp> |
| 238 | // bne 0, exit |
| 239 | // succ: |
| 240 | // stqcx new ptr |
| 241 | // bne 0, loop |
| 242 | // exit: |
| 243 | // .... |
| 244 | MachineFunction::iterator MFI = ++MBB.getIterator(); |
| 245 | MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); |
| 246 | MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); |
| 247 | MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); |
| 248 | MF->insert(MBBI: MFI, MBB: LoopCmpMBB); |
| 249 | MF->insert(MBBI: MFI, MBB: CmpSuccMBB); |
| 250 | MF->insert(MBBI: MFI, MBB: ExitMBB); |
| 251 | ExitMBB->splice(Where: ExitMBB->begin(), Other: &MBB, From: std::next(x: MI.getIterator()), |
| 252 | To: MBB.end()); |
| 253 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
| 254 | MBB.addSuccessor(Succ: LoopCmpMBB); |
| 255 | // Build loop. |
| 256 | MachineBasicBlock *CurrentMBB = LoopCmpMBB; |
| 257 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: LL, DestReg: Old).addReg(RegNo: RA).addReg(RegNo: RB); |
| 258 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchLo) |
| 259 | .addReg(RegNo: OldLo) |
| 260 | .addReg(RegNo: CmpLo); |
| 261 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::XOR8), DestReg: ScratchHi) |
| 262 | .addReg(RegNo: OldHi) |
| 263 | .addReg(RegNo: CmpHi); |
| 264 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::OR8_rec), DestReg: ScratchLo) |
| 265 | .addReg(RegNo: ScratchLo) |
| 266 | .addReg(RegNo: ScratchHi); |
| 267 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
| 268 | .addImm(Val: PPC::PRED_NE) |
| 269 | .addReg(RegNo: PPC::CR0) |
| 270 | .addMBB(MBB: ExitMBB); |
| 271 | CurrentMBB->addSuccessor(Succ: CmpSuccMBB); |
| 272 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
| 273 | // Build succ. |
| 274 | CurrentMBB = CmpSuccMBB; |
| 275 | PairedCopy(TII, MBB&: *CurrentMBB, MBBI: CurrentMBB->end(), DL, Dest0: ScratchHi, Dest1: ScratchLo, |
| 276 | Src0: NewHi, Src1: NewLo); |
| 277 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: SC).addReg(RegNo: Scratch).addReg(RegNo: RA).addReg(RegNo: RB); |
| 278 | BuildMI(BB: CurrentMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC)) |
| 279 | .addImm(Val: PPC::PRED_NE) |
| 280 | .addReg(RegNo: PPC::CR0) |
| 281 | .addMBB(MBB: LoopCmpMBB); |
| 282 | CurrentMBB->addSuccessor(Succ: LoopCmpMBB); |
| 283 | CurrentMBB->addSuccessor(Succ: ExitMBB); |
| 284 | |
| 285 | fullyRecomputeLiveIns(MBBs: {ExitMBB, CmpSuccMBB, LoopCmpMBB}); |
| 286 | NMBBI = MBB.end(); |
| 287 | MI.eraseFromParent(); |
| 288 | return true; |
| 289 | } |
| 290 | |
| 291 | } // namespace |
| 292 | |
| 293 | INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic" , |
| 294 | false, false) |
| 295 | |
| 296 | char PPCExpandAtomicPseudo::ID = 0; |
| 297 | FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { |
| 298 | return new PPCExpandAtomicPseudo(); |
| 299 | } |
| 300 | |