1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
20#include "llvm/CodeGen/LivePhysRegs.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {}
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 StringRef getPassName() const override {
42 return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
43 }
44
45private:
46 bool expandMBB(MachineBasicBlock &MBB);
47 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
48 MachineBasicBlock::iterator &NextMBBI);
49 bool expandAtomicBinOp(MachineBasicBlock &MBB,
50 MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
51 bool IsMasked, int Width,
52 MachineBasicBlock::iterator &NextMBBI);
53 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
54 MachineBasicBlock::iterator MBBI,
55 AtomicRMWInst::BinOp, bool IsMasked, int Width,
56 MachineBasicBlock::iterator &NextMBBI);
57 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
58 MachineBasicBlock::iterator MBBI, bool IsMasked,
59 int Width, MachineBasicBlock::iterator &NextMBBI);
60#ifndef NDEBUG
61 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
62 unsigned Size = 0;
63 for (auto &MBB : MF)
64 for (auto &MI : MBB)
65 Size += TII->getInstSizeInBytes(MI);
66 return Size;
67 }
68#endif
69};
70
71char RISCVExpandAtomicPseudo::ID = 0;
72
73bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74 STI = &MF.getSubtarget<RISCVSubtarget>();
75 TII = STI->getInstrInfo();
76
77#ifndef NDEBUG
78 const unsigned OldSize = getInstSizeInBytes(MF);
79#endif
80
81 bool Modified = false;
82 for (auto &MBB : MF)
83 Modified |= expandMBB(MBB);
84
85#ifndef NDEBUG
86 const unsigned NewSize = getInstSizeInBytes(MF);
87 assert(OldSize >= NewSize);
88#endif
89 return Modified;
90}
91
92bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
93 bool Modified = false;
94
95 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
96 while (MBBI != E) {
97 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
98 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
99 MBBI = NMBBI;
100 }
101
102 return Modified;
103}
104
105bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI,
107 MachineBasicBlock::iterator &NextMBBI) {
108 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
109 // expanded instructions for each pseudo is correct in the Size field of the
110 // tablegen definition for the pseudo.
111 switch (MBBI->getOpcode()) {
112 case RISCV::PseudoAtomicLoadNand32:
113 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32,
114 NextMBBI);
115 case RISCV::PseudoAtomicLoadNand64:
116 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64,
117 NextMBBI);
118 case RISCV::PseudoMaskedAtomicSwap32:
119 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32,
120 NextMBBI);
121 case RISCV::PseudoMaskedAtomicLoadAdd32:
122 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI);
123 case RISCV::PseudoMaskedAtomicLoadSub32:
124 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI);
125 case RISCV::PseudoMaskedAtomicLoadNand32:
126 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32,
127 NextMBBI);
128 case RISCV::PseudoMaskedAtomicLoadMax32:
129 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32,
130 NextMBBI);
131 case RISCV::PseudoMaskedAtomicLoadMin32:
132 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32,
133 NextMBBI);
134 case RISCV::PseudoMaskedAtomicLoadUMax32:
135 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32,
136 NextMBBI);
137 case RISCV::PseudoMaskedAtomicLoadUMin32:
138 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32,
139 NextMBBI);
140 case RISCV::PseudoCmpXchg32:
141 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI);
142 case RISCV::PseudoCmpXchg64:
143 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI);
144 case RISCV::PseudoMaskedCmpXchg32:
145 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI);
146 }
147
148 return false;
149}
150
151static unsigned getLRForRMW32(AtomicOrdering Ordering,
152 const RISCVSubtarget *Subtarget) {
153 switch (Ordering) {
154 default:
155 llvm_unreachable("Unexpected AtomicOrdering");
156 case AtomicOrdering::Monotonic:
157 return RISCV::LR_W;
158 case AtomicOrdering::Acquire:
159 if (Subtarget->hasStdExtZtso())
160 return RISCV::LR_W;
161 return RISCV::LR_W_AQ;
162 case AtomicOrdering::Release:
163 return RISCV::LR_W;
164 case AtomicOrdering::AcquireRelease:
165 if (Subtarget->hasStdExtZtso())
166 return RISCV::LR_W;
167 return RISCV::LR_W_AQ;
168 case AtomicOrdering::SequentiallyConsistent:
169 return RISCV::LR_W_AQ_RL;
170 }
171}
172
173static unsigned getSCForRMW32(AtomicOrdering Ordering,
174 const RISCVSubtarget *Subtarget) {
175 switch (Ordering) {
176 default:
177 llvm_unreachable("Unexpected AtomicOrdering");
178 case AtomicOrdering::Monotonic:
179 return RISCV::SC_W;
180 case AtomicOrdering::Acquire:
181 return RISCV::SC_W;
182 case AtomicOrdering::Release:
183 if (Subtarget->hasStdExtZtso())
184 return RISCV::SC_W;
185 return RISCV::SC_W_RL;
186 case AtomicOrdering::AcquireRelease:
187 if (Subtarget->hasStdExtZtso())
188 return RISCV::SC_W;
189 return RISCV::SC_W_RL;
190 case AtomicOrdering::SequentiallyConsistent:
191 return RISCV::SC_W_RL;
192 }
193}
194
195static unsigned getLRForRMW64(AtomicOrdering Ordering,
196 const RISCVSubtarget *Subtarget) {
197 switch (Ordering) {
198 default:
199 llvm_unreachable("Unexpected AtomicOrdering");
200 case AtomicOrdering::Monotonic:
201 return RISCV::LR_D;
202 case AtomicOrdering::Acquire:
203 if (Subtarget->hasStdExtZtso())
204 return RISCV::LR_D;
205 return RISCV::LR_D_AQ;
206 case AtomicOrdering::Release:
207 return RISCV::LR_D;
208 case AtomicOrdering::AcquireRelease:
209 if (Subtarget->hasStdExtZtso())
210 return RISCV::LR_D;
211 return RISCV::LR_D_AQ;
212 case AtomicOrdering::SequentiallyConsistent:
213 return RISCV::LR_D_AQ_RL;
214 }
215}
216
217static unsigned getSCForRMW64(AtomicOrdering Ordering,
218 const RISCVSubtarget *Subtarget) {
219 switch (Ordering) {
220 default:
221 llvm_unreachable("Unexpected AtomicOrdering");
222 case AtomicOrdering::Monotonic:
223 return RISCV::SC_D;
224 case AtomicOrdering::Acquire:
225 return RISCV::SC_D;
226 case AtomicOrdering::Release:
227 if (Subtarget->hasStdExtZtso())
228 return RISCV::SC_D;
229 return RISCV::SC_D_RL;
230 case AtomicOrdering::AcquireRelease:
231 if (Subtarget->hasStdExtZtso())
232 return RISCV::SC_D;
233 return RISCV::SC_D_RL;
234 case AtomicOrdering::SequentiallyConsistent:
235 return RISCV::SC_D_RL;
236 }
237}
238
239static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
240 const RISCVSubtarget *Subtarget) {
241 if (Width == 32)
242 return getLRForRMW32(Ordering, Subtarget);
243 if (Width == 64)
244 return getLRForRMW64(Ordering, Subtarget);
245 llvm_unreachable("Unexpected LR width\n");
246}
247
248static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
249 const RISCVSubtarget *Subtarget) {
250 if (Width == 32)
251 return getSCForRMW32(Ordering, Subtarget);
252 if (Width == 64)
253 return getSCForRMW64(Ordering, Subtarget);
254 llvm_unreachable("Unexpected SC width\n");
255}
256
257static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
258 DebugLoc DL, MachineBasicBlock *ThisMBB,
259 MachineBasicBlock *LoopMBB,
260 MachineBasicBlock *DoneMBB,
261 AtomicRMWInst::BinOp BinOp, int Width,
262 const RISCVSubtarget *STI) {
263 Register DestReg = MI.getOperand(i: 0).getReg();
264 Register ScratchReg = MI.getOperand(i: 1).getReg();
265 Register AddrReg = MI.getOperand(i: 2).getReg();
266 Register IncrReg = MI.getOperand(i: 3).getReg();
267 AtomicOrdering Ordering =
268 static_cast<AtomicOrdering>(MI.getOperand(i: 4).getImm());
269
270 // .loop:
271 // lr.[w|d] dest, (addr)
272 // binop scratch, dest, val
273 // sc.[w|d] scratch, scratch, (addr)
274 // bnez scratch, loop
275 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), DestReg)
276 .addReg(RegNo: AddrReg);
277 switch (BinOp) {
278 default:
279 llvm_unreachable("Unexpected AtomicRMW BinOp");
280 case AtomicRMWInst::Nand:
281 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
282 .addReg(RegNo: DestReg)
283 .addReg(RegNo: IncrReg);
284 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg)
285 .addReg(RegNo: ScratchReg)
286 .addImm(Val: -1);
287 break;
288 }
289 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), DestReg: ScratchReg)
290 .addReg(RegNo: AddrReg)
291 .addReg(RegNo: ScratchReg);
292 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
293 .addReg(RegNo: ScratchReg)
294 .addReg(RegNo: RISCV::X0)
295 .addMBB(MBB: LoopMBB);
296}
297
298static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
299 MachineBasicBlock *MBB, Register DestReg,
300 Register OldValReg, Register NewValReg,
301 Register MaskReg, Register ScratchReg) {
302 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
303 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
304 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
305
306 // We select bits from newval and oldval using:
307 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
308 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
309 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg: ScratchReg)
310 .addReg(RegNo: OldValReg)
311 .addReg(RegNo: NewValReg);
312 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
313 .addReg(RegNo: ScratchReg)
314 .addReg(RegNo: MaskReg);
315 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg)
316 .addReg(RegNo: OldValReg)
317 .addReg(RegNo: ScratchReg);
318}
319
320static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
321 MachineInstr &MI, DebugLoc DL,
322 MachineBasicBlock *ThisMBB,
323 MachineBasicBlock *LoopMBB,
324 MachineBasicBlock *DoneMBB,
325 AtomicRMWInst::BinOp BinOp, int Width,
326 const RISCVSubtarget *STI) {
327 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
328 Register DestReg = MI.getOperand(i: 0).getReg();
329 Register ScratchReg = MI.getOperand(i: 1).getReg();
330 Register AddrReg = MI.getOperand(i: 2).getReg();
331 Register IncrReg = MI.getOperand(i: 3).getReg();
332 Register MaskReg = MI.getOperand(i: 4).getReg();
333 AtomicOrdering Ordering =
334 static_cast<AtomicOrdering>(MI.getOperand(i: 5).getImm());
335
336 // .loop:
337 // lr.w destreg, (alignedaddr)
338 // binop scratch, destreg, incr
339 // xor scratch, destreg, scratch
340 // and scratch, scratch, masktargetdata
341 // xor scratch, destreg, scratch
342 // sc.w scratch, scratch, (alignedaddr)
343 // bnez scratch, loop
344 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg)
345 .addReg(RegNo: AddrReg);
346 switch (BinOp) {
347 default:
348 llvm_unreachable("Unexpected AtomicRMW BinOp");
349 case AtomicRMWInst::Xchg:
350 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: ScratchReg)
351 .addReg(RegNo: IncrReg)
352 .addImm(Val: 0);
353 break;
354 case AtomicRMWInst::Add:
355 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADD), DestReg: ScratchReg)
356 .addReg(RegNo: DestReg)
357 .addReg(RegNo: IncrReg);
358 break;
359 case AtomicRMWInst::Sub:
360 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: ScratchReg)
361 .addReg(RegNo: DestReg)
362 .addReg(RegNo: IncrReg);
363 break;
364 case AtomicRMWInst::Nand:
365 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
366 .addReg(RegNo: DestReg)
367 .addReg(RegNo: IncrReg);
368 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg)
369 .addReg(RegNo: ScratchReg)
370 .addImm(Val: -1);
371 break;
372 }
373
374 insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg,
375 ScratchReg);
376
377 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: ScratchReg)
378 .addReg(RegNo: AddrReg)
379 .addReg(RegNo: ScratchReg);
380 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
381 .addReg(RegNo: ScratchReg)
382 .addReg(RegNo: RISCV::X0)
383 .addMBB(MBB: LoopMBB);
384}
385
386bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
387 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
388 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
389 MachineBasicBlock::iterator &NextMBBI) {
390 MachineInstr &MI = *MBBI;
391 DebugLoc DL = MI.getDebugLoc();
392
393 MachineFunction *MF = MBB.getParent();
394 auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
395 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
396
397 // Insert new MBBs.
398 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB);
399 MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB);
400
401 // Set up successors and transfer remaining instructions to DoneMBB.
402 LoopMBB->addSuccessor(Succ: LoopMBB);
403 LoopMBB->addSuccessor(Succ: DoneMBB);
404 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
405 DoneMBB->transferSuccessors(FromMBB: &MBB);
406 MBB.addSuccessor(Succ: LoopMBB);
407
408 if (!IsMasked)
409 doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width,
410 STI);
411 else
412 doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp,
413 Width, STI);
414
415 NextMBBI = MBB.end();
416 MI.eraseFromParent();
417
418 LivePhysRegs LiveRegs;
419 computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB);
420 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
421
422 return true;
423}
424
425static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
426 MachineBasicBlock *MBB, Register ValReg,
427 Register ShamtReg) {
428 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLL), DestReg: ValReg)
429 .addReg(RegNo: ValReg)
430 .addReg(RegNo: ShamtReg);
431 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRA), DestReg: ValReg)
432 .addReg(RegNo: ValReg)
433 .addReg(RegNo: ShamtReg);
434}
435
436bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
437 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
438 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
439 MachineBasicBlock::iterator &NextMBBI) {
440 assert(IsMasked == true &&
441 "Should only need to expand masked atomic max/min");
442 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
443
444 MachineInstr &MI = *MBBI;
445 DebugLoc DL = MI.getDebugLoc();
446 MachineFunction *MF = MBB.getParent();
447 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
448 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
449 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
450 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
451
452 // Insert new MBBs.
453 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
454 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB);
455 MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB);
456 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB);
457
458 // Set up successors and transfer remaining instructions to DoneMBB.
459 LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB);
460 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
461 LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB);
462 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
463 LoopTailMBB->addSuccessor(Succ: DoneMBB);
464 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
465 DoneMBB->transferSuccessors(FromMBB: &MBB);
466 MBB.addSuccessor(Succ: LoopHeadMBB);
467
468 Register DestReg = MI.getOperand(i: 0).getReg();
469 Register Scratch1Reg = MI.getOperand(i: 1).getReg();
470 Register Scratch2Reg = MI.getOperand(i: 2).getReg();
471 Register AddrReg = MI.getOperand(i: 3).getReg();
472 Register IncrReg = MI.getOperand(i: 4).getReg();
473 Register MaskReg = MI.getOperand(i: 5).getReg();
474 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
475 AtomicOrdering Ordering =
476 static_cast<AtomicOrdering>(MI.getOperand(i: IsSigned ? 7 : 6).getImm());
477
478 //
479 // .loophead:
480 // lr.w destreg, (alignedaddr)
481 // and scratch2, destreg, mask
482 // mv scratch1, destreg
483 // [sext scratch2 if signed min/max]
484 // ifnochangeneeded scratch2, incr, .looptail
485 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg)
486 .addReg(RegNo: AddrReg);
487 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: Scratch2Reg)
488 .addReg(RegNo: DestReg)
489 .addReg(RegNo: MaskReg);
490 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: Scratch1Reg)
491 .addReg(RegNo: DestReg)
492 .addImm(Val: 0);
493
494 switch (BinOp) {
495 default:
496 llvm_unreachable("Unexpected AtomicRMW BinOp");
497 case AtomicRMWInst::Max: {
498 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg());
499 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
500 .addReg(RegNo: Scratch2Reg)
501 .addReg(RegNo: IncrReg)
502 .addMBB(MBB: LoopTailMBB);
503 break;
504 }
505 case AtomicRMWInst::Min: {
506 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg());
507 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
508 .addReg(RegNo: IncrReg)
509 .addReg(RegNo: Scratch2Reg)
510 .addMBB(MBB: LoopTailMBB);
511 break;
512 }
513 case AtomicRMWInst::UMax:
514 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU))
515 .addReg(RegNo: Scratch2Reg)
516 .addReg(RegNo: IncrReg)
517 .addMBB(MBB: LoopTailMBB);
518 break;
519 case AtomicRMWInst::UMin:
520 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU))
521 .addReg(RegNo: IncrReg)
522 .addReg(RegNo: Scratch2Reg)
523 .addMBB(MBB: LoopTailMBB);
524 break;
525 }
526
527 // .loopifbody:
528 // xor scratch1, destreg, incr
529 // and scratch1, scratch1, mask
530 // xor scratch1, destreg, scratch1
531 insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: Scratch1Reg, OldValReg: DestReg, NewValReg: IncrReg,
532 MaskReg, ScratchReg: Scratch1Reg);
533
534 // .looptail:
535 // sc.w scratch1, scratch1, (addr)
536 // bnez scratch1, loop
537 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: Scratch1Reg)
538 .addReg(RegNo: AddrReg)
539 .addReg(RegNo: Scratch1Reg);
540 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
541 .addReg(RegNo: Scratch1Reg)
542 .addReg(RegNo: RISCV::X0)
543 .addMBB(MBB: LoopHeadMBB);
544
545 NextMBBI = MBB.end();
546 MI.eraseFromParent();
547
548 LivePhysRegs LiveRegs;
549 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
550 computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB);
551 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
552 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
553
554 return true;
555}
556
557// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
558// operation, it can be folded into the cmpxchg expansion by
559// modifying the branch within 'LoopHead' (which performs the same
560// comparison). This is a valid transformation because after altering the
561// LoopHead's BNE destination, the BNE following the cmpxchg becomes
562// redundant and and be deleted. In the case of a masked cmpxchg, an
563// appropriate AND and BNE must be matched.
564//
565// On success, returns true and deletes the matching BNE or AND+BNE, sets the
566// LoopHeadBNETarget argument to the target that should be used within the
567// loop head, and removes that block as a successor to MBB.
568bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
569 MachineBasicBlock::iterator MBBI,
570 Register DestReg, Register CmpValReg,
571 Register MaskReg,
572 MachineBasicBlock *&LoopHeadBNETarget) {
573 SmallVector<MachineInstr *> ToErase;
574 auto E = MBB.end();
575 if (MBBI == E)
576 return false;
577 MBBI = skipDebugInstructionsForward(It: MBBI, End: E);
578
579 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
580 if (MaskReg.isValid()) {
581 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
582 return false;
583 Register ANDOp1 = MBBI->getOperand(i: 1).getReg();
584 Register ANDOp2 = MBBI->getOperand(i: 2).getReg();
585 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
586 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
587 return false;
588 // We now expect the BNE to use the result of the AND as an operand.
589 DestReg = MBBI->getOperand(i: 0).getReg();
590 ToErase.push_back(Elt: &*MBBI);
591 MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E);
592 }
593
594 // Match BNE DestReg, MaskReg.
595 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
596 return false;
597 Register BNEOp0 = MBBI->getOperand(i: 0).getReg();
598 Register BNEOp1 = MBBI->getOperand(i: 1).getReg();
599 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
600 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
601 return false;
602
603 // Make sure the branch is the only user of the AND.
604 if (MaskReg.isValid()) {
605 if (BNEOp0 == DestReg && !MBBI->getOperand(i: 0).isKill())
606 return false;
607 if (BNEOp1 == DestReg && !MBBI->getOperand(i: 1).isKill())
608 return false;
609 }
610
611 ToErase.push_back(Elt: &*MBBI);
612 LoopHeadBNETarget = MBBI->getOperand(i: 2).getMBB();
613 MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E);
614 if (MBBI != E)
615 return false;
616
617 MBB.removeSuccessor(Succ: LoopHeadBNETarget);
618 for (auto *MI : ToErase)
619 MI->eraseFromParent();
620 return true;
621}
622
623bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
624 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
625 int Width, MachineBasicBlock::iterator &NextMBBI) {
626 MachineInstr &MI = *MBBI;
627 DebugLoc DL = MI.getDebugLoc();
628 MachineFunction *MF = MBB.getParent();
629 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
630 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
631 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
632
633 Register DestReg = MI.getOperand(i: 0).getReg();
634 Register ScratchReg = MI.getOperand(i: 1).getReg();
635 Register AddrReg = MI.getOperand(i: 2).getReg();
636 Register CmpValReg = MI.getOperand(i: 3).getReg();
637 Register NewValReg = MI.getOperand(i: 4).getReg();
638 Register MaskReg = IsMasked ? MI.getOperand(i: 5).getReg() : Register();
639
640 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
641 tryToFoldBNEOnCmpXchgResult(MBB, MBBI: std::next(x: MBBI), DestReg, CmpValReg, MaskReg,
642 LoopHeadBNETarget);
643
644 // Insert new MBBs.
645 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
646 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB);
647 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB);
648
649 // Set up successors and transfer remaining instructions to DoneMBB.
650 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
651 LoopHeadMBB->addSuccessor(Succ: LoopHeadBNETarget);
652 LoopTailMBB->addSuccessor(Succ: DoneMBB);
653 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
654 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
655 DoneMBB->transferSuccessors(FromMBB: &MBB);
656 MBB.addSuccessor(Succ: LoopHeadMBB);
657
658 AtomicOrdering Ordering =
659 static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm());
660
661 if (!IsMasked) {
662 // .loophead:
663 // lr.[w|d] dest, (addr)
664 // bne dest, cmpval, done
665 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)),
666 DestReg)
667 .addReg(RegNo: AddrReg);
668 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
669 .addReg(RegNo: DestReg)
670 .addReg(RegNo: CmpValReg)
671 .addMBB(MBB: LoopHeadBNETarget);
672 // .looptail:
673 // sc.[w|d] scratch, newval, (addr)
674 // bnez scratch, loophead
675 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)),
676 DestReg: ScratchReg)
677 .addReg(RegNo: AddrReg)
678 .addReg(RegNo: NewValReg);
679 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
680 .addReg(RegNo: ScratchReg)
681 .addReg(RegNo: RISCV::X0)
682 .addMBB(MBB: LoopHeadMBB);
683 } else {
684 // .loophead:
685 // lr.w dest, (addr)
686 // and scratch, dest, mask
687 // bne scratch, cmpval, done
688 Register MaskReg = MI.getOperand(i: 5).getReg();
689 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)),
690 DestReg)
691 .addReg(RegNo: AddrReg);
692 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
693 .addReg(RegNo: DestReg)
694 .addReg(RegNo: MaskReg);
695 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
696 .addReg(RegNo: ScratchReg)
697 .addReg(RegNo: CmpValReg)
698 .addMBB(MBB: LoopHeadBNETarget);
699
700 // .looptail:
701 // xor scratch, dest, newval
702 // and scratch, scratch, mask
703 // xor scratch, dest, scratch
704 // sc.w scratch, scratch, (adrr)
705 // bnez scratch, loophead
706 insertMaskedMerge(TII, DL, MBB: LoopTailMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg,
707 MaskReg, ScratchReg);
708 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)),
709 DestReg: ScratchReg)
710 .addReg(RegNo: AddrReg)
711 .addReg(RegNo: ScratchReg);
712 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
713 .addReg(RegNo: ScratchReg)
714 .addReg(RegNo: RISCV::X0)
715 .addMBB(MBB: LoopHeadMBB);
716 }
717
718 NextMBBI = MBB.end();
719 MI.eraseFromParent();
720
721 LivePhysRegs LiveRegs;
722 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
723 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
724 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
725
726 return true;
727}
728
729} // end of anonymous namespace
730
731INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
732 RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
733
734FunctionPass *llvm::createRISCVExpandAtomicPseudoPass() {
735 return new RISCVExpandAtomicPseudo();
736}
737