1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
20#include "llvm/CodeGen/LivePhysRegs.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
38 initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
39 }
40
41 bool runOnMachineFunction(MachineFunction &MF) override;
42
43 StringRef getPassName() const override {
44 return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
45 }
46
47private:
48 bool expandMBB(MachineBasicBlock &MBB);
49 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
50 MachineBasicBlock::iterator &NextMBBI);
51 bool expandAtomicBinOp(MachineBasicBlock &MBB,
52 MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
53 bool IsMasked, int Width,
54 MachineBasicBlock::iterator &NextMBBI);
55 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
56 MachineBasicBlock::iterator MBBI,
57 AtomicRMWInst::BinOp, bool IsMasked, int Width,
58 MachineBasicBlock::iterator &NextMBBI);
59 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
60 MachineBasicBlock::iterator MBBI, bool IsMasked,
61 int Width, MachineBasicBlock::iterator &NextMBBI);
62#ifndef NDEBUG
63 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
64 unsigned Size = 0;
65 for (auto &MBB : MF)
66 for (auto &MI : MBB)
67 Size += TII->getInstSizeInBytes(MI);
68 return Size;
69 }
70#endif
71};
72
73char RISCVExpandAtomicPseudo::ID = 0;
74
75bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
76 STI = &MF.getSubtarget<RISCVSubtarget>();
77 TII = STI->getInstrInfo();
78
79#ifndef NDEBUG
80 const unsigned OldSize = getInstSizeInBytes(MF);
81#endif
82
83 bool Modified = false;
84 for (auto &MBB : MF)
85 Modified |= expandMBB(MBB);
86
87#ifndef NDEBUG
88 const unsigned NewSize = getInstSizeInBytes(MF);
89 assert(OldSize >= NewSize);
90#endif
91 return Modified;
92}
93
94bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
95 bool Modified = false;
96
97 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
98 while (MBBI != E) {
99 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
100 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
101 MBBI = NMBBI;
102 }
103
104 return Modified;
105}
106
107bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
108 MachineBasicBlock::iterator MBBI,
109 MachineBasicBlock::iterator &NextMBBI) {
110 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
111 // expanded instructions for each pseudo is correct in the Size field of the
112 // tablegen definition for the pseudo.
113 switch (MBBI->getOpcode()) {
114 case RISCV::PseudoAtomicLoadNand32:
115 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32,
116 NextMBBI);
117 case RISCV::PseudoAtomicLoadNand64:
118 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64,
119 NextMBBI);
120 case RISCV::PseudoMaskedAtomicSwap32:
121 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32,
122 NextMBBI);
123 case RISCV::PseudoMaskedAtomicLoadAdd32:
124 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI);
125 case RISCV::PseudoMaskedAtomicLoadSub32:
126 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI);
127 case RISCV::PseudoMaskedAtomicLoadNand32:
128 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32,
129 NextMBBI);
130 case RISCV::PseudoMaskedAtomicLoadMax32:
131 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32,
132 NextMBBI);
133 case RISCV::PseudoMaskedAtomicLoadMin32:
134 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32,
135 NextMBBI);
136 case RISCV::PseudoMaskedAtomicLoadUMax32:
137 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32,
138 NextMBBI);
139 case RISCV::PseudoMaskedAtomicLoadUMin32:
140 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32,
141 NextMBBI);
142 case RISCV::PseudoCmpXchg32:
143 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI);
144 case RISCV::PseudoCmpXchg64:
145 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI);
146 case RISCV::PseudoMaskedCmpXchg32:
147 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI);
148 }
149
150 return false;
151}
152
153static unsigned getLRForRMW32(AtomicOrdering Ordering,
154 const RISCVSubtarget *Subtarget) {
155 switch (Ordering) {
156 default:
157 llvm_unreachable("Unexpected AtomicOrdering");
158 case AtomicOrdering::Monotonic:
159 return RISCV::LR_W;
160 case AtomicOrdering::Acquire:
161 if (Subtarget->hasStdExtZtso())
162 return RISCV::LR_W;
163 return RISCV::LR_W_AQ;
164 case AtomicOrdering::Release:
165 return RISCV::LR_W;
166 case AtomicOrdering::AcquireRelease:
167 if (Subtarget->hasStdExtZtso())
168 return RISCV::LR_W;
169 return RISCV::LR_W_AQ;
170 case AtomicOrdering::SequentiallyConsistent:
171 return RISCV::LR_W_AQ_RL;
172 }
173}
174
175static unsigned getSCForRMW32(AtomicOrdering Ordering,
176 const RISCVSubtarget *Subtarget) {
177 switch (Ordering) {
178 default:
179 llvm_unreachable("Unexpected AtomicOrdering");
180 case AtomicOrdering::Monotonic:
181 return RISCV::SC_W;
182 case AtomicOrdering::Acquire:
183 return RISCV::SC_W;
184 case AtomicOrdering::Release:
185 if (Subtarget->hasStdExtZtso())
186 return RISCV::SC_W;
187 return RISCV::SC_W_RL;
188 case AtomicOrdering::AcquireRelease:
189 if (Subtarget->hasStdExtZtso())
190 return RISCV::SC_W;
191 return RISCV::SC_W_RL;
192 case AtomicOrdering::SequentiallyConsistent:
193 return RISCV::SC_W_RL;
194 }
195}
196
197static unsigned getLRForRMW64(AtomicOrdering Ordering,
198 const RISCVSubtarget *Subtarget) {
199 switch (Ordering) {
200 default:
201 llvm_unreachable("Unexpected AtomicOrdering");
202 case AtomicOrdering::Monotonic:
203 return RISCV::LR_D;
204 case AtomicOrdering::Acquire:
205 if (Subtarget->hasStdExtZtso())
206 return RISCV::LR_D;
207 return RISCV::LR_D_AQ;
208 case AtomicOrdering::Release:
209 return RISCV::LR_D;
210 case AtomicOrdering::AcquireRelease:
211 if (Subtarget->hasStdExtZtso())
212 return RISCV::LR_D;
213 return RISCV::LR_D_AQ;
214 case AtomicOrdering::SequentiallyConsistent:
215 return RISCV::LR_D_AQ_RL;
216 }
217}
218
219static unsigned getSCForRMW64(AtomicOrdering Ordering,
220 const RISCVSubtarget *Subtarget) {
221 switch (Ordering) {
222 default:
223 llvm_unreachable("Unexpected AtomicOrdering");
224 case AtomicOrdering::Monotonic:
225 return RISCV::SC_D;
226 case AtomicOrdering::Acquire:
227 return RISCV::SC_D;
228 case AtomicOrdering::Release:
229 if (Subtarget->hasStdExtZtso())
230 return RISCV::SC_D;
231 return RISCV::SC_D_RL;
232 case AtomicOrdering::AcquireRelease:
233 if (Subtarget->hasStdExtZtso())
234 return RISCV::SC_D;
235 return RISCV::SC_D_RL;
236 case AtomicOrdering::SequentiallyConsistent:
237 return RISCV::SC_D_RL;
238 }
239}
240
241static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
242 const RISCVSubtarget *Subtarget) {
243 if (Width == 32)
244 return getLRForRMW32(Ordering, Subtarget);
245 if (Width == 64)
246 return getLRForRMW64(Ordering, Subtarget);
247 llvm_unreachable("Unexpected LR width\n");
248}
249
250static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
251 const RISCVSubtarget *Subtarget) {
252 if (Width == 32)
253 return getSCForRMW32(Ordering, Subtarget);
254 if (Width == 64)
255 return getSCForRMW64(Ordering, Subtarget);
256 llvm_unreachable("Unexpected SC width\n");
257}
258
259static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
260 DebugLoc DL, MachineBasicBlock *ThisMBB,
261 MachineBasicBlock *LoopMBB,
262 MachineBasicBlock *DoneMBB,
263 AtomicRMWInst::BinOp BinOp, int Width,
264 const RISCVSubtarget *STI) {
265 Register DestReg = MI.getOperand(i: 0).getReg();
266 Register ScratchReg = MI.getOperand(i: 1).getReg();
267 Register AddrReg = MI.getOperand(i: 2).getReg();
268 Register IncrReg = MI.getOperand(i: 3).getReg();
269 AtomicOrdering Ordering =
270 static_cast<AtomicOrdering>(MI.getOperand(i: 4).getImm());
271
272 // .loop:
273 // lr.[w|d] dest, (addr)
274 // binop scratch, dest, val
275 // sc.[w|d] scratch, scratch, (addr)
276 // bnez scratch, loop
277 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), DestReg)
278 .addReg(RegNo: AddrReg);
279 switch (BinOp) {
280 default:
281 llvm_unreachable("Unexpected AtomicRMW BinOp");
282 case AtomicRMWInst::Nand:
283 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
284 .addReg(RegNo: DestReg)
285 .addReg(RegNo: IncrReg);
286 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg)
287 .addReg(RegNo: ScratchReg)
288 .addImm(Val: -1);
289 break;
290 }
291 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), DestReg: ScratchReg)
292 .addReg(RegNo: AddrReg)
293 .addReg(RegNo: ScratchReg);
294 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
295 .addReg(RegNo: ScratchReg)
296 .addReg(RegNo: RISCV::X0)
297 .addMBB(MBB: LoopMBB);
298}
299
300static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
301 MachineBasicBlock *MBB, Register DestReg,
302 Register OldValReg, Register NewValReg,
303 Register MaskReg, Register ScratchReg) {
304 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
305 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
306 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
307
308 // We select bits from newval and oldval using:
309 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
310 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
311 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg: ScratchReg)
312 .addReg(RegNo: OldValReg)
313 .addReg(RegNo: NewValReg);
314 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
315 .addReg(RegNo: ScratchReg)
316 .addReg(RegNo: MaskReg);
317 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg)
318 .addReg(RegNo: OldValReg)
319 .addReg(RegNo: ScratchReg);
320}
321
322static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
323 MachineInstr &MI, DebugLoc DL,
324 MachineBasicBlock *ThisMBB,
325 MachineBasicBlock *LoopMBB,
326 MachineBasicBlock *DoneMBB,
327 AtomicRMWInst::BinOp BinOp, int Width,
328 const RISCVSubtarget *STI) {
329 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
330 Register DestReg = MI.getOperand(i: 0).getReg();
331 Register ScratchReg = MI.getOperand(i: 1).getReg();
332 Register AddrReg = MI.getOperand(i: 2).getReg();
333 Register IncrReg = MI.getOperand(i: 3).getReg();
334 Register MaskReg = MI.getOperand(i: 4).getReg();
335 AtomicOrdering Ordering =
336 static_cast<AtomicOrdering>(MI.getOperand(i: 5).getImm());
337
338 // .loop:
339 // lr.w destreg, (alignedaddr)
340 // binop scratch, destreg, incr
341 // xor scratch, destreg, scratch
342 // and scratch, scratch, masktargetdata
343 // xor scratch, destreg, scratch
344 // sc.w scratch, scratch, (alignedaddr)
345 // bnez scratch, loop
346 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg)
347 .addReg(RegNo: AddrReg);
348 switch (BinOp) {
349 default:
350 llvm_unreachable("Unexpected AtomicRMW BinOp");
351 case AtomicRMWInst::Xchg:
352 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: ScratchReg)
353 .addReg(RegNo: IncrReg)
354 .addImm(Val: 0);
355 break;
356 case AtomicRMWInst::Add:
357 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADD), DestReg: ScratchReg)
358 .addReg(RegNo: DestReg)
359 .addReg(RegNo: IncrReg);
360 break;
361 case AtomicRMWInst::Sub:
362 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: ScratchReg)
363 .addReg(RegNo: DestReg)
364 .addReg(RegNo: IncrReg);
365 break;
366 case AtomicRMWInst::Nand:
367 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
368 .addReg(RegNo: DestReg)
369 .addReg(RegNo: IncrReg);
370 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg)
371 .addReg(RegNo: ScratchReg)
372 .addImm(Val: -1);
373 break;
374 }
375
376 insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg,
377 ScratchReg);
378
379 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: ScratchReg)
380 .addReg(RegNo: AddrReg)
381 .addReg(RegNo: ScratchReg);
382 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
383 .addReg(RegNo: ScratchReg)
384 .addReg(RegNo: RISCV::X0)
385 .addMBB(MBB: LoopMBB);
386}
387
388bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
389 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
390 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
391 MachineBasicBlock::iterator &NextMBBI) {
392 MachineInstr &MI = *MBBI;
393 DebugLoc DL = MI.getDebugLoc();
394
395 MachineFunction *MF = MBB.getParent();
396 auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
397 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
398
399 // Insert new MBBs.
400 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB);
401 MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB);
402
403 // Set up successors and transfer remaining instructions to DoneMBB.
404 LoopMBB->addSuccessor(Succ: LoopMBB);
405 LoopMBB->addSuccessor(Succ: DoneMBB);
406 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
407 DoneMBB->transferSuccessors(FromMBB: &MBB);
408 MBB.addSuccessor(Succ: LoopMBB);
409
410 if (!IsMasked)
411 doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width,
412 STI);
413 else
414 doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp,
415 Width, STI);
416
417 NextMBBI = MBB.end();
418 MI.eraseFromParent();
419
420 LivePhysRegs LiveRegs;
421 computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB);
422 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
423
424 return true;
425}
426
427static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
428 MachineBasicBlock *MBB, Register ValReg,
429 Register ShamtReg) {
430 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLL), DestReg: ValReg)
431 .addReg(RegNo: ValReg)
432 .addReg(RegNo: ShamtReg);
433 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRA), DestReg: ValReg)
434 .addReg(RegNo: ValReg)
435 .addReg(RegNo: ShamtReg);
436}
437
438bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
439 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
440 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
441 MachineBasicBlock::iterator &NextMBBI) {
442 assert(IsMasked == true &&
443 "Should only need to expand masked atomic max/min");
444 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
445
446 MachineInstr &MI = *MBBI;
447 DebugLoc DL = MI.getDebugLoc();
448 MachineFunction *MF = MBB.getParent();
449 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
450 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
451 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
452 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
453
454 // Insert new MBBs.
455 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
456 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB);
457 MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB);
458 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB);
459
460 // Set up successors and transfer remaining instructions to DoneMBB.
461 LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB);
462 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
463 LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB);
464 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
465 LoopTailMBB->addSuccessor(Succ: DoneMBB);
466 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
467 DoneMBB->transferSuccessors(FromMBB: &MBB);
468 MBB.addSuccessor(Succ: LoopHeadMBB);
469
470 Register DestReg = MI.getOperand(i: 0).getReg();
471 Register Scratch1Reg = MI.getOperand(i: 1).getReg();
472 Register Scratch2Reg = MI.getOperand(i: 2).getReg();
473 Register AddrReg = MI.getOperand(i: 3).getReg();
474 Register IncrReg = MI.getOperand(i: 4).getReg();
475 Register MaskReg = MI.getOperand(i: 5).getReg();
476 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
477 AtomicOrdering Ordering =
478 static_cast<AtomicOrdering>(MI.getOperand(i: IsSigned ? 7 : 6).getImm());
479
480 //
481 // .loophead:
482 // lr.w destreg, (alignedaddr)
483 // and scratch2, destreg, mask
484 // mv scratch1, destreg
485 // [sext scratch2 if signed min/max]
486 // ifnochangeneeded scratch2, incr, .looptail
487 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg)
488 .addReg(RegNo: AddrReg);
489 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: Scratch2Reg)
490 .addReg(RegNo: DestReg)
491 .addReg(RegNo: MaskReg);
492 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: Scratch1Reg)
493 .addReg(RegNo: DestReg)
494 .addImm(Val: 0);
495
496 switch (BinOp) {
497 default:
498 llvm_unreachable("Unexpected AtomicRMW BinOp");
499 case AtomicRMWInst::Max: {
500 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg());
501 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
502 .addReg(RegNo: Scratch2Reg)
503 .addReg(RegNo: IncrReg)
504 .addMBB(MBB: LoopTailMBB);
505 break;
506 }
507 case AtomicRMWInst::Min: {
508 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg());
509 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
510 .addReg(RegNo: IncrReg)
511 .addReg(RegNo: Scratch2Reg)
512 .addMBB(MBB: LoopTailMBB);
513 break;
514 }
515 case AtomicRMWInst::UMax:
516 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU))
517 .addReg(RegNo: Scratch2Reg)
518 .addReg(RegNo: IncrReg)
519 .addMBB(MBB: LoopTailMBB);
520 break;
521 case AtomicRMWInst::UMin:
522 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU))
523 .addReg(RegNo: IncrReg)
524 .addReg(RegNo: Scratch2Reg)
525 .addMBB(MBB: LoopTailMBB);
526 break;
527 }
528
529 // .loopifbody:
530 // xor scratch1, destreg, incr
531 // and scratch1, scratch1, mask
532 // xor scratch1, destreg, scratch1
533 insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: Scratch1Reg, OldValReg: DestReg, NewValReg: IncrReg,
534 MaskReg, ScratchReg: Scratch1Reg);
535
536 // .looptail:
537 // sc.w scratch1, scratch1, (addr)
538 // bnez scratch1, loop
539 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: Scratch1Reg)
540 .addReg(RegNo: AddrReg)
541 .addReg(RegNo: Scratch1Reg);
542 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
543 .addReg(RegNo: Scratch1Reg)
544 .addReg(RegNo: RISCV::X0)
545 .addMBB(MBB: LoopHeadMBB);
546
547 NextMBBI = MBB.end();
548 MI.eraseFromParent();
549
550 LivePhysRegs LiveRegs;
551 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
552 computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB);
553 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
554 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
555
556 return true;
557}
558
559// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
560// operation, it can be folded into the cmpxchg expansion by
561// modifying the branch within 'LoopHead' (which performs the same
562// comparison). This is a valid transformation because after altering the
563// LoopHead's BNE destination, the BNE following the cmpxchg becomes
564// redundant and and be deleted. In the case of a masked cmpxchg, an
565// appropriate AND and BNE must be matched.
566//
567// On success, returns true and deletes the matching BNE or AND+BNE, sets the
568// LoopHeadBNETarget argument to the target that should be used within the
569// loop head, and removes that block as a successor to MBB.
570bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
571 MachineBasicBlock::iterator MBBI,
572 Register DestReg, Register CmpValReg,
573 Register MaskReg,
574 MachineBasicBlock *&LoopHeadBNETarget) {
575 SmallVector<MachineInstr *> ToErase;
576 auto E = MBB.end();
577 if (MBBI == E)
578 return false;
579 MBBI = skipDebugInstructionsForward(It: MBBI, End: E);
580
581 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
582 if (MaskReg.isValid()) {
583 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
584 return false;
585 Register ANDOp1 = MBBI->getOperand(i: 1).getReg();
586 Register ANDOp2 = MBBI->getOperand(i: 2).getReg();
587 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
588 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
589 return false;
590 // We now expect the BNE to use the result of the AND as an operand.
591 DestReg = MBBI->getOperand(i: 0).getReg();
592 ToErase.push_back(Elt: &*MBBI);
593 MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E);
594 }
595
596 // Match BNE DestReg, MaskReg.
597 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
598 return false;
599 Register BNEOp0 = MBBI->getOperand(i: 0).getReg();
600 Register BNEOp1 = MBBI->getOperand(i: 1).getReg();
601 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
602 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
603 return false;
604
605 // Make sure the branch is the only user of the AND.
606 if (MaskReg.isValid()) {
607 if (BNEOp0 == DestReg && !MBBI->getOperand(i: 0).isKill())
608 return false;
609 if (BNEOp1 == DestReg && !MBBI->getOperand(i: 1).isKill())
610 return false;
611 }
612
613 ToErase.push_back(Elt: &*MBBI);
614 LoopHeadBNETarget = MBBI->getOperand(i: 2).getMBB();
615 MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E);
616 if (MBBI != E)
617 return false;
618
619 MBB.removeSuccessor(Succ: LoopHeadBNETarget);
620 for (auto *MI : ToErase)
621 MI->eraseFromParent();
622 return true;
623}
624
625bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
626 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
627 int Width, MachineBasicBlock::iterator &NextMBBI) {
628 MachineInstr &MI = *MBBI;
629 DebugLoc DL = MI.getDebugLoc();
630 MachineFunction *MF = MBB.getParent();
631 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
632 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
633 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
634
635 Register DestReg = MI.getOperand(i: 0).getReg();
636 Register ScratchReg = MI.getOperand(i: 1).getReg();
637 Register AddrReg = MI.getOperand(i: 2).getReg();
638 Register CmpValReg = MI.getOperand(i: 3).getReg();
639 Register NewValReg = MI.getOperand(i: 4).getReg();
640 Register MaskReg = IsMasked ? MI.getOperand(i: 5).getReg() : Register();
641
642 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
643 tryToFoldBNEOnCmpXchgResult(MBB, MBBI: std::next(x: MBBI), DestReg, CmpValReg, MaskReg,
644 LoopHeadBNETarget);
645
646 // Insert new MBBs.
647 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
648 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB);
649 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB);
650
651 // Set up successors and transfer remaining instructions to DoneMBB.
652 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
653 LoopHeadMBB->addSuccessor(Succ: LoopHeadBNETarget);
654 LoopTailMBB->addSuccessor(Succ: DoneMBB);
655 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
656 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
657 DoneMBB->transferSuccessors(FromMBB: &MBB);
658 MBB.addSuccessor(Succ: LoopHeadMBB);
659
660 AtomicOrdering Ordering =
661 static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm());
662
663 if (!IsMasked) {
664 // .loophead:
665 // lr.[w|d] dest, (addr)
666 // bne dest, cmpval, done
667 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)),
668 DestReg)
669 .addReg(RegNo: AddrReg);
670 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
671 .addReg(RegNo: DestReg)
672 .addReg(RegNo: CmpValReg)
673 .addMBB(MBB: LoopHeadBNETarget);
674 // .looptail:
675 // sc.[w|d] scratch, newval, (addr)
676 // bnez scratch, loophead
677 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)),
678 DestReg: ScratchReg)
679 .addReg(RegNo: AddrReg)
680 .addReg(RegNo: NewValReg);
681 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
682 .addReg(RegNo: ScratchReg)
683 .addReg(RegNo: RISCV::X0)
684 .addMBB(MBB: LoopHeadMBB);
685 } else {
686 // .loophead:
687 // lr.w dest, (addr)
688 // and scratch, dest, mask
689 // bne scratch, cmpval, done
690 Register MaskReg = MI.getOperand(i: 5).getReg();
691 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)),
692 DestReg)
693 .addReg(RegNo: AddrReg);
694 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg)
695 .addReg(RegNo: DestReg)
696 .addReg(RegNo: MaskReg);
697 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
698 .addReg(RegNo: ScratchReg)
699 .addReg(RegNo: CmpValReg)
700 .addMBB(MBB: LoopHeadBNETarget);
701
702 // .looptail:
703 // xor scratch, dest, newval
704 // and scratch, scratch, mask
705 // xor scratch, dest, scratch
706 // sc.w scratch, scratch, (adrr)
707 // bnez scratch, loophead
708 insertMaskedMerge(TII, DL, MBB: LoopTailMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg,
709 MaskReg, ScratchReg);
710 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)),
711 DestReg: ScratchReg)
712 .addReg(RegNo: AddrReg)
713 .addReg(RegNo: ScratchReg);
714 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
715 .addReg(RegNo: ScratchReg)
716 .addReg(RegNo: RISCV::X0)
717 .addMBB(MBB: LoopHeadMBB);
718 }
719
720 NextMBBI = MBB.end();
721 MI.eraseFromParent();
722
723 LivePhysRegs LiveRegs;
724 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
725 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
726 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
727
728 return true;
729}
730
731} // end of anonymous namespace
732
733INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
734 RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
735
736namespace llvm {
737
738FunctionPass *createRISCVExpandAtomicPseudoPass() {
739 return new RISCVExpandAtomicPseudo();
740}
741
742} // end of namespace llvm
743