1//==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LL/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "LoongArch.h"
17#include "LoongArchInstrInfo.h"
18#include "LoongArchTargetMachine.h"
19
20#include "llvm/CodeGen/LivePhysRegs.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23
24using namespace llvm;
25
26#define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \
27 "LoongArch atomic pseudo instruction expansion pass"
28
29namespace {
30
31class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const LoongArchInstrInfo *TII;
34 static char ID;
35
36 LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {}
37
38 bool runOnMachineFunction(MachineFunction &MF) override;
39
40 StringRef getPassName() const override {
41 return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME;
42 }
43
44private:
45 bool expandMBB(MachineBasicBlock &MBB);
46 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
47 MachineBasicBlock::iterator &NextMBBI);
48 bool expandAtomicBinOp(MachineBasicBlock &MBB,
49 MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
50 bool IsMasked, int Width,
51 MachineBasicBlock::iterator &NextMBBI);
52 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
53 MachineBasicBlock::iterator MBBI,
54 AtomicRMWInst::BinOp, bool IsMasked, int Width,
55 MachineBasicBlock::iterator &NextMBBI);
56 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
57 MachineBasicBlock::iterator MBBI, bool IsMasked,
58 int Width, MachineBasicBlock::iterator &NextMBBI);
59 bool expandAtomicCmpXchg128(MachineBasicBlock &MBB,
60 MachineBasicBlock::iterator,
61 MachineBasicBlock::iterator &NextMBBI);
62};
63
64char LoongArchExpandAtomicPseudo::ID = 0;
65
66bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
67 TII =
68 static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
69 bool Modified = false;
70 for (auto &MBB : MF)
71 Modified |= expandMBB(MBB);
72 return Modified;
73}
74
75bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
76 bool Modified = false;
77
78 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
79 while (MBBI != E) {
80 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
81 Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI);
82 MBBI = NMBBI;
83 }
84
85 return Modified;
86}
87
88bool LoongArchExpandAtomicPseudo::expandMI(
89 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 MachineBasicBlock::iterator &NextMBBI) {
91 switch (MBBI->getOpcode()) {
92 case LoongArch::PseudoMaskedAtomicSwap32:
93 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32,
94 NextMBBI);
95 case LoongArch::PseudoAtomicSwap32:
96 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: false, Width: 32,
97 NextMBBI);
98 case LoongArch::PseudoMaskedAtomicLoadAdd32:
99 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI);
100 case LoongArch::PseudoMaskedAtomicLoadSub32:
101 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI);
102 case LoongArch::PseudoAtomicLoadNand32:
103 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32,
104 NextMBBI);
105 case LoongArch::PseudoAtomicLoadNand64:
106 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64,
107 NextMBBI);
108 case LoongArch::PseudoMaskedAtomicLoadNand32:
109 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32,
110 NextMBBI);
111 case LoongArch::PseudoAtomicLoadAdd32:
112 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: false, Width: 32,
113 NextMBBI);
114 case LoongArch::PseudoAtomicLoadSub32:
115 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: false, Width: 32,
116 NextMBBI);
117 case LoongArch::PseudoAtomicLoadAnd32:
118 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, IsMasked: false, Width: 32,
119 NextMBBI);
120 case LoongArch::PseudoAtomicLoadOr32:
121 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, IsMasked: false, Width: 32, NextMBBI);
122 case LoongArch::PseudoAtomicLoadXor32:
123 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, IsMasked: false, Width: 32,
124 NextMBBI);
125 case LoongArch::PseudoAtomicLoadUMax32:
126 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: false, Width: 32,
127 NextMBBI);
128 case LoongArch::PseudoAtomicLoadUMin32:
129 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: false, Width: 32,
130 NextMBBI);
131 case LoongArch::PseudoAtomicLoadMax32:
132 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: false, Width: 32,
133 NextMBBI);
134 case LoongArch::PseudoAtomicLoadMin32:
135 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: false, Width: 32,
136 NextMBBI);
137 case LoongArch::PseudoMaskedAtomicLoadUMax32:
138 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32,
139 NextMBBI);
140 case LoongArch::PseudoMaskedAtomicLoadUMin32:
141 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32,
142 NextMBBI);
143 case LoongArch::PseudoCmpXchg32:
144 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI);
145 case LoongArch::PseudoCmpXchg64:
146 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI);
147 case LoongArch::PseudoCmpXchg128:
148 case LoongArch::PseudoCmpXchg128Acquire:
149 return expandAtomicCmpXchg128(MBB, MBBI, NextMBBI);
150 case LoongArch::PseudoMaskedCmpXchg32:
151 return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI);
152 case LoongArch::PseudoMaskedAtomicLoadMax32:
153 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32,
154 NextMBBI);
155 case LoongArch::PseudoMaskedAtomicLoadMin32:
156 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32,
157 NextMBBI);
158 }
159 return false;
160}
161
162static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
163 MachineInstr &MI, DebugLoc DL,
164 MachineBasicBlock *ThisMBB,
165 MachineBasicBlock *LoopMBB,
166 MachineBasicBlock *DoneMBB,
167 AtomicRMWInst::BinOp BinOp, int Width) {
168 Register DestReg = MI.getOperand(i: 0).getReg();
169 Register ScratchReg = MI.getOperand(i: 1).getReg();
170 Register AddrReg = MI.getOperand(i: 2).getReg();
171 Register IncrReg = MI.getOperand(i: 3).getReg();
172
173 // .loop:
174 // ll.[w|d] dest, (addr)
175 // binop scratch, dest, val
176 // sc.[w|d] scratch, scratch, (addr)
177 // beqz scratch, loop
178 BuildMI(BB: LoopMBB, MIMD: DL,
179 MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
180 .addReg(RegNo: AddrReg)
181 .addImm(Val: 0);
182 switch (BinOp) {
183 default:
184 llvm_unreachable("Unexpected AtomicRMW BinOp");
185 case AtomicRMWInst::Xchg:
186 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
187 .addReg(RegNo: IncrReg)
188 .addReg(RegNo: LoongArch::R0);
189 break;
190 case AtomicRMWInst::Nand:
191 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg)
192 .addReg(RegNo: DestReg)
193 .addReg(RegNo: IncrReg);
194 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg)
195 .addReg(RegNo: ScratchReg)
196 .addReg(RegNo: LoongArch::R0);
197 break;
198 case AtomicRMWInst::Add:
199 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg)
200 .addReg(RegNo: DestReg)
201 .addReg(RegNo: IncrReg);
202 break;
203 case AtomicRMWInst::Sub:
204 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg)
205 .addReg(RegNo: DestReg)
206 .addReg(RegNo: IncrReg);
207 break;
208 case AtomicRMWInst::And:
209 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg)
210 .addReg(RegNo: DestReg)
211 .addReg(RegNo: IncrReg);
212 break;
213 case AtomicRMWInst::Or:
214 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
215 .addReg(RegNo: DestReg)
216 .addReg(RegNo: IncrReg);
217 break;
218 case AtomicRMWInst::Xor:
219 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg)
220 .addReg(RegNo: DestReg)
221 .addReg(RegNo: IncrReg);
222 break;
223 }
224 BuildMI(BB: LoopMBB, MIMD: DL,
225 MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), DestReg: ScratchReg)
226 .addReg(RegNo: ScratchReg)
227 .addReg(RegNo: AddrReg)
228 .addImm(Val: 0);
229 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
230 .addReg(RegNo: ScratchReg)
231 .addReg(RegNo: LoongArch::R0)
232 .addMBB(MBB: LoopMBB);
233}
234
235static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL,
236 MachineBasicBlock *MBB, Register DestReg,
237 Register OldValReg, Register NewValReg,
238 Register MaskReg, Register ScratchReg) {
239 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
240 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
241 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
242
243 // res = oldval ^ ((oldval ^ newval) & masktargetdata);
244 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg)
245 .addReg(RegNo: OldValReg)
246 .addReg(RegNo: NewValReg);
247 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg)
248 .addReg(RegNo: ScratchReg)
249 .addReg(RegNo: MaskReg);
250 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg)
251 .addReg(RegNo: OldValReg)
252 .addReg(RegNo: ScratchReg);
253}
254
255static void doMaskedAtomicBinOpExpansion(
256 const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
257 MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
258 MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
259 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
260 Register DestReg = MI.getOperand(i: 0).getReg();
261 Register ScratchReg = MI.getOperand(i: 1).getReg();
262 Register AddrReg = MI.getOperand(i: 2).getReg();
263 Register IncrReg = MI.getOperand(i: 3).getReg();
264 Register MaskReg = MI.getOperand(i: 4).getReg();
265
266 // .loop:
267 // ll.w destreg, (alignedaddr)
268 // binop scratch, destreg, incr
269 // xor scratch, destreg, scratch
270 // and scratch, scratch, masktargetdata
271 // xor scratch, destreg, scratch
272 // sc.w scratch, scratch, (alignedaddr)
273 // beqz scratch, loop
274 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg)
275 .addReg(RegNo: AddrReg)
276 .addImm(Val: 0);
277 switch (BinOp) {
278 default:
279 llvm_unreachable("Unexpected AtomicRMW BinOp");
280 case AtomicRMWInst::Xchg:
281 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: ScratchReg)
282 .addReg(RegNo: IncrReg)
283 .addImm(Val: 0);
284 break;
285 case AtomicRMWInst::Add:
286 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg)
287 .addReg(RegNo: DestReg)
288 .addReg(RegNo: IncrReg);
289 break;
290 case AtomicRMWInst::Sub:
291 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg)
292 .addReg(RegNo: DestReg)
293 .addReg(RegNo: IncrReg);
294 break;
295 case AtomicRMWInst::Nand:
296 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg)
297 .addReg(RegNo: DestReg)
298 .addReg(RegNo: IncrReg);
299 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg)
300 .addReg(RegNo: ScratchReg)
301 .addReg(RegNo: LoongArch::R0);
302 // TODO: support other AtomicRMWInst.
303 }
304
305 insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg,
306 ScratchReg);
307
308 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: ScratchReg)
309 .addReg(RegNo: ScratchReg)
310 .addReg(RegNo: AddrReg)
311 .addImm(Val: 0);
312 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
313 .addReg(RegNo: ScratchReg)
314 .addReg(RegNo: LoongArch::R0)
315 .addMBB(MBB: LoopMBB);
316}
317
318bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
319 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
320 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
321 MachineBasicBlock::iterator &NextMBBI) {
322 MachineInstr &MI = *MBBI;
323 DebugLoc DL = MI.getDebugLoc();
324
325 MachineFunction *MF = MBB.getParent();
326 auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
327 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
328
329 // Insert new MBBs.
330 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB);
331 MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB);
332
333 // Set up successors and transfer remaining instructions to DoneMBB.
334 LoopMBB->addSuccessor(Succ: LoopMBB);
335 LoopMBB->addSuccessor(Succ: DoneMBB);
336 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
337 DoneMBB->transferSuccessors(FromMBB: &MBB);
338 MBB.addSuccessor(Succ: LoopMBB);
339
340 if (IsMasked)
341 doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp,
342 Width);
343 else
344 doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width);
345
346 NextMBBI = MBB.end();
347 MI.eraseFromParent();
348
349 LivePhysRegs LiveRegs;
350 computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB);
351 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
352
353 return true;
354}
355
356static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
357 MachineBasicBlock *MBB, Register ValReg,
358 Register ShamtReg) {
359 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SLL_W), DestReg: ValReg)
360 .addReg(RegNo: ValReg)
361 .addReg(RegNo: ShamtReg);
362 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SRA_W), DestReg: ValReg)
363 .addReg(RegNo: ValReg)
364 .addReg(RegNo: ShamtReg);
365}
366
367bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
368 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
369 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
370 MachineBasicBlock::iterator &NextMBBI) {
371 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
372
373 MachineInstr &MI = *MBBI;
374 DebugLoc DL = MI.getDebugLoc();
375 MachineFunction *MF = MBB.getParent();
376 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
377 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
378 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
379 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
380
381 // Insert new MBBs.
382 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
383 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB);
384 MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB);
385 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB);
386
387 // Set up successors and transfer remaining instructions to DoneMBB.
388 LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB);
389 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
390 LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB);
391 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
392 LoopTailMBB->addSuccessor(Succ: DoneMBB);
393 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
394 DoneMBB->transferSuccessors(FromMBB: &MBB);
395 MBB.addSuccessor(Succ: LoopHeadMBB);
396
397 Register DestReg = MI.getOperand(i: 0).getReg();
398 Register ScratchReg = MI.getOperand(i: 1).getReg();
399 Register AddrReg = MI.getOperand(i: IsMasked ? 3 : 2).getReg();
400 Register IncrReg = MI.getOperand(i: IsMasked ? 4 : 3).getReg();
401 Register CmprReg = DestReg;
402
403 //
404 // .loophead:
405 // ll.w destreg, (alignedaddr)
406 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg)
407 .addReg(RegNo: AddrReg)
408 .addImm(Val: 0);
409 // and cmpr, destreg, mask
410 if (IsMasked) {
411 Register MaskReg = MI.getOperand(i: 5).getReg();
412 CmprReg = MI.getOperand(i: 2).getReg();
413 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: CmprReg)
414 .addReg(RegNo: DestReg)
415 .addReg(RegNo: MaskReg);
416 }
417 // move scratch, destreg
418 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
419 .addReg(RegNo: DestReg)
420 .addReg(RegNo: LoongArch::R0);
421
422 switch (BinOp) {
423 default:
424 llvm_unreachable("Unexpected AtomicRMW BinOp");
425 // bgeu cmpr, incr, .looptail
426 case AtomicRMWInst::UMax:
427 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU))
428 .addReg(RegNo: CmprReg)
429 .addReg(RegNo: IncrReg)
430 .addMBB(MBB: LoopTailMBB);
431 break;
432 // bgeu incr, cmpr, .looptail
433 case AtomicRMWInst::UMin:
434 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU))
435 .addReg(RegNo: IncrReg)
436 .addReg(RegNo: CmprReg)
437 .addMBB(MBB: LoopTailMBB);
438 break;
439 case AtomicRMWInst::Max:
440 if (IsMasked)
441 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: CmprReg, ShamtReg: MI.getOperand(i: 6).getReg());
442 // bge cmpr, incr, .looptail
443 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE))
444 .addReg(RegNo: CmprReg)
445 .addReg(RegNo: IncrReg)
446 .addMBB(MBB: LoopTailMBB);
447 break;
448 case AtomicRMWInst::Min:
449 if (IsMasked)
450 insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: CmprReg, ShamtReg: MI.getOperand(i: 6).getReg());
451 // bge incr, cmpr, .looptail
452 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE))
453 .addReg(RegNo: IncrReg)
454 .addReg(RegNo: CmprReg)
455 .addMBB(MBB: LoopTailMBB);
456 break;
457 // TODO: support other AtomicRMWInst.
458 }
459
460 // .loopifbody:
461 if (IsMasked) {
462 Register MaskReg = MI.getOperand(i: 5).getReg();
463 // xor scratch, destreg, incr
464 // and scratch, scratch, mask
465 // xor scratch, destreg, scratch
466 insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: IncrReg,
467 MaskReg, ScratchReg);
468 } else {
469 // move scratch, incr
470 BuildMI(BB: LoopIfBodyMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
471 .addReg(RegNo: IncrReg)
472 .addReg(RegNo: LoongArch::R0);
473 }
474
475 // .looptail:
476 // sc.w scratch, scratch, (addr)
477 // beqz scratch, loop
478 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: ScratchReg)
479 .addReg(RegNo: ScratchReg)
480 .addReg(RegNo: AddrReg)
481 .addImm(Val: 0);
482 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
483 .addReg(RegNo: ScratchReg)
484 .addReg(RegNo: LoongArch::R0)
485 .addMBB(MBB: LoopHeadMBB);
486
487 NextMBBI = MBB.end();
488 MI.eraseFromParent();
489
490 LivePhysRegs LiveRegs;
491 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
492 computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB);
493 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
494 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
495
496 return true;
497}
498
499bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
500 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
501 int Width, MachineBasicBlock::iterator &NextMBBI) {
502 MachineInstr &MI = *MBBI;
503 DebugLoc DL = MI.getDebugLoc();
504 MachineFunction *MF = MBB.getParent();
505 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
506 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
507 auto TailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
508 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
509
510 // Insert new MBBs.
511 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
512 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB);
513 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: TailMBB);
514 MF->insert(MBBI: ++TailMBB->getIterator(), MBB: DoneMBB);
515
516 // Set up successors and transfer remaining instructions to DoneMBB.
517 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
518 LoopHeadMBB->addSuccessor(Succ: TailMBB);
519 LoopTailMBB->addSuccessor(Succ: DoneMBB);
520 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
521 TailMBB->addSuccessor(Succ: DoneMBB);
522 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
523 DoneMBB->transferSuccessors(FromMBB: &MBB);
524 MBB.addSuccessor(Succ: LoopHeadMBB);
525
526 Register DestReg = MI.getOperand(i: 0).getReg();
527 Register ScratchReg = MI.getOperand(i: 1).getReg();
528 Register AddrReg = MI.getOperand(i: 2).getReg();
529 Register CmpValReg = MI.getOperand(i: 3).getReg();
530 Register NewValReg = MI.getOperand(i: 4).getReg();
531
532 if (!IsMasked) {
533 // .loophead:
534 // ll.[w|d] dest, (addr)
535 // bne dest, cmpval, tail
536 BuildMI(BB: LoopHeadMBB, MIMD: DL,
537 MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
538 .addReg(RegNo: AddrReg)
539 .addImm(Val: 0);
540 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE))
541 .addReg(RegNo: DestReg)
542 .addReg(RegNo: CmpValReg)
543 .addMBB(MBB: TailMBB);
544 // .looptail:
545 // move scratch, newval
546 // sc.[w|d] scratch, scratch, (addr)
547 // beqz scratch, loophead
548 // b done
549 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
550 .addReg(RegNo: NewValReg)
551 .addReg(RegNo: LoongArch::R0);
552 BuildMI(BB: LoopTailMBB, MIMD: DL,
553 MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
554 DestReg: ScratchReg)
555 .addReg(RegNo: ScratchReg)
556 .addReg(RegNo: AddrReg)
557 .addImm(Val: 0);
558 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
559 .addReg(RegNo: ScratchReg)
560 .addReg(RegNo: LoongArch::R0)
561 .addMBB(MBB: LoopHeadMBB);
562 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB);
563 } else {
564 // .loophead:
565 // ll.[w|d] dest, (addr)
566 // and scratch, dest, mask
567 // bne scratch, cmpval, tail
568 Register MaskReg = MI.getOperand(i: 5).getReg();
569 BuildMI(BB: LoopHeadMBB, MIMD: DL,
570 MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
571 .addReg(RegNo: AddrReg)
572 .addImm(Val: 0);
573 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg)
574 .addReg(RegNo: DestReg)
575 .addReg(RegNo: MaskReg);
576 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE))
577 .addReg(RegNo: ScratchReg)
578 .addReg(RegNo: CmpValReg)
579 .addMBB(MBB: TailMBB);
580
581 // .looptail:
582 // andn scratch, dest, mask
583 // or scratch, scratch, newval
584 // sc.[w|d] scratch, scratch, (addr)
585 // beqz scratch, loophead
586 // b done
587 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ANDN), DestReg: ScratchReg)
588 .addReg(RegNo: DestReg)
589 .addReg(RegNo: MaskReg);
590 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
591 .addReg(RegNo: ScratchReg)
592 .addReg(RegNo: NewValReg);
593 BuildMI(BB: LoopTailMBB, MIMD: DL,
594 MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
595 DestReg: ScratchReg)
596 .addReg(RegNo: ScratchReg)
597 .addReg(RegNo: AddrReg)
598 .addImm(Val: 0);
599 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
600 .addReg(RegNo: ScratchReg)
601 .addReg(RegNo: LoongArch::R0)
602 .addMBB(MBB: LoopHeadMBB);
603 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB);
604 }
605
606 AtomicOrdering FailureOrdering =
607 static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm());
608 int hint;
609
610 switch (FailureOrdering) {
611 case AtomicOrdering::Acquire:
612 case AtomicOrdering::AcquireRelease:
613 case AtomicOrdering::SequentiallyConsistent:
614 // acquire
615 hint = 0b10100;
616 break;
617 default:
618 hint = 0x700;
619 }
620
621 // .tail:
622 // dbar 0x700 | acquire
623
624 if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA()))
625 BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: hint);
626
627 NextMBBI = MBB.end();
628 MI.eraseFromParent();
629
630 LivePhysRegs LiveRegs;
631 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
632 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
633 computeAndAddLiveIns(LiveRegs, MBB&: *TailMBB);
634 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
635
636 return true;
637}
638
639bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128(
640 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
641 MachineBasicBlock::iterator &NextMBBI) {
642 MachineInstr &MI = *MBBI;
643 DebugLoc DL = MI.getDebugLoc();
644 MachineFunction *MF = MBB.getParent();
645 auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
646 auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
647 auto TailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
648 auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock());
649
650 // Insert new MBBs
651 MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB);
652 MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB);
653 MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: TailMBB);
654 MF->insert(MBBI: ++TailMBB->getIterator(), MBB: DoneMBB);
655
656 // Set up successors and transfer remaining instructions to DoneMBB.
657 LoopHeadMBB->addSuccessor(Succ: LoopTailMBB);
658 LoopHeadMBB->addSuccessor(Succ: TailMBB);
659 LoopTailMBB->addSuccessor(Succ: DoneMBB);
660 LoopTailMBB->addSuccessor(Succ: LoopHeadMBB);
661 TailMBB->addSuccessor(Succ: DoneMBB);
662 DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end());
663 DoneMBB->transferSuccessors(FromMBB: &MBB);
664 MBB.addSuccessor(Succ: LoopHeadMBB);
665
666 Register DestLoReg = MI.getOperand(i: 0).getReg();
667 Register DestHiReg = MI.getOperand(i: 1).getReg();
668 Register ScratchReg = MI.getOperand(i: 2).getReg();
669 Register AddrReg = MI.getOperand(i: 3).getReg();
670 Register CmpValLoReg = MI.getOperand(i: 4).getReg();
671 Register CmpValHiReg = MI.getOperand(i: 5).getReg();
672 Register NewValLoReg = MI.getOperand(i: 6).getReg();
673 Register NewValHiReg = MI.getOperand(i: 7).getReg();
674
675 // .loophead:
676 // ll.d res_lo, (addr)
677 // dbar acquire
678 // ld.d res_hi, (addr), 8
679 // bne dest_lo, cmpval_lo, tail
680 // bne dest_hi, cmpval_hi, tail
681 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_D), DestReg: DestLoReg)
682 .addReg(RegNo: AddrReg)
683 .addImm(Val: 0);
684 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: 0b10100);
685 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LD_D), DestReg: DestHiReg)
686 .addReg(RegNo: AddrReg)
687 .addImm(Val: 8);
688 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE))
689 .addReg(RegNo: DestLoReg)
690 .addReg(RegNo: CmpValLoReg)
691 .addMBB(MBB: TailMBB);
692 BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE))
693 .addReg(RegNo: DestHiReg)
694 .addReg(RegNo: CmpValHiReg)
695 .addMBB(MBB: TailMBB);
696 // .looptail:
697 // move scratch, newval_lo
698 // sc.q scratch, newval_hi, (addr)
699 // beqz scratch, loophead
700 // b done
701 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg)
702 .addReg(RegNo: NewValLoReg)
703 .addReg(RegNo: LoongArch::R0);
704 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_Q), DestReg: ScratchReg)
705 .addReg(RegNo: ScratchReg)
706 .addReg(RegNo: NewValHiReg)
707 .addReg(RegNo: AddrReg);
708 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ))
709 .addReg(RegNo: ScratchReg)
710 .addReg(RegNo: LoongArch::R0)
711 .addMBB(MBB: LoopHeadMBB);
712 BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB);
713 int hint;
714
715 switch (MI.getOpcode()) {
716 case LoongArch::PseudoCmpXchg128Acquire:
717 // acquire acqrel seqcst
718 hint = 0b10100;
719 break;
720 case LoongArch::PseudoCmpXchg128:
721 hint = 0x700;
722 break;
723 default:
724 llvm_unreachable("Unexpected opcode");
725 }
726
727 // .tail:
728 // dbar 0x700 | acquire
729 if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA()))
730 BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: hint);
731
732 NextMBBI = MBB.end();
733 MI.eraseFromParent();
734
735 LivePhysRegs LiveRegs;
736 computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB);
737 computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB);
738 computeAndAddLiveIns(LiveRegs, MBB&: *TailMBB);
739 computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB);
740
741 return true;
742}
743
744} // end namespace
745
746INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
747 LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
748
749namespace llvm {
750
751FunctionPass *createLoongArchExpandAtomicPseudoPass() {
752 return new LoongArchExpandAtomicPseudo();
753}
754
755} // end namespace llvm
756