1 | //==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions. This pass should be run at the last possible moment, |
11 | // avoiding the possibility for other passes to break the requirements for |
12 | // forward progress in the LL/SC block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "LoongArch.h" |
17 | #include "LoongArchInstrInfo.h" |
18 | #include "LoongArchTargetMachine.h" |
19 | |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \ |
27 | "LoongArch atomic pseudo instruction expansion pass" |
28 | |
29 | namespace { |
30 | |
31 | class LoongArchExpandAtomicPseudo : public MachineFunctionPass { |
32 | public: |
33 | const LoongArchInstrInfo *TII; |
34 | static char ID; |
35 | |
36 | LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {} |
37 | |
38 | bool runOnMachineFunction(MachineFunction &MF) override; |
39 | |
40 | StringRef getPassName() const override { |
41 | return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME; |
42 | } |
43 | |
44 | private: |
45 | bool expandMBB(MachineBasicBlock &MBB); |
46 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
47 | MachineBasicBlock::iterator &NextMBBI); |
48 | bool expandAtomicBinOp(MachineBasicBlock &MBB, |
49 | MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, |
50 | bool IsMasked, int Width, |
51 | MachineBasicBlock::iterator &NextMBBI); |
52 | bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, |
53 | MachineBasicBlock::iterator MBBI, |
54 | AtomicRMWInst::BinOp, bool IsMasked, int Width, |
55 | MachineBasicBlock::iterator &NextMBBI); |
56 | bool expandAtomicCmpXchg(MachineBasicBlock &MBB, |
57 | MachineBasicBlock::iterator MBBI, bool IsMasked, |
58 | int Width, MachineBasicBlock::iterator &NextMBBI); |
59 | bool expandAtomicCmpXchg128(MachineBasicBlock &MBB, |
60 | MachineBasicBlock::iterator, |
61 | MachineBasicBlock::iterator &NextMBBI); |
62 | }; |
63 | |
64 | char LoongArchExpandAtomicPseudo::ID = 0; |
65 | |
66 | bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
67 | TII = |
68 | static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
69 | bool Modified = false; |
70 | for (auto &MBB : MF) |
71 | Modified |= expandMBB(MBB); |
72 | return Modified; |
73 | } |
74 | |
75 | bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { |
76 | bool Modified = false; |
77 | |
78 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
79 | while (MBBI != E) { |
80 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
81 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
82 | MBBI = NMBBI; |
83 | } |
84 | |
85 | return Modified; |
86 | } |
87 | |
88 | bool LoongArchExpandAtomicPseudo::expandMI( |
89 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
90 | MachineBasicBlock::iterator &NextMBBI) { |
91 | switch (MBBI->getOpcode()) { |
92 | case LoongArch::PseudoMaskedAtomicSwap32: |
93 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32, |
94 | NextMBBI); |
95 | case LoongArch::PseudoAtomicSwap32: |
96 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: false, Width: 32, |
97 | NextMBBI); |
98 | case LoongArch::PseudoMaskedAtomicLoadAdd32: |
99 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI); |
100 | case LoongArch::PseudoMaskedAtomicLoadSub32: |
101 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI); |
102 | case LoongArch::PseudoAtomicLoadNand32: |
103 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32, |
104 | NextMBBI); |
105 | case LoongArch::PseudoAtomicLoadNand64: |
106 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64, |
107 | NextMBBI); |
108 | case LoongArch::PseudoMaskedAtomicLoadNand32: |
109 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32, |
110 | NextMBBI); |
111 | case LoongArch::PseudoAtomicLoadAdd32: |
112 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: false, Width: 32, |
113 | NextMBBI); |
114 | case LoongArch::PseudoAtomicLoadSub32: |
115 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: false, Width: 32, |
116 | NextMBBI); |
117 | case LoongArch::PseudoAtomicLoadAnd32: |
118 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, IsMasked: false, Width: 32, |
119 | NextMBBI); |
120 | case LoongArch::PseudoAtomicLoadOr32: |
121 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, IsMasked: false, Width: 32, NextMBBI); |
122 | case LoongArch::PseudoAtomicLoadXor32: |
123 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, IsMasked: false, Width: 32, |
124 | NextMBBI); |
125 | case LoongArch::PseudoAtomicLoadUMax32: |
126 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: false, Width: 32, |
127 | NextMBBI); |
128 | case LoongArch::PseudoAtomicLoadUMin32: |
129 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: false, Width: 32, |
130 | NextMBBI); |
131 | case LoongArch::PseudoAtomicLoadMax32: |
132 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: false, Width: 32, |
133 | NextMBBI); |
134 | case LoongArch::PseudoAtomicLoadMin32: |
135 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: false, Width: 32, |
136 | NextMBBI); |
137 | case LoongArch::PseudoMaskedAtomicLoadUMax32: |
138 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32, |
139 | NextMBBI); |
140 | case LoongArch::PseudoMaskedAtomicLoadUMin32: |
141 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32, |
142 | NextMBBI); |
143 | case LoongArch::PseudoCmpXchg32: |
144 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI); |
145 | case LoongArch::PseudoCmpXchg64: |
146 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI); |
147 | case LoongArch::PseudoCmpXchg128: |
148 | case LoongArch::PseudoCmpXchg128Acquire: |
149 | return expandAtomicCmpXchg128(MBB, MBBI, NextMBBI); |
150 | case LoongArch::PseudoMaskedCmpXchg32: |
151 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI); |
152 | case LoongArch::PseudoMaskedAtomicLoadMax32: |
153 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32, |
154 | NextMBBI); |
155 | case LoongArch::PseudoMaskedAtomicLoadMin32: |
156 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32, |
157 | NextMBBI); |
158 | } |
159 | return false; |
160 | } |
161 | |
162 | static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, |
163 | MachineInstr &MI, DebugLoc DL, |
164 | MachineBasicBlock *ThisMBB, |
165 | MachineBasicBlock *LoopMBB, |
166 | MachineBasicBlock *DoneMBB, |
167 | AtomicRMWInst::BinOp BinOp, int Width) { |
168 | Register DestReg = MI.getOperand(i: 0).getReg(); |
169 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
170 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
171 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
172 | |
173 | // .loop: |
174 | // ll.[w|d] dest, (addr) |
175 | // binop scratch, dest, val |
176 | // sc.[w|d] scratch, scratch, (addr) |
177 | // beqz scratch, loop |
178 | BuildMI(BB: LoopMBB, MIMD: DL, |
179 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
180 | .addReg(RegNo: AddrReg) |
181 | .addImm(Val: 0); |
182 | switch (BinOp) { |
183 | default: |
184 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
185 | case AtomicRMWInst::Xchg: |
186 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
187 | .addReg(RegNo: IncrReg) |
188 | .addReg(RegNo: LoongArch::R0); |
189 | break; |
190 | case AtomicRMWInst::Nand: |
191 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
192 | .addReg(RegNo: DestReg) |
193 | .addReg(RegNo: IncrReg); |
194 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg) |
195 | .addReg(RegNo: ScratchReg) |
196 | .addReg(RegNo: LoongArch::R0); |
197 | break; |
198 | case AtomicRMWInst::Add: |
199 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg) |
200 | .addReg(RegNo: DestReg) |
201 | .addReg(RegNo: IncrReg); |
202 | break; |
203 | case AtomicRMWInst::Sub: |
204 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg) |
205 | .addReg(RegNo: DestReg) |
206 | .addReg(RegNo: IncrReg); |
207 | break; |
208 | case AtomicRMWInst::And: |
209 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
210 | .addReg(RegNo: DestReg) |
211 | .addReg(RegNo: IncrReg); |
212 | break; |
213 | case AtomicRMWInst::Or: |
214 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
215 | .addReg(RegNo: DestReg) |
216 | .addReg(RegNo: IncrReg); |
217 | break; |
218 | case AtomicRMWInst::Xor: |
219 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg) |
220 | .addReg(RegNo: DestReg) |
221 | .addReg(RegNo: IncrReg); |
222 | break; |
223 | } |
224 | BuildMI(BB: LoopMBB, MIMD: DL, |
225 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), DestReg: ScratchReg) |
226 | .addReg(RegNo: ScratchReg) |
227 | .addReg(RegNo: AddrReg) |
228 | .addImm(Val: 0); |
229 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
230 | .addReg(RegNo: ScratchReg) |
231 | .addReg(RegNo: LoongArch::R0) |
232 | .addMBB(MBB: LoopMBB); |
233 | } |
234 | |
235 | static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL, |
236 | MachineBasicBlock *MBB, Register DestReg, |
237 | Register OldValReg, Register NewValReg, |
238 | Register MaskReg, Register ScratchReg) { |
239 | assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique" ); |
240 | assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique" ); |
241 | assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique" ); |
242 | |
243 | // res = oldval ^ ((oldval ^ newval) & masktargetdata); |
244 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg) |
245 | .addReg(RegNo: OldValReg) |
246 | .addReg(RegNo: NewValReg); |
247 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
248 | .addReg(RegNo: ScratchReg) |
249 | .addReg(RegNo: MaskReg); |
250 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg) |
251 | .addReg(RegNo: OldValReg) |
252 | .addReg(RegNo: ScratchReg); |
253 | } |
254 | |
255 | static void doMaskedAtomicBinOpExpansion( |
256 | const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL, |
257 | MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, |
258 | MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { |
259 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
260 | Register DestReg = MI.getOperand(i: 0).getReg(); |
261 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
262 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
263 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
264 | Register MaskReg = MI.getOperand(i: 4).getReg(); |
265 | |
266 | // .loop: |
267 | // ll.w destreg, (alignedaddr) |
268 | // binop scratch, destreg, incr |
269 | // xor scratch, destreg, scratch |
270 | // and scratch, scratch, masktargetdata |
271 | // xor scratch, destreg, scratch |
272 | // sc.w scratch, scratch, (alignedaddr) |
273 | // beqz scratch, loop |
274 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg) |
275 | .addReg(RegNo: AddrReg) |
276 | .addImm(Val: 0); |
277 | switch (BinOp) { |
278 | default: |
279 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
280 | case AtomicRMWInst::Xchg: |
281 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: ScratchReg) |
282 | .addReg(RegNo: IncrReg) |
283 | .addImm(Val: 0); |
284 | break; |
285 | case AtomicRMWInst::Add: |
286 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg) |
287 | .addReg(RegNo: DestReg) |
288 | .addReg(RegNo: IncrReg); |
289 | break; |
290 | case AtomicRMWInst::Sub: |
291 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg) |
292 | .addReg(RegNo: DestReg) |
293 | .addReg(RegNo: IncrReg); |
294 | break; |
295 | case AtomicRMWInst::Nand: |
296 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
297 | .addReg(RegNo: DestReg) |
298 | .addReg(RegNo: IncrReg); |
299 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg) |
300 | .addReg(RegNo: ScratchReg) |
301 | .addReg(RegNo: LoongArch::R0); |
302 | // TODO: support other AtomicRMWInst. |
303 | } |
304 | |
305 | insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg, |
306 | ScratchReg); |
307 | |
308 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: ScratchReg) |
309 | .addReg(RegNo: ScratchReg) |
310 | .addReg(RegNo: AddrReg) |
311 | .addImm(Val: 0); |
312 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
313 | .addReg(RegNo: ScratchReg) |
314 | .addReg(RegNo: LoongArch::R0) |
315 | .addMBB(MBB: LoopMBB); |
316 | } |
317 | |
318 | bool LoongArchExpandAtomicPseudo::expandAtomicBinOp( |
319 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
320 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
321 | MachineBasicBlock::iterator &NextMBBI) { |
322 | MachineInstr &MI = *MBBI; |
323 | DebugLoc DL = MI.getDebugLoc(); |
324 | |
325 | MachineFunction *MF = MBB.getParent(); |
326 | auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
327 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
328 | |
329 | // Insert new MBBs. |
330 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB); |
331 | MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB); |
332 | |
333 | // Set up successors and transfer remaining instructions to DoneMBB. |
334 | LoopMBB->addSuccessor(Succ: LoopMBB); |
335 | LoopMBB->addSuccessor(Succ: DoneMBB); |
336 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
337 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
338 | MBB.addSuccessor(Succ: LoopMBB); |
339 | |
340 | if (IsMasked) |
341 | doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, |
342 | Width); |
343 | else |
344 | doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width); |
345 | |
346 | NextMBBI = MBB.end(); |
347 | MI.eraseFromParent(); |
348 | |
349 | LivePhysRegs LiveRegs; |
350 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB); |
351 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
352 | |
353 | return true; |
354 | } |
355 | |
356 | static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL, |
357 | MachineBasicBlock *MBB, Register ValReg, |
358 | Register ShamtReg) { |
359 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SLL_W), DestReg: ValReg) |
360 | .addReg(RegNo: ValReg) |
361 | .addReg(RegNo: ShamtReg); |
362 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SRA_W), DestReg: ValReg) |
363 | .addReg(RegNo: ValReg) |
364 | .addReg(RegNo: ShamtReg); |
365 | } |
366 | |
367 | bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( |
368 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
369 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
370 | MachineBasicBlock::iterator &NextMBBI) { |
371 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
372 | |
373 | MachineInstr &MI = *MBBI; |
374 | DebugLoc DL = MI.getDebugLoc(); |
375 | MachineFunction *MF = MBB.getParent(); |
376 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
377 | auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
378 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
379 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
380 | |
381 | // Insert new MBBs. |
382 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
383 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB); |
384 | MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB); |
385 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
386 | |
387 | // Set up successors and transfer remaining instructions to DoneMBB. |
388 | LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB); |
389 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
390 | LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB); |
391 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
392 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
393 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
394 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
395 | MBB.addSuccessor(Succ: LoopHeadMBB); |
396 | |
397 | Register DestReg = MI.getOperand(i: 0).getReg(); |
398 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
399 | Register AddrReg = MI.getOperand(i: IsMasked ? 3 : 2).getReg(); |
400 | Register IncrReg = MI.getOperand(i: IsMasked ? 4 : 3).getReg(); |
401 | Register CmprReg = DestReg; |
402 | |
403 | // |
404 | // .loophead: |
405 | // ll.w destreg, (alignedaddr) |
406 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg) |
407 | .addReg(RegNo: AddrReg) |
408 | .addImm(Val: 0); |
409 | // and cmpr, destreg, mask |
410 | if (IsMasked) { |
411 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
412 | CmprReg = MI.getOperand(i: 2).getReg(); |
413 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: CmprReg) |
414 | .addReg(RegNo: DestReg) |
415 | .addReg(RegNo: MaskReg); |
416 | } |
417 | // move scratch, destreg |
418 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
419 | .addReg(RegNo: DestReg) |
420 | .addReg(RegNo: LoongArch::R0); |
421 | |
422 | switch (BinOp) { |
423 | default: |
424 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
425 | // bgeu cmpr, incr, .looptail |
426 | case AtomicRMWInst::UMax: |
427 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU)) |
428 | .addReg(RegNo: CmprReg) |
429 | .addReg(RegNo: IncrReg) |
430 | .addMBB(MBB: LoopTailMBB); |
431 | break; |
432 | // bgeu incr, cmpr, .looptail |
433 | case AtomicRMWInst::UMin: |
434 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU)) |
435 | .addReg(RegNo: IncrReg) |
436 | .addReg(RegNo: CmprReg) |
437 | .addMBB(MBB: LoopTailMBB); |
438 | break; |
439 | case AtomicRMWInst::Max: |
440 | if (IsMasked) |
441 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: CmprReg, ShamtReg: MI.getOperand(i: 6).getReg()); |
442 | // bge cmpr, incr, .looptail |
443 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE)) |
444 | .addReg(RegNo: CmprReg) |
445 | .addReg(RegNo: IncrReg) |
446 | .addMBB(MBB: LoopTailMBB); |
447 | break; |
448 | case AtomicRMWInst::Min: |
449 | if (IsMasked) |
450 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: CmprReg, ShamtReg: MI.getOperand(i: 6).getReg()); |
451 | // bge incr, cmpr, .looptail |
452 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE)) |
453 | .addReg(RegNo: IncrReg) |
454 | .addReg(RegNo: CmprReg) |
455 | .addMBB(MBB: LoopTailMBB); |
456 | break; |
457 | // TODO: support other AtomicRMWInst. |
458 | } |
459 | |
460 | // .loopifbody: |
461 | if (IsMasked) { |
462 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
463 | // xor scratch, destreg, incr |
464 | // and scratch, scratch, mask |
465 | // xor scratch, destreg, scratch |
466 | insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: IncrReg, |
467 | MaskReg, ScratchReg); |
468 | } else { |
469 | // move scratch, incr |
470 | BuildMI(BB: LoopIfBodyMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
471 | .addReg(RegNo: IncrReg) |
472 | .addReg(RegNo: LoongArch::R0); |
473 | } |
474 | |
475 | // .looptail: |
476 | // sc.w scratch, scratch, (addr) |
477 | // beqz scratch, loop |
478 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: ScratchReg) |
479 | .addReg(RegNo: ScratchReg) |
480 | .addReg(RegNo: AddrReg) |
481 | .addImm(Val: 0); |
482 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
483 | .addReg(RegNo: ScratchReg) |
484 | .addReg(RegNo: LoongArch::R0) |
485 | .addMBB(MBB: LoopHeadMBB); |
486 | |
487 | NextMBBI = MBB.end(); |
488 | MI.eraseFromParent(); |
489 | |
490 | LivePhysRegs LiveRegs; |
491 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
492 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB); |
493 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
494 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
495 | |
496 | return true; |
497 | } |
498 | |
499 | bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( |
500 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, |
501 | int Width, MachineBasicBlock::iterator &NextMBBI) { |
502 | MachineInstr &MI = *MBBI; |
503 | DebugLoc DL = MI.getDebugLoc(); |
504 | MachineFunction *MF = MBB.getParent(); |
505 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
506 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
507 | auto TailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
508 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
509 | |
510 | // Insert new MBBs. |
511 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
512 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB); |
513 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: TailMBB); |
514 | MF->insert(MBBI: ++TailMBB->getIterator(), MBB: DoneMBB); |
515 | |
516 | // Set up successors and transfer remaining instructions to DoneMBB. |
517 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
518 | LoopHeadMBB->addSuccessor(Succ: TailMBB); |
519 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
520 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
521 | TailMBB->addSuccessor(Succ: DoneMBB); |
522 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
523 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
524 | MBB.addSuccessor(Succ: LoopHeadMBB); |
525 | |
526 | Register DestReg = MI.getOperand(i: 0).getReg(); |
527 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
528 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
529 | Register CmpValReg = MI.getOperand(i: 3).getReg(); |
530 | Register NewValReg = MI.getOperand(i: 4).getReg(); |
531 | |
532 | if (!IsMasked) { |
533 | // .loophead: |
534 | // ll.[w|d] dest, (addr) |
535 | // bne dest, cmpval, tail |
536 | BuildMI(BB: LoopHeadMBB, MIMD: DL, |
537 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
538 | .addReg(RegNo: AddrReg) |
539 | .addImm(Val: 0); |
540 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
541 | .addReg(RegNo: DestReg) |
542 | .addReg(RegNo: CmpValReg) |
543 | .addMBB(MBB: TailMBB); |
544 | // .looptail: |
545 | // move scratch, newval |
546 | // sc.[w|d] scratch, scratch, (addr) |
547 | // beqz scratch, loophead |
548 | // b done |
549 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
550 | .addReg(RegNo: NewValReg) |
551 | .addReg(RegNo: LoongArch::R0); |
552 | BuildMI(BB: LoopTailMBB, MIMD: DL, |
553 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), |
554 | DestReg: ScratchReg) |
555 | .addReg(RegNo: ScratchReg) |
556 | .addReg(RegNo: AddrReg) |
557 | .addImm(Val: 0); |
558 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
559 | .addReg(RegNo: ScratchReg) |
560 | .addReg(RegNo: LoongArch::R0) |
561 | .addMBB(MBB: LoopHeadMBB); |
562 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB); |
563 | } else { |
564 | // .loophead: |
565 | // ll.[w|d] dest, (addr) |
566 | // and scratch, dest, mask |
567 | // bne scratch, cmpval, tail |
568 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
569 | BuildMI(BB: LoopHeadMBB, MIMD: DL, |
570 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
571 | .addReg(RegNo: AddrReg) |
572 | .addImm(Val: 0); |
573 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
574 | .addReg(RegNo: DestReg) |
575 | .addReg(RegNo: MaskReg); |
576 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
577 | .addReg(RegNo: ScratchReg) |
578 | .addReg(RegNo: CmpValReg) |
579 | .addMBB(MBB: TailMBB); |
580 | |
581 | // .looptail: |
582 | // andn scratch, dest, mask |
583 | // or scratch, scratch, newval |
584 | // sc.[w|d] scratch, scratch, (addr) |
585 | // beqz scratch, loophead |
586 | // b done |
587 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ANDN), DestReg: ScratchReg) |
588 | .addReg(RegNo: DestReg) |
589 | .addReg(RegNo: MaskReg); |
590 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
591 | .addReg(RegNo: ScratchReg) |
592 | .addReg(RegNo: NewValReg); |
593 | BuildMI(BB: LoopTailMBB, MIMD: DL, |
594 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), |
595 | DestReg: ScratchReg) |
596 | .addReg(RegNo: ScratchReg) |
597 | .addReg(RegNo: AddrReg) |
598 | .addImm(Val: 0); |
599 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
600 | .addReg(RegNo: ScratchReg) |
601 | .addReg(RegNo: LoongArch::R0) |
602 | .addMBB(MBB: LoopHeadMBB); |
603 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB); |
604 | } |
605 | |
606 | AtomicOrdering FailureOrdering = |
607 | static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm()); |
608 | int hint; |
609 | |
610 | switch (FailureOrdering) { |
611 | case AtomicOrdering::Acquire: |
612 | case AtomicOrdering::AcquireRelease: |
613 | case AtomicOrdering::SequentiallyConsistent: |
614 | // acquire |
615 | hint = 0b10100; |
616 | break; |
617 | default: |
618 | hint = 0x700; |
619 | } |
620 | |
621 | // .tail: |
622 | // dbar 0x700 | acquire |
623 | |
624 | if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA())) |
625 | BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: hint); |
626 | |
627 | NextMBBI = MBB.end(); |
628 | MI.eraseFromParent(); |
629 | |
630 | LivePhysRegs LiveRegs; |
631 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
632 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
633 | computeAndAddLiveIns(LiveRegs, MBB&: *TailMBB); |
634 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
635 | |
636 | return true; |
637 | } |
638 | |
639 | bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128( |
640 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
641 | MachineBasicBlock::iterator &NextMBBI) { |
642 | MachineInstr &MI = *MBBI; |
643 | DebugLoc DL = MI.getDebugLoc(); |
644 | MachineFunction *MF = MBB.getParent(); |
645 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
646 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
647 | auto TailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
648 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
649 | |
650 | // Insert new MBBs |
651 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
652 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB); |
653 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: TailMBB); |
654 | MF->insert(MBBI: ++TailMBB->getIterator(), MBB: DoneMBB); |
655 | |
656 | // Set up successors and transfer remaining instructions to DoneMBB. |
657 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
658 | LoopHeadMBB->addSuccessor(Succ: TailMBB); |
659 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
660 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
661 | TailMBB->addSuccessor(Succ: DoneMBB); |
662 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
663 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
664 | MBB.addSuccessor(Succ: LoopHeadMBB); |
665 | |
666 | Register DestLoReg = MI.getOperand(i: 0).getReg(); |
667 | Register DestHiReg = MI.getOperand(i: 1).getReg(); |
668 | Register ScratchReg = MI.getOperand(i: 2).getReg(); |
669 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
670 | Register CmpValLoReg = MI.getOperand(i: 4).getReg(); |
671 | Register CmpValHiReg = MI.getOperand(i: 5).getReg(); |
672 | Register NewValLoReg = MI.getOperand(i: 6).getReg(); |
673 | Register NewValHiReg = MI.getOperand(i: 7).getReg(); |
674 | |
675 | // .loophead: |
676 | // ll.d res_lo, (addr) |
677 | // dbar acquire |
678 | // ld.d res_hi, (addr), 8 |
679 | // bne dest_lo, cmpval_lo, tail |
680 | // bne dest_hi, cmpval_hi, tail |
681 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_D), DestReg: DestLoReg) |
682 | .addReg(RegNo: AddrReg) |
683 | .addImm(Val: 0); |
684 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: 0b10100); |
685 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LD_D), DestReg: DestHiReg) |
686 | .addReg(RegNo: AddrReg) |
687 | .addImm(Val: 8); |
688 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
689 | .addReg(RegNo: DestLoReg) |
690 | .addReg(RegNo: CmpValLoReg) |
691 | .addMBB(MBB: TailMBB); |
692 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
693 | .addReg(RegNo: DestHiReg) |
694 | .addReg(RegNo: CmpValHiReg) |
695 | .addMBB(MBB: TailMBB); |
696 | // .looptail: |
697 | // move scratch, newval_lo |
698 | // sc.q scratch, newval_hi, (addr) |
699 | // beqz scratch, loophead |
700 | // b done |
701 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
702 | .addReg(RegNo: NewValLoReg) |
703 | .addReg(RegNo: LoongArch::R0); |
704 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_Q), DestReg: ScratchReg) |
705 | .addReg(RegNo: ScratchReg) |
706 | .addReg(RegNo: NewValHiReg) |
707 | .addReg(RegNo: AddrReg); |
708 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQ)) |
709 | .addReg(RegNo: ScratchReg) |
710 | .addReg(RegNo: LoongArch::R0) |
711 | .addMBB(MBB: LoopHeadMBB); |
712 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB); |
713 | int hint; |
714 | |
715 | switch (MI.getOpcode()) { |
716 | case LoongArch::PseudoCmpXchg128Acquire: |
717 | // acquire acqrel seqcst |
718 | hint = 0b10100; |
719 | break; |
720 | case LoongArch::PseudoCmpXchg128: |
721 | hint = 0x700; |
722 | break; |
723 | default: |
724 | llvm_unreachable("Unexpected opcode" ); |
725 | } |
726 | |
727 | // .tail: |
728 | // dbar 0x700 | acquire |
729 | if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA())) |
730 | BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: hint); |
731 | |
732 | NextMBBI = MBB.end(); |
733 | MI.eraseFromParent(); |
734 | |
735 | LivePhysRegs LiveRegs; |
736 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
737 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
738 | computeAndAddLiveIns(LiveRegs, MBB&: *TailMBB); |
739 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
740 | |
741 | return true; |
742 | } |
743 | |
744 | } // end namespace |
745 | |
746 | INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo" , |
747 | LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false) |
748 | |
749 | namespace llvm { |
750 | |
751 | FunctionPass *createLoongArchExpandAtomicPseudoPass() { |
752 | return new LoongArchExpandAtomicPseudo(); |
753 | } |
754 | |
755 | } // end namespace llvm |
756 | |