1 | //==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions. This pass should be run at the last possible moment, |
11 | // avoiding the possibility for other passes to break the requirements for |
12 | // forward progress in the LL/SC block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "LoongArch.h" |
17 | #include "LoongArchInstrInfo.h" |
18 | #include "LoongArchTargetMachine.h" |
19 | |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \ |
27 | "LoongArch atomic pseudo instruction expansion pass" |
28 | |
29 | namespace { |
30 | |
31 | class LoongArchExpandAtomicPseudo : public MachineFunctionPass { |
32 | public: |
33 | const LoongArchInstrInfo *TII; |
34 | static char ID; |
35 | |
36 | LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) { |
37 | initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); |
38 | } |
39 | |
40 | bool runOnMachineFunction(MachineFunction &MF) override; |
41 | |
42 | StringRef getPassName() const override { |
43 | return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME; |
44 | } |
45 | |
46 | private: |
47 | bool expandMBB(MachineBasicBlock &MBB); |
48 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
49 | MachineBasicBlock::iterator &NextMBBI); |
50 | bool expandAtomicBinOp(MachineBasicBlock &MBB, |
51 | MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, |
52 | bool IsMasked, int Width, |
53 | MachineBasicBlock::iterator &NextMBBI); |
54 | bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, |
55 | MachineBasicBlock::iterator MBBI, |
56 | AtomicRMWInst::BinOp, bool IsMasked, int Width, |
57 | MachineBasicBlock::iterator &NextMBBI); |
58 | bool expandAtomicCmpXchg(MachineBasicBlock &MBB, |
59 | MachineBasicBlock::iterator MBBI, bool IsMasked, |
60 | int Width, MachineBasicBlock::iterator &NextMBBI); |
61 | }; |
62 | |
63 | char LoongArchExpandAtomicPseudo::ID = 0; |
64 | |
65 | bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
66 | TII = |
67 | static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
68 | bool Modified = false; |
69 | for (auto &MBB : MF) |
70 | Modified |= expandMBB(MBB); |
71 | return Modified; |
72 | } |
73 | |
74 | bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { |
75 | bool Modified = false; |
76 | |
77 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
78 | while (MBBI != E) { |
79 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
80 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
81 | MBBI = NMBBI; |
82 | } |
83 | |
84 | return Modified; |
85 | } |
86 | |
87 | bool LoongArchExpandAtomicPseudo::expandMI( |
88 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
89 | MachineBasicBlock::iterator &NextMBBI) { |
90 | switch (MBBI->getOpcode()) { |
91 | case LoongArch::PseudoMaskedAtomicSwap32: |
92 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32, |
93 | NextMBBI); |
94 | case LoongArch::PseudoAtomicSwap32: |
95 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: false, Width: 32, |
96 | NextMBBI); |
97 | case LoongArch::PseudoMaskedAtomicLoadAdd32: |
98 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI); |
99 | case LoongArch::PseudoMaskedAtomicLoadSub32: |
100 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI); |
101 | case LoongArch::PseudoAtomicLoadNand32: |
102 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32, |
103 | NextMBBI); |
104 | case LoongArch::PseudoAtomicLoadNand64: |
105 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64, |
106 | NextMBBI); |
107 | case LoongArch::PseudoMaskedAtomicLoadNand32: |
108 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32, |
109 | NextMBBI); |
110 | case LoongArch::PseudoAtomicLoadAdd32: |
111 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: false, Width: 32, |
112 | NextMBBI); |
113 | case LoongArch::PseudoAtomicLoadSub32: |
114 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: false, Width: 32, |
115 | NextMBBI); |
116 | case LoongArch::PseudoAtomicLoadAnd32: |
117 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, IsMasked: false, Width: 32, |
118 | NextMBBI); |
119 | case LoongArch::PseudoAtomicLoadOr32: |
120 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, IsMasked: false, Width: 32, NextMBBI); |
121 | case LoongArch::PseudoAtomicLoadXor32: |
122 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, IsMasked: false, Width: 32, |
123 | NextMBBI); |
124 | case LoongArch::PseudoMaskedAtomicLoadUMax32: |
125 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32, |
126 | NextMBBI); |
127 | case LoongArch::PseudoMaskedAtomicLoadUMin32: |
128 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32, |
129 | NextMBBI); |
130 | case LoongArch::PseudoCmpXchg32: |
131 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI); |
132 | case LoongArch::PseudoCmpXchg64: |
133 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI); |
134 | case LoongArch::PseudoMaskedCmpXchg32: |
135 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI); |
136 | case LoongArch::PseudoMaskedAtomicLoadMax32: |
137 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32, |
138 | NextMBBI); |
139 | case LoongArch::PseudoMaskedAtomicLoadMin32: |
140 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32, |
141 | NextMBBI); |
142 | } |
143 | return false; |
144 | } |
145 | |
146 | static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, |
147 | MachineInstr &MI, DebugLoc DL, |
148 | MachineBasicBlock *ThisMBB, |
149 | MachineBasicBlock *LoopMBB, |
150 | MachineBasicBlock *DoneMBB, |
151 | AtomicRMWInst::BinOp BinOp, int Width) { |
152 | Register DestReg = MI.getOperand(i: 0).getReg(); |
153 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
154 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
155 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
156 | |
157 | // .loop: |
158 | // ll.[w|d] dest, (addr) |
159 | // binop scratch, dest, val |
160 | // sc.[w|d] scratch, scratch, (addr) |
161 | // beqz scratch, loop |
162 | BuildMI(BB: LoopMBB, MIMD: DL, |
163 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
164 | .addReg(RegNo: AddrReg) |
165 | .addImm(Val: 0); |
166 | switch (BinOp) { |
167 | default: |
168 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
169 | case AtomicRMWInst::Xchg: |
170 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
171 | .addReg(RegNo: IncrReg) |
172 | .addReg(RegNo: LoongArch::R0); |
173 | break; |
174 | case AtomicRMWInst::Nand: |
175 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
176 | .addReg(RegNo: DestReg) |
177 | .addReg(RegNo: IncrReg); |
178 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg) |
179 | .addReg(RegNo: ScratchReg) |
180 | .addReg(RegNo: LoongArch::R0); |
181 | break; |
182 | case AtomicRMWInst::Add: |
183 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg) |
184 | .addReg(RegNo: DestReg) |
185 | .addReg(RegNo: IncrReg); |
186 | break; |
187 | case AtomicRMWInst::Sub: |
188 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg) |
189 | .addReg(RegNo: DestReg) |
190 | .addReg(RegNo: IncrReg); |
191 | break; |
192 | case AtomicRMWInst::And: |
193 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
194 | .addReg(RegNo: DestReg) |
195 | .addReg(RegNo: IncrReg); |
196 | break; |
197 | case AtomicRMWInst::Or: |
198 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
199 | .addReg(RegNo: DestReg) |
200 | .addReg(RegNo: IncrReg); |
201 | break; |
202 | case AtomicRMWInst::Xor: |
203 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg) |
204 | .addReg(RegNo: DestReg) |
205 | .addReg(RegNo: IncrReg); |
206 | break; |
207 | } |
208 | BuildMI(BB: LoopMBB, MIMD: DL, |
209 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), DestReg: ScratchReg) |
210 | .addReg(RegNo: ScratchReg) |
211 | .addReg(RegNo: AddrReg) |
212 | .addImm(Val: 0); |
213 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQZ)) |
214 | .addReg(RegNo: ScratchReg) |
215 | .addMBB(MBB: LoopMBB); |
216 | } |
217 | |
218 | static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL, |
219 | MachineBasicBlock *MBB, Register DestReg, |
220 | Register OldValReg, Register NewValReg, |
221 | Register MaskReg, Register ScratchReg) { |
222 | assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique" ); |
223 | assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique" ); |
224 | assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique" ); |
225 | |
226 | // res = oldval ^ ((oldval ^ newval) & masktargetdata); |
227 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg: ScratchReg) |
228 | .addReg(RegNo: OldValReg) |
229 | .addReg(RegNo: NewValReg); |
230 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
231 | .addReg(RegNo: ScratchReg) |
232 | .addReg(RegNo: MaskReg); |
233 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XOR), DestReg) |
234 | .addReg(RegNo: OldValReg) |
235 | .addReg(RegNo: ScratchReg); |
236 | } |
237 | |
238 | static void doMaskedAtomicBinOpExpansion( |
239 | const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL, |
240 | MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, |
241 | MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { |
242 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
243 | Register DestReg = MI.getOperand(i: 0).getReg(); |
244 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
245 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
246 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
247 | Register MaskReg = MI.getOperand(i: 4).getReg(); |
248 | |
249 | // .loop: |
250 | // ll.w destreg, (alignedaddr) |
251 | // binop scratch, destreg, incr |
252 | // xor scratch, destreg, scratch |
253 | // and scratch, scratch, masktargetdata |
254 | // xor scratch, destreg, scratch |
255 | // sc.w scratch, scratch, (alignedaddr) |
256 | // beqz scratch, loop |
257 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg) |
258 | .addReg(RegNo: AddrReg) |
259 | .addImm(Val: 0); |
260 | switch (BinOp) { |
261 | default: |
262 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
263 | case AtomicRMWInst::Xchg: |
264 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: ScratchReg) |
265 | .addReg(RegNo: IncrReg) |
266 | .addImm(Val: 0); |
267 | break; |
268 | case AtomicRMWInst::Add: |
269 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADD_W), DestReg: ScratchReg) |
270 | .addReg(RegNo: DestReg) |
271 | .addReg(RegNo: IncrReg); |
272 | break; |
273 | case AtomicRMWInst::Sub: |
274 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUB_W), DestReg: ScratchReg) |
275 | .addReg(RegNo: DestReg) |
276 | .addReg(RegNo: IncrReg); |
277 | break; |
278 | case AtomicRMWInst::Nand: |
279 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
280 | .addReg(RegNo: DestReg) |
281 | .addReg(RegNo: IncrReg); |
282 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::NOR), DestReg: ScratchReg) |
283 | .addReg(RegNo: ScratchReg) |
284 | .addReg(RegNo: LoongArch::R0); |
285 | // TODO: support other AtomicRMWInst. |
286 | } |
287 | |
288 | insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg, |
289 | ScratchReg); |
290 | |
291 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: ScratchReg) |
292 | .addReg(RegNo: ScratchReg) |
293 | .addReg(RegNo: AddrReg) |
294 | .addImm(Val: 0); |
295 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQZ)) |
296 | .addReg(RegNo: ScratchReg) |
297 | .addMBB(MBB: LoopMBB); |
298 | } |
299 | |
300 | bool LoongArchExpandAtomicPseudo::expandAtomicBinOp( |
301 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
302 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
303 | MachineBasicBlock::iterator &NextMBBI) { |
304 | MachineInstr &MI = *MBBI; |
305 | DebugLoc DL = MI.getDebugLoc(); |
306 | |
307 | MachineFunction *MF = MBB.getParent(); |
308 | auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
309 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
310 | |
311 | // Insert new MBBs. |
312 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB); |
313 | MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB); |
314 | |
315 | // Set up successors and transfer remaining instructions to DoneMBB. |
316 | LoopMBB->addSuccessor(Succ: LoopMBB); |
317 | LoopMBB->addSuccessor(Succ: DoneMBB); |
318 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
319 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
320 | MBB.addSuccessor(Succ: LoopMBB); |
321 | |
322 | if (IsMasked) |
323 | doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, |
324 | Width); |
325 | else |
326 | doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width); |
327 | |
328 | NextMBBI = MBB.end(); |
329 | MI.eraseFromParent(); |
330 | |
331 | LivePhysRegs LiveRegs; |
332 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB); |
333 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
334 | |
335 | return true; |
336 | } |
337 | |
338 | static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL, |
339 | MachineBasicBlock *MBB, Register ValReg, |
340 | Register ShamtReg) { |
341 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SLL_W), DestReg: ValReg) |
342 | .addReg(RegNo: ValReg) |
343 | .addReg(RegNo: ShamtReg); |
344 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SRA_W), DestReg: ValReg) |
345 | .addReg(RegNo: ValReg) |
346 | .addReg(RegNo: ShamtReg); |
347 | } |
348 | |
349 | bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( |
350 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
351 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
352 | MachineBasicBlock::iterator &NextMBBI) { |
353 | assert(IsMasked == true && |
354 | "Should only need to expand masked atomic max/min" ); |
355 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
356 | |
357 | MachineInstr &MI = *MBBI; |
358 | DebugLoc DL = MI.getDebugLoc(); |
359 | MachineFunction *MF = MBB.getParent(); |
360 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
361 | auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
362 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
363 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
364 | |
365 | // Insert new MBBs. |
366 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
367 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB); |
368 | MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB); |
369 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
370 | |
371 | // Set up successors and transfer remaining instructions to DoneMBB. |
372 | LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB); |
373 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
374 | LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB); |
375 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
376 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
377 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
378 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
379 | MBB.addSuccessor(Succ: LoopHeadMBB); |
380 | |
381 | Register DestReg = MI.getOperand(i: 0).getReg(); |
382 | Register Scratch1Reg = MI.getOperand(i: 1).getReg(); |
383 | Register Scratch2Reg = MI.getOperand(i: 2).getReg(); |
384 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
385 | Register IncrReg = MI.getOperand(i: 4).getReg(); |
386 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
387 | |
388 | // |
389 | // .loophead: |
390 | // ll.w destreg, (alignedaddr) |
391 | // and scratch2, destreg, mask |
392 | // move scratch1, destreg |
393 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::LL_W), DestReg) |
394 | .addReg(RegNo: AddrReg) |
395 | .addImm(Val: 0); |
396 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: Scratch2Reg) |
397 | .addReg(RegNo: DestReg) |
398 | .addReg(RegNo: MaskReg); |
399 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: Scratch1Reg) |
400 | .addReg(RegNo: DestReg) |
401 | .addReg(RegNo: LoongArch::R0); |
402 | |
403 | switch (BinOp) { |
404 | default: |
405 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
406 | // bgeu scratch2, incr, .looptail |
407 | case AtomicRMWInst::UMax: |
408 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU)) |
409 | .addReg(RegNo: Scratch2Reg) |
410 | .addReg(RegNo: IncrReg) |
411 | .addMBB(MBB: LoopTailMBB); |
412 | break; |
413 | // bgeu incr, scratch2, .looptail |
414 | case AtomicRMWInst::UMin: |
415 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGEU)) |
416 | .addReg(RegNo: IncrReg) |
417 | .addReg(RegNo: Scratch2Reg) |
418 | .addMBB(MBB: LoopTailMBB); |
419 | break; |
420 | case AtomicRMWInst::Max: |
421 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
422 | // bge scratch2, incr, .looptail |
423 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE)) |
424 | .addReg(RegNo: Scratch2Reg) |
425 | .addReg(RegNo: IncrReg) |
426 | .addMBB(MBB: LoopTailMBB); |
427 | break; |
428 | case AtomicRMWInst::Min: |
429 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
430 | // bge incr, scratch2, .looptail |
431 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BGE)) |
432 | .addReg(RegNo: IncrReg) |
433 | .addReg(RegNo: Scratch2Reg) |
434 | .addMBB(MBB: LoopTailMBB); |
435 | break; |
436 | // TODO: support other AtomicRMWInst. |
437 | } |
438 | |
439 | // .loopifbody: |
440 | // xor scratch1, destreg, incr |
441 | // and scratch1, scratch1, mask |
442 | // xor scratch1, destreg, scratch1 |
443 | insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: Scratch1Reg, OldValReg: DestReg, NewValReg: IncrReg, |
444 | MaskReg, ScratchReg: Scratch1Reg); |
445 | |
446 | // .looptail: |
447 | // sc.w scratch1, scratch1, (addr) |
448 | // beqz scratch1, loop |
449 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SC_W), DestReg: Scratch1Reg) |
450 | .addReg(RegNo: Scratch1Reg) |
451 | .addReg(RegNo: AddrReg) |
452 | .addImm(Val: 0); |
453 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQZ)) |
454 | .addReg(RegNo: Scratch1Reg) |
455 | .addMBB(MBB: LoopHeadMBB); |
456 | |
457 | NextMBBI = MBB.end(); |
458 | MI.eraseFromParent(); |
459 | |
460 | LivePhysRegs LiveRegs; |
461 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
462 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB); |
463 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
464 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
465 | |
466 | return true; |
467 | } |
468 | |
469 | bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( |
470 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, |
471 | int Width, MachineBasicBlock::iterator &NextMBBI) { |
472 | MachineInstr &MI = *MBBI; |
473 | DebugLoc DL = MI.getDebugLoc(); |
474 | MachineFunction *MF = MBB.getParent(); |
475 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
476 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
477 | auto TailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
478 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
479 | |
480 | // Insert new MBBs. |
481 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
482 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB); |
483 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: TailMBB); |
484 | MF->insert(MBBI: ++TailMBB->getIterator(), MBB: DoneMBB); |
485 | |
486 | // Set up successors and transfer remaining instructions to DoneMBB. |
487 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
488 | LoopHeadMBB->addSuccessor(Succ: TailMBB); |
489 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
490 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
491 | TailMBB->addSuccessor(Succ: DoneMBB); |
492 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
493 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
494 | MBB.addSuccessor(Succ: LoopHeadMBB); |
495 | |
496 | Register DestReg = MI.getOperand(i: 0).getReg(); |
497 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
498 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
499 | Register CmpValReg = MI.getOperand(i: 3).getReg(); |
500 | Register NewValReg = MI.getOperand(i: 4).getReg(); |
501 | |
502 | if (!IsMasked) { |
503 | // .loophead: |
504 | // ll.[w|d] dest, (addr) |
505 | // bne dest, cmpval, tail |
506 | BuildMI(BB: LoopHeadMBB, MIMD: DL, |
507 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
508 | .addReg(RegNo: AddrReg) |
509 | .addImm(Val: 0); |
510 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
511 | .addReg(RegNo: DestReg) |
512 | .addReg(RegNo: CmpValReg) |
513 | .addMBB(MBB: TailMBB); |
514 | // .looptail: |
515 | // move scratch, newval |
516 | // sc.[w|d] scratch, scratch, (addr) |
517 | // beqz scratch, loophead |
518 | // b done |
519 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
520 | .addReg(RegNo: NewValReg) |
521 | .addReg(RegNo: LoongArch::R0); |
522 | BuildMI(BB: LoopTailMBB, MIMD: DL, |
523 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), |
524 | DestReg: ScratchReg) |
525 | .addReg(RegNo: ScratchReg) |
526 | .addReg(RegNo: AddrReg) |
527 | .addImm(Val: 0); |
528 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQZ)) |
529 | .addReg(RegNo: ScratchReg) |
530 | .addMBB(MBB: LoopHeadMBB); |
531 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB); |
532 | } else { |
533 | // .loophead: |
534 | // ll.[w|d] dest, (addr) |
535 | // and scratch, dest, mask |
536 | // bne scratch, cmpval, tail |
537 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
538 | BuildMI(BB: LoopHeadMBB, MIMD: DL, |
539 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) |
540 | .addReg(RegNo: AddrReg) |
541 | .addImm(Val: 0); |
542 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::AND), DestReg: ScratchReg) |
543 | .addReg(RegNo: DestReg) |
544 | .addReg(RegNo: MaskReg); |
545 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE)) |
546 | .addReg(RegNo: ScratchReg) |
547 | .addReg(RegNo: CmpValReg) |
548 | .addMBB(MBB: TailMBB); |
549 | |
550 | // .looptail: |
551 | // andn scratch, dest, mask |
552 | // or scratch, scratch, newval |
553 | // sc.[w|d] scratch, scratch, (addr) |
554 | // beqz scratch, loophead |
555 | // b done |
556 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ANDN), DestReg: ScratchReg) |
557 | .addReg(RegNo: DestReg) |
558 | .addReg(RegNo: MaskReg); |
559 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: ScratchReg) |
560 | .addReg(RegNo: ScratchReg) |
561 | .addReg(RegNo: NewValReg); |
562 | BuildMI(BB: LoopTailMBB, MIMD: DL, |
563 | MCID: TII->get(Opcode: Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), |
564 | DestReg: ScratchReg) |
565 | .addReg(RegNo: ScratchReg) |
566 | .addReg(RegNo: AddrReg) |
567 | .addImm(Val: 0); |
568 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BEQZ)) |
569 | .addReg(RegNo: ScratchReg) |
570 | .addMBB(MBB: LoopHeadMBB); |
571 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::B)).addMBB(MBB: DoneMBB); |
572 | } |
573 | |
574 | AtomicOrdering FailureOrdering = |
575 | static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm()); |
576 | int hint; |
577 | |
578 | switch (FailureOrdering) { |
579 | case AtomicOrdering::Acquire: |
580 | case AtomicOrdering::AcquireRelease: |
581 | case AtomicOrdering::SequentiallyConsistent: |
582 | // acquire |
583 | hint = 0b10100; |
584 | break; |
585 | default: |
586 | hint = 0x700; |
587 | } |
588 | |
589 | // .tail: |
590 | // dbar 0x700 | acquire |
591 | BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::DBAR)).addImm(Val: hint); |
592 | |
593 | NextMBBI = MBB.end(); |
594 | MI.eraseFromParent(); |
595 | |
596 | LivePhysRegs LiveRegs; |
597 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
598 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
599 | computeAndAddLiveIns(LiveRegs, MBB&: *TailMBB); |
600 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
601 | |
602 | return true; |
603 | } |
604 | |
605 | } // end namespace |
606 | |
607 | INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo" , |
608 | LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false) |
609 | |
610 | namespace llvm { |
611 | |
612 | FunctionPass *createLoongArchExpandAtomicPseudoPass() { |
613 | return new LoongArchExpandAtomicPseudo(); |
614 | } |
615 | |
616 | } // end namespace llvm |
617 | |