1 | //===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions. This pass should be run at the last possible moment, |
11 | // avoiding the possibility for other passes to break the requirements for |
12 | // forward progress in the LR/SC block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "RISCV.h" |
17 | #include "RISCVInstrInfo.h" |
18 | #include "RISCVTargetMachine.h" |
19 | |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \ |
27 | "RISC-V atomic pseudo instruction expansion pass" |
28 | |
29 | namespace { |
30 | |
31 | class RISCVExpandAtomicPseudo : public MachineFunctionPass { |
32 | public: |
33 | const RISCVSubtarget *STI; |
34 | const RISCVInstrInfo *TII; |
35 | static char ID; |
36 | |
37 | RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) { |
38 | initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); |
39 | } |
40 | |
41 | bool runOnMachineFunction(MachineFunction &MF) override; |
42 | |
43 | StringRef getPassName() const override { |
44 | return RISCV_EXPAND_ATOMIC_PSEUDO_NAME; |
45 | } |
46 | |
47 | private: |
48 | bool expandMBB(MachineBasicBlock &MBB); |
49 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
50 | MachineBasicBlock::iterator &NextMBBI); |
51 | bool expandAtomicBinOp(MachineBasicBlock &MBB, |
52 | MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, |
53 | bool IsMasked, int Width, |
54 | MachineBasicBlock::iterator &NextMBBI); |
55 | bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, |
56 | MachineBasicBlock::iterator MBBI, |
57 | AtomicRMWInst::BinOp, bool IsMasked, int Width, |
58 | MachineBasicBlock::iterator &NextMBBI); |
59 | bool expandAtomicCmpXchg(MachineBasicBlock &MBB, |
60 | MachineBasicBlock::iterator MBBI, bool IsMasked, |
61 | int Width, MachineBasicBlock::iterator &NextMBBI); |
62 | #ifndef NDEBUG |
63 | unsigned getInstSizeInBytes(const MachineFunction &MF) const { |
64 | unsigned Size = 0; |
65 | for (auto &MBB : MF) |
66 | for (auto &MI : MBB) |
67 | Size += TII->getInstSizeInBytes(MI); |
68 | return Size; |
69 | } |
70 | #endif |
71 | }; |
72 | |
73 | char RISCVExpandAtomicPseudo::ID = 0; |
74 | |
75 | bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
76 | STI = &MF.getSubtarget<RISCVSubtarget>(); |
77 | TII = STI->getInstrInfo(); |
78 | |
79 | #ifndef NDEBUG |
80 | const unsigned OldSize = getInstSizeInBytes(MF); |
81 | #endif |
82 | |
83 | bool Modified = false; |
84 | for (auto &MBB : MF) |
85 | Modified |= expandMBB(MBB); |
86 | |
87 | #ifndef NDEBUG |
88 | const unsigned NewSize = getInstSizeInBytes(MF); |
89 | assert(OldSize >= NewSize); |
90 | #endif |
91 | return Modified; |
92 | } |
93 | |
94 | bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { |
95 | bool Modified = false; |
96 | |
97 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
98 | while (MBBI != E) { |
99 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
100 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
101 | MBBI = NMBBI; |
102 | } |
103 | |
104 | return Modified; |
105 | } |
106 | |
107 | bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, |
108 | MachineBasicBlock::iterator MBBI, |
109 | MachineBasicBlock::iterator &NextMBBI) { |
110 | // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the |
111 | // expanded instructions for each pseudo is correct in the Size field of the |
112 | // tablegen definition for the pseudo. |
113 | switch (MBBI->getOpcode()) { |
114 | case RISCV::PseudoAtomicLoadNand32: |
115 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32, |
116 | NextMBBI); |
117 | case RISCV::PseudoAtomicLoadNand64: |
118 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64, |
119 | NextMBBI); |
120 | case RISCV::PseudoMaskedAtomicSwap32: |
121 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32, |
122 | NextMBBI); |
123 | case RISCV::PseudoMaskedAtomicLoadAdd32: |
124 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI); |
125 | case RISCV::PseudoMaskedAtomicLoadSub32: |
126 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI); |
127 | case RISCV::PseudoMaskedAtomicLoadNand32: |
128 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32, |
129 | NextMBBI); |
130 | case RISCV::PseudoMaskedAtomicLoadMax32: |
131 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32, |
132 | NextMBBI); |
133 | case RISCV::PseudoMaskedAtomicLoadMin32: |
134 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32, |
135 | NextMBBI); |
136 | case RISCV::PseudoMaskedAtomicLoadUMax32: |
137 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32, |
138 | NextMBBI); |
139 | case RISCV::PseudoMaskedAtomicLoadUMin32: |
140 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32, |
141 | NextMBBI); |
142 | case RISCV::PseudoCmpXchg32: |
143 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI); |
144 | case RISCV::PseudoCmpXchg64: |
145 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI); |
146 | case RISCV::PseudoMaskedCmpXchg32: |
147 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI); |
148 | } |
149 | |
150 | return false; |
151 | } |
152 | |
153 | static unsigned getLRForRMW32(AtomicOrdering Ordering, |
154 | const RISCVSubtarget *Subtarget) { |
155 | switch (Ordering) { |
156 | default: |
157 | llvm_unreachable("Unexpected AtomicOrdering" ); |
158 | case AtomicOrdering::Monotonic: |
159 | return RISCV::LR_W; |
160 | case AtomicOrdering::Acquire: |
161 | if (Subtarget->hasStdExtZtso()) |
162 | return RISCV::LR_W; |
163 | return RISCV::LR_W_AQ; |
164 | case AtomicOrdering::Release: |
165 | return RISCV::LR_W; |
166 | case AtomicOrdering::AcquireRelease: |
167 | if (Subtarget->hasStdExtZtso()) |
168 | return RISCV::LR_W; |
169 | return RISCV::LR_W_AQ; |
170 | case AtomicOrdering::SequentiallyConsistent: |
171 | return RISCV::LR_W_AQ_RL; |
172 | } |
173 | } |
174 | |
175 | static unsigned getSCForRMW32(AtomicOrdering Ordering, |
176 | const RISCVSubtarget *Subtarget) { |
177 | switch (Ordering) { |
178 | default: |
179 | llvm_unreachable("Unexpected AtomicOrdering" ); |
180 | case AtomicOrdering::Monotonic: |
181 | return RISCV::SC_W; |
182 | case AtomicOrdering::Acquire: |
183 | return RISCV::SC_W; |
184 | case AtomicOrdering::Release: |
185 | if (Subtarget->hasStdExtZtso()) |
186 | return RISCV::SC_W; |
187 | return RISCV::SC_W_RL; |
188 | case AtomicOrdering::AcquireRelease: |
189 | if (Subtarget->hasStdExtZtso()) |
190 | return RISCV::SC_W; |
191 | return RISCV::SC_W_RL; |
192 | case AtomicOrdering::SequentiallyConsistent: |
193 | return RISCV::SC_W_RL; |
194 | } |
195 | } |
196 | |
197 | static unsigned getLRForRMW64(AtomicOrdering Ordering, |
198 | const RISCVSubtarget *Subtarget) { |
199 | switch (Ordering) { |
200 | default: |
201 | llvm_unreachable("Unexpected AtomicOrdering" ); |
202 | case AtomicOrdering::Monotonic: |
203 | return RISCV::LR_D; |
204 | case AtomicOrdering::Acquire: |
205 | if (Subtarget->hasStdExtZtso()) |
206 | return RISCV::LR_D; |
207 | return RISCV::LR_D_AQ; |
208 | case AtomicOrdering::Release: |
209 | return RISCV::LR_D; |
210 | case AtomicOrdering::AcquireRelease: |
211 | if (Subtarget->hasStdExtZtso()) |
212 | return RISCV::LR_D; |
213 | return RISCV::LR_D_AQ; |
214 | case AtomicOrdering::SequentiallyConsistent: |
215 | return RISCV::LR_D_AQ_RL; |
216 | } |
217 | } |
218 | |
219 | static unsigned getSCForRMW64(AtomicOrdering Ordering, |
220 | const RISCVSubtarget *Subtarget) { |
221 | switch (Ordering) { |
222 | default: |
223 | llvm_unreachable("Unexpected AtomicOrdering" ); |
224 | case AtomicOrdering::Monotonic: |
225 | return RISCV::SC_D; |
226 | case AtomicOrdering::Acquire: |
227 | return RISCV::SC_D; |
228 | case AtomicOrdering::Release: |
229 | if (Subtarget->hasStdExtZtso()) |
230 | return RISCV::SC_D; |
231 | return RISCV::SC_D_RL; |
232 | case AtomicOrdering::AcquireRelease: |
233 | if (Subtarget->hasStdExtZtso()) |
234 | return RISCV::SC_D; |
235 | return RISCV::SC_D_RL; |
236 | case AtomicOrdering::SequentiallyConsistent: |
237 | return RISCV::SC_D_RL; |
238 | } |
239 | } |
240 | |
241 | static unsigned getLRForRMW(AtomicOrdering Ordering, int Width, |
242 | const RISCVSubtarget *Subtarget) { |
243 | if (Width == 32) |
244 | return getLRForRMW32(Ordering, Subtarget); |
245 | if (Width == 64) |
246 | return getLRForRMW64(Ordering, Subtarget); |
247 | llvm_unreachable("Unexpected LR width\n" ); |
248 | } |
249 | |
250 | static unsigned getSCForRMW(AtomicOrdering Ordering, int Width, |
251 | const RISCVSubtarget *Subtarget) { |
252 | if (Width == 32) |
253 | return getSCForRMW32(Ordering, Subtarget); |
254 | if (Width == 64) |
255 | return getSCForRMW64(Ordering, Subtarget); |
256 | llvm_unreachable("Unexpected SC width\n" ); |
257 | } |
258 | |
259 | static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, |
260 | DebugLoc DL, MachineBasicBlock *ThisMBB, |
261 | MachineBasicBlock *LoopMBB, |
262 | MachineBasicBlock *DoneMBB, |
263 | AtomicRMWInst::BinOp BinOp, int Width, |
264 | const RISCVSubtarget *STI) { |
265 | Register DestReg = MI.getOperand(i: 0).getReg(); |
266 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
267 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
268 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
269 | AtomicOrdering Ordering = |
270 | static_cast<AtomicOrdering>(MI.getOperand(i: 4).getImm()); |
271 | |
272 | // .loop: |
273 | // lr.[w|d] dest, (addr) |
274 | // binop scratch, dest, val |
275 | // sc.[w|d] scratch, scratch, (addr) |
276 | // bnez scratch, loop |
277 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), DestReg) |
278 | .addReg(RegNo: AddrReg); |
279 | switch (BinOp) { |
280 | default: |
281 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
282 | case AtomicRMWInst::Nand: |
283 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
284 | .addReg(RegNo: DestReg) |
285 | .addReg(RegNo: IncrReg); |
286 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg) |
287 | .addReg(RegNo: ScratchReg) |
288 | .addImm(Val: -1); |
289 | break; |
290 | } |
291 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), DestReg: ScratchReg) |
292 | .addReg(RegNo: AddrReg) |
293 | .addReg(RegNo: ScratchReg); |
294 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
295 | .addReg(RegNo: ScratchReg) |
296 | .addReg(RegNo: RISCV::X0) |
297 | .addMBB(MBB: LoopMBB); |
298 | } |
299 | |
300 | static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, |
301 | MachineBasicBlock *MBB, Register DestReg, |
302 | Register OldValReg, Register NewValReg, |
303 | Register MaskReg, Register ScratchReg) { |
304 | assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique" ); |
305 | assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique" ); |
306 | assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique" ); |
307 | |
308 | // We select bits from newval and oldval using: |
309 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge |
310 | // r = oldval ^ ((oldval ^ newval) & masktargetdata); |
311 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg: ScratchReg) |
312 | .addReg(RegNo: OldValReg) |
313 | .addReg(RegNo: NewValReg); |
314 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
315 | .addReg(RegNo: ScratchReg) |
316 | .addReg(RegNo: MaskReg); |
317 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg) |
318 | .addReg(RegNo: OldValReg) |
319 | .addReg(RegNo: ScratchReg); |
320 | } |
321 | |
322 | static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, |
323 | MachineInstr &MI, DebugLoc DL, |
324 | MachineBasicBlock *ThisMBB, |
325 | MachineBasicBlock *LoopMBB, |
326 | MachineBasicBlock *DoneMBB, |
327 | AtomicRMWInst::BinOp BinOp, int Width, |
328 | const RISCVSubtarget *STI) { |
329 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
330 | Register DestReg = MI.getOperand(i: 0).getReg(); |
331 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
332 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
333 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
334 | Register MaskReg = MI.getOperand(i: 4).getReg(); |
335 | AtomicOrdering Ordering = |
336 | static_cast<AtomicOrdering>(MI.getOperand(i: 5).getImm()); |
337 | |
338 | // .loop: |
339 | // lr.w destreg, (alignedaddr) |
340 | // binop scratch, destreg, incr |
341 | // xor scratch, destreg, scratch |
342 | // and scratch, scratch, masktargetdata |
343 | // xor scratch, destreg, scratch |
344 | // sc.w scratch, scratch, (alignedaddr) |
345 | // bnez scratch, loop |
346 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg) |
347 | .addReg(RegNo: AddrReg); |
348 | switch (BinOp) { |
349 | default: |
350 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
351 | case AtomicRMWInst::Xchg: |
352 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: ScratchReg) |
353 | .addReg(RegNo: IncrReg) |
354 | .addImm(Val: 0); |
355 | break; |
356 | case AtomicRMWInst::Add: |
357 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADD), DestReg: ScratchReg) |
358 | .addReg(RegNo: DestReg) |
359 | .addReg(RegNo: IncrReg); |
360 | break; |
361 | case AtomicRMWInst::Sub: |
362 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: ScratchReg) |
363 | .addReg(RegNo: DestReg) |
364 | .addReg(RegNo: IncrReg); |
365 | break; |
366 | case AtomicRMWInst::Nand: |
367 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
368 | .addReg(RegNo: DestReg) |
369 | .addReg(RegNo: IncrReg); |
370 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg) |
371 | .addReg(RegNo: ScratchReg) |
372 | .addImm(Val: -1); |
373 | break; |
374 | } |
375 | |
376 | insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg, |
377 | ScratchReg); |
378 | |
379 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: ScratchReg) |
380 | .addReg(RegNo: AddrReg) |
381 | .addReg(RegNo: ScratchReg); |
382 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
383 | .addReg(RegNo: ScratchReg) |
384 | .addReg(RegNo: RISCV::X0) |
385 | .addMBB(MBB: LoopMBB); |
386 | } |
387 | |
388 | bool RISCVExpandAtomicPseudo::expandAtomicBinOp( |
389 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
390 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
391 | MachineBasicBlock::iterator &NextMBBI) { |
392 | MachineInstr &MI = *MBBI; |
393 | DebugLoc DL = MI.getDebugLoc(); |
394 | |
395 | MachineFunction *MF = MBB.getParent(); |
396 | auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
397 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
398 | |
399 | // Insert new MBBs. |
400 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB); |
401 | MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB); |
402 | |
403 | // Set up successors and transfer remaining instructions to DoneMBB. |
404 | LoopMBB->addSuccessor(Succ: LoopMBB); |
405 | LoopMBB->addSuccessor(Succ: DoneMBB); |
406 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
407 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
408 | MBB.addSuccessor(Succ: LoopMBB); |
409 | |
410 | if (!IsMasked) |
411 | doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width, |
412 | STI); |
413 | else |
414 | doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, |
415 | Width, STI); |
416 | |
417 | NextMBBI = MBB.end(); |
418 | MI.eraseFromParent(); |
419 | |
420 | LivePhysRegs LiveRegs; |
421 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB); |
422 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
423 | |
424 | return true; |
425 | } |
426 | |
427 | static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, |
428 | MachineBasicBlock *MBB, Register ValReg, |
429 | Register ShamtReg) { |
430 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLL), DestReg: ValReg) |
431 | .addReg(RegNo: ValReg) |
432 | .addReg(RegNo: ShamtReg); |
433 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRA), DestReg: ValReg) |
434 | .addReg(RegNo: ValReg) |
435 | .addReg(RegNo: ShamtReg); |
436 | } |
437 | |
438 | bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( |
439 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
440 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
441 | MachineBasicBlock::iterator &NextMBBI) { |
442 | assert(IsMasked == true && |
443 | "Should only need to expand masked atomic max/min" ); |
444 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
445 | |
446 | MachineInstr &MI = *MBBI; |
447 | DebugLoc DL = MI.getDebugLoc(); |
448 | MachineFunction *MF = MBB.getParent(); |
449 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
450 | auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
451 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
452 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
453 | |
454 | // Insert new MBBs. |
455 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
456 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB); |
457 | MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB); |
458 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
459 | |
460 | // Set up successors and transfer remaining instructions to DoneMBB. |
461 | LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB); |
462 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
463 | LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB); |
464 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
465 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
466 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
467 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
468 | MBB.addSuccessor(Succ: LoopHeadMBB); |
469 | |
470 | Register DestReg = MI.getOperand(i: 0).getReg(); |
471 | Register Scratch1Reg = MI.getOperand(i: 1).getReg(); |
472 | Register Scratch2Reg = MI.getOperand(i: 2).getReg(); |
473 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
474 | Register IncrReg = MI.getOperand(i: 4).getReg(); |
475 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
476 | bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max; |
477 | AtomicOrdering Ordering = |
478 | static_cast<AtomicOrdering>(MI.getOperand(i: IsSigned ? 7 : 6).getImm()); |
479 | |
480 | // |
481 | // .loophead: |
482 | // lr.w destreg, (alignedaddr) |
483 | // and scratch2, destreg, mask |
484 | // mv scratch1, destreg |
485 | // [sext scratch2 if signed min/max] |
486 | // ifnochangeneeded scratch2, incr, .looptail |
487 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg) |
488 | .addReg(RegNo: AddrReg); |
489 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: Scratch2Reg) |
490 | .addReg(RegNo: DestReg) |
491 | .addReg(RegNo: MaskReg); |
492 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: Scratch1Reg) |
493 | .addReg(RegNo: DestReg) |
494 | .addImm(Val: 0); |
495 | |
496 | switch (BinOp) { |
497 | default: |
498 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
499 | case AtomicRMWInst::Max: { |
500 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
501 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE)) |
502 | .addReg(RegNo: Scratch2Reg) |
503 | .addReg(RegNo: IncrReg) |
504 | .addMBB(MBB: LoopTailMBB); |
505 | break; |
506 | } |
507 | case AtomicRMWInst::Min: { |
508 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
509 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE)) |
510 | .addReg(RegNo: IncrReg) |
511 | .addReg(RegNo: Scratch2Reg) |
512 | .addMBB(MBB: LoopTailMBB); |
513 | break; |
514 | } |
515 | case AtomicRMWInst::UMax: |
516 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU)) |
517 | .addReg(RegNo: Scratch2Reg) |
518 | .addReg(RegNo: IncrReg) |
519 | .addMBB(MBB: LoopTailMBB); |
520 | break; |
521 | case AtomicRMWInst::UMin: |
522 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU)) |
523 | .addReg(RegNo: IncrReg) |
524 | .addReg(RegNo: Scratch2Reg) |
525 | .addMBB(MBB: LoopTailMBB); |
526 | break; |
527 | } |
528 | |
529 | // .loopifbody: |
530 | // xor scratch1, destreg, incr |
531 | // and scratch1, scratch1, mask |
532 | // xor scratch1, destreg, scratch1 |
533 | insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: Scratch1Reg, OldValReg: DestReg, NewValReg: IncrReg, |
534 | MaskReg, ScratchReg: Scratch1Reg); |
535 | |
536 | // .looptail: |
537 | // sc.w scratch1, scratch1, (addr) |
538 | // bnez scratch1, loop |
539 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: Scratch1Reg) |
540 | .addReg(RegNo: AddrReg) |
541 | .addReg(RegNo: Scratch1Reg); |
542 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
543 | .addReg(RegNo: Scratch1Reg) |
544 | .addReg(RegNo: RISCV::X0) |
545 | .addMBB(MBB: LoopHeadMBB); |
546 | |
547 | NextMBBI = MBB.end(); |
548 | MI.eraseFromParent(); |
549 | |
550 | LivePhysRegs LiveRegs; |
551 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
552 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB); |
553 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
554 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
555 | |
556 | return true; |
557 | } |
558 | |
559 | // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg |
560 | // operation, it can be folded into the cmpxchg expansion by |
561 | // modifying the branch within 'LoopHead' (which performs the same |
562 | // comparison). This is a valid transformation because after altering the |
563 | // LoopHead's BNE destination, the BNE following the cmpxchg becomes |
564 | // redundant and and be deleted. In the case of a masked cmpxchg, an |
565 | // appropriate AND and BNE must be matched. |
566 | // |
567 | // On success, returns true and deletes the matching BNE or AND+BNE, sets the |
568 | // LoopHeadBNETarget argument to the target that should be used within the |
569 | // loop head, and removes that block as a successor to MBB. |
570 | bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB, |
571 | MachineBasicBlock::iterator MBBI, |
572 | Register DestReg, Register CmpValReg, |
573 | Register MaskReg, |
574 | MachineBasicBlock *&LoopHeadBNETarget) { |
575 | SmallVector<MachineInstr *> ToErase; |
576 | auto E = MBB.end(); |
577 | if (MBBI == E) |
578 | return false; |
579 | MBBI = skipDebugInstructionsForward(It: MBBI, End: E); |
580 | |
581 | // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg. |
582 | if (MaskReg.isValid()) { |
583 | if (MBBI == E || MBBI->getOpcode() != RISCV::AND) |
584 | return false; |
585 | Register ANDOp1 = MBBI->getOperand(i: 1).getReg(); |
586 | Register ANDOp2 = MBBI->getOperand(i: 2).getReg(); |
587 | if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) && |
588 | !(ANDOp1 == MaskReg && ANDOp2 == DestReg)) |
589 | return false; |
590 | // We now expect the BNE to use the result of the AND as an operand. |
591 | DestReg = MBBI->getOperand(i: 0).getReg(); |
592 | ToErase.push_back(Elt: &*MBBI); |
593 | MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E); |
594 | } |
595 | |
596 | // Match BNE DestReg, MaskReg. |
597 | if (MBBI == E || MBBI->getOpcode() != RISCV::BNE) |
598 | return false; |
599 | Register BNEOp0 = MBBI->getOperand(i: 0).getReg(); |
600 | Register BNEOp1 = MBBI->getOperand(i: 1).getReg(); |
601 | if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) && |
602 | !(BNEOp0 == CmpValReg && BNEOp1 == DestReg)) |
603 | return false; |
604 | |
605 | // Make sure the branch is the only user of the AND. |
606 | if (MaskReg.isValid()) { |
607 | if (BNEOp0 == DestReg && !MBBI->getOperand(i: 0).isKill()) |
608 | return false; |
609 | if (BNEOp1 == DestReg && !MBBI->getOperand(i: 1).isKill()) |
610 | return false; |
611 | } |
612 | |
613 | ToErase.push_back(Elt: &*MBBI); |
614 | LoopHeadBNETarget = MBBI->getOperand(i: 2).getMBB(); |
615 | MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E); |
616 | if (MBBI != E) |
617 | return false; |
618 | |
619 | MBB.removeSuccessor(Succ: LoopHeadBNETarget); |
620 | for (auto *MI : ToErase) |
621 | MI->eraseFromParent(); |
622 | return true; |
623 | } |
624 | |
625 | bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( |
626 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, |
627 | int Width, MachineBasicBlock::iterator &NextMBBI) { |
628 | MachineInstr &MI = *MBBI; |
629 | DebugLoc DL = MI.getDebugLoc(); |
630 | MachineFunction *MF = MBB.getParent(); |
631 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
632 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
633 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
634 | |
635 | Register DestReg = MI.getOperand(i: 0).getReg(); |
636 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
637 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
638 | Register CmpValReg = MI.getOperand(i: 3).getReg(); |
639 | Register NewValReg = MI.getOperand(i: 4).getReg(); |
640 | Register MaskReg = IsMasked ? MI.getOperand(i: 5).getReg() : Register(); |
641 | |
642 | MachineBasicBlock *LoopHeadBNETarget = DoneMBB; |
643 | tryToFoldBNEOnCmpXchgResult(MBB, MBBI: std::next(x: MBBI), DestReg, CmpValReg, MaskReg, |
644 | LoopHeadBNETarget); |
645 | |
646 | // Insert new MBBs. |
647 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
648 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB); |
649 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
650 | |
651 | // Set up successors and transfer remaining instructions to DoneMBB. |
652 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
653 | LoopHeadMBB->addSuccessor(Succ: LoopHeadBNETarget); |
654 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
655 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
656 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
657 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
658 | MBB.addSuccessor(Succ: LoopHeadMBB); |
659 | |
660 | AtomicOrdering Ordering = |
661 | static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm()); |
662 | |
663 | if (!IsMasked) { |
664 | // .loophead: |
665 | // lr.[w|d] dest, (addr) |
666 | // bne dest, cmpval, done |
667 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), |
668 | DestReg) |
669 | .addReg(RegNo: AddrReg); |
670 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
671 | .addReg(RegNo: DestReg) |
672 | .addReg(RegNo: CmpValReg) |
673 | .addMBB(MBB: LoopHeadBNETarget); |
674 | // .looptail: |
675 | // sc.[w|d] scratch, newval, (addr) |
676 | // bnez scratch, loophead |
677 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), |
678 | DestReg: ScratchReg) |
679 | .addReg(RegNo: AddrReg) |
680 | .addReg(RegNo: NewValReg); |
681 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
682 | .addReg(RegNo: ScratchReg) |
683 | .addReg(RegNo: RISCV::X0) |
684 | .addMBB(MBB: LoopHeadMBB); |
685 | } else { |
686 | // .loophead: |
687 | // lr.w dest, (addr) |
688 | // and scratch, dest, mask |
689 | // bne scratch, cmpval, done |
690 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
691 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), |
692 | DestReg) |
693 | .addReg(RegNo: AddrReg); |
694 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
695 | .addReg(RegNo: DestReg) |
696 | .addReg(RegNo: MaskReg); |
697 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
698 | .addReg(RegNo: ScratchReg) |
699 | .addReg(RegNo: CmpValReg) |
700 | .addMBB(MBB: LoopHeadBNETarget); |
701 | |
702 | // .looptail: |
703 | // xor scratch, dest, newval |
704 | // and scratch, scratch, mask |
705 | // xor scratch, dest, scratch |
706 | // sc.w scratch, scratch, (adrr) |
707 | // bnez scratch, loophead |
708 | insertMaskedMerge(TII, DL, MBB: LoopTailMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg, |
709 | MaskReg, ScratchReg); |
710 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), |
711 | DestReg: ScratchReg) |
712 | .addReg(RegNo: AddrReg) |
713 | .addReg(RegNo: ScratchReg); |
714 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
715 | .addReg(RegNo: ScratchReg) |
716 | .addReg(RegNo: RISCV::X0) |
717 | .addMBB(MBB: LoopHeadMBB); |
718 | } |
719 | |
720 | NextMBBI = MBB.end(); |
721 | MI.eraseFromParent(); |
722 | |
723 | LivePhysRegs LiveRegs; |
724 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
725 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
726 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
727 | |
728 | return true; |
729 | } |
730 | |
731 | } // end of anonymous namespace |
732 | |
733 | INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo" , |
734 | RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false) |
735 | |
736 | namespace llvm { |
737 | |
738 | FunctionPass *createRISCVExpandAtomicPseudoPass() { |
739 | return new RISCVExpandAtomicPseudo(); |
740 | } |
741 | |
742 | } // end of namespace llvm |
743 | |