1 | //===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that expands atomic pseudo instructions into |
10 | // target instructions. This pass should be run at the last possible moment, |
11 | // avoiding the possibility for other passes to break the requirements for |
12 | // forward progress in the LR/SC block. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "RISCV.h" |
17 | #include "RISCVInstrInfo.h" |
18 | #include "RISCVTargetMachine.h" |
19 | |
20 | #include "llvm/CodeGen/LivePhysRegs.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \ |
27 | "RISC-V atomic pseudo instruction expansion pass" |
28 | |
29 | namespace { |
30 | |
31 | class RISCVExpandAtomicPseudo : public MachineFunctionPass { |
32 | public: |
33 | const RISCVSubtarget *STI; |
34 | const RISCVInstrInfo *TII; |
35 | static char ID; |
36 | |
37 | RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {} |
38 | |
39 | bool runOnMachineFunction(MachineFunction &MF) override; |
40 | |
41 | StringRef getPassName() const override { |
42 | return RISCV_EXPAND_ATOMIC_PSEUDO_NAME; |
43 | } |
44 | |
45 | private: |
46 | bool expandMBB(MachineBasicBlock &MBB); |
47 | bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
48 | MachineBasicBlock::iterator &NextMBBI); |
49 | bool expandAtomicBinOp(MachineBasicBlock &MBB, |
50 | MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, |
51 | bool IsMasked, int Width, |
52 | MachineBasicBlock::iterator &NextMBBI); |
53 | bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, |
54 | MachineBasicBlock::iterator MBBI, |
55 | AtomicRMWInst::BinOp, bool IsMasked, int Width, |
56 | MachineBasicBlock::iterator &NextMBBI); |
57 | bool expandAtomicCmpXchg(MachineBasicBlock &MBB, |
58 | MachineBasicBlock::iterator MBBI, bool IsMasked, |
59 | int Width, MachineBasicBlock::iterator &NextMBBI); |
60 | #ifndef NDEBUG |
61 | unsigned getInstSizeInBytes(const MachineFunction &MF) const { |
62 | unsigned Size = 0; |
63 | for (auto &MBB : MF) |
64 | for (auto &MI : MBB) |
65 | Size += TII->getInstSizeInBytes(MI); |
66 | return Size; |
67 | } |
68 | #endif |
69 | }; |
70 | |
71 | char RISCVExpandAtomicPseudo::ID = 0; |
72 | |
73 | bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { |
74 | STI = &MF.getSubtarget<RISCVSubtarget>(); |
75 | TII = STI->getInstrInfo(); |
76 | |
77 | #ifndef NDEBUG |
78 | const unsigned OldSize = getInstSizeInBytes(MF); |
79 | #endif |
80 | |
81 | bool Modified = false; |
82 | for (auto &MBB : MF) |
83 | Modified |= expandMBB(MBB); |
84 | |
85 | #ifndef NDEBUG |
86 | const unsigned NewSize = getInstSizeInBytes(MF); |
87 | assert(OldSize >= NewSize); |
88 | #endif |
89 | return Modified; |
90 | } |
91 | |
92 | bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { |
93 | bool Modified = false; |
94 | |
95 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
96 | while (MBBI != E) { |
97 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
98 | Modified |= expandMI(MBB, MBBI, NextMBBI&: NMBBI); |
99 | MBBI = NMBBI; |
100 | } |
101 | |
102 | return Modified; |
103 | } |
104 | |
105 | bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, |
106 | MachineBasicBlock::iterator MBBI, |
107 | MachineBasicBlock::iterator &NextMBBI) { |
108 | // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the |
109 | // expanded instructions for each pseudo is correct in the Size field of the |
110 | // tablegen definition for the pseudo. |
111 | switch (MBBI->getOpcode()) { |
112 | case RISCV::PseudoAtomicLoadNand32: |
113 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 32, |
114 | NextMBBI); |
115 | case RISCV::PseudoAtomicLoadNand64: |
116 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: false, Width: 64, |
117 | NextMBBI); |
118 | case RISCV::PseudoMaskedAtomicSwap32: |
119 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, IsMasked: true, Width: 32, |
120 | NextMBBI); |
121 | case RISCV::PseudoMaskedAtomicLoadAdd32: |
122 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, IsMasked: true, Width: 32, NextMBBI); |
123 | case RISCV::PseudoMaskedAtomicLoadSub32: |
124 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, IsMasked: true, Width: 32, NextMBBI); |
125 | case RISCV::PseudoMaskedAtomicLoadNand32: |
126 | return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, IsMasked: true, Width: 32, |
127 | NextMBBI); |
128 | case RISCV::PseudoMaskedAtomicLoadMax32: |
129 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, IsMasked: true, Width: 32, |
130 | NextMBBI); |
131 | case RISCV::PseudoMaskedAtomicLoadMin32: |
132 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, IsMasked: true, Width: 32, |
133 | NextMBBI); |
134 | case RISCV::PseudoMaskedAtomicLoadUMax32: |
135 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, IsMasked: true, Width: 32, |
136 | NextMBBI); |
137 | case RISCV::PseudoMaskedAtomicLoadUMin32: |
138 | return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, IsMasked: true, Width: 32, |
139 | NextMBBI); |
140 | case RISCV::PseudoCmpXchg32: |
141 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 32, NextMBBI); |
142 | case RISCV::PseudoCmpXchg64: |
143 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: false, Width: 64, NextMBBI); |
144 | case RISCV::PseudoMaskedCmpXchg32: |
145 | return expandAtomicCmpXchg(MBB, MBBI, IsMasked: true, Width: 32, NextMBBI); |
146 | } |
147 | |
148 | return false; |
149 | } |
150 | |
151 | static unsigned getLRForRMW32(AtomicOrdering Ordering, |
152 | const RISCVSubtarget *Subtarget) { |
153 | switch (Ordering) { |
154 | default: |
155 | llvm_unreachable("Unexpected AtomicOrdering" ); |
156 | case AtomicOrdering::Monotonic: |
157 | return RISCV::LR_W; |
158 | case AtomicOrdering::Acquire: |
159 | if (Subtarget->hasStdExtZtso()) |
160 | return RISCV::LR_W; |
161 | return RISCV::LR_W_AQ; |
162 | case AtomicOrdering::Release: |
163 | return RISCV::LR_W; |
164 | case AtomicOrdering::AcquireRelease: |
165 | if (Subtarget->hasStdExtZtso()) |
166 | return RISCV::LR_W; |
167 | return RISCV::LR_W_AQ; |
168 | case AtomicOrdering::SequentiallyConsistent: |
169 | return RISCV::LR_W_AQ_RL; |
170 | } |
171 | } |
172 | |
173 | static unsigned getSCForRMW32(AtomicOrdering Ordering, |
174 | const RISCVSubtarget *Subtarget) { |
175 | switch (Ordering) { |
176 | default: |
177 | llvm_unreachable("Unexpected AtomicOrdering" ); |
178 | case AtomicOrdering::Monotonic: |
179 | return RISCV::SC_W; |
180 | case AtomicOrdering::Acquire: |
181 | return RISCV::SC_W; |
182 | case AtomicOrdering::Release: |
183 | if (Subtarget->hasStdExtZtso()) |
184 | return RISCV::SC_W; |
185 | return RISCV::SC_W_RL; |
186 | case AtomicOrdering::AcquireRelease: |
187 | if (Subtarget->hasStdExtZtso()) |
188 | return RISCV::SC_W; |
189 | return RISCV::SC_W_RL; |
190 | case AtomicOrdering::SequentiallyConsistent: |
191 | return RISCV::SC_W_RL; |
192 | } |
193 | } |
194 | |
195 | static unsigned getLRForRMW64(AtomicOrdering Ordering, |
196 | const RISCVSubtarget *Subtarget) { |
197 | switch (Ordering) { |
198 | default: |
199 | llvm_unreachable("Unexpected AtomicOrdering" ); |
200 | case AtomicOrdering::Monotonic: |
201 | return RISCV::LR_D; |
202 | case AtomicOrdering::Acquire: |
203 | if (Subtarget->hasStdExtZtso()) |
204 | return RISCV::LR_D; |
205 | return RISCV::LR_D_AQ; |
206 | case AtomicOrdering::Release: |
207 | return RISCV::LR_D; |
208 | case AtomicOrdering::AcquireRelease: |
209 | if (Subtarget->hasStdExtZtso()) |
210 | return RISCV::LR_D; |
211 | return RISCV::LR_D_AQ; |
212 | case AtomicOrdering::SequentiallyConsistent: |
213 | return RISCV::LR_D_AQ_RL; |
214 | } |
215 | } |
216 | |
217 | static unsigned getSCForRMW64(AtomicOrdering Ordering, |
218 | const RISCVSubtarget *Subtarget) { |
219 | switch (Ordering) { |
220 | default: |
221 | llvm_unreachable("Unexpected AtomicOrdering" ); |
222 | case AtomicOrdering::Monotonic: |
223 | return RISCV::SC_D; |
224 | case AtomicOrdering::Acquire: |
225 | return RISCV::SC_D; |
226 | case AtomicOrdering::Release: |
227 | if (Subtarget->hasStdExtZtso()) |
228 | return RISCV::SC_D; |
229 | return RISCV::SC_D_RL; |
230 | case AtomicOrdering::AcquireRelease: |
231 | if (Subtarget->hasStdExtZtso()) |
232 | return RISCV::SC_D; |
233 | return RISCV::SC_D_RL; |
234 | case AtomicOrdering::SequentiallyConsistent: |
235 | return RISCV::SC_D_RL; |
236 | } |
237 | } |
238 | |
239 | static unsigned getLRForRMW(AtomicOrdering Ordering, int Width, |
240 | const RISCVSubtarget *Subtarget) { |
241 | if (Width == 32) |
242 | return getLRForRMW32(Ordering, Subtarget); |
243 | if (Width == 64) |
244 | return getLRForRMW64(Ordering, Subtarget); |
245 | llvm_unreachable("Unexpected LR width\n" ); |
246 | } |
247 | |
248 | static unsigned getSCForRMW(AtomicOrdering Ordering, int Width, |
249 | const RISCVSubtarget *Subtarget) { |
250 | if (Width == 32) |
251 | return getSCForRMW32(Ordering, Subtarget); |
252 | if (Width == 64) |
253 | return getSCForRMW64(Ordering, Subtarget); |
254 | llvm_unreachable("Unexpected SC width\n" ); |
255 | } |
256 | |
257 | static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, |
258 | DebugLoc DL, MachineBasicBlock *ThisMBB, |
259 | MachineBasicBlock *LoopMBB, |
260 | MachineBasicBlock *DoneMBB, |
261 | AtomicRMWInst::BinOp BinOp, int Width, |
262 | const RISCVSubtarget *STI) { |
263 | Register DestReg = MI.getOperand(i: 0).getReg(); |
264 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
265 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
266 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
267 | AtomicOrdering Ordering = |
268 | static_cast<AtomicOrdering>(MI.getOperand(i: 4).getImm()); |
269 | |
270 | // .loop: |
271 | // lr.[w|d] dest, (addr) |
272 | // binop scratch, dest, val |
273 | // sc.[w|d] scratch, scratch, (addr) |
274 | // bnez scratch, loop |
275 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), DestReg) |
276 | .addReg(RegNo: AddrReg); |
277 | switch (BinOp) { |
278 | default: |
279 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
280 | case AtomicRMWInst::Nand: |
281 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
282 | .addReg(RegNo: DestReg) |
283 | .addReg(RegNo: IncrReg); |
284 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg) |
285 | .addReg(RegNo: ScratchReg) |
286 | .addImm(Val: -1); |
287 | break; |
288 | } |
289 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), DestReg: ScratchReg) |
290 | .addReg(RegNo: AddrReg) |
291 | .addReg(RegNo: ScratchReg); |
292 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
293 | .addReg(RegNo: ScratchReg) |
294 | .addReg(RegNo: RISCV::X0) |
295 | .addMBB(MBB: LoopMBB); |
296 | } |
297 | |
298 | static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, |
299 | MachineBasicBlock *MBB, Register DestReg, |
300 | Register OldValReg, Register NewValReg, |
301 | Register MaskReg, Register ScratchReg) { |
302 | assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique" ); |
303 | assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique" ); |
304 | assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique" ); |
305 | |
306 | // We select bits from newval and oldval using: |
307 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge |
308 | // r = oldval ^ ((oldval ^ newval) & masktargetdata); |
309 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg: ScratchReg) |
310 | .addReg(RegNo: OldValReg) |
311 | .addReg(RegNo: NewValReg); |
312 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
313 | .addReg(RegNo: ScratchReg) |
314 | .addReg(RegNo: MaskReg); |
315 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XOR), DestReg) |
316 | .addReg(RegNo: OldValReg) |
317 | .addReg(RegNo: ScratchReg); |
318 | } |
319 | |
320 | static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, |
321 | MachineInstr &MI, DebugLoc DL, |
322 | MachineBasicBlock *ThisMBB, |
323 | MachineBasicBlock *LoopMBB, |
324 | MachineBasicBlock *DoneMBB, |
325 | AtomicRMWInst::BinOp BinOp, int Width, |
326 | const RISCVSubtarget *STI) { |
327 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
328 | Register DestReg = MI.getOperand(i: 0).getReg(); |
329 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
330 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
331 | Register IncrReg = MI.getOperand(i: 3).getReg(); |
332 | Register MaskReg = MI.getOperand(i: 4).getReg(); |
333 | AtomicOrdering Ordering = |
334 | static_cast<AtomicOrdering>(MI.getOperand(i: 5).getImm()); |
335 | |
336 | // .loop: |
337 | // lr.w destreg, (alignedaddr) |
338 | // binop scratch, destreg, incr |
339 | // xor scratch, destreg, scratch |
340 | // and scratch, scratch, masktargetdata |
341 | // xor scratch, destreg, scratch |
342 | // sc.w scratch, scratch, (alignedaddr) |
343 | // bnez scratch, loop |
344 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg) |
345 | .addReg(RegNo: AddrReg); |
346 | switch (BinOp) { |
347 | default: |
348 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
349 | case AtomicRMWInst::Xchg: |
350 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: ScratchReg) |
351 | .addReg(RegNo: IncrReg) |
352 | .addImm(Val: 0); |
353 | break; |
354 | case AtomicRMWInst::Add: |
355 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADD), DestReg: ScratchReg) |
356 | .addReg(RegNo: DestReg) |
357 | .addReg(RegNo: IncrReg); |
358 | break; |
359 | case AtomicRMWInst::Sub: |
360 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: ScratchReg) |
361 | .addReg(RegNo: DestReg) |
362 | .addReg(RegNo: IncrReg); |
363 | break; |
364 | case AtomicRMWInst::Nand: |
365 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
366 | .addReg(RegNo: DestReg) |
367 | .addReg(RegNo: IncrReg); |
368 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::XORI), DestReg: ScratchReg) |
369 | .addReg(RegNo: ScratchReg) |
370 | .addImm(Val: -1); |
371 | break; |
372 | } |
373 | |
374 | insertMaskedMerge(TII, DL, MBB: LoopMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg: ScratchReg, MaskReg, |
375 | ScratchReg); |
376 | |
377 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: ScratchReg) |
378 | .addReg(RegNo: AddrReg) |
379 | .addReg(RegNo: ScratchReg); |
380 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
381 | .addReg(RegNo: ScratchReg) |
382 | .addReg(RegNo: RISCV::X0) |
383 | .addMBB(MBB: LoopMBB); |
384 | } |
385 | |
386 | bool RISCVExpandAtomicPseudo::expandAtomicBinOp( |
387 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
388 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
389 | MachineBasicBlock::iterator &NextMBBI) { |
390 | MachineInstr &MI = *MBBI; |
391 | DebugLoc DL = MI.getDebugLoc(); |
392 | |
393 | MachineFunction *MF = MBB.getParent(); |
394 | auto LoopMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
395 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
396 | |
397 | // Insert new MBBs. |
398 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopMBB); |
399 | MF->insert(MBBI: ++LoopMBB->getIterator(), MBB: DoneMBB); |
400 | |
401 | // Set up successors and transfer remaining instructions to DoneMBB. |
402 | LoopMBB->addSuccessor(Succ: LoopMBB); |
403 | LoopMBB->addSuccessor(Succ: DoneMBB); |
404 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
405 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
406 | MBB.addSuccessor(Succ: LoopMBB); |
407 | |
408 | if (!IsMasked) |
409 | doAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, Width, |
410 | STI); |
411 | else |
412 | doMaskedAtomicBinOpExpansion(TII, MI, DL, ThisMBB: &MBB, LoopMBB, DoneMBB, BinOp, |
413 | Width, STI); |
414 | |
415 | NextMBBI = MBB.end(); |
416 | MI.eraseFromParent(); |
417 | |
418 | LivePhysRegs LiveRegs; |
419 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopMBB); |
420 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
421 | |
422 | return true; |
423 | } |
424 | |
425 | static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, |
426 | MachineBasicBlock *MBB, Register ValReg, |
427 | Register ShamtReg) { |
428 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLL), DestReg: ValReg) |
429 | .addReg(RegNo: ValReg) |
430 | .addReg(RegNo: ShamtReg); |
431 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRA), DestReg: ValReg) |
432 | .addReg(RegNo: ValReg) |
433 | .addReg(RegNo: ShamtReg); |
434 | } |
435 | |
436 | bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( |
437 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
438 | AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, |
439 | MachineBasicBlock::iterator &NextMBBI) { |
440 | assert(IsMasked == true && |
441 | "Should only need to expand masked atomic max/min" ); |
442 | assert(Width == 32 && "Should never need to expand masked 64-bit operations" ); |
443 | |
444 | MachineInstr &MI = *MBBI; |
445 | DebugLoc DL = MI.getDebugLoc(); |
446 | MachineFunction *MF = MBB.getParent(); |
447 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
448 | auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
449 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
450 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
451 | |
452 | // Insert new MBBs. |
453 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
454 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopIfBodyMBB); |
455 | MF->insert(MBBI: ++LoopIfBodyMBB->getIterator(), MBB: LoopTailMBB); |
456 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
457 | |
458 | // Set up successors and transfer remaining instructions to DoneMBB. |
459 | LoopHeadMBB->addSuccessor(Succ: LoopIfBodyMBB); |
460 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
461 | LoopIfBodyMBB->addSuccessor(Succ: LoopTailMBB); |
462 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
463 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
464 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
465 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
466 | MBB.addSuccessor(Succ: LoopHeadMBB); |
467 | |
468 | Register DestReg = MI.getOperand(i: 0).getReg(); |
469 | Register Scratch1Reg = MI.getOperand(i: 1).getReg(); |
470 | Register Scratch2Reg = MI.getOperand(i: 2).getReg(); |
471 | Register AddrReg = MI.getOperand(i: 3).getReg(); |
472 | Register IncrReg = MI.getOperand(i: 4).getReg(); |
473 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
474 | bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max; |
475 | AtomicOrdering Ordering = |
476 | static_cast<AtomicOrdering>(MI.getOperand(i: IsSigned ? 7 : 6).getImm()); |
477 | |
478 | // |
479 | // .loophead: |
480 | // lr.w destreg, (alignedaddr) |
481 | // and scratch2, destreg, mask |
482 | // mv scratch1, destreg |
483 | // [sext scratch2 if signed min/max] |
484 | // ifnochangeneeded scratch2, incr, .looptail |
485 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW32(Ordering, Subtarget: STI)), DestReg) |
486 | .addReg(RegNo: AddrReg); |
487 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: Scratch2Reg) |
488 | .addReg(RegNo: DestReg) |
489 | .addReg(RegNo: MaskReg); |
490 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: Scratch1Reg) |
491 | .addReg(RegNo: DestReg) |
492 | .addImm(Val: 0); |
493 | |
494 | switch (BinOp) { |
495 | default: |
496 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
497 | case AtomicRMWInst::Max: { |
498 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
499 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE)) |
500 | .addReg(RegNo: Scratch2Reg) |
501 | .addReg(RegNo: IncrReg) |
502 | .addMBB(MBB: LoopTailMBB); |
503 | break; |
504 | } |
505 | case AtomicRMWInst::Min: { |
506 | insertSext(TII, DL, MBB: LoopHeadMBB, ValReg: Scratch2Reg, ShamtReg: MI.getOperand(i: 6).getReg()); |
507 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE)) |
508 | .addReg(RegNo: IncrReg) |
509 | .addReg(RegNo: Scratch2Reg) |
510 | .addMBB(MBB: LoopTailMBB); |
511 | break; |
512 | } |
513 | case AtomicRMWInst::UMax: |
514 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU)) |
515 | .addReg(RegNo: Scratch2Reg) |
516 | .addReg(RegNo: IncrReg) |
517 | .addMBB(MBB: LoopTailMBB); |
518 | break; |
519 | case AtomicRMWInst::UMin: |
520 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BGEU)) |
521 | .addReg(RegNo: IncrReg) |
522 | .addReg(RegNo: Scratch2Reg) |
523 | .addMBB(MBB: LoopTailMBB); |
524 | break; |
525 | } |
526 | |
527 | // .loopifbody: |
528 | // xor scratch1, destreg, incr |
529 | // and scratch1, scratch1, mask |
530 | // xor scratch1, destreg, scratch1 |
531 | insertMaskedMerge(TII, DL, MBB: LoopIfBodyMBB, DestReg: Scratch1Reg, OldValReg: DestReg, NewValReg: IncrReg, |
532 | MaskReg, ScratchReg: Scratch1Reg); |
533 | |
534 | // .looptail: |
535 | // sc.w scratch1, scratch1, (addr) |
536 | // bnez scratch1, loop |
537 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW32(Ordering, Subtarget: STI)), DestReg: Scratch1Reg) |
538 | .addReg(RegNo: AddrReg) |
539 | .addReg(RegNo: Scratch1Reg); |
540 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
541 | .addReg(RegNo: Scratch1Reg) |
542 | .addReg(RegNo: RISCV::X0) |
543 | .addMBB(MBB: LoopHeadMBB); |
544 | |
545 | NextMBBI = MBB.end(); |
546 | MI.eraseFromParent(); |
547 | |
548 | LivePhysRegs LiveRegs; |
549 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
550 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopIfBodyMBB); |
551 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
552 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
553 | |
554 | return true; |
555 | } |
556 | |
557 | // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg |
558 | // operation, it can be folded into the cmpxchg expansion by |
559 | // modifying the branch within 'LoopHead' (which performs the same |
560 | // comparison). This is a valid transformation because after altering the |
561 | // LoopHead's BNE destination, the BNE following the cmpxchg becomes |
562 | // redundant and and be deleted. In the case of a masked cmpxchg, an |
563 | // appropriate AND and BNE must be matched. |
564 | // |
565 | // On success, returns true and deletes the matching BNE or AND+BNE, sets the |
566 | // LoopHeadBNETarget argument to the target that should be used within the |
567 | // loop head, and removes that block as a successor to MBB. |
568 | bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB, |
569 | MachineBasicBlock::iterator MBBI, |
570 | Register DestReg, Register CmpValReg, |
571 | Register MaskReg, |
572 | MachineBasicBlock *&LoopHeadBNETarget) { |
573 | SmallVector<MachineInstr *> ToErase; |
574 | auto E = MBB.end(); |
575 | if (MBBI == E) |
576 | return false; |
577 | MBBI = skipDebugInstructionsForward(It: MBBI, End: E); |
578 | |
579 | // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg. |
580 | if (MaskReg.isValid()) { |
581 | if (MBBI == E || MBBI->getOpcode() != RISCV::AND) |
582 | return false; |
583 | Register ANDOp1 = MBBI->getOperand(i: 1).getReg(); |
584 | Register ANDOp2 = MBBI->getOperand(i: 2).getReg(); |
585 | if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) && |
586 | !(ANDOp1 == MaskReg && ANDOp2 == DestReg)) |
587 | return false; |
588 | // We now expect the BNE to use the result of the AND as an operand. |
589 | DestReg = MBBI->getOperand(i: 0).getReg(); |
590 | ToErase.push_back(Elt: &*MBBI); |
591 | MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E); |
592 | } |
593 | |
594 | // Match BNE DestReg, MaskReg. |
595 | if (MBBI == E || MBBI->getOpcode() != RISCV::BNE) |
596 | return false; |
597 | Register BNEOp0 = MBBI->getOperand(i: 0).getReg(); |
598 | Register BNEOp1 = MBBI->getOperand(i: 1).getReg(); |
599 | if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) && |
600 | !(BNEOp0 == CmpValReg && BNEOp1 == DestReg)) |
601 | return false; |
602 | |
603 | // Make sure the branch is the only user of the AND. |
604 | if (MaskReg.isValid()) { |
605 | if (BNEOp0 == DestReg && !MBBI->getOperand(i: 0).isKill()) |
606 | return false; |
607 | if (BNEOp1 == DestReg && !MBBI->getOperand(i: 1).isKill()) |
608 | return false; |
609 | } |
610 | |
611 | ToErase.push_back(Elt: &*MBBI); |
612 | LoopHeadBNETarget = MBBI->getOperand(i: 2).getMBB(); |
613 | MBBI = skipDebugInstructionsForward(It: std::next(x: MBBI), End: E); |
614 | if (MBBI != E) |
615 | return false; |
616 | |
617 | MBB.removeSuccessor(Succ: LoopHeadBNETarget); |
618 | for (auto *MI : ToErase) |
619 | MI->eraseFromParent(); |
620 | return true; |
621 | } |
622 | |
623 | bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( |
624 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, |
625 | int Width, MachineBasicBlock::iterator &NextMBBI) { |
626 | MachineInstr &MI = *MBBI; |
627 | DebugLoc DL = MI.getDebugLoc(); |
628 | MachineFunction *MF = MBB.getParent(); |
629 | auto LoopHeadMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
630 | auto LoopTailMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
631 | auto DoneMBB = MF->CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
632 | |
633 | Register DestReg = MI.getOperand(i: 0).getReg(); |
634 | Register ScratchReg = MI.getOperand(i: 1).getReg(); |
635 | Register AddrReg = MI.getOperand(i: 2).getReg(); |
636 | Register CmpValReg = MI.getOperand(i: 3).getReg(); |
637 | Register NewValReg = MI.getOperand(i: 4).getReg(); |
638 | Register MaskReg = IsMasked ? MI.getOperand(i: 5).getReg() : Register(); |
639 | |
640 | MachineBasicBlock *LoopHeadBNETarget = DoneMBB; |
641 | tryToFoldBNEOnCmpXchgResult(MBB, MBBI: std::next(x: MBBI), DestReg, CmpValReg, MaskReg, |
642 | LoopHeadBNETarget); |
643 | |
644 | // Insert new MBBs. |
645 | MF->insert(MBBI: ++MBB.getIterator(), MBB: LoopHeadMBB); |
646 | MF->insert(MBBI: ++LoopHeadMBB->getIterator(), MBB: LoopTailMBB); |
647 | MF->insert(MBBI: ++LoopTailMBB->getIterator(), MBB: DoneMBB); |
648 | |
649 | // Set up successors and transfer remaining instructions to DoneMBB. |
650 | LoopHeadMBB->addSuccessor(Succ: LoopTailMBB); |
651 | LoopHeadMBB->addSuccessor(Succ: LoopHeadBNETarget); |
652 | LoopTailMBB->addSuccessor(Succ: DoneMBB); |
653 | LoopTailMBB->addSuccessor(Succ: LoopHeadMBB); |
654 | DoneMBB->splice(Where: DoneMBB->end(), Other: &MBB, From: MI, To: MBB.end()); |
655 | DoneMBB->transferSuccessors(FromMBB: &MBB); |
656 | MBB.addSuccessor(Succ: LoopHeadMBB); |
657 | |
658 | AtomicOrdering Ordering = |
659 | static_cast<AtomicOrdering>(MI.getOperand(i: IsMasked ? 6 : 5).getImm()); |
660 | |
661 | if (!IsMasked) { |
662 | // .loophead: |
663 | // lr.[w|d] dest, (addr) |
664 | // bne dest, cmpval, done |
665 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), |
666 | DestReg) |
667 | .addReg(RegNo: AddrReg); |
668 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
669 | .addReg(RegNo: DestReg) |
670 | .addReg(RegNo: CmpValReg) |
671 | .addMBB(MBB: LoopHeadBNETarget); |
672 | // .looptail: |
673 | // sc.[w|d] scratch, newval, (addr) |
674 | // bnez scratch, loophead |
675 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), |
676 | DestReg: ScratchReg) |
677 | .addReg(RegNo: AddrReg) |
678 | .addReg(RegNo: NewValReg); |
679 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
680 | .addReg(RegNo: ScratchReg) |
681 | .addReg(RegNo: RISCV::X0) |
682 | .addMBB(MBB: LoopHeadMBB); |
683 | } else { |
684 | // .loophead: |
685 | // lr.w dest, (addr) |
686 | // and scratch, dest, mask |
687 | // bne scratch, cmpval, done |
688 | Register MaskReg = MI.getOperand(i: 5).getReg(); |
689 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: getLRForRMW(Ordering, Width, Subtarget: STI)), |
690 | DestReg) |
691 | .addReg(RegNo: AddrReg); |
692 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::AND), DestReg: ScratchReg) |
693 | .addReg(RegNo: DestReg) |
694 | .addReg(RegNo: MaskReg); |
695 | BuildMI(BB: LoopHeadMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
696 | .addReg(RegNo: ScratchReg) |
697 | .addReg(RegNo: CmpValReg) |
698 | .addMBB(MBB: LoopHeadBNETarget); |
699 | |
700 | // .looptail: |
701 | // xor scratch, dest, newval |
702 | // and scratch, scratch, mask |
703 | // xor scratch, dest, scratch |
704 | // sc.w scratch, scratch, (adrr) |
705 | // bnez scratch, loophead |
706 | insertMaskedMerge(TII, DL, MBB: LoopTailMBB, DestReg: ScratchReg, OldValReg: DestReg, NewValReg, |
707 | MaskReg, ScratchReg); |
708 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: getSCForRMW(Ordering, Width, Subtarget: STI)), |
709 | DestReg: ScratchReg) |
710 | .addReg(RegNo: AddrReg) |
711 | .addReg(RegNo: ScratchReg); |
712 | BuildMI(BB: LoopTailMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
713 | .addReg(RegNo: ScratchReg) |
714 | .addReg(RegNo: RISCV::X0) |
715 | .addMBB(MBB: LoopHeadMBB); |
716 | } |
717 | |
718 | NextMBBI = MBB.end(); |
719 | MI.eraseFromParent(); |
720 | |
721 | LivePhysRegs LiveRegs; |
722 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopHeadMBB); |
723 | computeAndAddLiveIns(LiveRegs, MBB&: *LoopTailMBB); |
724 | computeAndAddLiveIns(LiveRegs, MBB&: *DoneMBB); |
725 | |
726 | return true; |
727 | } |
728 | |
729 | } // end of anonymous namespace |
730 | |
731 | INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo" , |
732 | RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false) |
733 | |
734 | FunctionPass *llvm::createRISCVExpandAtomicPseudoPass() { |
735 | return new RISCVExpandAtomicPseudo(); |
736 | } |
737 | |