1 | //===- RISCVVectorPeephole.cpp - MI Vector Pseudo Peepholes ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass performs various vector pseudo peephole optimisations after |
10 | // instruction selection. |
11 | // |
12 | // Currently it converts vmerge.vvm to vmv.v.v |
13 | // PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew |
14 | // -> |
15 | // PseudoVMV_V_V %false, %true, %vl, %sew |
16 | // |
17 | // And masked pseudos to unmasked pseudos |
18 | // PseudoVADD_V_V_MASK %passthru, %a, %b, %allonesmask, %vl, sew, policy |
19 | // -> |
20 | // PseudoVADD_V_V %passthru %a, %b, %vl, sew, policy |
21 | // |
22 | // It also converts AVLs to VLMAX where possible |
23 | // %vl = VLENB * something |
24 | // PseudoVADD_V_V %passthru, %a, %b, %vl, sew, policy |
25 | // -> |
26 | // PseudoVADD_V_V %passthru, %a, %b, -1, sew, policy |
27 | // |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #include "RISCV.h" |
31 | #include "RISCVSubtarget.h" |
32 | #include "llvm/CodeGen/MachineFunctionPass.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/TargetInstrInfo.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | |
37 | using namespace llvm; |
38 | |
39 | #define DEBUG_TYPE "riscv-vector-peephole" |
40 | |
41 | namespace { |
42 | |
43 | class RISCVVectorPeephole : public MachineFunctionPass { |
44 | public: |
45 | static char ID; |
46 | const TargetInstrInfo *TII; |
47 | MachineRegisterInfo *MRI; |
48 | const TargetRegisterInfo *TRI; |
49 | const RISCVSubtarget *ST; |
50 | RISCVVectorPeephole() : MachineFunctionPass(ID) {} |
51 | |
52 | bool runOnMachineFunction(MachineFunction &MF) override; |
53 | MachineFunctionProperties getRequiredProperties() const override { |
54 | return MachineFunctionProperties().setIsSSA(); |
55 | } |
56 | |
57 | StringRef getPassName() const override { |
58 | return "RISC-V Vector Peephole Optimization" ; |
59 | } |
60 | |
61 | private: |
62 | bool tryToReduceVL(MachineInstr &MI) const; |
63 | bool convertToVLMAX(MachineInstr &MI) const; |
64 | bool convertToWholeRegister(MachineInstr &MI) const; |
65 | bool convertToUnmasked(MachineInstr &MI) const; |
66 | bool convertAllOnesVMergeToVMv(MachineInstr &MI) const; |
67 | bool convertSameMaskVMergeToVMv(MachineInstr &MI); |
68 | bool foldUndefPassthruVMV_V_V(MachineInstr &MI); |
69 | bool foldVMV_V_V(MachineInstr &MI); |
70 | |
71 | bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const; |
72 | bool isAllOnesMask(const MachineInstr *MaskDef) const; |
73 | std::optional<unsigned> getConstant(const MachineOperand &VL) const; |
74 | bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; |
75 | bool isKnownSameDefs(const MachineOperand &A, const MachineOperand &B) const; |
76 | }; |
77 | |
78 | } // namespace |
79 | |
80 | char RISCVVectorPeephole::ID = 0; |
81 | |
82 | INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks" , false, |
83 | false) |
84 | |
85 | /// Given \p User that has an input operand with EEW=SEW, which uses the dest |
86 | /// operand of \p Src with an unknown EEW, return true if their EEWs match. |
87 | bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User, |
88 | const MachineInstr &Src) const { |
89 | unsigned UserLog2SEW = |
90 | User.getOperand(i: RISCVII::getSEWOpNum(Desc: User.getDesc())).getImm(); |
91 | unsigned SrcLog2SEW = |
92 | Src.getOperand(i: RISCVII::getSEWOpNum(Desc: Src.getDesc())).getImm(); |
93 | unsigned SrcLog2EEW = RISCV::getDestLog2EEW( |
94 | Desc: TII->get(Opcode: RISCV::getRVVMCOpcode(RVVPseudoOpcode: Src.getOpcode())), Log2SEW: SrcLog2SEW); |
95 | return SrcLog2EEW == UserLog2SEW; |
96 | } |
97 | |
98 | // Attempt to reduce the VL of an instruction whose sole use is feeding a |
99 | // instruction with a narrower VL. This currently works backwards from the |
100 | // user instruction (which might have a smaller VL). |
101 | bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { |
102 | // Note that the goal here is a bit multifaceted. |
103 | // 1) For store's reducing the VL of the value being stored may help to |
104 | // reduce VL toggles. This is somewhat of an artifact of the fact we |
105 | // promote arithmetic instructions but VL predicate stores. |
106 | // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us |
107 | // to share code with the foldVMV_V_V transform below. |
108 | // |
109 | // Note that to the best of our knowledge, reducing VL is generally not |
110 | // a significant win on real hardware unless we can also reduce LMUL which |
111 | // this code doesn't try to do. |
112 | // |
113 | // TODO: We can handle a bunch more instructions here, and probably |
114 | // recurse backwards through operands too. |
115 | SmallVector<unsigned, 2> SrcIndices = {0}; |
116 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
117 | default: |
118 | return false; |
119 | case RISCV::VSE8_V: |
120 | case RISCV::VSE16_V: |
121 | case RISCV::VSE32_V: |
122 | case RISCV::VSE64_V: |
123 | break; |
124 | case RISCV::VMV_V_V: |
125 | SrcIndices[0] = 2; |
126 | break; |
127 | case RISCV::VMERGE_VVM: |
128 | SrcIndices.assign(IL: {2, 3}); |
129 | break; |
130 | case RISCV::VREDSUM_VS: |
131 | case RISCV::VREDMAXU_VS: |
132 | case RISCV::VREDMAX_VS: |
133 | case RISCV::VREDMINU_VS: |
134 | case RISCV::VREDMIN_VS: |
135 | case RISCV::VREDAND_VS: |
136 | case RISCV::VREDOR_VS: |
137 | case RISCV::VREDXOR_VS: |
138 | case RISCV::VWREDSUM_VS: |
139 | case RISCV::VWREDSUMU_VS: |
140 | case RISCV::VFREDUSUM_VS: |
141 | case RISCV::VFREDOSUM_VS: |
142 | case RISCV::VFREDMAX_VS: |
143 | case RISCV::VFREDMIN_VS: |
144 | case RISCV::VFWREDUSUM_VS: |
145 | case RISCV::VFWREDOSUM_VS: |
146 | SrcIndices[0] = 2; |
147 | break; |
148 | } |
149 | |
150 | MachineOperand &VL = MI.getOperand(i: RISCVII::getVLOpNum(Desc: MI.getDesc())); |
151 | if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel) |
152 | return false; |
153 | |
154 | bool Changed = false; |
155 | for (unsigned SrcIdx : SrcIndices) { |
156 | Register SrcReg = MI.getOperand(i: SrcIdx).getReg(); |
157 | // Note: one *use*, not one *user*. |
158 | if (!MRI->hasOneUse(RegNo: SrcReg)) |
159 | continue; |
160 | |
161 | MachineInstr *Src = MRI->getVRegDef(Reg: SrcReg); |
162 | if (!Src || Src->hasUnmodeledSideEffects() || |
163 | Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 || |
164 | !RISCVII::hasVLOp(TSFlags: Src->getDesc().TSFlags) || |
165 | !RISCVII::hasSEWOp(TSFlags: Src->getDesc().TSFlags)) |
166 | continue; |
167 | |
168 | // Src's dest needs to have the same EEW as MI's input. |
169 | if (!hasSameEEW(User: MI, Src: *Src)) |
170 | continue; |
171 | |
172 | bool ElementsDependOnVL = RISCVII::elementsDependOnVL( |
173 | TSFlags: TII->get(Opcode: RISCV::getRVVMCOpcode(RVVPseudoOpcode: Src->getOpcode())).TSFlags); |
174 | if (ElementsDependOnVL || Src->mayRaiseFPException()) |
175 | continue; |
176 | |
177 | MachineOperand &SrcVL = |
178 | Src->getOperand(i: RISCVII::getVLOpNum(Desc: Src->getDesc())); |
179 | if (VL.isIdenticalTo(Other: SrcVL) || !RISCV::isVLKnownLE(LHS: VL, RHS: SrcVL)) |
180 | continue; |
181 | |
182 | if (!ensureDominates(Use: VL, Src&: *Src)) |
183 | continue; |
184 | |
185 | if (VL.isImm()) |
186 | SrcVL.ChangeToImmediate(ImmVal: VL.getImm()); |
187 | else if (VL.isReg()) |
188 | SrcVL.ChangeToRegister(Reg: VL.getReg(), isDef: false); |
189 | |
190 | Changed = true; |
191 | } |
192 | |
193 | // TODO: For instructions with a passthru, we could clear the passthru |
194 | // and tail policy since we've just proven the tail is not demanded. |
195 | return Changed; |
196 | } |
197 | |
198 | /// Check if an operand is an immediate or a materialized ADDI $x0, imm. |
199 | std::optional<unsigned> |
200 | RISCVVectorPeephole::getConstant(const MachineOperand &VL) const { |
201 | if (VL.isImm()) |
202 | return VL.getImm(); |
203 | |
204 | MachineInstr *Def = MRI->getVRegDef(Reg: VL.getReg()); |
205 | if (!Def || Def->getOpcode() != RISCV::ADDI || |
206 | Def->getOperand(i: 1).getReg() != RISCV::X0) |
207 | return std::nullopt; |
208 | return Def->getOperand(i: 2).getImm(); |
209 | } |
210 | |
211 | /// Convert AVLs that are known to be VLMAX to the VLMAX sentinel. |
212 | bool RISCVVectorPeephole::convertToVLMAX(MachineInstr &MI) const { |
213 | if (!RISCVII::hasVLOp(TSFlags: MI.getDesc().TSFlags) || |
214 | !RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
215 | return false; |
216 | |
217 | auto LMUL = RISCVVType::decodeVLMUL(VLMul: RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags)); |
218 | // Fixed-point value, denominator=8 |
219 | unsigned LMULFixed = LMUL.second ? (8 / LMUL.first) : 8 * LMUL.first; |
220 | unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm(); |
221 | // A Log2SEW of 0 is an operation on mask registers only |
222 | unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; |
223 | assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW" ); |
224 | assert(8 * LMULFixed / SEW > 0); |
225 | |
226 | // If the exact VLEN is known then we know VLMAX, check if the AVL == VLMAX. |
227 | MachineOperand &VL = MI.getOperand(i: RISCVII::getVLOpNum(Desc: MI.getDesc())); |
228 | if (auto VLen = ST->getRealVLen(), AVL = getConstant(VL); |
229 | VLen && AVL && (*VLen * LMULFixed) / SEW == *AVL * 8) { |
230 | VL.ChangeToImmediate(ImmVal: RISCV::VLMaxSentinel); |
231 | return true; |
232 | } |
233 | |
234 | // If an AVL is a VLENB that's possibly scaled to be equal to VLMAX, convert |
235 | // it to the VLMAX sentinel value. |
236 | if (!VL.isReg()) |
237 | return false; |
238 | MachineInstr *Def = MRI->getVRegDef(Reg: VL.getReg()); |
239 | if (!Def) |
240 | return false; |
241 | |
242 | // Fixed-point value, denominator=8 |
243 | uint64_t ScaleFixed = 8; |
244 | // Check if the VLENB was potentially scaled with slli/srli |
245 | if (Def->getOpcode() == RISCV::SLLI) { |
246 | assert(Def->getOperand(2).getImm() < 64); |
247 | ScaleFixed <<= Def->getOperand(i: 2).getImm(); |
248 | Def = MRI->getVRegDef(Reg: Def->getOperand(i: 1).getReg()); |
249 | } else if (Def->getOpcode() == RISCV::SRLI) { |
250 | assert(Def->getOperand(2).getImm() < 64); |
251 | ScaleFixed >>= Def->getOperand(i: 2).getImm(); |
252 | Def = MRI->getVRegDef(Reg: Def->getOperand(i: 1).getReg()); |
253 | } |
254 | |
255 | if (!Def || Def->getOpcode() != RISCV::PseudoReadVLENB) |
256 | return false; |
257 | |
258 | // AVL = (VLENB * Scale) |
259 | // |
260 | // VLMAX = (VLENB * 8 * LMUL) / SEW |
261 | // |
262 | // AVL == VLMAX |
263 | // -> VLENB * Scale == (VLENB * 8 * LMUL) / SEW |
264 | // -> Scale == (8 * LMUL) / SEW |
265 | if (ScaleFixed != 8 * LMULFixed / SEW) |
266 | return false; |
267 | |
268 | VL.ChangeToImmediate(ImmVal: RISCV::VLMaxSentinel); |
269 | |
270 | return true; |
271 | } |
272 | |
273 | bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const { |
274 | while (MaskDef->isCopy() && MaskDef->getOperand(i: 1).getReg().isVirtual()) |
275 | MaskDef = MRI->getVRegDef(Reg: MaskDef->getOperand(i: 1).getReg()); |
276 | |
277 | // TODO: Check that the VMSET is the expected bitwidth? The pseudo has |
278 | // undefined behaviour if it's the wrong bitwidth, so we could choose to |
279 | // assume that it's all-ones? Same applies to its VL. |
280 | switch (MaskDef->getOpcode()) { |
281 | case RISCV::PseudoVMSET_M_B1: |
282 | case RISCV::PseudoVMSET_M_B2: |
283 | case RISCV::PseudoVMSET_M_B4: |
284 | case RISCV::PseudoVMSET_M_B8: |
285 | case RISCV::PseudoVMSET_M_B16: |
286 | case RISCV::PseudoVMSET_M_B32: |
287 | case RISCV::PseudoVMSET_M_B64: |
288 | return true; |
289 | default: |
290 | return false; |
291 | } |
292 | } |
293 | |
294 | /// Convert unit strided unmasked loads and stores to whole-register equivalents |
295 | /// to avoid the dependency on $vl and $vtype. |
296 | /// |
297 | /// %x = PseudoVLE8_V_M1 %passthru, %ptr, %vlmax, policy |
298 | /// PseudoVSE8_V_M1 %v, %ptr, %vlmax |
299 | /// |
300 | /// -> |
301 | /// |
302 | /// %x = VL1RE8_V %ptr |
303 | /// VS1R_V %v, %ptr |
304 | bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const { |
305 | #define CASE_WHOLE_REGISTER_LMUL_SEW(lmul, sew) \ |
306 | case RISCV::PseudoVLE##sew##_V_M##lmul: \ |
307 | NewOpc = RISCV::VL##lmul##RE##sew##_V; \ |
308 | break; \ |
309 | case RISCV::PseudoVSE##sew##_V_M##lmul: \ |
310 | NewOpc = RISCV::VS##lmul##R_V; \ |
311 | break; |
312 | #define CASE_WHOLE_REGISTER_LMUL(lmul) \ |
313 | CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 8) \ |
314 | CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 16) \ |
315 | CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 32) \ |
316 | CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 64) |
317 | |
318 | unsigned NewOpc; |
319 | switch (MI.getOpcode()) { |
320 | CASE_WHOLE_REGISTER_LMUL(1) |
321 | CASE_WHOLE_REGISTER_LMUL(2) |
322 | CASE_WHOLE_REGISTER_LMUL(4) |
323 | CASE_WHOLE_REGISTER_LMUL(8) |
324 | default: |
325 | return false; |
326 | } |
327 | |
328 | MachineOperand &VLOp = MI.getOperand(i: RISCVII::getVLOpNum(Desc: MI.getDesc())); |
329 | if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel) |
330 | return false; |
331 | |
332 | // Whole register instructions aren't pseudos so they don't have |
333 | // policy/SEW/AVL ops, and they don't have passthrus. |
334 | if (RISCVII::hasVecPolicyOp(TSFlags: MI.getDesc().TSFlags)) |
335 | MI.removeOperand(OpNo: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())); |
336 | MI.removeOperand(OpNo: RISCVII::getSEWOpNum(Desc: MI.getDesc())); |
337 | MI.removeOperand(OpNo: RISCVII::getVLOpNum(Desc: MI.getDesc())); |
338 | if (RISCVII::isFirstDefTiedToFirstUse(Desc: MI.getDesc())) |
339 | MI.removeOperand(OpNo: 1); |
340 | |
341 | MI.setDesc(TII->get(Opcode: NewOpc)); |
342 | |
343 | return true; |
344 | } |
345 | |
346 | static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) { |
347 | #define CASE_VMERGE_TO_VMV(lmul) \ |
348 | case RISCV::PseudoVMERGE_VVM_##lmul: \ |
349 | return RISCV::PseudoVMV_V_V_##lmul; |
350 | switch (MI.getOpcode()) { |
351 | default: |
352 | return 0; |
353 | CASE_VMERGE_TO_VMV(MF8) |
354 | CASE_VMERGE_TO_VMV(MF4) |
355 | CASE_VMERGE_TO_VMV(MF2) |
356 | CASE_VMERGE_TO_VMV(M1) |
357 | CASE_VMERGE_TO_VMV(M2) |
358 | CASE_VMERGE_TO_VMV(M4) |
359 | CASE_VMERGE_TO_VMV(M8) |
360 | } |
361 | } |
362 | |
363 | /// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V. |
364 | /// |
365 | /// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl |
366 | /// -> |
367 | /// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu |
368 | bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { |
369 | unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI); |
370 | if (!NewOpc) |
371 | return false; |
372 | if (!isAllOnesMask(MaskDef: MRI->getVRegDef(Reg: MI.getOperand(i: 4).getReg()))) |
373 | return false; |
374 | |
375 | MI.setDesc(TII->get(Opcode: NewOpc)); |
376 | MI.removeOperand(OpNo: 2); // False operand |
377 | MI.removeOperand(OpNo: 3); // Mask operand |
378 | MI.addOperand( |
379 | Op: MachineOperand::CreateImm(Val: RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)); |
380 | |
381 | // vmv.v.v doesn't have a mask operand, so we may be able to inflate the |
382 | // register class for the destination and passthru operands e.g. VRNoV0 -> VR |
383 | MRI->recomputeRegClass(Reg: MI.getOperand(i: 0).getReg()); |
384 | if (MI.getOperand(i: 1).getReg() != RISCV::NoRegister) |
385 | MRI->recomputeRegClass(Reg: MI.getOperand(i: 1).getReg()); |
386 | return true; |
387 | } |
388 | |
389 | bool RISCVVectorPeephole::isKnownSameDefs(const MachineOperand &A, |
390 | const MachineOperand &B) const { |
391 | if (A.getReg().isPhysical() || B.getReg().isPhysical()) |
392 | return false; |
393 | |
394 | return TRI->lookThruCopyLike(SrcReg: A.getReg(), MRI) == |
395 | TRI->lookThruCopyLike(SrcReg: B.getReg(), MRI); |
396 | } |
397 | |
398 | /// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the |
399 | /// same mask, and the masked pseudo's passthru is the same as the false |
400 | /// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V. |
401 | /// |
402 | /// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy |
403 | /// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew |
404 | /// -> |
405 | /// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy |
406 | /// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu |
407 | bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { |
408 | unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI); |
409 | if (!NewOpc) |
410 | return false; |
411 | MachineInstr *True = MRI->getVRegDef(Reg: MI.getOperand(i: 3).getReg()); |
412 | |
413 | if (!True || True->getParent() != MI.getParent()) |
414 | return false; |
415 | |
416 | auto *TrueMaskedInfo = RISCV::getMaskedPseudoInfo(MaskedPseudo: True->getOpcode()); |
417 | if (!TrueMaskedInfo || !hasSameEEW(User: MI, Src: *True)) |
418 | return false; |
419 | |
420 | const MachineOperand &TrueMask = |
421 | True->getOperand(i: TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs()); |
422 | const MachineOperand &MIMask = MI.getOperand(i: 4); |
423 | if (!isKnownSameDefs(A: TrueMask, B: MIMask)) |
424 | return false; |
425 | |
426 | // True's passthru needs to be equivalent to False |
427 | Register TruePassthruReg = True->getOperand(i: 1).getReg(); |
428 | Register FalseReg = MI.getOperand(i: 2).getReg(); |
429 | if (TruePassthruReg != FalseReg) { |
430 | // If True's passthru is undef see if we can change it to False |
431 | if (TruePassthruReg != RISCV::NoRegister || |
432 | !MRI->hasOneUse(RegNo: MI.getOperand(i: 3).getReg()) || |
433 | !ensureDominates(Use: MI.getOperand(i: 2), Src&: *True)) |
434 | return false; |
435 | True->getOperand(i: 1).setReg(MI.getOperand(i: 2).getReg()); |
436 | // If True is masked then its passthru needs to be in VRNoV0. |
437 | MRI->constrainRegClass(Reg: True->getOperand(i: 1).getReg(), |
438 | RC: TII->getRegClass(MCID: True->getDesc(), OpNum: 1, TRI, |
439 | MF: *True->getParent()->getParent())); |
440 | } |
441 | |
442 | MI.setDesc(TII->get(Opcode: NewOpc)); |
443 | MI.removeOperand(OpNo: 2); // False operand |
444 | MI.removeOperand(OpNo: 3); // Mask operand |
445 | MI.addOperand( |
446 | Op: MachineOperand::CreateImm(Val: RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)); |
447 | |
448 | // vmv.v.v doesn't have a mask operand, so we may be able to inflate the |
449 | // register class for the destination and passthru operands e.g. VRNoV0 -> VR |
450 | MRI->recomputeRegClass(Reg: MI.getOperand(i: 0).getReg()); |
451 | if (MI.getOperand(i: 1).getReg() != RISCV::NoRegister) |
452 | MRI->recomputeRegClass(Reg: MI.getOperand(i: 1).getReg()); |
453 | return true; |
454 | } |
455 | |
456 | bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { |
457 | const RISCV::RISCVMaskedPseudoInfo *I = |
458 | RISCV::getMaskedPseudoInfo(MaskedPseudo: MI.getOpcode()); |
459 | if (!I) |
460 | return false; |
461 | |
462 | if (!isAllOnesMask(MaskDef: MRI->getVRegDef( |
463 | Reg: MI.getOperand(i: I->MaskOpIdx + MI.getNumExplicitDefs()).getReg()))) |
464 | return false; |
465 | |
466 | // There are two classes of pseudos in the table - compares and |
467 | // everything else. See the comment on RISCVMaskedPseudo for details. |
468 | const unsigned Opc = I->UnmaskedPseudo; |
469 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
470 | [[maybe_unused]] const bool HasPolicyOp = |
471 | RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags); |
472 | const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID); |
473 | const MCInstrDesc &MaskedMCID = TII->get(Opcode: MI.getOpcode()); |
474 | assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) || |
475 | !RISCVII::hasVecPolicyOp(MCID.TSFlags)) && |
476 | "Unmasked pseudo has policy but masked pseudo doesn't?" ); |
477 | assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure" ); |
478 | assert(!(HasPassthru && !RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) && |
479 | "Unmasked with passthru but masked with no passthru?" ); |
480 | (void)HasPolicyOp; |
481 | |
482 | MI.setDesc(MCID); |
483 | |
484 | // Drop the policy operand if unmasked doesn't need it. |
485 | if (RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags) && |
486 | !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags)) |
487 | MI.removeOperand(OpNo: RISCVII::getVecPolicyOpNum(Desc: MaskedMCID)); |
488 | |
489 | // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs? |
490 | unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs(); |
491 | MI.removeOperand(OpNo: MaskOpIdx); |
492 | |
493 | // The unmasked pseudo will no longer be constrained to the vrnov0 reg class, |
494 | // so try and relax it to vr. |
495 | MRI->recomputeRegClass(Reg: MI.getOperand(i: 0).getReg()); |
496 | |
497 | // If the original masked pseudo had a passthru, relax it or remove it. |
498 | if (RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID)) { |
499 | unsigned PassthruOpIdx = MI.getNumExplicitDefs(); |
500 | if (HasPassthru) { |
501 | if (MI.getOperand(i: PassthruOpIdx).getReg() != RISCV::NoRegister) |
502 | MRI->recomputeRegClass(Reg: MI.getOperand(i: PassthruOpIdx).getReg()); |
503 | } else |
504 | MI.removeOperand(OpNo: PassthruOpIdx); |
505 | } |
506 | |
507 | return true; |
508 | } |
509 | |
510 | /// Check if it's safe to move From down to To, checking that no physical |
511 | /// registers are clobbered. |
512 | static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) { |
513 | assert(From.getParent() == To.getParent() && !From.hasImplicitDef()); |
514 | SmallVector<Register> PhysUses; |
515 | for (const MachineOperand &MO : From.all_uses()) |
516 | if (MO.getReg().isPhysical()) |
517 | PhysUses.push_back(Elt: MO.getReg()); |
518 | bool SawStore = false; |
519 | for (auto II = From.getIterator(); II != To.getIterator(); II++) { |
520 | for (Register PhysReg : PhysUses) |
521 | if (II->definesRegister(Reg: PhysReg, TRI: nullptr)) |
522 | return false; |
523 | if (II->mayStore()) { |
524 | SawStore = true; |
525 | break; |
526 | } |
527 | } |
528 | return From.isSafeToMove(SawStore); |
529 | } |
530 | |
531 | /// Given A and B are in the same MBB, returns true if A comes before B. |
532 | static bool dominates(MachineBasicBlock::const_iterator A, |
533 | MachineBasicBlock::const_iterator B) { |
534 | assert(A->getParent() == B->getParent()); |
535 | const MachineBasicBlock *MBB = A->getParent(); |
536 | auto MBBEnd = MBB->end(); |
537 | if (B == MBBEnd) |
538 | return true; |
539 | |
540 | MachineBasicBlock::const_iterator I = MBB->begin(); |
541 | for (; &*I != A && &*I != B; ++I) |
542 | ; |
543 | |
544 | return &*I == A; |
545 | } |
546 | |
547 | /// If the register in \p MO doesn't dominate \p Src, try to move \p Src so it |
548 | /// does. Returns false if doesn't dominate and we can't move. \p MO must be in |
549 | /// the same basic block as \Src. |
550 | bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, |
551 | MachineInstr &Src) const { |
552 | assert(MO.getParent()->getParent() == Src.getParent()); |
553 | if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) |
554 | return true; |
555 | |
556 | MachineInstr *Def = MRI->getVRegDef(Reg: MO.getReg()); |
557 | if (Def->getParent() == Src.getParent() && !dominates(A: Def, B: Src)) { |
558 | if (!isSafeToMove(From: Src, To: *Def->getNextNode())) |
559 | return false; |
560 | Src.moveBefore(MovePos: Def->getNextNode()); |
561 | } |
562 | |
563 | return true; |
564 | } |
565 | |
566 | /// If a PseudoVMV_V_V's passthru is undef then we can replace it with its input |
567 | bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { |
568 | if (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode()) != RISCV::VMV_V_V) |
569 | return false; |
570 | if (MI.getOperand(i: 1).getReg() != RISCV::NoRegister) |
571 | return false; |
572 | |
573 | // If the input was a pseudo with a policy operand, we can give it a tail |
574 | // agnostic policy if MI's undef tail subsumes the input's. |
575 | MachineInstr *Src = MRI->getVRegDef(Reg: MI.getOperand(i: 2).getReg()); |
576 | if (Src && !Src->hasUnmodeledSideEffects() && |
577 | MRI->hasOneUse(RegNo: MI.getOperand(i: 2).getReg()) && |
578 | RISCVII::hasVLOp(TSFlags: Src->getDesc().TSFlags) && |
579 | RISCVII::hasVecPolicyOp(TSFlags: Src->getDesc().TSFlags) && hasSameEEW(User: MI, Src: *Src)) { |
580 | const MachineOperand &MIVL = MI.getOperand(i: 3); |
581 | const MachineOperand &SrcVL = |
582 | Src->getOperand(i: RISCVII::getVLOpNum(Desc: Src->getDesc())); |
583 | |
584 | MachineOperand &SrcPolicy = |
585 | Src->getOperand(i: RISCVII::getVecPolicyOpNum(Desc: Src->getDesc())); |
586 | |
587 | if (RISCV::isVLKnownLE(LHS: MIVL, RHS: SrcVL)) |
588 | SrcPolicy.setImm(SrcPolicy.getImm() | RISCVVType::TAIL_AGNOSTIC); |
589 | } |
590 | |
591 | MRI->constrainRegClass(Reg: MI.getOperand(i: 2).getReg(), |
592 | RC: MRI->getRegClass(Reg: MI.getOperand(i: 0).getReg())); |
593 | MRI->replaceRegWith(FromReg: MI.getOperand(i: 0).getReg(), ToReg: MI.getOperand(i: 2).getReg()); |
594 | MRI->clearKillFlags(Reg: MI.getOperand(i: 2).getReg()); |
595 | MI.eraseFromParent(); |
596 | return true; |
597 | } |
598 | |
599 | /// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL |
600 | /// into it. |
601 | /// |
602 | /// %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy |
603 | /// %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy |
604 | /// (where %vl1 <= %vl2, see related tryToReduceVL) |
605 | /// |
606 | /// -> |
607 | /// |
608 | /// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy |
609 | bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { |
610 | if (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode()) != RISCV::VMV_V_V) |
611 | return false; |
612 | |
613 | MachineOperand &Passthru = MI.getOperand(i: 1); |
614 | |
615 | if (!MRI->hasOneUse(RegNo: MI.getOperand(i: 2).getReg())) |
616 | return false; |
617 | |
618 | MachineInstr *Src = MRI->getVRegDef(Reg: MI.getOperand(i: 2).getReg()); |
619 | if (!Src || Src->hasUnmodeledSideEffects() || |
620 | Src->getParent() != MI.getParent() || |
621 | !RISCVII::isFirstDefTiedToFirstUse(Desc: Src->getDesc()) || |
622 | !RISCVII::hasVLOp(TSFlags: Src->getDesc().TSFlags) || |
623 | !RISCVII::hasVecPolicyOp(TSFlags: Src->getDesc().TSFlags)) |
624 | return false; |
625 | |
626 | // Src's dest needs to have the same EEW as MI's input. |
627 | if (!hasSameEEW(User: MI, Src: *Src)) |
628 | return false; |
629 | |
630 | // Src needs to have the same passthru as VMV_V_V |
631 | MachineOperand &SrcPassthru = Src->getOperand(i: Src->getNumExplicitDefs()); |
632 | if (SrcPassthru.getReg() != RISCV::NoRegister && |
633 | SrcPassthru.getReg() != Passthru.getReg()) |
634 | return false; |
635 | |
636 | // Src VL will have already been reduced if legal (see tryToReduceVL), |
637 | // so we don't need to handle a smaller source VL here. However, the |
638 | // user's VL may be larger |
639 | MachineOperand &SrcVL = Src->getOperand(i: RISCVII::getVLOpNum(Desc: Src->getDesc())); |
640 | if (!RISCV::isVLKnownLE(LHS: SrcVL, RHS: MI.getOperand(i: 3))) |
641 | return false; |
642 | |
643 | // If the new passthru doesn't dominate Src, try to move Src so it does. |
644 | if (!ensureDominates(MO: Passthru, Src&: *Src)) |
645 | return false; |
646 | |
647 | if (SrcPassthru.getReg() != Passthru.getReg()) { |
648 | SrcPassthru.setReg(Passthru.getReg()); |
649 | // If Src is masked then its passthru needs to be in VRNoV0. |
650 | if (Passthru.getReg() != RISCV::NoRegister) |
651 | MRI->constrainRegClass(Reg: Passthru.getReg(), |
652 | RC: TII->getRegClass(MCID: Src->getDesc(), |
653 | OpNum: SrcPassthru.getOperandNo(), TRI, |
654 | MF: *Src->getParent()->getParent())); |
655 | } |
656 | |
657 | // If MI was tail agnostic and the VL didn't increase, preserve it. |
658 | int64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
659 | if ((MI.getOperand(i: 5).getImm() & RISCVVType::TAIL_AGNOSTIC) && |
660 | RISCV::isVLKnownLE(LHS: MI.getOperand(i: 3), RHS: SrcVL)) |
661 | Policy |= RISCVVType::TAIL_AGNOSTIC; |
662 | Src->getOperand(i: RISCVII::getVecPolicyOpNum(Desc: Src->getDesc())).setImm(Policy); |
663 | |
664 | MRI->constrainRegClass(Reg: Src->getOperand(i: 0).getReg(), |
665 | RC: MRI->getRegClass(Reg: MI.getOperand(i: 0).getReg())); |
666 | MRI->replaceRegWith(FromReg: MI.getOperand(i: 0).getReg(), ToReg: Src->getOperand(i: 0).getReg()); |
667 | MI.eraseFromParent(); |
668 | |
669 | return true; |
670 | } |
671 | |
672 | bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) { |
673 | if (skipFunction(F: MF.getFunction())) |
674 | return false; |
675 | |
676 | // Skip if the vector extension is not enabled. |
677 | ST = &MF.getSubtarget<RISCVSubtarget>(); |
678 | if (!ST->hasVInstructions()) |
679 | return false; |
680 | |
681 | TII = ST->getInstrInfo(); |
682 | MRI = &MF.getRegInfo(); |
683 | TRI = MRI->getTargetRegisterInfo(); |
684 | |
685 | bool Changed = false; |
686 | |
687 | for (MachineBasicBlock &MBB : MF) { |
688 | for (MachineInstr &MI : make_early_inc_range(Range&: MBB)) { |
689 | Changed |= convertToVLMAX(MI); |
690 | Changed |= tryToReduceVL(MI); |
691 | Changed |= convertToUnmasked(MI); |
692 | Changed |= convertToWholeRegister(MI); |
693 | Changed |= convertAllOnesVMergeToVMv(MI); |
694 | Changed |= convertSameMaskVMergeToVMv(MI); |
695 | if (foldUndefPassthruVMV_V_V(MI)) { |
696 | Changed |= true; |
697 | continue; // MI is erased |
698 | } |
699 | Changed |= foldVMV_V_V(MI); |
700 | } |
701 | } |
702 | |
703 | return Changed; |
704 | } |
705 | |
706 | FunctionPass *llvm::createRISCVVectorPeepholePass() { |
707 | return new RISCVVectorPeephole(); |
708 | } |
709 | |