1 | //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a function pass that inserts VSETVLI instructions where |
10 | // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL |
11 | // instructions. |
12 | // |
13 | // This pass consists of 3 phases: |
14 | // |
15 | // Phase 1 collects how each basic block affects VL/VTYPE. |
16 | // |
17 | // Phase 2 uses the information from phase 1 to do a data flow analysis to |
18 | // propagate the VL/VTYPE changes through the function. This gives us the |
19 | // VL/VTYPE at the start of each basic block. |
20 | // |
21 | // Phase 3 inserts VSETVLI instructions in each basic block. Information from |
22 | // phase 2 is used to prevent inserting a VSETVLI before the first vector |
23 | // instruction in the block if possible. |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | #include "RISCV.h" |
28 | #include "RISCVSubtarget.h" |
29 | #include "llvm/ADT/Statistic.h" |
30 | #include "llvm/CodeGen/LiveDebugVariables.h" |
31 | #include "llvm/CodeGen/LiveIntervals.h" |
32 | #include "llvm/CodeGen/LiveStacks.h" |
33 | #include "llvm/CodeGen/MachineFunctionPass.h" |
34 | #include <queue> |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "riscv-insert-vsetvli" |
38 | #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" |
39 | #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass" |
40 | |
41 | STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted" ); |
42 | STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced" ); |
43 | |
44 | namespace { |
45 | |
46 | /// Given a virtual register \p Reg, return the corresponding VNInfo for it. |
47 | /// This will return nullptr if the virtual register is an implicit_def or |
48 | /// if LiveIntervals is not available. |
49 | static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, |
50 | const LiveIntervals *LIS) { |
51 | assert(Reg.isVirtual()); |
52 | if (!LIS) |
53 | return nullptr; |
54 | auto &LI = LIS->getInterval(Reg); |
55 | SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); |
56 | return LI.getVNInfoBefore(Idx: SI); |
57 | } |
58 | |
59 | static unsigned getVLOpNum(const MachineInstr &MI) { |
60 | return RISCVII::getVLOpNum(Desc: MI.getDesc()); |
61 | } |
62 | |
63 | static unsigned getSEWOpNum(const MachineInstr &MI) { |
64 | return RISCVII::getSEWOpNum(Desc: MI.getDesc()); |
65 | } |
66 | |
67 | static bool isVectorConfigInstr(const MachineInstr &MI) { |
68 | return MI.getOpcode() == RISCV::PseudoVSETVLI || |
69 | MI.getOpcode() == RISCV::PseudoVSETVLIX0 || |
70 | MI.getOpcode() == RISCV::PseudoVSETIVLI; |
71 | } |
72 | |
73 | /// Return true if this is 'vsetvli x0, x0, vtype' which preserves |
74 | /// VL and only sets VTYPE. |
75 | static bool isVLPreservingConfig(const MachineInstr &MI) { |
76 | if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) |
77 | return false; |
78 | assert(RISCV::X0 == MI.getOperand(1).getReg()); |
79 | return RISCV::X0 == MI.getOperand(i: 0).getReg(); |
80 | } |
81 | |
82 | static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { |
83 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
84 | default: |
85 | return false; |
86 | case RISCV::VFMV_S_F: |
87 | case RISCV::VFMV_V_F: |
88 | return true; |
89 | } |
90 | } |
91 | |
92 | static bool (const MachineInstr &MI) { |
93 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
94 | default: |
95 | return false; |
96 | case RISCV::VMV_X_S: |
97 | case RISCV::VFMV_F_S: |
98 | return true; |
99 | } |
100 | } |
101 | |
102 | static bool isScalarInsertInstr(const MachineInstr &MI) { |
103 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
104 | default: |
105 | return false; |
106 | case RISCV::VMV_S_X: |
107 | case RISCV::VFMV_S_F: |
108 | return true; |
109 | } |
110 | } |
111 | |
112 | static bool isScalarSplatInstr(const MachineInstr &MI) { |
113 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
114 | default: |
115 | return false; |
116 | case RISCV::VMV_V_I: |
117 | case RISCV::VMV_V_X: |
118 | case RISCV::VFMV_V_F: |
119 | return true; |
120 | } |
121 | } |
122 | |
123 | static bool isVSlideInstr(const MachineInstr &MI) { |
124 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
125 | default: |
126 | return false; |
127 | case RISCV::VSLIDEDOWN_VX: |
128 | case RISCV::VSLIDEDOWN_VI: |
129 | case RISCV::VSLIDEUP_VX: |
130 | case RISCV::VSLIDEUP_VI: |
131 | return true; |
132 | } |
133 | } |
134 | |
135 | /// Get the EEW for a load or store instruction. Return std::nullopt if MI is |
136 | /// not a load or store which ignores SEW. |
137 | static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { |
138 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
139 | default: |
140 | return std::nullopt; |
141 | case RISCV::VLE8_V: |
142 | case RISCV::VLSE8_V: |
143 | case RISCV::VSE8_V: |
144 | case RISCV::VSSE8_V: |
145 | return 8; |
146 | case RISCV::VLE16_V: |
147 | case RISCV::VLSE16_V: |
148 | case RISCV::VSE16_V: |
149 | case RISCV::VSSE16_V: |
150 | return 16; |
151 | case RISCV::VLE32_V: |
152 | case RISCV::VLSE32_V: |
153 | case RISCV::VSE32_V: |
154 | case RISCV::VSSE32_V: |
155 | return 32; |
156 | case RISCV::VLE64_V: |
157 | case RISCV::VLSE64_V: |
158 | case RISCV::VSE64_V: |
159 | case RISCV::VSSE64_V: |
160 | return 64; |
161 | } |
162 | } |
163 | |
164 | static bool isNonZeroLoadImmediate(const MachineInstr &MI) { |
165 | return MI.getOpcode() == RISCV::ADDI && |
166 | MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() && |
167 | MI.getOperand(i: 1).getReg() == RISCV::X0 && |
168 | MI.getOperand(i: 2).getImm() != 0; |
169 | } |
170 | |
171 | /// Return true if this is an operation on mask registers. Note that |
172 | /// this includes both arithmetic/logical ops and load/store (vlm/vsm). |
173 | static bool isMaskRegOp(const MachineInstr &MI) { |
174 | if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
175 | return false; |
176 | const unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm(); |
177 | // A Log2SEW of 0 is an operation on mask registers only. |
178 | return Log2SEW == 0; |
179 | } |
180 | |
181 | /// Return true if the inactive elements in the result are entirely undefined. |
182 | /// Note that this is different from "agnostic" as defined by the vector |
183 | /// specification. Agnostic requires each lane to either be undisturbed, or |
184 | /// take the value -1; no other value is allowed. |
185 | static bool hasUndefinedMergeOp(const MachineInstr &MI) { |
186 | |
187 | unsigned UseOpIdx; |
188 | if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx)) |
189 | // If there is no passthrough operand, then the pass through |
190 | // lanes are undefined. |
191 | return true; |
192 | |
193 | // All undefined passthrus should be $noreg: see |
194 | // RISCVDAGToDAGISel::doPeepholeNoRegPassThru |
195 | const MachineOperand &UseMO = MI.getOperand(i: UseOpIdx); |
196 | return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); |
197 | } |
198 | |
199 | /// Which subfields of VL or VTYPE have values we need to preserve? |
200 | struct DemandedFields { |
201 | // Some unknown property of VL is used. If demanded, must preserve entire |
202 | // value. |
203 | bool VLAny = false; |
204 | // Only zero vs non-zero is used. If demanded, can change non-zero values. |
205 | bool VLZeroness = false; |
206 | // What properties of SEW we need to preserve. |
207 | enum : uint8_t { |
208 | SEWEqual = 3, // The exact value of SEW needs to be preserved. |
209 | SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater |
210 | // than or equal to the original value. |
211 | SEWGreaterThanOrEqualAndLessThan64 = |
212 | 1, // SEW can be changed as long as it's greater |
213 | // than or equal to the original value, but must be less |
214 | // than 64. |
215 | SEWNone = 0 // We don't need to preserve SEW at all. |
216 | } SEW = SEWNone; |
217 | enum : uint8_t { |
218 | LMULEqual = 2, // The exact value of LMUL needs to be preserved. |
219 | LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. |
220 | LMULNone = 0 // We don't need to preserve LMUL at all. |
221 | } LMUL = LMULNone; |
222 | bool SEWLMULRatio = false; |
223 | bool TailPolicy = false; |
224 | bool MaskPolicy = false; |
225 | |
226 | // Return true if any part of VTYPE was used |
227 | bool usedVTYPE() const { |
228 | return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; |
229 | } |
230 | |
231 | // Return true if any property of VL was used |
232 | bool usedVL() { |
233 | return VLAny || VLZeroness; |
234 | } |
235 | |
236 | // Mark all VTYPE subfields and properties as demanded |
237 | void demandVTYPE() { |
238 | SEW = SEWEqual; |
239 | LMUL = LMULEqual; |
240 | SEWLMULRatio = true; |
241 | TailPolicy = true; |
242 | MaskPolicy = true; |
243 | } |
244 | |
245 | // Mark all VL properties as demanded |
246 | void demandVL() { |
247 | VLAny = true; |
248 | VLZeroness = true; |
249 | } |
250 | |
251 | static DemandedFields all() { |
252 | DemandedFields DF; |
253 | DF.demandVTYPE(); |
254 | DF.demandVL(); |
255 | return DF; |
256 | } |
257 | |
258 | // Make this the result of demanding both the fields in this and B. |
259 | void doUnion(const DemandedFields &B) { |
260 | VLAny |= B.VLAny; |
261 | VLZeroness |= B.VLZeroness; |
262 | SEW = std::max(a: SEW, b: B.SEW); |
263 | LMUL = std::max(a: LMUL, b: B.LMUL); |
264 | SEWLMULRatio |= B.SEWLMULRatio; |
265 | TailPolicy |= B.TailPolicy; |
266 | MaskPolicy |= B.MaskPolicy; |
267 | } |
268 | |
269 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
270 | /// Support for debugging, callable in GDB: V->dump() |
271 | LLVM_DUMP_METHOD void dump() const { |
272 | print(dbgs()); |
273 | dbgs() << "\n" ; |
274 | } |
275 | |
276 | /// Implement operator<<. |
277 | void print(raw_ostream &OS) const { |
278 | OS << "{" ; |
279 | OS << "VLAny=" << VLAny << ", " ; |
280 | OS << "VLZeroness=" << VLZeroness << ", " ; |
281 | OS << "SEW=" ; |
282 | switch (SEW) { |
283 | case SEWEqual: |
284 | OS << "SEWEqual" ; |
285 | break; |
286 | case SEWGreaterThanOrEqual: |
287 | OS << "SEWGreaterThanOrEqual" ; |
288 | break; |
289 | case SEWGreaterThanOrEqualAndLessThan64: |
290 | OS << "SEWGreaterThanOrEqualAndLessThan64" ; |
291 | break; |
292 | case SEWNone: |
293 | OS << "SEWNone" ; |
294 | break; |
295 | }; |
296 | OS << ", " ; |
297 | OS << "LMUL=" ; |
298 | switch (LMUL) { |
299 | case LMULEqual: |
300 | OS << "LMULEqual" ; |
301 | break; |
302 | case LMULLessThanOrEqualToM1: |
303 | OS << "LMULLessThanOrEqualToM1" ; |
304 | break; |
305 | case LMULNone: |
306 | OS << "LMULNone" ; |
307 | break; |
308 | }; |
309 | OS << ", " ; |
310 | OS << "SEWLMULRatio=" << SEWLMULRatio << ", " ; |
311 | OS << "TailPolicy=" << TailPolicy << ", " ; |
312 | OS << "MaskPolicy=" << MaskPolicy; |
313 | OS << "}" ; |
314 | } |
315 | #endif |
316 | }; |
317 | |
318 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
319 | LLVM_ATTRIBUTE_USED |
320 | inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { |
321 | DF.print(OS); |
322 | return OS; |
323 | } |
324 | #endif |
325 | |
326 | static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { |
327 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: LMUL); |
328 | return Fractional || LMul == 1; |
329 | } |
330 | |
331 | /// Return true if moving from CurVType to NewVType is |
332 | /// indistinguishable from the perspective of an instruction (or set |
333 | /// of instructions) which use only the Used subfields and properties. |
334 | static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, |
335 | const DemandedFields &Used) { |
336 | switch (Used.SEW) { |
337 | case DemandedFields::SEWNone: |
338 | break; |
339 | case DemandedFields::SEWEqual: |
340 | if (RISCVVType::getSEW(VType: CurVType) != RISCVVType::getSEW(VType: NewVType)) |
341 | return false; |
342 | break; |
343 | case DemandedFields::SEWGreaterThanOrEqual: |
344 | if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType)) |
345 | return false; |
346 | break; |
347 | case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: |
348 | if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType) || |
349 | RISCVVType::getSEW(VType: NewVType) >= 64) |
350 | return false; |
351 | break; |
352 | } |
353 | |
354 | switch (Used.LMUL) { |
355 | case DemandedFields::LMULNone: |
356 | break; |
357 | case DemandedFields::LMULEqual: |
358 | if (RISCVVType::getVLMUL(VType: CurVType) != RISCVVType::getVLMUL(VType: NewVType)) |
359 | return false; |
360 | break; |
361 | case DemandedFields::LMULLessThanOrEqualToM1: |
362 | if (!isLMUL1OrSmaller(LMUL: RISCVVType::getVLMUL(VType: NewVType))) |
363 | return false; |
364 | break; |
365 | } |
366 | |
367 | if (Used.SEWLMULRatio) { |
368 | auto Ratio1 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: CurVType), |
369 | VLMul: RISCVVType::getVLMUL(VType: CurVType)); |
370 | auto Ratio2 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: NewVType), |
371 | VLMul: RISCVVType::getVLMUL(VType: NewVType)); |
372 | if (Ratio1 != Ratio2) |
373 | return false; |
374 | } |
375 | |
376 | if (Used.TailPolicy && RISCVVType::isTailAgnostic(VType: CurVType) != |
377 | RISCVVType::isTailAgnostic(VType: NewVType)) |
378 | return false; |
379 | if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(VType: CurVType) != |
380 | RISCVVType::isMaskAgnostic(VType: NewVType)) |
381 | return false; |
382 | return true; |
383 | } |
384 | |
385 | /// Return the fields and properties demanded by the provided instruction. |
386 | DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { |
387 | // This function works in coalesceVSETVLI too. We can still use the value of a |
388 | // SEW, VL, or Policy operand even though it might not be the exact value in |
389 | // the VL or VTYPE, since we only care about what the instruction originally |
390 | // demanded. |
391 | |
392 | // Most instructions don't use any of these subfeilds. |
393 | DemandedFields Res; |
394 | // Start conservative if registers are used |
395 | if (MI.isCall() || MI.isInlineAsm() || |
396 | MI.readsRegister(Reg: RISCV::VL, /*TRI=*/nullptr)) |
397 | Res.demandVL(); |
398 | if (MI.isCall() || MI.isInlineAsm() || |
399 | MI.readsRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr)) |
400 | Res.demandVTYPE(); |
401 | // Start conservative on the unlowered form too |
402 | uint64_t TSFlags = MI.getDesc().TSFlags; |
403 | if (RISCVII::hasSEWOp(TSFlags)) { |
404 | Res.demandVTYPE(); |
405 | if (RISCVII::hasVLOp(TSFlags)) |
406 | if (const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
407 | !VLOp.isReg() || !VLOp.isUndef()) |
408 | Res.demandVL(); |
409 | |
410 | // Behavior is independent of mask policy. |
411 | if (!RISCVII::usesMaskPolicy(TSFlags)) |
412 | Res.MaskPolicy = false; |
413 | } |
414 | |
415 | // Loads and stores with implicit EEW do not demand SEW or LMUL directly. |
416 | // They instead demand the ratio of the two which is used in computing |
417 | // EMUL, but which allows us the flexibility to change SEW and LMUL |
418 | // provided we don't change the ratio. |
419 | // Note: We assume that the instructions initial SEW is the EEW encoded |
420 | // in the opcode. This is asserted when constructing the VSETVLIInfo. |
421 | if (getEEWForLoadStore(MI)) { |
422 | Res.SEW = DemandedFields::SEWNone; |
423 | Res.LMUL = DemandedFields::LMULNone; |
424 | } |
425 | |
426 | // Store instructions don't use the policy fields. |
427 | if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { |
428 | Res.TailPolicy = false; |
429 | Res.MaskPolicy = false; |
430 | } |
431 | |
432 | // If this is a mask reg operation, it only cares about VLMAX. |
433 | // TODO: Possible extensions to this logic |
434 | // * Probably ok if available VLMax is larger than demanded |
435 | // * The policy bits can probably be ignored.. |
436 | if (isMaskRegOp(MI)) { |
437 | Res.SEW = DemandedFields::SEWNone; |
438 | Res.LMUL = DemandedFields::LMULNone; |
439 | } |
440 | |
441 | // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. |
442 | if (isScalarInsertInstr(MI)) { |
443 | Res.LMUL = DemandedFields::LMULNone; |
444 | Res.SEWLMULRatio = false; |
445 | Res.VLAny = false; |
446 | // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't |
447 | // need to preserve any other bits and are thus compatible with any larger, |
448 | // etype and can disregard policy bits. Warning: It's tempting to try doing |
449 | // this for any tail agnostic operation, but we can't as TA requires |
450 | // tail lanes to either be the original value or -1. We are writing |
451 | // unknown bits to the lanes here. |
452 | if (hasUndefinedMergeOp(MI)) { |
453 | if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
454 | Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
455 | else |
456 | Res.SEW = DemandedFields::SEWGreaterThanOrEqual; |
457 | Res.TailPolicy = false; |
458 | } |
459 | } |
460 | |
461 | // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. |
462 | if (isScalarExtractInstr(MI)) { |
463 | assert(!RISCVII::hasVLOp(TSFlags)); |
464 | Res.LMUL = DemandedFields::LMULNone; |
465 | Res.SEWLMULRatio = false; |
466 | Res.TailPolicy = false; |
467 | Res.MaskPolicy = false; |
468 | } |
469 | |
470 | if (RISCVII::hasVLOp(TSFlags: MI.getDesc().TSFlags)) { |
471 | const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
472 | // A slidedown/slideup with an *undefined* merge op can freely clobber |
473 | // elements not copied from the source vector (e.g. masked off, tail, or |
474 | // slideup's prefix). Notes: |
475 | // * We can't modify SEW here since the slide amount is in units of SEW. |
476 | // * VL=1 is special only because we have existing support for zero vs |
477 | // non-zero VL. We could generalize this if we had a VL > C predicate. |
478 | // * The LMUL1 restriction is for machines whose latency may depend on VL. |
479 | // * As above, this is only legal for tail "undefined" not "agnostic". |
480 | if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && |
481 | hasUndefinedMergeOp(MI)) { |
482 | Res.VLAny = false; |
483 | Res.VLZeroness = true; |
484 | Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; |
485 | Res.TailPolicy = false; |
486 | } |
487 | |
488 | // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the |
489 | // same semantically as vmv.s.x. This is particularly useful since we don't |
490 | // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in |
491 | // it's place. Since a splat is non-constant time in LMUL, we do need to be |
492 | // careful to not increase the number of active vector registers (unlike for |
493 | // vmv.s.x.) |
494 | if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && |
495 | hasUndefinedMergeOp(MI)) { |
496 | Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; |
497 | Res.SEWLMULRatio = false; |
498 | Res.VLAny = false; |
499 | if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
500 | Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
501 | else |
502 | Res.SEW = DemandedFields::SEWGreaterThanOrEqual; |
503 | Res.TailPolicy = false; |
504 | } |
505 | } |
506 | |
507 | return Res; |
508 | } |
509 | |
510 | /// Defines the abstract state with which the forward dataflow models the |
511 | /// values of the VL and VTYPE registers after insertion. |
512 | class VSETVLIInfo { |
513 | struct AVLDef { |
514 | // Every AVLDef should have a VNInfo, unless we're running without |
515 | // LiveIntervals in which case this will be nullptr. |
516 | const VNInfo *ValNo; |
517 | Register DefReg; |
518 | }; |
519 | union { |
520 | AVLDef AVLRegDef; |
521 | unsigned AVLImm; |
522 | }; |
523 | |
524 | enum : uint8_t { |
525 | Uninitialized, |
526 | AVLIsReg, |
527 | AVLIsImm, |
528 | AVLIsVLMAX, |
529 | Unknown, // AVL and VTYPE are fully unknown |
530 | } State = Uninitialized; |
531 | |
532 | // Fields from VTYPE. |
533 | RISCVII::VLMUL VLMul = RISCVII::LMUL_1; |
534 | uint8_t SEW = 0; |
535 | uint8_t TailAgnostic : 1; |
536 | uint8_t MaskAgnostic : 1; |
537 | uint8_t SEWLMULRatioOnly : 1; |
538 | |
539 | public: |
540 | VSETVLIInfo() |
541 | : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), |
542 | SEWLMULRatioOnly(false) {} |
543 | |
544 | static VSETVLIInfo getUnknown() { |
545 | VSETVLIInfo Info; |
546 | Info.setUnknown(); |
547 | return Info; |
548 | } |
549 | |
550 | bool isValid() const { return State != Uninitialized; } |
551 | void setUnknown() { State = Unknown; } |
552 | bool isUnknown() const { return State == Unknown; } |
553 | |
554 | void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { |
555 | assert(AVLReg.isVirtual()); |
556 | AVLRegDef.ValNo = VNInfo; |
557 | AVLRegDef.DefReg = AVLReg; |
558 | State = AVLIsReg; |
559 | } |
560 | |
561 | void setAVLImm(unsigned Imm) { |
562 | AVLImm = Imm; |
563 | State = AVLIsImm; |
564 | } |
565 | |
566 | void setAVLVLMAX() { State = AVLIsVLMAX; } |
567 | |
568 | bool hasAVLImm() const { return State == AVLIsImm; } |
569 | bool hasAVLReg() const { return State == AVLIsReg; } |
570 | bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } |
571 | Register getAVLReg() const { |
572 | assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); |
573 | return AVLRegDef.DefReg; |
574 | } |
575 | unsigned getAVLImm() const { |
576 | assert(hasAVLImm()); |
577 | return AVLImm; |
578 | } |
579 | const VNInfo *getAVLVNInfo() const { |
580 | assert(hasAVLReg()); |
581 | return AVLRegDef.ValNo; |
582 | } |
583 | // Most AVLIsReg infos will have a single defining MachineInstr, unless it was |
584 | // a PHI node. In that case getAVLVNInfo()->def will point to the block |
585 | // boundary slot and this will return nullptr. If LiveIntervals isn't |
586 | // available, nullptr is also returned. |
587 | const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { |
588 | assert(hasAVLReg()); |
589 | if (!LIS || getAVLVNInfo()->isPHIDef()) |
590 | return nullptr; |
591 | auto *MI = LIS->getInstructionFromIndex(index: getAVLVNInfo()->def); |
592 | assert(MI); |
593 | return MI; |
594 | } |
595 | |
596 | void setAVL(VSETVLIInfo Info) { |
597 | assert(Info.isValid()); |
598 | if (Info.isUnknown()) |
599 | setUnknown(); |
600 | else if (Info.hasAVLReg()) |
601 | setAVLRegDef(VNInfo: Info.getAVLVNInfo(), AVLReg: Info.getAVLReg()); |
602 | else if (Info.hasAVLVLMAX()) |
603 | setAVLVLMAX(); |
604 | else { |
605 | assert(Info.hasAVLImm()); |
606 | setAVLImm(Info.getAVLImm()); |
607 | } |
608 | } |
609 | |
610 | unsigned getSEW() const { return SEW; } |
611 | RISCVII::VLMUL getVLMUL() const { return VLMul; } |
612 | bool getTailAgnostic() const { return TailAgnostic; } |
613 | bool getMaskAgnostic() const { return MaskAgnostic; } |
614 | |
615 | bool hasNonZeroAVL(const LiveIntervals *LIS) const { |
616 | if (hasAVLImm()) |
617 | return getAVLImm() > 0; |
618 | if (hasAVLReg()) { |
619 | if (auto *DefMI = getAVLDefMI(LIS)) |
620 | return isNonZeroLoadImmediate(MI: *DefMI); |
621 | } |
622 | if (hasAVLVLMAX()) |
623 | return true; |
624 | return false; |
625 | } |
626 | |
627 | bool hasEquallyZeroAVL(const VSETVLIInfo &Other, |
628 | const LiveIntervals *LIS) const { |
629 | if (hasSameAVL(Other)) |
630 | return true; |
631 | return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); |
632 | } |
633 | |
634 | bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const { |
635 | if (hasAVLReg() && Other.hasAVLReg()) { |
636 | assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && |
637 | "we either have intervals or we don't" ); |
638 | if (!getAVLVNInfo()) |
639 | return getAVLReg() == Other.getAVLReg(); |
640 | return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && |
641 | getAVLReg() == Other.getAVLReg(); |
642 | } |
643 | |
644 | if (hasAVLImm() && Other.hasAVLImm()) |
645 | return getAVLImm() == Other.getAVLImm(); |
646 | |
647 | if (hasAVLVLMAX()) |
648 | return Other.hasAVLVLMAX() && hasSameVLMAX(Other); |
649 | |
650 | return false; |
651 | } |
652 | |
653 | // Return true if the two lattice values are guaranteed to have |
654 | // the same AVL value at runtime. |
655 | bool hasSameAVL(const VSETVLIInfo &Other) const { |
656 | // Without LiveIntervals, we don't know which instruction defines a |
657 | // register. Since a register may be redefined, this means all AVLIsReg |
658 | // states must be treated as possibly distinct. |
659 | if (hasAVLReg() && Other.hasAVLReg()) { |
660 | assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && |
661 | "we either have intervals or we don't" ); |
662 | if (!getAVLVNInfo()) |
663 | return false; |
664 | } |
665 | return hasSameAVLLatticeValue(Other); |
666 | } |
667 | |
668 | void setVTYPE(unsigned VType) { |
669 | assert(isValid() && !isUnknown() && |
670 | "Can't set VTYPE for uninitialized or unknown" ); |
671 | VLMul = RISCVVType::getVLMUL(VType); |
672 | SEW = RISCVVType::getSEW(VType); |
673 | TailAgnostic = RISCVVType::isTailAgnostic(VType); |
674 | MaskAgnostic = RISCVVType::isMaskAgnostic(VType); |
675 | } |
676 | void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { |
677 | assert(isValid() && !isUnknown() && |
678 | "Can't set VTYPE for uninitialized or unknown" ); |
679 | VLMul = L; |
680 | SEW = S; |
681 | TailAgnostic = TA; |
682 | MaskAgnostic = MA; |
683 | } |
684 | |
685 | void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } |
686 | |
687 | unsigned encodeVTYPE() const { |
688 | assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && |
689 | "Can't encode VTYPE for uninitialized or unknown" ); |
690 | return RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, TailAgnostic, MaskAgnostic); |
691 | } |
692 | |
693 | bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } |
694 | |
695 | bool hasSameVTYPE(const VSETVLIInfo &Other) const { |
696 | assert(isValid() && Other.isValid() && |
697 | "Can't compare invalid VSETVLIInfos" ); |
698 | assert(!isUnknown() && !Other.isUnknown() && |
699 | "Can't compare VTYPE in unknown state" ); |
700 | assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && |
701 | "Can't compare when only LMUL/SEW ratio is valid." ); |
702 | return std::tie(args: VLMul, args: SEW, args: TailAgnostic, args: MaskAgnostic) == |
703 | std::tie(args: Other.VLMul, args: Other.SEW, args: Other.TailAgnostic, |
704 | args: Other.MaskAgnostic); |
705 | } |
706 | |
707 | unsigned getSEWLMULRatio() const { |
708 | assert(isValid() && !isUnknown() && |
709 | "Can't use VTYPE for uninitialized or unknown" ); |
710 | return RISCVVType::getSEWLMULRatio(SEW, VLMul); |
711 | } |
712 | |
713 | // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. |
714 | // Note that having the same VLMAX ensures that both share the same |
715 | // function from AVL to VL; that is, they must produce the same VL value |
716 | // for any given AVL value. |
717 | bool hasSameVLMAX(const VSETVLIInfo &Other) const { |
718 | assert(isValid() && Other.isValid() && |
719 | "Can't compare invalid VSETVLIInfos" ); |
720 | assert(!isUnknown() && !Other.isUnknown() && |
721 | "Can't compare VTYPE in unknown state" ); |
722 | return getSEWLMULRatio() == Other.getSEWLMULRatio(); |
723 | } |
724 | |
725 | bool hasCompatibleVTYPE(const DemandedFields &Used, |
726 | const VSETVLIInfo &Require) const { |
727 | return areCompatibleVTYPEs(CurVType: Require.encodeVTYPE(), NewVType: encodeVTYPE(), Used); |
728 | } |
729 | |
730 | // Determine whether the vector instructions requirements represented by |
731 | // Require are compatible with the previous vsetvli instruction represented |
732 | // by this. MI is the instruction whose requirements we're considering. |
733 | bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, |
734 | const LiveIntervals *LIS) const { |
735 | assert(isValid() && Require.isValid() && |
736 | "Can't compare invalid VSETVLIInfos" ); |
737 | // Nothing is compatible with Unknown. |
738 | if (isUnknown() || Require.isUnknown()) |
739 | return false; |
740 | |
741 | // If only our VLMAX ratio is valid, then this isn't compatible. |
742 | if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) |
743 | return false; |
744 | |
745 | if (Used.VLAny && !(hasSameAVL(Other: Require) && hasSameVLMAX(Other: Require))) |
746 | return false; |
747 | |
748 | if (Used.VLZeroness && !hasEquallyZeroAVL(Other: Require, LIS)) |
749 | return false; |
750 | |
751 | return hasCompatibleVTYPE(Used, Require); |
752 | } |
753 | |
754 | bool operator==(const VSETVLIInfo &Other) const { |
755 | // Uninitialized is only equal to another Uninitialized. |
756 | if (!isValid()) |
757 | return !Other.isValid(); |
758 | if (!Other.isValid()) |
759 | return !isValid(); |
760 | |
761 | // Unknown is only equal to another Unknown. |
762 | if (isUnknown()) |
763 | return Other.isUnknown(); |
764 | if (Other.isUnknown()) |
765 | return isUnknown(); |
766 | |
767 | if (!hasSameAVLLatticeValue(Other)) |
768 | return false; |
769 | |
770 | // If the SEWLMULRatioOnly bits are different, then they aren't equal. |
771 | if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) |
772 | return false; |
773 | |
774 | // If only the VLMAX is valid, check that it is the same. |
775 | if (SEWLMULRatioOnly) |
776 | return hasSameVLMAX(Other); |
777 | |
778 | // If the full VTYPE is valid, check that it is the same. |
779 | return hasSameVTYPE(Other); |
780 | } |
781 | |
782 | bool operator!=(const VSETVLIInfo &Other) const { |
783 | return !(*this == Other); |
784 | } |
785 | |
786 | // Calculate the VSETVLIInfo visible to a block assuming this and Other are |
787 | // both predecessors. |
788 | VSETVLIInfo intersect(const VSETVLIInfo &Other) const { |
789 | // If the new value isn't valid, ignore it. |
790 | if (!Other.isValid()) |
791 | return *this; |
792 | |
793 | // If this value isn't valid, this must be the first predecessor, use it. |
794 | if (!isValid()) |
795 | return Other; |
796 | |
797 | // If either is unknown, the result is unknown. |
798 | if (isUnknown() || Other.isUnknown()) |
799 | return VSETVLIInfo::getUnknown(); |
800 | |
801 | // If we have an exact, match return this. |
802 | if (*this == Other) |
803 | return *this; |
804 | |
805 | // Not an exact match, but maybe the AVL and VLMAX are the same. If so, |
806 | // return an SEW/LMUL ratio only value. |
807 | if (hasSameAVL(Other) && hasSameVLMAX(Other)) { |
808 | VSETVLIInfo MergeInfo = *this; |
809 | MergeInfo.SEWLMULRatioOnly = true; |
810 | return MergeInfo; |
811 | } |
812 | |
813 | // Otherwise the result is unknown. |
814 | return VSETVLIInfo::getUnknown(); |
815 | } |
816 | |
817 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
818 | /// Support for debugging, callable in GDB: V->dump() |
819 | LLVM_DUMP_METHOD void dump() const { |
820 | print(dbgs()); |
821 | dbgs() << "\n" ; |
822 | } |
823 | |
824 | /// Implement operator<<. |
825 | /// @{ |
826 | void print(raw_ostream &OS) const { |
827 | OS << "{" ; |
828 | if (!isValid()) |
829 | OS << "Uninitialized" ; |
830 | if (isUnknown()) |
831 | OS << "unknown" ; |
832 | if (hasAVLReg()) |
833 | OS << "AVLReg=" << llvm::printReg(getAVLReg()); |
834 | if (hasAVLImm()) |
835 | OS << "AVLImm=" << (unsigned)AVLImm; |
836 | if (hasAVLVLMAX()) |
837 | OS << "AVLVLMAX" ; |
838 | OS << ", " |
839 | << "VLMul=" << (unsigned)VLMul << ", " |
840 | << "SEW=" << (unsigned)SEW << ", " |
841 | << "TailAgnostic=" << (bool)TailAgnostic << ", " |
842 | << "MaskAgnostic=" << (bool)MaskAgnostic << ", " |
843 | << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}" ; |
844 | } |
845 | #endif |
846 | }; |
847 | |
848 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
849 | LLVM_ATTRIBUTE_USED |
850 | inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { |
851 | V.print(OS); |
852 | return OS; |
853 | } |
854 | #endif |
855 | |
856 | struct BlockData { |
857 | // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this |
858 | // block. Calculated in Phase 2. |
859 | VSETVLIInfo Exit; |
860 | |
861 | // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor |
862 | // blocks. Calculated in Phase 2, and used by Phase 3. |
863 | VSETVLIInfo Pred; |
864 | |
865 | // Keeps track of whether the block is already in the queue. |
866 | bool InQueue = false; |
867 | |
868 | BlockData() = default; |
869 | }; |
870 | |
871 | class RISCVInsertVSETVLI : public MachineFunctionPass { |
872 | const RISCVSubtarget *ST; |
873 | const TargetInstrInfo *TII; |
874 | MachineRegisterInfo *MRI; |
875 | // Possibly null! |
876 | LiveIntervals *LIS; |
877 | |
878 | std::vector<BlockData> BlockInfo; |
879 | std::queue<const MachineBasicBlock *> WorkList; |
880 | |
881 | public: |
882 | static char ID; |
883 | |
884 | RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} |
885 | bool runOnMachineFunction(MachineFunction &MF) override; |
886 | |
887 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
888 | AU.setPreservesCFG(); |
889 | |
890 | AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); |
891 | AU.addPreserved<LiveIntervalsWrapperPass>(); |
892 | AU.addPreserved<SlotIndexesWrapperPass>(); |
893 | AU.addPreserved<LiveDebugVariables>(); |
894 | AU.addPreserved<LiveStacks>(); |
895 | |
896 | MachineFunctionPass::getAnalysisUsage(AU); |
897 | } |
898 | |
899 | StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } |
900 | |
901 | private: |
902 | bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, |
903 | const VSETVLIInfo &CurInfo) const; |
904 | bool needVSETVLIPHI(const VSETVLIInfo &Require, |
905 | const MachineBasicBlock &MBB) const; |
906 | void insertVSETVLI(MachineBasicBlock &MBB, |
907 | MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
908 | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); |
909 | |
910 | void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; |
911 | void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; |
912 | bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
913 | VSETVLIInfo &Info) const; |
914 | void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); |
915 | void emitVSETVLIs(MachineBasicBlock &MBB); |
916 | void doPRE(MachineBasicBlock &MBB); |
917 | void insertReadVL(MachineBasicBlock &MBB); |
918 | |
919 | bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, |
920 | const DemandedFields &Used) const; |
921 | void coalesceVSETVLIs(MachineBasicBlock &MBB) const; |
922 | |
923 | VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; |
924 | VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; |
925 | void forwardVSETVLIAVL(VSETVLIInfo &Info) const; |
926 | }; |
927 | |
928 | } // end anonymous namespace |
929 | |
930 | char RISCVInsertVSETVLI::ID = 0; |
931 | char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID; |
932 | |
933 | INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, |
934 | false, false) |
935 | |
936 | // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can |
937 | // replace the AVL operand with the AVL of the defining vsetvli. E.g. |
938 | // |
939 | // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 |
940 | // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1 |
941 | // -> |
942 | // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 |
943 | // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 |
944 | void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const { |
945 | if (!Info.hasAVLReg()) |
946 | return; |
947 | const MachineInstr *DefMI = Info.getAVLDefMI(LIS); |
948 | if (!DefMI || !isVectorConfigInstr(MI: *DefMI)) |
949 | return; |
950 | VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(MI: *DefMI); |
951 | if (!DefInstrInfo.hasSameVLMAX(Other: Info)) |
952 | return; |
953 | Info.setAVL(DefInstrInfo); |
954 | } |
955 | |
956 | // Return a VSETVLIInfo representing the changes made by this VSETVLI or |
957 | // VSETIVLI instruction. |
958 | VSETVLIInfo |
959 | RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { |
960 | VSETVLIInfo NewInfo; |
961 | if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { |
962 | NewInfo.setAVLImm(MI.getOperand(i: 1).getImm()); |
963 | } else { |
964 | assert(MI.getOpcode() == RISCV::PseudoVSETVLI || |
965 | MI.getOpcode() == RISCV::PseudoVSETVLIX0); |
966 | Register AVLReg = MI.getOperand(i: 1).getReg(); |
967 | assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && |
968 | "Can't handle X0, X0 vsetvli yet" ); |
969 | if (AVLReg == RISCV::X0) |
970 | NewInfo.setAVLVLMAX(); |
971 | else if (MI.getOperand(i: 1).isUndef()) |
972 | // Otherwise use an AVL of 1 to avoid depending on previous vl. |
973 | NewInfo.setAVLImm(1); |
974 | else { |
975 | VNInfo *VNI = getVNInfoFromReg(Reg: AVLReg, MI, LIS); |
976 | NewInfo.setAVLRegDef(VNInfo: VNI, AVLReg); |
977 | } |
978 | } |
979 | NewInfo.setVTYPE(MI.getOperand(i: 2).getImm()); |
980 | |
981 | forwardVSETVLIAVL(Info&: NewInfo); |
982 | |
983 | return NewInfo; |
984 | } |
985 | |
986 | static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, |
987 | RISCVII::VLMUL VLMul) { |
988 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: VLMul); |
989 | if (Fractional) |
990 | VLEN = VLEN / LMul; |
991 | else |
992 | VLEN = VLEN * LMul; |
993 | return VLEN/SEW; |
994 | } |
995 | |
996 | VSETVLIInfo |
997 | RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { |
998 | VSETVLIInfo InstrInfo; |
999 | const uint64_t TSFlags = MI.getDesc().TSFlags; |
1000 | |
1001 | bool TailAgnostic = true; |
1002 | bool MaskAgnostic = true; |
1003 | if (!hasUndefinedMergeOp(MI)) { |
1004 | // Start with undisturbed. |
1005 | TailAgnostic = false; |
1006 | MaskAgnostic = false; |
1007 | |
1008 | // If there is a policy operand, use it. |
1009 | if (RISCVII::hasVecPolicyOp(TSFlags)) { |
1010 | const MachineOperand &Op = MI.getOperand(i: MI.getNumExplicitOperands() - 1); |
1011 | uint64_t Policy = Op.getImm(); |
1012 | assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && |
1013 | "Invalid Policy Value" ); |
1014 | TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; |
1015 | MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; |
1016 | } |
1017 | |
1018 | // Some pseudo instructions force a tail agnostic policy despite having a |
1019 | // tied def. |
1020 | if (RISCVII::doesForceTailAgnostic(TSFlags)) |
1021 | TailAgnostic = true; |
1022 | |
1023 | if (!RISCVII::usesMaskPolicy(TSFlags)) |
1024 | MaskAgnostic = true; |
1025 | } |
1026 | |
1027 | RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); |
1028 | |
1029 | unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm(); |
1030 | // A Log2SEW of 0 is an operation on mask registers only. |
1031 | unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; |
1032 | assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW" ); |
1033 | |
1034 | if (RISCVII::hasVLOp(TSFlags)) { |
1035 | const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
1036 | if (VLOp.isImm()) { |
1037 | int64_t Imm = VLOp.getImm(); |
1038 | // Conver the VLMax sentintel to X0 register. |
1039 | if (Imm == RISCV::VLMaxSentinel) { |
1040 | // If we know the exact VLEN, see if we can use the constant encoding |
1041 | // for the VLMAX instead. This reduces register pressure slightly. |
1042 | const unsigned VLMAX = computeVLMAX(VLEN: ST->getRealMaxVLen(), SEW, VLMul); |
1043 | if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31) |
1044 | InstrInfo.setAVLImm(VLMAX); |
1045 | else |
1046 | InstrInfo.setAVLVLMAX(); |
1047 | } |
1048 | else |
1049 | InstrInfo.setAVLImm(Imm); |
1050 | } else if (VLOp.isUndef()) { |
1051 | // Otherwise use an AVL of 1 to avoid depending on previous vl. |
1052 | InstrInfo.setAVLImm(1); |
1053 | } else { |
1054 | VNInfo *VNI = getVNInfoFromReg(Reg: VLOp.getReg(), MI, LIS); |
1055 | InstrInfo.setAVLRegDef(VNInfo: VNI, AVLReg: VLOp.getReg()); |
1056 | } |
1057 | } else { |
1058 | assert(isScalarExtractInstr(MI)); |
1059 | // Pick a random value for state tracking purposes, will be ignored via |
1060 | // the demanded fields mechanism |
1061 | InstrInfo.setAVLImm(1); |
1062 | } |
1063 | #ifndef NDEBUG |
1064 | if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { |
1065 | assert(SEW == EEW && "Initial SEW doesn't match expected EEW" ); |
1066 | } |
1067 | #endif |
1068 | InstrInfo.setVTYPE(L: VLMul, S: SEW, TA: TailAgnostic, MA: MaskAgnostic); |
1069 | |
1070 | forwardVSETVLIAVL(Info&: InstrInfo); |
1071 | |
1072 | return InstrInfo; |
1073 | } |
1074 | |
1075 | void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, |
1076 | MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
1077 | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { |
1078 | |
1079 | ++NumInsertedVSETVL; |
1080 | if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { |
1081 | // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same |
1082 | // VLMAX. |
1083 | if (Info.hasSameAVL(Other: PrevInfo) && Info.hasSameVLMAX(Other: PrevInfo)) { |
1084 | auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0)) |
1085 | .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead) |
1086 | .addReg(RegNo: RISCV::X0, flags: RegState::Kill) |
1087 | .addImm(Val: Info.encodeVTYPE()) |
1088 | .addReg(RegNo: RISCV::VL, flags: RegState::Implicit); |
1089 | if (LIS) |
1090 | LIS->InsertMachineInstrInMaps(MI&: *MI); |
1091 | return; |
1092 | } |
1093 | |
1094 | // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If |
1095 | // it has the same VLMAX we want and the last VL/VTYPE we observed is the |
1096 | // same, we can use the X0, X0 form. |
1097 | if (Info.hasSameVLMAX(Other: PrevInfo) && Info.hasAVLReg()) { |
1098 | if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); |
1099 | DefMI && isVectorConfigInstr(MI: *DefMI)) { |
1100 | VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI); |
1101 | if (DefInfo.hasSameAVL(Other: PrevInfo) && DefInfo.hasSameVLMAX(Other: PrevInfo)) { |
1102 | auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0)) |
1103 | .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead) |
1104 | .addReg(RegNo: RISCV::X0, flags: RegState::Kill) |
1105 | .addImm(Val: Info.encodeVTYPE()) |
1106 | .addReg(RegNo: RISCV::VL, flags: RegState::Implicit); |
1107 | if (LIS) |
1108 | LIS->InsertMachineInstrInMaps(MI&: *MI); |
1109 | return; |
1110 | } |
1111 | } |
1112 | } |
1113 | } |
1114 | |
1115 | if (Info.hasAVLImm()) { |
1116 | auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETIVLI)) |
1117 | .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead) |
1118 | .addImm(Val: Info.getAVLImm()) |
1119 | .addImm(Val: Info.encodeVTYPE()); |
1120 | if (LIS) |
1121 | LIS->InsertMachineInstrInMaps(MI&: *MI); |
1122 | return; |
1123 | } |
1124 | |
1125 | if (Info.hasAVLVLMAX()) { |
1126 | Register DestReg = MRI->createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
1127 | auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0)) |
1128 | .addReg(RegNo: DestReg, flags: RegState::Define | RegState::Dead) |
1129 | .addReg(RegNo: RISCV::X0, flags: RegState::Kill) |
1130 | .addImm(Val: Info.encodeVTYPE()); |
1131 | if (LIS) { |
1132 | LIS->InsertMachineInstrInMaps(MI&: *MI); |
1133 | LIS->createAndComputeVirtRegInterval(Reg: DestReg); |
1134 | } |
1135 | return; |
1136 | } |
1137 | |
1138 | Register AVLReg = Info.getAVLReg(); |
1139 | MRI->constrainRegClass(Reg: AVLReg, RC: &RISCV::GPRNoX0RegClass); |
1140 | auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLI)) |
1141 | .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead) |
1142 | .addReg(RegNo: AVLReg) |
1143 | .addImm(Val: Info.encodeVTYPE()); |
1144 | if (LIS) { |
1145 | LIS->InsertMachineInstrInMaps(MI&: *MI); |
1146 | LiveInterval &LI = LIS->getInterval(Reg: AVLReg); |
1147 | SlotIndex SI = LIS->getInstructionIndex(Instr: *MI).getRegSlot(); |
1148 | // If the AVL value isn't live at MI, do a quick check to see if it's easily |
1149 | // extendable. Otherwise, we need to copy it. |
1150 | if (LI.getVNInfoBefore(Idx: SI) != Info.getAVLVNInfo()) { |
1151 | if (!LI.liveAt(index: SI) && LI.containsOneValue()) |
1152 | LIS->extendToIndices(LR&: LI, Indices: SI); |
1153 | else { |
1154 | Register AVLCopyReg = |
1155 | MRI->createVirtualRegister(RegClass: &RISCV::GPRNoX0RegClass); |
1156 | MachineBasicBlock::iterator II; |
1157 | if (Info.getAVLVNInfo()->isPHIDef()) |
1158 | II = LIS->getMBBFromIndex(index: Info.getAVLVNInfo()->def)->getFirstNonPHI(); |
1159 | else { |
1160 | II = LIS->getInstructionFromIndex(index: Info.getAVLVNInfo()->def); |
1161 | II = std::next(x: II); |
1162 | } |
1163 | assert(II.isValid()); |
1164 | auto AVLCopy = |
1165 | BuildMI(BB&: *II->getParent(), I: II, MIMD: DL, MCID: TII->get(Opcode: RISCV::COPY), DestReg: AVLCopyReg) |
1166 | .addReg(RegNo: AVLReg); |
1167 | LIS->InsertMachineInstrInMaps(MI&: *AVLCopy); |
1168 | MI->getOperand(i: 1).setReg(AVLCopyReg); |
1169 | LIS->createAndComputeVirtRegInterval(Reg: AVLCopyReg); |
1170 | } |
1171 | } |
1172 | } |
1173 | } |
1174 | |
1175 | /// Return true if a VSETVLI is required to transition from CurInfo to Require |
1176 | /// given a set of DemandedFields \p Used. |
1177 | bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used, |
1178 | const VSETVLIInfo &Require, |
1179 | const VSETVLIInfo &CurInfo) const { |
1180 | if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) |
1181 | return true; |
1182 | |
1183 | if (CurInfo.isCompatible(Used, Require, LIS)) |
1184 | return false; |
1185 | |
1186 | return true; |
1187 | } |
1188 | |
1189 | // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we |
1190 | // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more |
1191 | // places. |
1192 | static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, |
1193 | DemandedFields &Demanded) { |
1194 | VSETVLIInfo Info = NewInfo; |
1195 | |
1196 | if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && |
1197 | !PrevInfo.isUnknown()) { |
1198 | if (auto NewVLMul = RISCVVType::getSameRatioLMUL( |
1199 | SEW: PrevInfo.getSEW(), VLMUL: PrevInfo.getVLMUL(), EEW: Info.getSEW())) |
1200 | Info.setVLMul(*NewVLMul); |
1201 | Demanded.LMUL = DemandedFields::LMULEqual; |
1202 | } |
1203 | |
1204 | return Info; |
1205 | } |
1206 | |
1207 | // Given an incoming state reaching MI, minimally modifies that state so that it |
1208 | // is compatible with MI. The resulting state is guaranteed to be semantically |
1209 | // legal for MI, but may not be the state requested by MI. |
1210 | void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, |
1211 | const MachineInstr &MI) const { |
1212 | if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
1213 | return; |
1214 | |
1215 | DemandedFields Demanded = getDemanded(MI, ST); |
1216 | |
1217 | const VSETVLIInfo NewInfo = computeInfoForInstr(MI); |
1218 | assert(NewInfo.isValid() && !NewInfo.isUnknown()); |
1219 | if (Info.isValid() && !needVSETVLI(Used: Demanded, Require: NewInfo, CurInfo: Info)) |
1220 | return; |
1221 | |
1222 | const VSETVLIInfo PrevInfo = Info; |
1223 | if (!Info.isValid() || Info.isUnknown()) |
1224 | Info = NewInfo; |
1225 | |
1226 | const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); |
1227 | |
1228 | // If MI only demands that VL has the same zeroness, we only need to set the |
1229 | // AVL if the zeroness differs. This removes a vsetvli entirely if the types |
1230 | // match or allows use of cheaper avl preserving variant if VLMAX doesn't |
1231 | // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" |
1232 | // variant, so we avoid the transform to prevent extending live range of an |
1233 | // avl register operand. |
1234 | // TODO: We can probably relax this for immediates. |
1235 | bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(Other: PrevInfo, LIS) && |
1236 | IncomingInfo.hasSameVLMAX(Other: PrevInfo); |
1237 | if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) |
1238 | Info.setAVL(IncomingInfo); |
1239 | |
1240 | Info.setVTYPE( |
1241 | L: ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) |
1242 | .getVLMUL(), |
1243 | S: ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), |
1244 | // Prefer tail/mask agnostic since it can be relaxed to undisturbed later |
1245 | // if needed. |
1246 | TA: (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || |
1247 | IncomingInfo.getTailAgnostic(), |
1248 | MA: (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || |
1249 | IncomingInfo.getMaskAgnostic()); |
1250 | |
1251 | // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep |
1252 | // the AVL. |
1253 | if (Info.hasSEWLMULRatioOnly()) { |
1254 | VSETVLIInfo RatiolessInfo = IncomingInfo; |
1255 | RatiolessInfo.setAVL(Info); |
1256 | Info = RatiolessInfo; |
1257 | } |
1258 | } |
1259 | |
1260 | // Given a state with which we evaluated MI (see transferBefore above for why |
1261 | // this might be different that the state MI requested), modify the state to |
1262 | // reflect the changes MI might make. |
1263 | void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, |
1264 | const MachineInstr &MI) const { |
1265 | if (isVectorConfigInstr(MI)) { |
1266 | Info = getInfoForVSETVLI(MI); |
1267 | return; |
1268 | } |
1269 | |
1270 | if (RISCV::isFaultFirstLoad(MI)) { |
1271 | // Update AVL to vl-output of the fault first load. |
1272 | assert(MI.getOperand(1).getReg().isVirtual()); |
1273 | if (LIS) { |
1274 | auto &LI = LIS->getInterval(Reg: MI.getOperand(i: 1).getReg()); |
1275 | SlotIndex SI = |
1276 | LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); |
1277 | VNInfo *VNI = LI.getVNInfoAt(Idx: SI); |
1278 | Info.setAVLRegDef(VNInfo: VNI, AVLReg: MI.getOperand(i: 1).getReg()); |
1279 | } else |
1280 | Info.setAVLRegDef(VNInfo: nullptr, AVLReg: MI.getOperand(i: 1).getReg()); |
1281 | return; |
1282 | } |
1283 | |
1284 | // If this is something that updates VL/VTYPE that we don't know about, set |
1285 | // the state to unknown. |
1286 | if (MI.isCall() || MI.isInlineAsm() || |
1287 | MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) || |
1288 | MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr)) |
1289 | Info = VSETVLIInfo::getUnknown(); |
1290 | } |
1291 | |
1292 | bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
1293 | VSETVLIInfo &Info) const { |
1294 | bool HadVectorOp = false; |
1295 | |
1296 | Info = BlockInfo[MBB.getNumber()].Pred; |
1297 | for (const MachineInstr &MI : MBB) { |
1298 | transferBefore(Info, MI); |
1299 | |
1300 | if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
1301 | HadVectorOp = true; |
1302 | |
1303 | transferAfter(Info, MI); |
1304 | } |
1305 | |
1306 | return HadVectorOp; |
1307 | } |
1308 | |
1309 | void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { |
1310 | |
1311 | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
1312 | |
1313 | BBInfo.InQueue = false; |
1314 | |
1315 | // Start with the previous entry so that we keep the most conservative state |
1316 | // we have ever found. |
1317 | VSETVLIInfo InInfo = BBInfo.Pred; |
1318 | if (MBB.pred_empty()) { |
1319 | // There are no predecessors, so use the default starting status. |
1320 | InInfo.setUnknown(); |
1321 | } else { |
1322 | for (MachineBasicBlock *P : MBB.predecessors()) |
1323 | InInfo = InInfo.intersect(Other: BlockInfo[P->getNumber()].Exit); |
1324 | } |
1325 | |
1326 | // If we don't have any valid predecessor value, wait until we do. |
1327 | if (!InInfo.isValid()) |
1328 | return; |
1329 | |
1330 | // If no change, no need to rerun block |
1331 | if (InInfo == BBInfo.Pred) |
1332 | return; |
1333 | |
1334 | BBInfo.Pred = InInfo; |
1335 | LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) |
1336 | << " changed to " << BBInfo.Pred << "\n" ); |
1337 | |
1338 | // Note: It's tempting to cache the state changes here, but due to the |
1339 | // compatibility checks performed a blocks output state can change based on |
1340 | // the input state. To cache, we'd have to add logic for finding |
1341 | // never-compatible state changes. |
1342 | VSETVLIInfo TmpStatus; |
1343 | computeVLVTYPEChanges(MBB, Info&: TmpStatus); |
1344 | |
1345 | // If the new exit value matches the old exit value, we don't need to revisit |
1346 | // any blocks. |
1347 | if (BBInfo.Exit == TmpStatus) |
1348 | return; |
1349 | |
1350 | BBInfo.Exit = TmpStatus; |
1351 | LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) |
1352 | << " changed to " << BBInfo.Exit << "\n" ); |
1353 | |
1354 | // Add the successors to the work list so we can propagate the changed exit |
1355 | // status. |
1356 | for (MachineBasicBlock *S : MBB.successors()) |
1357 | if (!BlockInfo[S->getNumber()].InQueue) { |
1358 | BlockInfo[S->getNumber()].InQueue = true; |
1359 | WorkList.push(x: S); |
1360 | } |
1361 | } |
1362 | |
1363 | // If we weren't able to prove a vsetvli was directly unneeded, it might still |
1364 | // be unneeded if the AVL was a phi node where all incoming values are VL |
1365 | // outputs from the last VSETVLI in their respective basic blocks. |
1366 | bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, |
1367 | const MachineBasicBlock &MBB) const { |
1368 | if (!Require.hasAVLReg()) |
1369 | return true; |
1370 | |
1371 | if (!LIS) |
1372 | return true; |
1373 | |
1374 | // We need the AVL to have been produced by a PHI node in this basic block. |
1375 | const VNInfo *Valno = Require.getAVLVNInfo(); |
1376 | if (!Valno->isPHIDef() || LIS->getMBBFromIndex(index: Valno->def) != &MBB) |
1377 | return true; |
1378 | |
1379 | const LiveRange &LR = LIS->getInterval(Reg: Require.getAVLReg()); |
1380 | |
1381 | for (auto *PBB : MBB.predecessors()) { |
1382 | const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; |
1383 | |
1384 | // We need the PHI input to the be the output of a VSET(I)VLI. |
1385 | const VNInfo *Value = LR.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: PBB)); |
1386 | if (!Value) |
1387 | return true; |
1388 | MachineInstr *DefMI = LIS->getInstructionFromIndex(index: Value->def); |
1389 | if (!DefMI || !isVectorConfigInstr(MI: *DefMI)) |
1390 | return true; |
1391 | |
1392 | // We found a VSET(I)VLI make sure it matches the output of the |
1393 | // predecessor block. |
1394 | VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI); |
1395 | if (DefInfo != PBBExit) |
1396 | return true; |
1397 | |
1398 | // Require has the same VL as PBBExit, so if the exit from the |
1399 | // predecessor has the VTYPE we are looking for we might be able |
1400 | // to avoid a VSETVLI. |
1401 | if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Other: Require)) |
1402 | return true; |
1403 | } |
1404 | |
1405 | // If all the incoming values to the PHI checked out, we don't need |
1406 | // to insert a VSETVLI. |
1407 | return false; |
1408 | } |
1409 | |
1410 | void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { |
1411 | VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; |
1412 | // Track whether the prefix of the block we've scanned is transparent |
1413 | // (meaning has not yet changed the abstract state). |
1414 | bool PrefixTransparent = true; |
1415 | for (MachineInstr &MI : MBB) { |
1416 | const VSETVLIInfo PrevInfo = CurInfo; |
1417 | transferBefore(Info&: CurInfo, MI); |
1418 | |
1419 | // If this is an explicit VSETVLI or VSETIVLI, update our state. |
1420 | if (isVectorConfigInstr(MI)) { |
1421 | // Conservatively, mark the VL and VTYPE as live. |
1422 | assert(MI.getOperand(3).getReg() == RISCV::VL && |
1423 | MI.getOperand(4).getReg() == RISCV::VTYPE && |
1424 | "Unexpected operands where VL and VTYPE should be" ); |
1425 | MI.getOperand(i: 3).setIsDead(false); |
1426 | MI.getOperand(i: 4).setIsDead(false); |
1427 | PrefixTransparent = false; |
1428 | } |
1429 | |
1430 | uint64_t TSFlags = MI.getDesc().TSFlags; |
1431 | if (RISCVII::hasSEWOp(TSFlags)) { |
1432 | if (!PrevInfo.isCompatible(Used: DemandedFields::all(), Require: CurInfo, LIS)) { |
1433 | // If this is the first implicit state change, and the state change |
1434 | // requested can be proven to produce the same register contents, we |
1435 | // can skip emitting the actual state change and continue as if we |
1436 | // had since we know the GPR result of the implicit state change |
1437 | // wouldn't be used and VL/VTYPE registers are correct. Note that |
1438 | // we *do* need to model the state as if it changed as while the |
1439 | // register contents are unchanged, the abstract model can change. |
1440 | if (!PrefixTransparent || needVSETVLIPHI(Require: CurInfo, MBB)) |
1441 | insertVSETVLI(MBB, InsertPt: MI, DL: MI.getDebugLoc(), Info: CurInfo, PrevInfo); |
1442 | PrefixTransparent = false; |
1443 | } |
1444 | |
1445 | if (RISCVII::hasVLOp(TSFlags)) { |
1446 | MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
1447 | if (VLOp.isReg()) { |
1448 | Register Reg = VLOp.getReg(); |
1449 | |
1450 | // Erase the AVL operand from the instruction. |
1451 | VLOp.setReg(RISCV::NoRegister); |
1452 | VLOp.setIsKill(false); |
1453 | if (LIS) { |
1454 | LiveInterval &LI = LIS->getInterval(Reg); |
1455 | SmallVector<MachineInstr *> DeadMIs; |
1456 | LIS->shrinkToUses(li: &LI, dead: &DeadMIs); |
1457 | // We might have separate components that need split due to |
1458 | // needVSETVLIPHI causing us to skip inserting a new VL def. |
1459 | SmallVector<LiveInterval *> SplitLIs; |
1460 | LIS->splitSeparateComponents(LI, SplitLIs); |
1461 | |
1462 | // If the AVL was an immediate > 31, then it would have been emitted |
1463 | // as an ADDI. However, the ADDI might not have been used in the |
1464 | // vsetvli, or a vsetvli might not have been emitted, so it may be |
1465 | // dead now. |
1466 | for (MachineInstr *DeadMI : DeadMIs) { |
1467 | if (!TII->isAddImmediate(MI: *DeadMI, Reg)) |
1468 | continue; |
1469 | LIS->RemoveMachineInstrFromMaps(MI&: *DeadMI); |
1470 | DeadMI->eraseFromParent(); |
1471 | } |
1472 | } |
1473 | } |
1474 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VL, /*isDef*/ false, |
1475 | /*isImp*/ true)); |
1476 | } |
1477 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VTYPE, /*isDef*/ false, |
1478 | /*isImp*/ true)); |
1479 | } |
1480 | |
1481 | if (MI.isCall() || MI.isInlineAsm() || |
1482 | MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) || |
1483 | MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr)) |
1484 | PrefixTransparent = false; |
1485 | |
1486 | transferAfter(Info&: CurInfo, MI); |
1487 | } |
1488 | |
1489 | const auto &Info = BlockInfo[MBB.getNumber()]; |
1490 | if (CurInfo != Info.Exit) { |
1491 | LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n" ); |
1492 | LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n" ); |
1493 | LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n" ); |
1494 | LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n" ); |
1495 | } |
1496 | assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated" ); |
1497 | } |
1498 | |
1499 | /// Perform simple partial redundancy elimination of the VSETVLI instructions |
1500 | /// we're about to insert by looking for cases where we can PRE from the |
1501 | /// beginning of one block to the end of one of its predecessors. Specifically, |
1502 | /// this is geared to catch the common case of a fixed length vsetvl in a single |
1503 | /// block loop when it could execute once in the preheader instead. |
1504 | void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { |
1505 | if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) |
1506 | return; |
1507 | |
1508 | MachineBasicBlock *UnavailablePred = nullptr; |
1509 | VSETVLIInfo AvailableInfo; |
1510 | for (MachineBasicBlock *P : MBB.predecessors()) { |
1511 | const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; |
1512 | if (PredInfo.isUnknown()) { |
1513 | if (UnavailablePred) |
1514 | return; |
1515 | UnavailablePred = P; |
1516 | } else if (!AvailableInfo.isValid()) { |
1517 | AvailableInfo = PredInfo; |
1518 | } else if (AvailableInfo != PredInfo) { |
1519 | return; |
1520 | } |
1521 | } |
1522 | |
1523 | // Unreachable, single pred, or full redundancy. Note that FRE is handled by |
1524 | // phase 3. |
1525 | if (!UnavailablePred || !AvailableInfo.isValid()) |
1526 | return; |
1527 | |
1528 | if (!LIS) |
1529 | return; |
1530 | |
1531 | // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of |
1532 | // the unavailable pred. |
1533 | if (AvailableInfo.hasSEWLMULRatioOnly()) |
1534 | return; |
1535 | |
1536 | // Critical edge - TODO: consider splitting? |
1537 | if (UnavailablePred->succ_size() != 1) |
1538 | return; |
1539 | |
1540 | // If the AVL value is a register (other than our VLMAX sentinel), |
1541 | // we need to prove the value is available at the point we're going |
1542 | // to insert the vsetvli at. |
1543 | if (AvailableInfo.hasAVLReg()) { |
1544 | SlotIndex SI = AvailableInfo.getAVLVNInfo()->def; |
1545 | // This is an inline dominance check which covers the case of |
1546 | // UnavailablePred being the preheader of a loop. |
1547 | if (LIS->getMBBFromIndex(index: SI) != UnavailablePred) |
1548 | return; |
1549 | if (!UnavailablePred->terminators().empty() && |
1550 | SI >= LIS->getInstructionIndex(Instr: *UnavailablePred->getFirstTerminator())) |
1551 | return; |
1552 | } |
1553 | |
1554 | // Model the effect of changing the input state of the block MBB to |
1555 | // AvailableInfo. We're looking for two issues here; one legality, |
1556 | // one profitability. |
1557 | // 1) If the block doesn't use some of the fields from VL or VTYPE, we |
1558 | // may hit the end of the block with a different end state. We can |
1559 | // not make this change without reflowing later blocks as well. |
1560 | // 2) If we don't actually remove a transition, inserting a vsetvli |
1561 | // into the predecessor block would be correct, but unprofitable. |
1562 | VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; |
1563 | VSETVLIInfo CurInfo = AvailableInfo; |
1564 | int TransitionsRemoved = 0; |
1565 | for (const MachineInstr &MI : MBB) { |
1566 | const VSETVLIInfo LastInfo = CurInfo; |
1567 | const VSETVLIInfo LastOldInfo = OldInfo; |
1568 | transferBefore(Info&: CurInfo, MI); |
1569 | transferBefore(Info&: OldInfo, MI); |
1570 | if (CurInfo == LastInfo) |
1571 | TransitionsRemoved++; |
1572 | if (LastOldInfo == OldInfo) |
1573 | TransitionsRemoved--; |
1574 | transferAfter(Info&: CurInfo, MI); |
1575 | transferAfter(Info&: OldInfo, MI); |
1576 | if (CurInfo == OldInfo) |
1577 | // Convergence. All transitions after this must match by construction. |
1578 | break; |
1579 | } |
1580 | if (CurInfo != OldInfo || TransitionsRemoved <= 0) |
1581 | // Issues 1 and 2 above |
1582 | return; |
1583 | |
1584 | // Finally, update both data flow state and insert the actual vsetvli. |
1585 | // Doing both keeps the code in sync with the dataflow results, which |
1586 | // is critical for correctness of phase 3. |
1587 | auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; |
1588 | LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " |
1589 | << UnavailablePred->getName() << " with state " |
1590 | << AvailableInfo << "\n" ); |
1591 | BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; |
1592 | BlockInfo[MBB.getNumber()].Pred = AvailableInfo; |
1593 | |
1594 | // Note there's an implicit assumption here that terminators never use |
1595 | // or modify VL or VTYPE. Also, fallthrough will return end(). |
1596 | auto InsertPt = UnavailablePred->getFirstInstrTerminator(); |
1597 | insertVSETVLI(MBB&: *UnavailablePred, InsertPt, |
1598 | DL: UnavailablePred->findDebugLoc(MBBI: InsertPt), |
1599 | Info: AvailableInfo, PrevInfo: OldExit); |
1600 | } |
1601 | |
1602 | // Return true if we can mutate PrevMI to match MI without changing any the |
1603 | // fields which would be observed. |
1604 | bool RISCVInsertVSETVLI::canMutatePriorConfig( |
1605 | const MachineInstr &PrevMI, const MachineInstr &MI, |
1606 | const DemandedFields &Used) const { |
1607 | // If the VL values aren't equal, return false if either a) the former is |
1608 | // demanded, or b) we can't rewrite the former to be the later for |
1609 | // implementation reasons. |
1610 | if (!isVLPreservingConfig(MI)) { |
1611 | if (Used.VLAny) |
1612 | return false; |
1613 | |
1614 | if (Used.VLZeroness) { |
1615 | if (isVLPreservingConfig(MI: PrevMI)) |
1616 | return false; |
1617 | if (!getInfoForVSETVLI(MI: PrevMI).hasEquallyZeroAVL(Other: getInfoForVSETVLI(MI), |
1618 | LIS)) |
1619 | return false; |
1620 | } |
1621 | |
1622 | auto &AVL = MI.getOperand(i: 1); |
1623 | auto &PrevAVL = PrevMI.getOperand(i: 1); |
1624 | |
1625 | // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. |
1626 | // For now just check that PrevMI uses the same virtual register. |
1627 | if (AVL.isReg() && AVL.getReg() != RISCV::X0 && |
1628 | (!MRI->hasOneDef(RegNo: AVL.getReg()) || !PrevAVL.isReg() || |
1629 | PrevAVL.getReg() != AVL.getReg())) |
1630 | return false; |
1631 | } |
1632 | |
1633 | assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); |
1634 | auto PriorVType = PrevMI.getOperand(i: 2).getImm(); |
1635 | auto VType = MI.getOperand(i: 2).getImm(); |
1636 | return areCompatibleVTYPEs(CurVType: PriorVType, NewVType: VType, Used); |
1637 | } |
1638 | |
1639 | void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { |
1640 | MachineInstr *NextMI = nullptr; |
1641 | // We can have arbitrary code in successors, so VL and VTYPE |
1642 | // must be considered demanded. |
1643 | DemandedFields Used; |
1644 | Used.demandVL(); |
1645 | Used.demandVTYPE(); |
1646 | SmallVector<MachineInstr*> ToDelete; |
1647 | |
1648 | // Update LIS and cleanup dead AVLs given a value which has |
1649 | // has had one use (as an AVL) removed. |
1650 | auto afterDroppedAVLUse = [&](Register OldVLReg) { |
1651 | if (LIS) |
1652 | LIS->shrinkToUses(li: &LIS->getInterval(Reg: OldVLReg)); |
1653 | |
1654 | MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg: OldVLReg); |
1655 | if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg: OldVLReg) && |
1656 | MRI->use_nodbg_empty(RegNo: OldVLReg)) { |
1657 | if (LIS) { |
1658 | LIS->removeInterval(Reg: OldVLReg); |
1659 | LIS->RemoveMachineInstrFromMaps(MI&: *VLOpDef); |
1660 | } |
1661 | VLOpDef->eraseFromParent(); |
1662 | } |
1663 | }; |
1664 | |
1665 | for (MachineInstr &MI : make_range(x: MBB.rbegin(), y: MBB.rend())) { |
1666 | |
1667 | if (!isVectorConfigInstr(MI)) { |
1668 | Used.doUnion(B: getDemanded(MI, ST)); |
1669 | if (MI.isCall() || MI.isInlineAsm() || |
1670 | MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) || |
1671 | MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr)) |
1672 | NextMI = nullptr; |
1673 | continue; |
1674 | } |
1675 | |
1676 | if (!MI.getOperand(i: 0).isDead()) |
1677 | Used.demandVL(); |
1678 | |
1679 | if (NextMI) { |
1680 | if (!Used.usedVL() && !Used.usedVTYPE()) { |
1681 | ToDelete.push_back(Elt: &MI); |
1682 | // Leave NextMI unchanged |
1683 | continue; |
1684 | } |
1685 | |
1686 | if (canMutatePriorConfig(PrevMI: MI, MI: *NextMI, Used)) { |
1687 | if (!isVLPreservingConfig(MI: *NextMI)) { |
1688 | Register DefReg = NextMI->getOperand(i: 0).getReg(); |
1689 | |
1690 | MI.getOperand(i: 0).setReg(DefReg); |
1691 | MI.getOperand(i: 0).setIsDead(false); |
1692 | |
1693 | // The def of DefReg moved to MI, so extend the LiveInterval up to |
1694 | // it. |
1695 | if (DefReg.isVirtual() && LIS) { |
1696 | LiveInterval &DefLI = LIS->getInterval(Reg: DefReg); |
1697 | SlotIndex MISlot = LIS->getInstructionIndex(Instr: MI).getRegSlot(); |
1698 | VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: DefLI.beginIndex()); |
1699 | LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); |
1700 | DefLI.addSegment(S); |
1701 | DefVNI->def = MISlot; |
1702 | // Mark DefLI as spillable if it was previously unspillable |
1703 | DefLI.setWeight(0); |
1704 | |
1705 | // DefReg may have had no uses, in which case we need to shrink |
1706 | // the LiveInterval up to MI. |
1707 | LIS->shrinkToUses(li: &DefLI); |
1708 | } |
1709 | |
1710 | Register OldVLReg; |
1711 | if (MI.getOperand(i: 1).isReg()) |
1712 | OldVLReg = MI.getOperand(i: 1).getReg(); |
1713 | if (NextMI->getOperand(i: 1).isImm()) |
1714 | MI.getOperand(i: 1).ChangeToImmediate(ImmVal: NextMI->getOperand(i: 1).getImm()); |
1715 | else |
1716 | MI.getOperand(i: 1).ChangeToRegister(Reg: NextMI->getOperand(i: 1).getReg(), isDef: false); |
1717 | if (OldVLReg && OldVLReg.isVirtual()) |
1718 | afterDroppedAVLUse(OldVLReg); |
1719 | |
1720 | MI.setDesc(NextMI->getDesc()); |
1721 | } |
1722 | MI.getOperand(i: 2).setImm(NextMI->getOperand(i: 2).getImm()); |
1723 | ToDelete.push_back(Elt: NextMI); |
1724 | // fallthrough |
1725 | } |
1726 | } |
1727 | NextMI = &MI; |
1728 | Used = getDemanded(MI, ST); |
1729 | } |
1730 | |
1731 | NumCoalescedVSETVL += ToDelete.size(); |
1732 | for (auto *MI : ToDelete) { |
1733 | if (LIS) |
1734 | LIS->RemoveMachineInstrFromMaps(MI&: *MI); |
1735 | Register OldAVLReg; |
1736 | if (MI->getOperand(i: 1).isReg()) |
1737 | OldAVLReg = MI->getOperand(i: 1).getReg(); |
1738 | MI->eraseFromParent(); |
1739 | if (OldAVLReg && OldAVLReg.isVirtual()) |
1740 | afterDroppedAVLUse(OldAVLReg); |
1741 | } |
1742 | } |
1743 | |
1744 | void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { |
1745 | for (auto I = MBB.begin(), E = MBB.end(); I != E;) { |
1746 | MachineInstr &MI = *I++; |
1747 | if (RISCV::isFaultFirstLoad(MI)) { |
1748 | Register VLOutput = MI.getOperand(i: 1).getReg(); |
1749 | assert(VLOutput.isVirtual()); |
1750 | if (!MI.getOperand(i: 1).isDead()) { |
1751 | auto ReadVLMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), |
1752 | MCID: TII->get(Opcode: RISCV::PseudoReadVL), DestReg: VLOutput); |
1753 | // Move the LiveInterval's definition down to PseudoReadVL. |
1754 | if (LIS) { |
1755 | SlotIndex NewDefSI = |
1756 | LIS->InsertMachineInstrInMaps(MI&: *ReadVLMI).getRegSlot(); |
1757 | LiveInterval &DefLI = LIS->getInterval(Reg: VLOutput); |
1758 | VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: DefLI.beginIndex()); |
1759 | DefLI.removeSegment(Start: DefLI.beginIndex(), End: NewDefSI); |
1760 | DefVNI->def = NewDefSI; |
1761 | } |
1762 | } |
1763 | // We don't use the vl output of the VLEFF/VLSEGFF anymore. |
1764 | MI.getOperand(i: 1).setReg(RISCV::X0); |
1765 | } |
1766 | } |
1767 | } |
1768 | |
1769 | bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { |
1770 | // Skip if the vector extension is not enabled. |
1771 | ST = &MF.getSubtarget<RISCVSubtarget>(); |
1772 | if (!ST->hasVInstructions()) |
1773 | return false; |
1774 | |
1775 | LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n" ); |
1776 | |
1777 | TII = ST->getInstrInfo(); |
1778 | MRI = &MF.getRegInfo(); |
1779 | auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); |
1780 | LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; |
1781 | |
1782 | assert(BlockInfo.empty() && "Expect empty block infos" ); |
1783 | BlockInfo.resize(new_size: MF.getNumBlockIDs()); |
1784 | |
1785 | bool HaveVectorOp = false; |
1786 | |
1787 | // Phase 1 - determine how VL/VTYPE are affected by the each block. |
1788 | for (const MachineBasicBlock &MBB : MF) { |
1789 | VSETVLIInfo TmpStatus; |
1790 | HaveVectorOp |= computeVLVTYPEChanges(MBB, Info&: TmpStatus); |
1791 | // Initial exit state is whatever change we found in the block. |
1792 | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
1793 | BBInfo.Exit = TmpStatus; |
1794 | LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) |
1795 | << " is " << BBInfo.Exit << "\n" ); |
1796 | |
1797 | } |
1798 | |
1799 | // If we didn't find any instructions that need VSETVLI, we're done. |
1800 | if (!HaveVectorOp) { |
1801 | BlockInfo.clear(); |
1802 | return false; |
1803 | } |
1804 | |
1805 | // Phase 2 - determine the exit VL/VTYPE from each block. We add all |
1806 | // blocks to the list here, but will also add any that need to be revisited |
1807 | // during Phase 2 processing. |
1808 | for (const MachineBasicBlock &MBB : MF) { |
1809 | WorkList.push(x: &MBB); |
1810 | BlockInfo[MBB.getNumber()].InQueue = true; |
1811 | } |
1812 | while (!WorkList.empty()) { |
1813 | const MachineBasicBlock &MBB = *WorkList.front(); |
1814 | WorkList.pop(); |
1815 | computeIncomingVLVTYPE(MBB); |
1816 | } |
1817 | |
1818 | // Perform partial redundancy elimination of vsetvli transitions. |
1819 | for (MachineBasicBlock &MBB : MF) |
1820 | doPRE(MBB); |
1821 | |
1822 | // Phase 3 - add any vsetvli instructions needed in the block. Use the |
1823 | // Phase 2 information to avoid adding vsetvlis before the first vector |
1824 | // instruction in the block if the VL/VTYPE is satisfied by its |
1825 | // predecessors. |
1826 | for (MachineBasicBlock &MBB : MF) |
1827 | emitVSETVLIs(MBB); |
1828 | |
1829 | // Now that all vsetvlis are explicit, go through and do block local |
1830 | // DSE and peephole based demanded fields based transforms. Note that |
1831 | // this *must* be done outside the main dataflow so long as we allow |
1832 | // any cross block analysis within the dataflow. We can't have both |
1833 | // demanded fields based mutation and non-local analysis in the |
1834 | // dataflow at the same time without introducing inconsistencies. |
1835 | for (MachineBasicBlock &MBB : MF) |
1836 | coalesceVSETVLIs(MBB); |
1837 | |
1838 | // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output |
1839 | // of VLEFF/VLSEGFF. |
1840 | for (MachineBasicBlock &MBB : MF) |
1841 | insertReadVL(MBB); |
1842 | |
1843 | BlockInfo.clear(); |
1844 | return HaveVectorOp; |
1845 | } |
1846 | |
1847 | /// Returns an instance of the Insert VSETVLI pass. |
1848 | FunctionPass *llvm::createRISCVInsertVSETVLIPass() { |
1849 | return new RISCVInsertVSETVLI(); |
1850 | } |
1851 | |