1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/PostOrderIterator.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/CodeGen/LiveDebugVariables.h"
32#include "llvm/CodeGen/LiveIntervals.h"
33#include "llvm/CodeGen/LiveStacks.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include <queue>
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-insert-vsetvli"
39#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
44static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
45 DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
46 cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
47 "vill is cleared"),
48 cl::init(Val: true));
49
50namespace {
51
52/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
53/// This will return nullptr if the virtual register is an implicit_def or
54/// if LiveIntervals is not available.
55static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
56 const LiveIntervals *LIS) {
57 assert(Reg.isVirtual());
58 if (!LIS)
59 return nullptr;
60 auto &LI = LIS->getInterval(Reg);
61 SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
62 return LI.getVNInfoBefore(Idx: SI);
63}
64
65static unsigned getVLOpNum(const MachineInstr &MI) {
66 return RISCVII::getVLOpNum(Desc: MI.getDesc());
67}
68
69static unsigned getSEWOpNum(const MachineInstr &MI) {
70 return RISCVII::getSEWOpNum(Desc: MI.getDesc());
71}
72
73/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
74/// not a load or store which ignores SEW.
75static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
76 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
77 default:
78 return std::nullopt;
79 case RISCV::VLE8_V:
80 case RISCV::VLSE8_V:
81 case RISCV::VSE8_V:
82 case RISCV::VSSE8_V:
83 return 8;
84 case RISCV::VLE16_V:
85 case RISCV::VLSE16_V:
86 case RISCV::VSE16_V:
87 case RISCV::VSSE16_V:
88 return 16;
89 case RISCV::VLE32_V:
90 case RISCV::VLSE32_V:
91 case RISCV::VSE32_V:
92 case RISCV::VSSE32_V:
93 return 32;
94 case RISCV::VLE64_V:
95 case RISCV::VLSE64_V:
96 case RISCV::VSE64_V:
97 case RISCV::VSSE64_V:
98 return 64;
99 }
100}
101
102/// Return true if this is an operation on mask registers. Note that
103/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
104static bool isMaskRegOp(const MachineInstr &MI) {
105 if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags))
106 return false;
107 const unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm();
108 // A Log2SEW of 0 is an operation on mask registers only.
109 return Log2SEW == 0;
110}
111
112/// Return true if the inactive elements in the result are entirely undefined.
113/// Note that this is different from "agnostic" as defined by the vector
114/// specification. Agnostic requires each lane to either be undisturbed, or
115/// take the value -1; no other value is allowed.
116static bool hasUndefinedPassthru(const MachineInstr &MI) {
117
118 unsigned UseOpIdx;
119 if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx))
120 // If there is no passthrough operand, then the pass through
121 // lanes are undefined.
122 return true;
123
124 // All undefined passthrus should be $noreg: see
125 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
126 const MachineOperand &UseMO = MI.getOperand(i: UseOpIdx);
127 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
128}
129
130/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
131static bool isVectorCopy(const TargetRegisterInfo *TRI,
132 const MachineInstr &MI) {
133 return MI.isCopy() && MI.getOperand(i: 0).getReg().isPhysical() &&
134 RISCVRegisterInfo::isRVVRegClass(
135 RC: TRI->getMinimalPhysRegClass(Reg: MI.getOperand(i: 0).getReg()));
136}
137
138/// Which subfields of VL or VTYPE have values we need to preserve?
139struct DemandedFields {
140 // Some unknown property of VL is used. If demanded, must preserve entire
141 // value.
142 bool VLAny = false;
143 // Only zero vs non-zero is used. If demanded, can change non-zero values.
144 bool VLZeroness = false;
145 // What properties of SEW we need to preserve.
146 enum : uint8_t {
147 SEWEqual = 3, // The exact value of SEW needs to be preserved.
148 SEWGreaterThanOrEqualAndLessThan64 =
149 2, // SEW can be changed as long as it's greater
150 // than or equal to the original value, but must be less
151 // than 64.
152 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
153 // than or equal to the original value.
154 SEWNone = 0 // We don't need to preserve SEW at all.
155 } SEW = SEWNone;
156 enum : uint8_t {
157 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
158 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
159 LMULNone = 0 // We don't need to preserve LMUL at all.
160 } LMUL = LMULNone;
161 bool SEWLMULRatio = false;
162 bool TailPolicy = false;
163 bool MaskPolicy = false;
164 // If this is true, we demand that VTYPE is set to some legal state, i.e. that
165 // vill is unset.
166 bool VILL = false;
167
168 // Return true if any part of VTYPE was used
169 bool usedVTYPE() const {
170 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
171 }
172
173 // Return true if any property of VL was used
174 bool usedVL() {
175 return VLAny || VLZeroness;
176 }
177
178 // Mark all VTYPE subfields and properties as demanded
179 void demandVTYPE() {
180 SEW = SEWEqual;
181 LMUL = LMULEqual;
182 SEWLMULRatio = true;
183 TailPolicy = true;
184 MaskPolicy = true;
185 VILL = true;
186 }
187
188 // Mark all VL properties as demanded
189 void demandVL() {
190 VLAny = true;
191 VLZeroness = true;
192 }
193
194 static DemandedFields all() {
195 DemandedFields DF;
196 DF.demandVTYPE();
197 DF.demandVL();
198 return DF;
199 }
200
201 // Make this the result of demanding both the fields in this and B.
202 void doUnion(const DemandedFields &B) {
203 VLAny |= B.VLAny;
204 VLZeroness |= B.VLZeroness;
205 SEW = std::max(a: SEW, b: B.SEW);
206 LMUL = std::max(a: LMUL, b: B.LMUL);
207 SEWLMULRatio |= B.SEWLMULRatio;
208 TailPolicy |= B.TailPolicy;
209 MaskPolicy |= B.MaskPolicy;
210 VILL |= B.VILL;
211 }
212
213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
214 /// Support for debugging, callable in GDB: V->dump()
215 LLVM_DUMP_METHOD void dump() const {
216 print(dbgs());
217 dbgs() << "\n";
218 }
219
220 /// Implement operator<<.
221 void print(raw_ostream &OS) const {
222 OS << "{";
223 OS << "VLAny=" << VLAny << ", ";
224 OS << "VLZeroness=" << VLZeroness << ", ";
225 OS << "SEW=";
226 switch (SEW) {
227 case SEWEqual:
228 OS << "SEWEqual";
229 break;
230 case SEWGreaterThanOrEqual:
231 OS << "SEWGreaterThanOrEqual";
232 break;
233 case SEWGreaterThanOrEqualAndLessThan64:
234 OS << "SEWGreaterThanOrEqualAndLessThan64";
235 break;
236 case SEWNone:
237 OS << "SEWNone";
238 break;
239 };
240 OS << ", ";
241 OS << "LMUL=";
242 switch (LMUL) {
243 case LMULEqual:
244 OS << "LMULEqual";
245 break;
246 case LMULLessThanOrEqualToM1:
247 OS << "LMULLessThanOrEqualToM1";
248 break;
249 case LMULNone:
250 OS << "LMULNone";
251 break;
252 };
253 OS << ", ";
254 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
255 OS << "TailPolicy=" << TailPolicy << ", ";
256 OS << "MaskPolicy=" << MaskPolicy << ", ";
257 OS << "VILL=" << VILL;
258 OS << "}";
259 }
260#endif
261};
262
263#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
264LLVM_ATTRIBUTE_USED
265inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
266 DF.print(OS);
267 return OS;
268}
269#endif
270
271static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
272 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul: LMUL);
273 return Fractional || LMul == 1;
274}
275
276/// Return true if moving from CurVType to NewVType is
277/// indistinguishable from the perspective of an instruction (or set
278/// of instructions) which use only the Used subfields and properties.
279static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
280 const DemandedFields &Used) {
281 switch (Used.SEW) {
282 case DemandedFields::SEWNone:
283 break;
284 case DemandedFields::SEWEqual:
285 if (RISCVVType::getSEW(VType: CurVType) != RISCVVType::getSEW(VType: NewVType))
286 return false;
287 break;
288 case DemandedFields::SEWGreaterThanOrEqual:
289 if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType))
290 return false;
291 break;
292 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
293 if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType) ||
294 RISCVVType::getSEW(VType: NewVType) >= 64)
295 return false;
296 break;
297 }
298
299 switch (Used.LMUL) {
300 case DemandedFields::LMULNone:
301 break;
302 case DemandedFields::LMULEqual:
303 if (RISCVVType::getVLMUL(VType: CurVType) != RISCVVType::getVLMUL(VType: NewVType))
304 return false;
305 break;
306 case DemandedFields::LMULLessThanOrEqualToM1:
307 if (!isLMUL1OrSmaller(LMUL: RISCVVType::getVLMUL(VType: NewVType)))
308 return false;
309 break;
310 }
311
312 if (Used.SEWLMULRatio) {
313 auto Ratio1 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: CurVType),
314 VLMul: RISCVVType::getVLMUL(VType: CurVType));
315 auto Ratio2 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: NewVType),
316 VLMul: RISCVVType::getVLMUL(VType: NewVType));
317 if (Ratio1 != Ratio2)
318 return false;
319 }
320
321 if (Used.TailPolicy && RISCVVType::isTailAgnostic(VType: CurVType) !=
322 RISCVVType::isTailAgnostic(VType: NewVType))
323 return false;
324 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(VType: CurVType) !=
325 RISCVVType::isMaskAgnostic(VType: NewVType))
326 return false;
327 return true;
328}
329
330/// Return the fields and properties demanded by the provided instruction.
331DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
332 // This function works in coalesceVSETVLI too. We can still use the value of a
333 // SEW, VL, or Policy operand even though it might not be the exact value in
334 // the VL or VTYPE, since we only care about what the instruction originally
335 // demanded.
336
337 // Most instructions don't use any of these subfeilds.
338 DemandedFields Res;
339 // Start conservative if registers are used
340 if (MI.isCall() || MI.isInlineAsm() ||
341 MI.readsRegister(Reg: RISCV::VL, /*TRI=*/nullptr))
342 Res.demandVL();
343 if (MI.isCall() || MI.isInlineAsm() ||
344 MI.readsRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr))
345 Res.demandVTYPE();
346 // Start conservative on the unlowered form too
347 uint64_t TSFlags = MI.getDesc().TSFlags;
348 if (RISCVII::hasSEWOp(TSFlags)) {
349 Res.demandVTYPE();
350 if (RISCVII::hasVLOp(TSFlags))
351 if (const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
352 !VLOp.isReg() || !VLOp.isUndef())
353 Res.demandVL();
354
355 // Behavior is independent of mask policy.
356 if (!RISCVII::usesMaskPolicy(TSFlags))
357 Res.MaskPolicy = false;
358 }
359
360 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
361 // They instead demand the ratio of the two which is used in computing
362 // EMUL, but which allows us the flexibility to change SEW and LMUL
363 // provided we don't change the ratio.
364 // Note: We assume that the instructions initial SEW is the EEW encoded
365 // in the opcode. This is asserted when constructing the VSETVLIInfo.
366 if (getEEWForLoadStore(MI)) {
367 Res.SEW = DemandedFields::SEWNone;
368 Res.LMUL = DemandedFields::LMULNone;
369 }
370
371 // Store instructions don't use the policy fields.
372 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
373 Res.TailPolicy = false;
374 Res.MaskPolicy = false;
375 }
376
377 // If this is a mask reg operation, it only cares about VLMAX.
378 // TODO: Possible extensions to this logic
379 // * Probably ok if available VLMax is larger than demanded
380 // * The policy bits can probably be ignored..
381 if (isMaskRegOp(MI)) {
382 Res.SEW = DemandedFields::SEWNone;
383 Res.LMUL = DemandedFields::LMULNone;
384 }
385
386 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
387 if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
388 Res.LMUL = DemandedFields::LMULNone;
389 Res.SEWLMULRatio = false;
390 Res.VLAny = false;
391 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
392 // need to preserve any other bits and are thus compatible with any larger,
393 // etype and can disregard policy bits. Warning: It's tempting to try doing
394 // this for any tail agnostic operation, but we can't as TA requires
395 // tail lanes to either be the original value or -1. We are writing
396 // unknown bits to the lanes here.
397 if (hasUndefinedPassthru(MI)) {
398 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
399 !ST->hasVInstructionsF64())
400 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
401 else
402 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
403 Res.TailPolicy = false;
404 }
405 }
406
407 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
408 if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
409 assert(!RISCVII::hasVLOp(TSFlags));
410 Res.LMUL = DemandedFields::LMULNone;
411 Res.SEWLMULRatio = false;
412 Res.TailPolicy = false;
413 Res.MaskPolicy = false;
414 }
415
416 if (RISCVII::hasVLOp(TSFlags: MI.getDesc().TSFlags)) {
417 const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
418 // A slidedown/slideup with an *undefined* passthru can freely clobber
419 // elements not copied from the source vector (e.g. masked off, tail, or
420 // slideup's prefix). Notes:
421 // * We can't modify SEW here since the slide amount is in units of SEW.
422 // * VL=1 is special only because we have existing support for zero vs
423 // non-zero VL. We could generalize this if we had a VL > C predicate.
424 // * The LMUL1 restriction is for machines whose latency may depend on VL.
425 // * As above, this is only legal for tail "undefined" not "agnostic".
426 if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
427 VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
428 Res.VLAny = false;
429 Res.VLZeroness = true;
430 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
431 Res.TailPolicy = false;
432 }
433
434 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
435 // same semantically as vmv.s.x. This is particularly useful since we don't
436 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
437 // it's place. Since a splat is non-constant time in LMUL, we do need to be
438 // careful to not increase the number of active vector registers (unlike for
439 // vmv.s.x.)
440 if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
441 VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
442 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
443 Res.SEWLMULRatio = false;
444 Res.VLAny = false;
445 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
446 !ST->hasVInstructionsF64())
447 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
448 else
449 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
450 Res.TailPolicy = false;
451 }
452 }
453
454 // In §32.16.6, whole vector register moves have a dependency on SEW. At the
455 // MIR level though we don't encode the element type, and it gives the same
456 // result whatever the SEW may be.
457 //
458 // However it does need valid SEW, i.e. vill must be cleared. The entry to a
459 // function, calls and inline assembly may all set it, so make sure we clear
460 // it for whole register copies. Do this by leaving VILL demanded.
461 if (isVectorCopy(TRI: ST->getRegisterInfo(), MI)) {
462 Res.LMUL = DemandedFields::LMULNone;
463 Res.SEW = DemandedFields::SEWNone;
464 Res.SEWLMULRatio = false;
465 Res.TailPolicy = false;
466 Res.MaskPolicy = false;
467 }
468
469 if (RISCVInstrInfo::isVExtractInstr(MI)) {
470 assert(!RISCVII::hasVLOp(TSFlags));
471 // TODO: LMUL can be any larger value (without cost)
472 Res.TailPolicy = false;
473 }
474
475 return Res;
476}
477
478/// Defines the abstract state with which the forward dataflow models the
479/// values of the VL and VTYPE registers after insertion.
480class VSETVLIInfo {
481 struct AVLDef {
482 // Every AVLDef should have a VNInfo, unless we're running without
483 // LiveIntervals in which case this will be nullptr.
484 const VNInfo *ValNo;
485 Register DefReg;
486 };
487 union {
488 AVLDef AVLRegDef;
489 unsigned AVLImm;
490 };
491
492 enum : uint8_t {
493 Uninitialized,
494 AVLIsReg,
495 AVLIsImm,
496 AVLIsVLMAX,
497 Unknown, // AVL and VTYPE are fully unknown
498 } State = Uninitialized;
499
500 // Fields from VTYPE.
501 RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;
502 uint8_t SEW = 0;
503 uint8_t TailAgnostic : 1;
504 uint8_t MaskAgnostic : 1;
505 uint8_t SEWLMULRatioOnly : 1;
506
507public:
508 VSETVLIInfo()
509 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
510 SEWLMULRatioOnly(false) {}
511
512 static VSETVLIInfo getUnknown() {
513 VSETVLIInfo Info;
514 Info.setUnknown();
515 return Info;
516 }
517
518 bool isValid() const { return State != Uninitialized; }
519 void setUnknown() { State = Unknown; }
520 bool isUnknown() const { return State == Unknown; }
521
522 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
523 assert(AVLReg.isVirtual());
524 AVLRegDef.ValNo = VNInfo;
525 AVLRegDef.DefReg = AVLReg;
526 State = AVLIsReg;
527 }
528
529 void setAVLImm(unsigned Imm) {
530 AVLImm = Imm;
531 State = AVLIsImm;
532 }
533
534 void setAVLVLMAX() { State = AVLIsVLMAX; }
535
536 bool hasAVLImm() const { return State == AVLIsImm; }
537 bool hasAVLReg() const { return State == AVLIsReg; }
538 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
539 Register getAVLReg() const {
540 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
541 return AVLRegDef.DefReg;
542 }
543 unsigned getAVLImm() const {
544 assert(hasAVLImm());
545 return AVLImm;
546 }
547 const VNInfo *getAVLVNInfo() const {
548 assert(hasAVLReg());
549 return AVLRegDef.ValNo;
550 }
551 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
552 // a PHI node. In that case getAVLVNInfo()->def will point to the block
553 // boundary slot and this will return nullptr. If LiveIntervals isn't
554 // available, nullptr is also returned.
555 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
556 assert(hasAVLReg());
557 if (!LIS || getAVLVNInfo()->isPHIDef())
558 return nullptr;
559 auto *MI = LIS->getInstructionFromIndex(index: getAVLVNInfo()->def);
560 assert(MI);
561 return MI;
562 }
563
564 void setAVL(const VSETVLIInfo &Info) {
565 assert(Info.isValid());
566 if (Info.isUnknown())
567 setUnknown();
568 else if (Info.hasAVLReg())
569 setAVLRegDef(VNInfo: Info.getAVLVNInfo(), AVLReg: Info.getAVLReg());
570 else if (Info.hasAVLVLMAX())
571 setAVLVLMAX();
572 else {
573 assert(Info.hasAVLImm());
574 setAVLImm(Info.getAVLImm());
575 }
576 }
577
578 unsigned getSEW() const { return SEW; }
579 RISCVVType::VLMUL getVLMUL() const { return VLMul; }
580 bool getTailAgnostic() const { return TailAgnostic; }
581 bool getMaskAgnostic() const { return MaskAgnostic; }
582
583 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
584 if (hasAVLImm())
585 return getAVLImm() > 0;
586 if (hasAVLReg()) {
587 if (auto *DefMI = getAVLDefMI(LIS))
588 return RISCVInstrInfo::isNonZeroLoadImmediate(MI: *DefMI);
589 }
590 if (hasAVLVLMAX())
591 return true;
592 return false;
593 }
594
595 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
596 const LiveIntervals *LIS) const {
597 if (hasSameAVL(Other))
598 return true;
599 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
600 }
601
602 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
603 if (hasAVLReg() && Other.hasAVLReg()) {
604 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
605 "we either have intervals or we don't");
606 if (!getAVLVNInfo())
607 return getAVLReg() == Other.getAVLReg();
608 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
609 getAVLReg() == Other.getAVLReg();
610 }
611
612 if (hasAVLImm() && Other.hasAVLImm())
613 return getAVLImm() == Other.getAVLImm();
614
615 if (hasAVLVLMAX())
616 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
617
618 return false;
619 }
620
621 // Return true if the two lattice values are guaranteed to have
622 // the same AVL value at runtime.
623 bool hasSameAVL(const VSETVLIInfo &Other) const {
624 // Without LiveIntervals, we don't know which instruction defines a
625 // register. Since a register may be redefined, this means all AVLIsReg
626 // states must be treated as possibly distinct.
627 if (hasAVLReg() && Other.hasAVLReg()) {
628 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
629 "we either have intervals or we don't");
630 if (!getAVLVNInfo())
631 return false;
632 }
633 return hasSameAVLLatticeValue(Other);
634 }
635
636 void setVTYPE(unsigned VType) {
637 assert(isValid() && !isUnknown() &&
638 "Can't set VTYPE for uninitialized or unknown");
639 VLMul = RISCVVType::getVLMUL(VType);
640 SEW = RISCVVType::getSEW(VType);
641 TailAgnostic = RISCVVType::isTailAgnostic(VType);
642 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
643 }
644 void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
645 assert(isValid() && !isUnknown() &&
646 "Can't set VTYPE for uninitialized or unknown");
647 VLMul = L;
648 SEW = S;
649 TailAgnostic = TA;
650 MaskAgnostic = MA;
651 }
652
653 void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
654
655 unsigned encodeVTYPE() const {
656 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
657 "Can't encode VTYPE for uninitialized or unknown");
658 return RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, TailAgnostic, MaskAgnostic);
659 }
660
661 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
662
663 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
664 assert(isValid() && Other.isValid() &&
665 "Can't compare invalid VSETVLIInfos");
666 assert(!isUnknown() && !Other.isUnknown() &&
667 "Can't compare VTYPE in unknown state");
668 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
669 "Can't compare when only LMUL/SEW ratio is valid.");
670 return std::tie(args: VLMul, args: SEW, args: TailAgnostic, args: MaskAgnostic) ==
671 std::tie(args: Other.VLMul, args: Other.SEW, args: Other.TailAgnostic,
672 args: Other.MaskAgnostic);
673 }
674
675 unsigned getSEWLMULRatio() const {
676 assert(isValid() && !isUnknown() &&
677 "Can't use VTYPE for uninitialized or unknown");
678 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
679 }
680
681 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
682 // Note that having the same VLMAX ensures that both share the same
683 // function from AVL to VL; that is, they must produce the same VL value
684 // for any given AVL value.
685 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
686 assert(isValid() && Other.isValid() &&
687 "Can't compare invalid VSETVLIInfos");
688 assert(!isUnknown() && !Other.isUnknown() &&
689 "Can't compare VTYPE in unknown state");
690 return getSEWLMULRatio() == Other.getSEWLMULRatio();
691 }
692
693 bool hasCompatibleVTYPE(const DemandedFields &Used,
694 const VSETVLIInfo &Require) const {
695 return areCompatibleVTYPEs(CurVType: Require.encodeVTYPE(), NewVType: encodeVTYPE(), Used);
696 }
697
698 // Determine whether the vector instructions requirements represented by
699 // Require are compatible with the previous vsetvli instruction represented
700 // by this. MI is the instruction whose requirements we're considering.
701 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
702 const LiveIntervals *LIS) const {
703 assert(isValid() && Require.isValid() &&
704 "Can't compare invalid VSETVLIInfos");
705 // Nothing is compatible with Unknown.
706 if (isUnknown() || Require.isUnknown())
707 return false;
708
709 // If only our VLMAX ratio is valid, then this isn't compatible.
710 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
711 return false;
712
713 if (Used.VLAny && !(hasSameAVL(Other: Require) && hasSameVLMAX(Other: Require)))
714 return false;
715
716 if (Used.VLZeroness && !hasEquallyZeroAVL(Other: Require, LIS))
717 return false;
718
719 return hasCompatibleVTYPE(Used, Require);
720 }
721
722 bool operator==(const VSETVLIInfo &Other) const {
723 // Uninitialized is only equal to another Uninitialized.
724 if (!isValid())
725 return !Other.isValid();
726 if (!Other.isValid())
727 return !isValid();
728
729 // Unknown is only equal to another Unknown.
730 if (isUnknown())
731 return Other.isUnknown();
732 if (Other.isUnknown())
733 return isUnknown();
734
735 if (!hasSameAVLLatticeValue(Other))
736 return false;
737
738 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
739 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
740 return false;
741
742 // If only the VLMAX is valid, check that it is the same.
743 if (SEWLMULRatioOnly)
744 return hasSameVLMAX(Other);
745
746 // If the full VTYPE is valid, check that it is the same.
747 return hasSameVTYPE(Other);
748 }
749
750 bool operator!=(const VSETVLIInfo &Other) const {
751 return !(*this == Other);
752 }
753
754 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
755 // both predecessors.
756 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
757 // If the new value isn't valid, ignore it.
758 if (!Other.isValid())
759 return *this;
760
761 // If this value isn't valid, this must be the first predecessor, use it.
762 if (!isValid())
763 return Other;
764
765 // If either is unknown, the result is unknown.
766 if (isUnknown() || Other.isUnknown())
767 return VSETVLIInfo::getUnknown();
768
769 // If we have an exact, match return this.
770 if (*this == Other)
771 return *this;
772
773 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
774 // return an SEW/LMUL ratio only value.
775 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
776 VSETVLIInfo MergeInfo = *this;
777 MergeInfo.SEWLMULRatioOnly = true;
778 return MergeInfo;
779 }
780
781 // Otherwise the result is unknown.
782 return VSETVLIInfo::getUnknown();
783 }
784
785#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
786 /// Support for debugging, callable in GDB: V->dump()
787 LLVM_DUMP_METHOD void dump() const {
788 print(dbgs());
789 dbgs() << "\n";
790 }
791
792 /// Implement operator<<.
793 /// @{
794 void print(raw_ostream &OS) const {
795 OS << "{";
796 if (!isValid())
797 OS << "Uninitialized";
798 if (isUnknown())
799 OS << "unknown";
800 if (hasAVLReg())
801 OS << "AVLReg=" << llvm::printReg(getAVLReg());
802 if (hasAVLImm())
803 OS << "AVLImm=" << (unsigned)AVLImm;
804 if (hasAVLVLMAX())
805 OS << "AVLVLMAX";
806 OS << ", ";
807
808 unsigned LMul;
809 bool Fractional;
810 std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
811
812 OS << "VLMul=";
813 if (Fractional)
814 OS << "mf";
815 else
816 OS << "m";
817 OS << LMul << ", "
818 << "SEW=e" << (unsigned)SEW << ", "
819 << "TailAgnostic=" << (bool)TailAgnostic << ", "
820 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
821 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
822 }
823#endif
824};
825
826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
827LLVM_ATTRIBUTE_USED
828inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
829 V.print(OS);
830 return OS;
831}
832#endif
833
834struct BlockData {
835 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
836 // block. Calculated in Phase 2.
837 VSETVLIInfo Exit;
838
839 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
840 // blocks. Calculated in Phase 2, and used by Phase 3.
841 VSETVLIInfo Pred;
842
843 // Keeps track of whether the block is already in the queue.
844 bool InQueue = false;
845
846 BlockData() = default;
847};
848
849class RISCVInsertVSETVLI : public MachineFunctionPass {
850 const RISCVSubtarget *ST;
851 const TargetInstrInfo *TII;
852 MachineRegisterInfo *MRI;
853 // Possibly null!
854 LiveIntervals *LIS;
855
856 std::vector<BlockData> BlockInfo;
857 std::queue<const MachineBasicBlock *> WorkList;
858
859public:
860 static char ID;
861
862 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
863 bool runOnMachineFunction(MachineFunction &MF) override;
864
865 void getAnalysisUsage(AnalysisUsage &AU) const override {
866 AU.setPreservesCFG();
867
868 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
869 AU.addPreserved<LiveIntervalsWrapperPass>();
870 AU.addPreserved<SlotIndexesWrapperPass>();
871 AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
872 AU.addPreserved<LiveStacksWrapperLegacy>();
873
874 MachineFunctionPass::getAnalysisUsage(AU);
875 }
876
877 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
878
879private:
880 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
881 const VSETVLIInfo &CurInfo) const;
882 bool needVSETVLIPHI(const VSETVLIInfo &Require,
883 const MachineBasicBlock &MBB) const;
884 void insertVSETVLI(MachineBasicBlock &MBB,
885 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
886 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
887
888 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
889 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
890 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
891 VSETVLIInfo &Info) const;
892 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
893 void emitVSETVLIs(MachineBasicBlock &MBB);
894 void doPRE(MachineBasicBlock &MBB);
895 void insertReadVL(MachineBasicBlock &MBB);
896
897 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
898 const DemandedFields &Used) const;
899 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
900
901 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
902 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
903 void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
904};
905
906} // end anonymous namespace
907
908char RISCVInsertVSETVLI::ID = 0;
909char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
910
911INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
912 false, false)
913
914// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
915// replace the AVL operand with the AVL of the defining vsetvli. E.g.
916//
917// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
918// $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
919// ->
920// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
921// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
922void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
923 if (!Info.hasAVLReg())
924 return;
925 const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
926 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(MI: *DefMI))
927 return;
928 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(MI: *DefMI);
929 if (!DefInstrInfo.hasSameVLMAX(Other: Info))
930 return;
931 Info.setAVL(DefInstrInfo);
932}
933
934// Return a VSETVLIInfo representing the changes made by this VSETVLI or
935// VSETIVLI instruction.
936VSETVLIInfo
937RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
938 VSETVLIInfo NewInfo;
939 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
940 NewInfo.setAVLImm(MI.getOperand(i: 1).getImm());
941 } else {
942 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
943 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
944 if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
945 NewInfo.setAVLVLMAX();
946 else if (MI.getOperand(i: 1).isUndef())
947 // Otherwise use an AVL of 1 to avoid depending on previous vl.
948 NewInfo.setAVLImm(1);
949 else {
950 Register AVLReg = MI.getOperand(i: 1).getReg();
951 VNInfo *VNI = getVNInfoFromReg(Reg: AVLReg, MI, LIS);
952 NewInfo.setAVLRegDef(VNInfo: VNI, AVLReg);
953 }
954 }
955 NewInfo.setVTYPE(MI.getOperand(i: 2).getImm());
956
957 forwardVSETVLIAVL(Info&: NewInfo);
958
959 return NewInfo;
960}
961
962static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
963 RISCVVType::VLMUL VLMul) {
964 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
965 if (Fractional)
966 VLEN = VLEN / LMul;
967 else
968 VLEN = VLEN * LMul;
969 return VLEN/SEW;
970}
971
972VSETVLIInfo
973RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
974 VSETVLIInfo InstrInfo;
975 const uint64_t TSFlags = MI.getDesc().TSFlags;
976
977 bool TailAgnostic = true;
978 bool MaskAgnostic = true;
979 if (!hasUndefinedPassthru(MI)) {
980 // Start with undisturbed.
981 TailAgnostic = false;
982 MaskAgnostic = false;
983
984 // If there is a policy operand, use it.
985 if (RISCVII::hasVecPolicyOp(TSFlags)) {
986 const MachineOperand &Op = MI.getOperand(i: MI.getNumExplicitOperands() - 1);
987 uint64_t Policy = Op.getImm();
988 assert(Policy <=
989 (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
990 "Invalid Policy Value");
991 TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
992 MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
993 }
994
995 if (!RISCVII::usesMaskPolicy(TSFlags))
996 MaskAgnostic = true;
997 }
998
999 RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1000
1001 unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm();
1002 // A Log2SEW of 0 is an operation on mask registers only.
1003 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1004 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1005
1006 if (RISCVII::hasVLOp(TSFlags)) {
1007 const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
1008 if (VLOp.isImm()) {
1009 int64_t Imm = VLOp.getImm();
1010 // Convert the VLMax sentintel to X0 register.
1011 if (Imm == RISCV::VLMaxSentinel) {
1012 // If we know the exact VLEN, see if we can use the constant encoding
1013 // for the VLMAX instead. This reduces register pressure slightly.
1014 const unsigned VLMAX = computeVLMAX(VLEN: ST->getRealMaxVLen(), SEW, VLMul);
1015 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1016 InstrInfo.setAVLImm(VLMAX);
1017 else
1018 InstrInfo.setAVLVLMAX();
1019 }
1020 else
1021 InstrInfo.setAVLImm(Imm);
1022 } else if (VLOp.isUndef()) {
1023 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1024 InstrInfo.setAVLImm(1);
1025 } else {
1026 VNInfo *VNI = getVNInfoFromReg(Reg: VLOp.getReg(), MI, LIS);
1027 InstrInfo.setAVLRegDef(VNInfo: VNI, AVLReg: VLOp.getReg());
1028 }
1029 } else {
1030 assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
1031 RISCVInstrInfo::isVExtractInstr(MI));
1032 // Pick a random value for state tracking purposes, will be ignored via
1033 // the demanded fields mechanism
1034 InstrInfo.setAVLImm(1);
1035 }
1036#ifndef NDEBUG
1037 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1038 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1039 }
1040#endif
1041 InstrInfo.setVTYPE(L: VLMul, S: SEW, TA: TailAgnostic, MA: MaskAgnostic);
1042
1043 forwardVSETVLIAVL(Info&: InstrInfo);
1044
1045 return InstrInfo;
1046}
1047
1048void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1049 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
1050 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1051
1052 ++NumInsertedVSETVL;
1053 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1054 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1055 // VLMAX.
1056 if (Info.hasSameAVL(Other: PrevInfo) && Info.hasSameVLMAX(Other: PrevInfo)) {
1057 auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0X0))
1058 .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead)
1059 .addReg(RegNo: RISCV::X0, flags: RegState::Kill)
1060 .addImm(Val: Info.encodeVTYPE())
1061 .addReg(RegNo: RISCV::VL, flags: RegState::Implicit);
1062 if (LIS)
1063 LIS->InsertMachineInstrInMaps(MI&: *MI);
1064 return;
1065 }
1066
1067 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1068 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1069 // same, we can use the X0, X0 form.
1070 if (Info.hasSameVLMAX(Other: PrevInfo) && Info.hasAVLReg()) {
1071 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1072 DefMI && RISCVInstrInfo::isVectorConfigInstr(MI: *DefMI)) {
1073 VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI);
1074 if (DefInfo.hasSameAVL(Other: PrevInfo) && DefInfo.hasSameVLMAX(Other: PrevInfo)) {
1075 auto MI =
1076 BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0X0))
1077 .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead)
1078 .addReg(RegNo: RISCV::X0, flags: RegState::Kill)
1079 .addImm(Val: Info.encodeVTYPE())
1080 .addReg(RegNo: RISCV::VL, flags: RegState::Implicit);
1081 if (LIS)
1082 LIS->InsertMachineInstrInMaps(MI&: *MI);
1083 return;
1084 }
1085 }
1086 }
1087 }
1088
1089 if (Info.hasAVLImm()) {
1090 auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETIVLI))
1091 .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead)
1092 .addImm(Val: Info.getAVLImm())
1093 .addImm(Val: Info.encodeVTYPE());
1094 if (LIS)
1095 LIS->InsertMachineInstrInMaps(MI&: *MI);
1096 return;
1097 }
1098
1099 if (Info.hasAVLVLMAX()) {
1100 Register DestReg = MRI->createVirtualRegister(RegClass: &RISCV::GPRNoX0RegClass);
1101 auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLIX0))
1102 .addReg(RegNo: DestReg, flags: RegState::Define | RegState::Dead)
1103 .addReg(RegNo: RISCV::X0, flags: RegState::Kill)
1104 .addImm(Val: Info.encodeVTYPE());
1105 if (LIS) {
1106 LIS->InsertMachineInstrInMaps(MI&: *MI);
1107 LIS->createAndComputeVirtRegInterval(Reg: DestReg);
1108 }
1109 return;
1110 }
1111
1112 Register AVLReg = Info.getAVLReg();
1113 MRI->constrainRegClass(Reg: AVLReg, RC: &RISCV::GPRNoX0RegClass);
1114 auto MI = BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoVSETVLI))
1115 .addReg(RegNo: RISCV::X0, flags: RegState::Define | RegState::Dead)
1116 .addReg(RegNo: AVLReg)
1117 .addImm(Val: Info.encodeVTYPE());
1118 if (LIS) {
1119 LIS->InsertMachineInstrInMaps(MI&: *MI);
1120 LiveInterval &LI = LIS->getInterval(Reg: AVLReg);
1121 SlotIndex SI = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1122 const VNInfo *CurVNI = Info.getAVLVNInfo();
1123 // If the AVL value isn't live at MI, do a quick check to see if it's easily
1124 // extendable. Otherwise, we need to copy it.
1125 if (LI.getVNInfoBefore(Idx: SI) != CurVNI) {
1126 if (!LI.liveAt(index: SI) && LI.containsOneValue())
1127 LIS->extendToIndices(LR&: LI, Indices: SI);
1128 else {
1129 Register AVLCopyReg =
1130 MRI->createVirtualRegister(RegClass: &RISCV::GPRNoX0RegClass);
1131 MachineBasicBlock *MBB = LIS->getMBBFromIndex(index: CurVNI->def);
1132 MachineBasicBlock::iterator II;
1133 if (CurVNI->isPHIDef())
1134 II = MBB->getFirstNonPHI();
1135 else {
1136 II = LIS->getInstructionFromIndex(index: CurVNI->def);
1137 II = std::next(x: II);
1138 }
1139 assert(II.isValid());
1140 auto AVLCopy = BuildMI(BB&: *MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: RISCV::COPY), DestReg: AVLCopyReg)
1141 .addReg(RegNo: AVLReg);
1142 LIS->InsertMachineInstrInMaps(MI&: *AVLCopy);
1143 MI->getOperand(i: 1).setReg(AVLCopyReg);
1144 LIS->createAndComputeVirtRegInterval(Reg: AVLCopyReg);
1145 }
1146 }
1147 }
1148}
1149
1150/// Return true if a VSETVLI is required to transition from CurInfo to Require
1151/// given a set of DemandedFields \p Used.
1152bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1153 const VSETVLIInfo &Require,
1154 const VSETVLIInfo &CurInfo) const {
1155 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1156 return true;
1157
1158 if (CurInfo.isCompatible(Used, Require, LIS))
1159 return false;
1160
1161 return true;
1162}
1163
1164// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1165// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1166// places.
1167static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
1168 const VSETVLIInfo &NewInfo,
1169 DemandedFields &Demanded) {
1170 VSETVLIInfo Info = NewInfo;
1171
1172 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1173 !PrevInfo.isUnknown()) {
1174 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1175 SEW: PrevInfo.getSEW(), VLMUL: PrevInfo.getVLMUL(), EEW: Info.getSEW()))
1176 Info.setVLMul(*NewVLMul);
1177 Demanded.LMUL = DemandedFields::LMULEqual;
1178 }
1179
1180 return Info;
1181}
1182
1183// Given an incoming state reaching MI, minimally modifies that state so that it
1184// is compatible with MI. The resulting state is guaranteed to be semantically
1185// legal for MI, but may not be the state requested by MI.
1186void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1187 const MachineInstr &MI) const {
1188 if (isVectorCopy(TRI: ST->getRegisterInfo(), MI) &&
1189 (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1190 // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1191 // be coalesced into another vsetvli since we won't demand any fields.
1192 VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1193 NewInfo.setAVLImm(1);
1194 NewInfo.setVTYPE(L: RISCVVType::LMUL_1, /*sew*/ S: 8, /*ta*/ TA: true, /*ma*/ MA: true);
1195 Info = NewInfo;
1196 return;
1197 }
1198
1199 if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags))
1200 return;
1201
1202 DemandedFields Demanded = getDemanded(MI, ST);
1203
1204 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1205 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1206 if (Info.isValid() && !needVSETVLI(Used: Demanded, Require: NewInfo, CurInfo: Info))
1207 return;
1208
1209 const VSETVLIInfo PrevInfo = Info;
1210 if (!Info.isValid() || Info.isUnknown())
1211 Info = NewInfo;
1212
1213 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1214
1215 // If MI only demands that VL has the same zeroness, we only need to set the
1216 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1217 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1218 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1219 // variant, so we avoid the transform to prevent extending live range of an
1220 // avl register operand.
1221 // TODO: We can probably relax this for immediates.
1222 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(Other: PrevInfo, LIS) &&
1223 IncomingInfo.hasSameVLMAX(Other: PrevInfo);
1224 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1225 Info.setAVL(IncomingInfo);
1226
1227 Info.setVTYPE(
1228 L: ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1229 .getVLMUL(),
1230 S: ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1231 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1232 // if needed.
1233 TA: (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1234 IncomingInfo.getTailAgnostic(),
1235 MA: (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1236 IncomingInfo.getMaskAgnostic());
1237
1238 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1239 // the AVL.
1240 if (Info.hasSEWLMULRatioOnly()) {
1241 VSETVLIInfo RatiolessInfo = IncomingInfo;
1242 RatiolessInfo.setAVL(Info);
1243 Info = RatiolessInfo;
1244 }
1245}
1246
1247// Given a state with which we evaluated MI (see transferBefore above for why
1248// this might be different that the state MI requested), modify the state to
1249// reflect the changes MI might make.
1250void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1251 const MachineInstr &MI) const {
1252 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1253 Info = getInfoForVSETVLI(MI);
1254 return;
1255 }
1256
1257 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1258 // Update AVL to vl-output of the fault first load.
1259 assert(MI.getOperand(1).getReg().isVirtual());
1260 if (LIS) {
1261 auto &LI = LIS->getInterval(Reg: MI.getOperand(i: 1).getReg());
1262 SlotIndex SI =
1263 LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1264 VNInfo *VNI = LI.getVNInfoAt(Idx: SI);
1265 Info.setAVLRegDef(VNInfo: VNI, AVLReg: MI.getOperand(i: 1).getReg());
1266 } else
1267 Info.setAVLRegDef(VNInfo: nullptr, AVLReg: MI.getOperand(i: 1).getReg());
1268 return;
1269 }
1270
1271 // If this is something that updates VL/VTYPE that we don't know about, set
1272 // the state to unknown.
1273 if (MI.isCall() || MI.isInlineAsm() ||
1274 MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) ||
1275 MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr))
1276 Info = VSETVLIInfo::getUnknown();
1277}
1278
1279bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1280 VSETVLIInfo &Info) const {
1281 bool HadVectorOp = false;
1282
1283 Info = BlockInfo[MBB.getNumber()].Pred;
1284 for (const MachineInstr &MI : MBB) {
1285 transferBefore(Info, MI);
1286
1287 if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
1288 RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags) ||
1289 isVectorCopy(TRI: ST->getRegisterInfo(), MI))
1290 HadVectorOp = true;
1291
1292 transferAfter(Info, MI);
1293 }
1294
1295 return HadVectorOp;
1296}
1297
1298void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1299
1300 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1301
1302 BBInfo.InQueue = false;
1303
1304 // Start with the previous entry so that we keep the most conservative state
1305 // we have ever found.
1306 VSETVLIInfo InInfo = BBInfo.Pred;
1307 if (MBB.pred_empty()) {
1308 // There are no predecessors, so use the default starting status.
1309 InInfo.setUnknown();
1310 } else {
1311 for (MachineBasicBlock *P : MBB.predecessors())
1312 InInfo = InInfo.intersect(Other: BlockInfo[P->getNumber()].Exit);
1313 }
1314
1315 // If we don't have any valid predecessor value, wait until we do.
1316 if (!InInfo.isValid())
1317 return;
1318
1319 // If no change, no need to rerun block
1320 if (InInfo == BBInfo.Pred)
1321 return;
1322
1323 BBInfo.Pred = InInfo;
1324 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1325 << " changed to " << BBInfo.Pred << "\n");
1326
1327 // Note: It's tempting to cache the state changes here, but due to the
1328 // compatibility checks performed a blocks output state can change based on
1329 // the input state. To cache, we'd have to add logic for finding
1330 // never-compatible state changes.
1331 VSETVLIInfo TmpStatus;
1332 computeVLVTYPEChanges(MBB, Info&: TmpStatus);
1333
1334 // If the new exit value matches the old exit value, we don't need to revisit
1335 // any blocks.
1336 if (BBInfo.Exit == TmpStatus)
1337 return;
1338
1339 BBInfo.Exit = TmpStatus;
1340 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1341 << " changed to " << BBInfo.Exit << "\n");
1342
1343 // Add the successors to the work list so we can propagate the changed exit
1344 // status.
1345 for (MachineBasicBlock *S : MBB.successors())
1346 if (!BlockInfo[S->getNumber()].InQueue) {
1347 BlockInfo[S->getNumber()].InQueue = true;
1348 WorkList.push(x: S);
1349 }
1350}
1351
1352// If we weren't able to prove a vsetvli was directly unneeded, it might still
1353// be unneeded if the AVL was a phi node where all incoming values are VL
1354// outputs from the last VSETVLI in their respective basic blocks.
1355bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1356 const MachineBasicBlock &MBB) const {
1357 if (!Require.hasAVLReg())
1358 return true;
1359
1360 if (!LIS)
1361 return true;
1362
1363 // We need the AVL to have been produced by a PHI node in this basic block.
1364 const VNInfo *Valno = Require.getAVLVNInfo();
1365 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(index: Valno->def) != &MBB)
1366 return true;
1367
1368 const LiveRange &LR = LIS->getInterval(Reg: Require.getAVLReg());
1369
1370 for (auto *PBB : MBB.predecessors()) {
1371 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1372
1373 // We need the PHI input to the be the output of a VSET(I)VLI.
1374 const VNInfo *Value = LR.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: PBB));
1375 if (!Value)
1376 return true;
1377 MachineInstr *DefMI = LIS->getInstructionFromIndex(index: Value->def);
1378 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(MI: *DefMI))
1379 return true;
1380
1381 // We found a VSET(I)VLI make sure it matches the output of the
1382 // predecessor block.
1383 VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI);
1384 if (DefInfo != PBBExit)
1385 return true;
1386
1387 // Require has the same VL as PBBExit, so if the exit from the
1388 // predecessor has the VTYPE we are looking for we might be able
1389 // to avoid a VSETVLI.
1390 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Other: Require))
1391 return true;
1392 }
1393
1394 // If all the incoming values to the PHI checked out, we don't need
1395 // to insert a VSETVLI.
1396 return false;
1397}
1398
1399void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1400 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1401 // Track whether the prefix of the block we've scanned is transparent
1402 // (meaning has not yet changed the abstract state).
1403 bool PrefixTransparent = true;
1404 for (MachineInstr &MI : MBB) {
1405 const VSETVLIInfo PrevInfo = CurInfo;
1406 transferBefore(Info&: CurInfo, MI);
1407
1408 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1409 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1410 // Conservatively, mark the VL and VTYPE as live.
1411 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1412 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1413 "Unexpected operands where VL and VTYPE should be");
1414 MI.getOperand(i: 3).setIsDead(false);
1415 MI.getOperand(i: 4).setIsDead(false);
1416 PrefixTransparent = false;
1417 }
1418
1419 if (EnsureWholeVectorRegisterMoveValidVTYPE &&
1420 isVectorCopy(TRI: ST->getRegisterInfo(), MI)) {
1421 if (!PrevInfo.isCompatible(Used: DemandedFields::all(), Require: CurInfo, LIS)) {
1422 insertVSETVLI(MBB, InsertPt: MI, DL: MI.getDebugLoc(), Info: CurInfo, PrevInfo);
1423 PrefixTransparent = false;
1424 }
1425 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VTYPE, /*isDef*/ false,
1426 /*isImp*/ true));
1427 }
1428
1429 uint64_t TSFlags = MI.getDesc().TSFlags;
1430 if (RISCVII::hasSEWOp(TSFlags)) {
1431 if (!PrevInfo.isCompatible(Used: DemandedFields::all(), Require: CurInfo, LIS)) {
1432 // If this is the first implicit state change, and the state change
1433 // requested can be proven to produce the same register contents, we
1434 // can skip emitting the actual state change and continue as if we
1435 // had since we know the GPR result of the implicit state change
1436 // wouldn't be used and VL/VTYPE registers are correct. Note that
1437 // we *do* need to model the state as if it changed as while the
1438 // register contents are unchanged, the abstract model can change.
1439 if (!PrefixTransparent || needVSETVLIPHI(Require: CurInfo, MBB))
1440 insertVSETVLI(MBB, InsertPt: MI, DL: MI.getDebugLoc(), Info: CurInfo, PrevInfo);
1441 PrefixTransparent = false;
1442 }
1443
1444 if (RISCVII::hasVLOp(TSFlags)) {
1445 MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
1446 if (VLOp.isReg()) {
1447 Register Reg = VLOp.getReg();
1448
1449 // Erase the AVL operand from the instruction.
1450 VLOp.setReg(RISCV::NoRegister);
1451 VLOp.setIsKill(false);
1452 if (LIS) {
1453 LiveInterval &LI = LIS->getInterval(Reg);
1454 SmallVector<MachineInstr *> DeadMIs;
1455 LIS->shrinkToUses(li: &LI, dead: &DeadMIs);
1456 // We might have separate components that need split due to
1457 // needVSETVLIPHI causing us to skip inserting a new VL def.
1458 SmallVector<LiveInterval *> SplitLIs;
1459 LIS->splitSeparateComponents(LI, SplitLIs);
1460
1461 // If the AVL was an immediate > 31, then it would have been emitted
1462 // as an ADDI. However, the ADDI might not have been used in the
1463 // vsetvli, or a vsetvli might not have been emitted, so it may be
1464 // dead now.
1465 for (MachineInstr *DeadMI : DeadMIs) {
1466 if (!TII->isAddImmediate(MI: *DeadMI, Reg))
1467 continue;
1468 LIS->RemoveMachineInstrFromMaps(MI&: *DeadMI);
1469 DeadMI->eraseFromParent();
1470 }
1471 }
1472 }
1473 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VL, /*isDef*/ false,
1474 /*isImp*/ true));
1475 }
1476 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VTYPE, /*isDef*/ false,
1477 /*isImp*/ true));
1478 }
1479
1480 if (MI.isInlineAsm()) {
1481 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VL, /*isDef*/ true,
1482 /*isImp*/ true));
1483 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VTYPE, /*isDef*/ true,
1484 /*isImp*/ true));
1485 }
1486
1487 if (MI.isCall() || MI.isInlineAsm() ||
1488 MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) ||
1489 MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr))
1490 PrefixTransparent = false;
1491
1492 transferAfter(Info&: CurInfo, MI);
1493 }
1494
1495 const auto &Info = BlockInfo[MBB.getNumber()];
1496 if (CurInfo != Info.Exit) {
1497 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1498 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1499 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1500 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1501 }
1502 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1503}
1504
1505/// Perform simple partial redundancy elimination of the VSETVLI instructions
1506/// we're about to insert by looking for cases where we can PRE from the
1507/// beginning of one block to the end of one of its predecessors. Specifically,
1508/// this is geared to catch the common case of a fixed length vsetvl in a single
1509/// block loop when it could execute once in the preheader instead.
1510void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1511 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1512 return;
1513
1514 MachineBasicBlock *UnavailablePred = nullptr;
1515 VSETVLIInfo AvailableInfo;
1516 for (MachineBasicBlock *P : MBB.predecessors()) {
1517 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1518 if (PredInfo.isUnknown()) {
1519 if (UnavailablePred)
1520 return;
1521 UnavailablePred = P;
1522 } else if (!AvailableInfo.isValid()) {
1523 AvailableInfo = PredInfo;
1524 } else if (AvailableInfo != PredInfo) {
1525 return;
1526 }
1527 }
1528
1529 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1530 // phase 3.
1531 if (!UnavailablePred || !AvailableInfo.isValid())
1532 return;
1533
1534 if (!LIS)
1535 return;
1536
1537 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1538 // the unavailable pred.
1539 if (AvailableInfo.hasSEWLMULRatioOnly())
1540 return;
1541
1542 // Critical edge - TODO: consider splitting?
1543 if (UnavailablePred->succ_size() != 1)
1544 return;
1545
1546 // If the AVL value is a register (other than our VLMAX sentinel),
1547 // we need to prove the value is available at the point we're going
1548 // to insert the vsetvli at.
1549 if (AvailableInfo.hasAVLReg()) {
1550 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1551 // This is an inline dominance check which covers the case of
1552 // UnavailablePred being the preheader of a loop.
1553 if (LIS->getMBBFromIndex(index: SI) != UnavailablePred)
1554 return;
1555 if (!UnavailablePred->terminators().empty() &&
1556 SI >= LIS->getInstructionIndex(Instr: *UnavailablePred->getFirstTerminator()))
1557 return;
1558 }
1559
1560 // Model the effect of changing the input state of the block MBB to
1561 // AvailableInfo. We're looking for two issues here; one legality,
1562 // one profitability.
1563 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1564 // may hit the end of the block with a different end state. We can
1565 // not make this change without reflowing later blocks as well.
1566 // 2) If we don't actually remove a transition, inserting a vsetvli
1567 // into the predecessor block would be correct, but unprofitable.
1568 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1569 VSETVLIInfo CurInfo = AvailableInfo;
1570 int TransitionsRemoved = 0;
1571 for (const MachineInstr &MI : MBB) {
1572 const VSETVLIInfo LastInfo = CurInfo;
1573 const VSETVLIInfo LastOldInfo = OldInfo;
1574 transferBefore(Info&: CurInfo, MI);
1575 transferBefore(Info&: OldInfo, MI);
1576 if (CurInfo == LastInfo)
1577 TransitionsRemoved++;
1578 if (LastOldInfo == OldInfo)
1579 TransitionsRemoved--;
1580 transferAfter(Info&: CurInfo, MI);
1581 transferAfter(Info&: OldInfo, MI);
1582 if (CurInfo == OldInfo)
1583 // Convergence. All transitions after this must match by construction.
1584 break;
1585 }
1586 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1587 // Issues 1 and 2 above
1588 return;
1589
1590 // Finally, update both data flow state and insert the actual vsetvli.
1591 // Doing both keeps the code in sync with the dataflow results, which
1592 // is critical for correctness of phase 3.
1593 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1594 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1595 << UnavailablePred->getName() << " with state "
1596 << AvailableInfo << "\n");
1597 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1598 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1599
1600 // Note there's an implicit assumption here that terminators never use
1601 // or modify VL or VTYPE. Also, fallthrough will return end().
1602 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1603 insertVSETVLI(MBB&: *UnavailablePred, InsertPt,
1604 DL: UnavailablePred->findDebugLoc(MBBI: InsertPt),
1605 Info: AvailableInfo, PrevInfo: OldExit);
1606}
1607
1608// Return true if we can mutate PrevMI to match MI without changing any the
1609// fields which would be observed.
1610bool RISCVInsertVSETVLI::canMutatePriorConfig(
1611 const MachineInstr &PrevMI, const MachineInstr &MI,
1612 const DemandedFields &Used) const {
1613 // If the VL values aren't equal, return false if either a) the former is
1614 // demanded, or b) we can't rewrite the former to be the later for
1615 // implementation reasons.
1616 if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
1617 if (Used.VLAny)
1618 return false;
1619
1620 if (Used.VLZeroness) {
1621 if (RISCVInstrInfo::isVLPreservingConfig(MI: PrevMI))
1622 return false;
1623 if (!getInfoForVSETVLI(MI: PrevMI).hasEquallyZeroAVL(Other: getInfoForVSETVLI(MI),
1624 LIS))
1625 return false;
1626 }
1627
1628 auto &AVL = MI.getOperand(i: 1);
1629
1630 // If the AVL is a register, we need to make sure its definition is the same
1631 // at PrevMI as it was at MI.
1632 if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1633 VNInfo *VNI = getVNInfoFromReg(Reg: AVL.getReg(), MI, LIS);
1634 VNInfo *PrevVNI = getVNInfoFromReg(Reg: AVL.getReg(), MI: PrevMI, LIS);
1635 if (!VNI || !PrevVNI || VNI != PrevVNI)
1636 return false;
1637 }
1638 }
1639
1640 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1641 auto PriorVType = PrevMI.getOperand(i: 2).getImm();
1642 auto VType = MI.getOperand(i: 2).getImm();
1643 return areCompatibleVTYPEs(CurVType: PriorVType, NewVType: VType, Used);
1644}
1645
1646void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1647 MachineInstr *NextMI = nullptr;
1648 // We can have arbitrary code in successors, so VL and VTYPE
1649 // must be considered demanded.
1650 DemandedFields Used;
1651 Used.demandVL();
1652 Used.demandVTYPE();
1653 SmallVector<MachineInstr*> ToDelete;
1654
1655 auto dropAVLUse = [&](MachineOperand &MO) {
1656 if (!MO.isReg() || !MO.getReg().isVirtual())
1657 return;
1658 Register OldVLReg = MO.getReg();
1659 MO.setReg(RISCV::NoRegister);
1660
1661 if (LIS)
1662 LIS->shrinkToUses(li: &LIS->getInterval(Reg: OldVLReg));
1663
1664 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg: OldVLReg);
1665 if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg: OldVLReg) &&
1666 MRI->use_nodbg_empty(RegNo: OldVLReg))
1667 ToDelete.push_back(Elt: VLOpDef);
1668 };
1669
1670 for (MachineInstr &MI : make_early_inc_range(Range: reverse(C&: MBB))) {
1671
1672 if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
1673 Used.doUnion(B: getDemanded(MI, ST));
1674 if (MI.isCall() || MI.isInlineAsm() ||
1675 MI.modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr) ||
1676 MI.modifiesRegister(Reg: RISCV::VTYPE, /*TRI=*/nullptr))
1677 NextMI = nullptr;
1678 continue;
1679 }
1680
1681 if (!MI.getOperand(i: 0).isDead())
1682 Used.demandVL();
1683
1684 if (NextMI) {
1685 if (!Used.usedVL() && !Used.usedVTYPE()) {
1686 dropAVLUse(MI.getOperand(i: 1));
1687 if (LIS)
1688 LIS->RemoveMachineInstrFromMaps(MI);
1689 MI.eraseFromParent();
1690 NumCoalescedVSETVL++;
1691 // Leave NextMI unchanged
1692 continue;
1693 }
1694
1695 if (canMutatePriorConfig(PrevMI: MI, MI: *NextMI, Used)) {
1696 if (!RISCVInstrInfo::isVLPreservingConfig(MI: *NextMI)) {
1697 Register DefReg = NextMI->getOperand(i: 0).getReg();
1698
1699 MI.getOperand(i: 0).setReg(DefReg);
1700 MI.getOperand(i: 0).setIsDead(false);
1701
1702 // Move the AVL from NextMI to MI
1703 dropAVLUse(MI.getOperand(i: 1));
1704 if (NextMI->getOperand(i: 1).isImm())
1705 MI.getOperand(i: 1).ChangeToImmediate(ImmVal: NextMI->getOperand(i: 1).getImm());
1706 else
1707 MI.getOperand(i: 1).ChangeToRegister(Reg: NextMI->getOperand(i: 1).getReg(),
1708 isDef: false);
1709 dropAVLUse(NextMI->getOperand(i: 1));
1710
1711 // The def of DefReg moved to MI, so extend the LiveInterval up to
1712 // it.
1713 if (DefReg.isVirtual() && LIS) {
1714 LiveInterval &DefLI = LIS->getInterval(Reg: DefReg);
1715 SlotIndex MISlot = LIS->getInstructionIndex(Instr: MI).getRegSlot();
1716 SlotIndex NextMISlot =
1717 LIS->getInstructionIndex(Instr: *NextMI).getRegSlot();
1718 VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: NextMISlot);
1719 LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
1720 DefLI.addSegment(S);
1721 DefVNI->def = MISlot;
1722 // Mark DefLI as spillable if it was previously unspillable
1723 DefLI.setWeight(0);
1724
1725 // DefReg may have had no uses, in which case we need to shrink
1726 // the LiveInterval up to MI.
1727 LIS->shrinkToUses(li: &DefLI);
1728 }
1729
1730 MI.setDesc(NextMI->getDesc());
1731 }
1732 MI.getOperand(i: 2).setImm(NextMI->getOperand(i: 2).getImm());
1733
1734 dropAVLUse(NextMI->getOperand(i: 1));
1735 if (LIS)
1736 LIS->RemoveMachineInstrFromMaps(MI&: *NextMI);
1737 NextMI->eraseFromParent();
1738 NumCoalescedVSETVL++;
1739 // fallthrough
1740 }
1741 }
1742 NextMI = &MI;
1743 Used = getDemanded(MI, ST);
1744 }
1745
1746 // Loop over the dead AVL values, and delete them now. This has
1747 // to be outside the above loop to avoid invalidating iterators.
1748 for (auto *MI : ToDelete) {
1749 if (LIS) {
1750 LIS->removeInterval(Reg: MI->getOperand(i: 0).getReg());
1751 LIS->RemoveMachineInstrFromMaps(MI&: *MI);
1752 }
1753 MI->eraseFromParent();
1754 }
1755}
1756
1757void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1758 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1759 MachineInstr &MI = *I++;
1760 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1761 Register VLOutput = MI.getOperand(i: 1).getReg();
1762 assert(VLOutput.isVirtual());
1763 if (!MI.getOperand(i: 1).isDead()) {
1764 auto ReadVLMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(),
1765 MCID: TII->get(Opcode: RISCV::PseudoReadVL), DestReg: VLOutput);
1766 // Move the LiveInterval's definition down to PseudoReadVL.
1767 if (LIS) {
1768 SlotIndex NewDefSI =
1769 LIS->InsertMachineInstrInMaps(MI&: *ReadVLMI).getRegSlot();
1770 LiveInterval &DefLI = LIS->getInterval(Reg: VLOutput);
1771 LiveRange::Segment *DefSeg = DefLI.getSegmentContaining(Idx: NewDefSI);
1772 VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: DefSeg->start);
1773 DefLI.removeSegment(Start: DefSeg->start, End: NewDefSI);
1774 DefVNI->def = NewDefSI;
1775 }
1776 }
1777 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1778 MI.getOperand(i: 1).setReg(RISCV::X0);
1779 MI.addRegisterDefined(Reg: RISCV::VL, RegInfo: MRI->getTargetRegisterInfo());
1780 }
1781 }
1782}
1783
1784bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1785 // Skip if the vector extension is not enabled.
1786 ST = &MF.getSubtarget<RISCVSubtarget>();
1787 if (!ST->hasVInstructions())
1788 return false;
1789
1790 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1791
1792 TII = ST->getInstrInfo();
1793 MRI = &MF.getRegInfo();
1794 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1795 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1796
1797 assert(BlockInfo.empty() && "Expect empty block infos");
1798 BlockInfo.resize(new_size: MF.getNumBlockIDs());
1799
1800 bool HaveVectorOp = false;
1801
1802 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1803 for (const MachineBasicBlock &MBB : MF) {
1804 VSETVLIInfo TmpStatus;
1805 HaveVectorOp |= computeVLVTYPEChanges(MBB, Info&: TmpStatus);
1806 // Initial exit state is whatever change we found in the block.
1807 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1808 BBInfo.Exit = TmpStatus;
1809 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1810 << " is " << BBInfo.Exit << "\n");
1811
1812 }
1813
1814 // If we didn't find any instructions that need VSETVLI, we're done.
1815 if (!HaveVectorOp) {
1816 BlockInfo.clear();
1817 return false;
1818 }
1819
1820 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1821 // blocks to the list here, but will also add any that need to be revisited
1822 // during Phase 2 processing.
1823 for (const MachineBasicBlock &MBB : MF) {
1824 WorkList.push(x: &MBB);
1825 BlockInfo[MBB.getNumber()].InQueue = true;
1826 }
1827 while (!WorkList.empty()) {
1828 const MachineBasicBlock &MBB = *WorkList.front();
1829 WorkList.pop();
1830 computeIncomingVLVTYPE(MBB);
1831 }
1832
1833 // Perform partial redundancy elimination of vsetvli transitions.
1834 for (MachineBasicBlock &MBB : MF)
1835 doPRE(MBB);
1836
1837 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1838 // Phase 2 information to avoid adding vsetvlis before the first vector
1839 // instruction in the block if the VL/VTYPE is satisfied by its
1840 // predecessors.
1841 for (MachineBasicBlock &MBB : MF)
1842 emitVSETVLIs(MBB);
1843
1844 // Now that all vsetvlis are explicit, go through and do block local
1845 // DSE and peephole based demanded fields based transforms. Note that
1846 // this *must* be done outside the main dataflow so long as we allow
1847 // any cross block analysis within the dataflow. We can't have both
1848 // demanded fields based mutation and non-local analysis in the
1849 // dataflow at the same time without introducing inconsistencies.
1850 // We're visiting blocks from the bottom up because a VSETVLI in the
1851 // earlier block might become dead when its uses in later blocks are
1852 // optimized away.
1853 for (MachineBasicBlock *MBB : post_order(G: &MF))
1854 coalesceVSETVLIs(MBB&: *MBB);
1855
1856 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1857 // of VLEFF/VLSEGFF.
1858 for (MachineBasicBlock &MBB : MF)
1859 insertReadVL(MBB);
1860
1861 BlockInfo.clear();
1862 return HaveVectorOp;
1863}
1864
1865/// Returns an instance of the Insert VSETVLI pass.
1866FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1867 return new RISCVInsertVSETVLI();
1868}
1869