1//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass inserts writes to the VXRM CSR as needed by vector instructions.
10// Each instruction that uses VXRM carries an operand that contains its required
11// VXRM value. This pass tries to optimize placement to avoid redundant writes
12// to VXRM.
13//
14// This is done using 2 dataflow algorithms. The first is a forward data flow
15// to calculate where a VXRM value is available. The second is a backwards
16// dataflow to determine where a VXRM value is anticipated.
17//
18// Finally, we use the results of these two dataflows to insert VXRM writes
19// where a value is anticipated, but not available.
20//
21// FIXME: This pass does not split critical edges, so there can still be some
22// redundancy.
23//
24// FIXME: If we are willing to have writes that aren't always needed, we could
25// reduce the number of VXRM writes in some cases.
26//===----------------------------------------------------------------------===//
27
28#include "MCTargetDesc/RISCVBaseInfo.h"
29#include "RISCV.h"
30#include "RISCVSubtarget.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include <queue>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "riscv-insert-write-vxrm"
37#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
38
39namespace {
40
41class VXRMInfo {
42 uint8_t VXRMImm = 0;
43
44 enum : uint8_t {
45 Uninitialized,
46 Static,
47 Unknown,
48 } State = Uninitialized;
49
50public:
51 VXRMInfo() {}
52
53 static VXRMInfo getUnknown() {
54 VXRMInfo Info;
55 Info.setUnknown();
56 return Info;
57 }
58
59 bool isValid() const { return State != Uninitialized; }
60 void setUnknown() { State = Unknown; }
61 bool isUnknown() const { return State == Unknown; }
62
63 bool isStatic() const { return State == Static; }
64
65 void setVXRMImm(unsigned Imm) {
66 assert(Imm <= 3 && "Unexpected VXRM value");
67 VXRMImm = Imm;
68 State = Static;
69 }
70 unsigned getVXRMImm() const {
71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
72 return VXRMImm;
73 }
74
75 bool operator==(const VXRMInfo &Other) const {
76 // Uninitialized is only equal to another Uninitialized.
77 if (State != Other.State)
78 return false;
79
80 if (isStatic())
81 return VXRMImm == Other.VXRMImm;
82
83 assert((isValid() || isUnknown()) && "Unexpected state");
84 return true;
85 }
86
87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
88
89 // Calculate the VXRMInfo visible to a block assuming this and Other are
90 // both predecessors.
91 VXRMInfo intersect(const VXRMInfo &Other) const {
92 // If the new value isn't valid, ignore it.
93 if (!Other.isValid())
94 return *this;
95
96 // If this value isn't valid, this must be the first predecessor, use it.
97 if (!isValid())
98 return Other;
99
100 // If either is unknown, the result is unknown.
101 if (isUnknown() || Other.isUnknown())
102 return VXRMInfo::getUnknown();
103
104 // If we have an exact match, return this.
105 if (*this == Other)
106 return *this;
107
108 // Otherwise the result is unknown.
109 return VXRMInfo::getUnknown();
110 }
111
112#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
113 /// Support for debugging, callable in GDB: V->dump()
114 LLVM_DUMP_METHOD void dump() const {
115 print(dbgs());
116 dbgs() << "\n";
117 }
118
119 void print(raw_ostream &OS) const {
120 OS << '{';
121 if (!isValid())
122 OS << "Uninitialized";
123 else if (isUnknown())
124 OS << "Unknown";
125 else
126 OS << getVXRMImm();
127 OS << '}';
128 }
129#endif
130};
131
132#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
133LLVM_ATTRIBUTE_USED
134inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
135 V.print(OS);
136 return OS;
137}
138#endif
139
140struct BlockData {
141 // Indicates if the block uses VXRM. Uninitialized means no use.
142 VXRMInfo VXRMUse;
143
144 // Indicates the VXRM output from the block. Unitialized means transparent.
145 VXRMInfo VXRMOut;
146
147 // Keeps track of the available VXRM value at the start of the basic bloc.
148 VXRMInfo AvailableIn;
149
150 // Keeps track of the available VXRM value at the end of the basic block.
151 VXRMInfo AvailableOut;
152
153 // Keeps track of what VXRM is anticipated at the start of the basic block.
154 VXRMInfo AnticipatedIn;
155
156 // Keeps track of what VXRM is anticipated at the end of the basic block.
157 VXRMInfo AnticipatedOut;
158
159 // Keeps track of whether the block is already in the queue.
160 bool InQueue;
161
162 BlockData() = default;
163};
164
165class RISCVInsertWriteVXRM : public MachineFunctionPass {
166 const TargetInstrInfo *TII;
167
168 std::vector<BlockData> BlockInfo;
169 std::queue<const MachineBasicBlock *> WorkList;
170
171public:
172 static char ID;
173
174 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
175
176 bool runOnMachineFunction(MachineFunction &MF) override;
177
178 void getAnalysisUsage(AnalysisUsage &AU) const override {
179 AU.setPreservesCFG();
180 MachineFunctionPass::getAnalysisUsage(AU);
181 }
182
183 StringRef getPassName() const override {
184 return RISCV_INSERT_WRITE_VXRM_NAME;
185 }
186
187private:
188 bool computeVXRMChanges(const MachineBasicBlock &MBB);
189 void computeAvailable(const MachineBasicBlock &MBB);
190 void computeAnticipated(const MachineBasicBlock &MBB);
191 void emitWriteVXRM(MachineBasicBlock &MBB);
192};
193
194} // end anonymous namespace
195
196char RISCVInsertWriteVXRM::ID = 0;
197
198INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
199 false, false)
200
201static bool ignoresVXRM(const MachineInstr &MI) {
202 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
203 default:
204 return false;
205 case RISCV::VNCLIP_WI:
206 case RISCV::VNCLIPU_WI:
207 return MI.getOperand(i: 3).getImm() == 0;
208 }
209}
210
211bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
212 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
213
214 bool NeedVXRMWrite = false;
215 for (const MachineInstr &MI : MBB) {
216 int VXRMIdx = RISCVII::getVXRMOpNum(Desc: MI.getDesc());
217 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
218 unsigned NewVXRMImm = MI.getOperand(i: VXRMIdx).getImm();
219
220 if (!BBInfo.VXRMUse.isValid())
221 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
222
223 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
224 NeedVXRMWrite = true;
225 continue;
226 }
227
228 if (MI.isCall() || MI.isInlineAsm() ||
229 MI.modifiesRegister(Reg: RISCV::VXRM, /*TRI=*/nullptr)) {
230 if (!BBInfo.VXRMUse.isValid())
231 BBInfo.VXRMUse.setUnknown();
232
233 BBInfo.VXRMOut.setUnknown();
234 }
235 }
236
237 return NeedVXRMWrite;
238}
239
240void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
241 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
242
243 BBInfo.InQueue = false;
244
245 VXRMInfo Available;
246 if (MBB.pred_empty()) {
247 Available.setUnknown();
248 } else {
249 for (const MachineBasicBlock *P : MBB.predecessors())
250 Available = Available.intersect(Other: BlockInfo[P->getNumber()].AvailableOut);
251 }
252
253 // If we don't have any valid available info, wait until we do.
254 if (!Available.isValid())
255 return;
256
257 if (Available != BBInfo.AvailableIn) {
258 BBInfo.AvailableIn = Available;
259 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
260 << " changed to " << BBInfo.AvailableIn << "\n");
261 }
262
263 if (BBInfo.VXRMOut.isValid())
264 Available = BBInfo.VXRMOut;
265
266 if (Available == BBInfo.AvailableOut)
267 return;
268
269 BBInfo.AvailableOut = Available;
270 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
271 << " changed to " << BBInfo.AvailableOut << "\n");
272
273 // Add the successors to the work list so that we can propagate.
274 for (MachineBasicBlock *S : MBB.successors()) {
275 if (!BlockInfo[S->getNumber()].InQueue) {
276 BlockInfo[S->getNumber()].InQueue = true;
277 WorkList.push(x: S);
278 }
279 }
280}
281
282void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) {
283 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
284
285 BBInfo.InQueue = false;
286
287 VXRMInfo Anticipated;
288 if (MBB.succ_empty()) {
289 Anticipated.setUnknown();
290 } else {
291 for (const MachineBasicBlock *S : MBB.successors())
292 Anticipated =
293 Anticipated.intersect(Other: BlockInfo[S->getNumber()].AnticipatedIn);
294 }
295
296 // If we don't have any valid anticipated info, wait until we do.
297 if (!Anticipated.isValid())
298 return;
299
300 if (Anticipated != BBInfo.AnticipatedOut) {
301 BBInfo.AnticipatedOut = Anticipated;
302 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
303 << " changed to " << BBInfo.AnticipatedOut << "\n");
304 }
305
306 // If this block reads VXRM, copy it.
307 if (BBInfo.VXRMUse.isValid())
308 Anticipated = BBInfo.VXRMUse;
309
310 if (Anticipated == BBInfo.AnticipatedIn)
311 return;
312
313 BBInfo.AnticipatedIn = Anticipated;
314 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
315 << " changed to " << BBInfo.AnticipatedIn << "\n");
316
317 // Add the predecessors to the work list so that we can propagate.
318 for (MachineBasicBlock *P : MBB.predecessors()) {
319 if (!BlockInfo[P->getNumber()].InQueue) {
320 BlockInfo[P->getNumber()].InQueue = true;
321 WorkList.push(x: P);
322 }
323 }
324}
325
326void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
327 const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
328
329 VXRMInfo Info = BBInfo.AvailableIn;
330
331 // Flag to indicates we need to insert a VXRM write. We want to delay it as
332 // late as possible in this block.
333 bool PendingInsert = false;
334
335 // Insert VXRM write if anticipated and not available.
336 if (BBInfo.AnticipatedIn.isStatic()) {
337 // If this is the entry block and the value is anticipated, insert.
338 if (MBB.isEntryBlock()) {
339 PendingInsert = true;
340 } else {
341 // Search for any predecessors that wouldn't satisfy our requirement and
342 // insert a write VXRM if needed.
343 // NOTE: If one predecessor is able to provide the requirement, but
344 // another isn't, it means we have a critical edge. The better placement
345 // would be to split the critical edge.
346 for (MachineBasicBlock *P : MBB.predecessors()) {
347 const BlockData &PInfo = BlockInfo[P->getNumber()];
348 // If it's available out of the predecessor, then we're ok.
349 if (PInfo.AvailableOut.isStatic() &&
350 PInfo.AvailableOut.getVXRMImm() ==
351 BBInfo.AnticipatedIn.getVXRMImm())
352 continue;
353 // If the predecessor anticipates this value for all its succesors,
354 // then a write to VXRM would have already occured before this block is
355 // executed.
356 if (PInfo.AnticipatedOut.isStatic() &&
357 PInfo.AnticipatedOut.getVXRMImm() ==
358 BBInfo.AnticipatedIn.getVXRMImm())
359 continue;
360 PendingInsert = true;
361 break;
362 }
363 }
364
365 Info = BBInfo.AnticipatedIn;
366 }
367
368 for (MachineInstr &MI : MBB) {
369 int VXRMIdx = RISCVII::getVXRMOpNum(Desc: MI.getDesc());
370 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
371 unsigned NewVXRMImm = MI.getOperand(i: VXRMIdx).getImm();
372
373 if (PendingInsert || !Info.isStatic() ||
374 Info.getVXRMImm() != NewVXRMImm) {
375 assert((!PendingInsert ||
376 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
377 "Pending VXRM insertion mismatch");
378 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
379 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: RISCV::WriteVXRMImm))
380 .addImm(Val: NewVXRMImm);
381 PendingInsert = false;
382 }
383
384 MI.addOperand(Op: MachineOperand::CreateReg(Reg: RISCV::VXRM, /*IsDef*/ isDef: false,
385 /*IsImp*/ isImp: true));
386 Info.setVXRMImm(NewVXRMImm);
387 continue;
388 }
389
390 if (MI.isCall() || MI.isInlineAsm() ||
391 MI.modifiesRegister(Reg: RISCV::VXRM, /*TRI=*/nullptr))
392 Info.setUnknown();
393 }
394
395 // If all our successors anticipate a value, do the insert.
396 // NOTE: It's possible that not all predecessors of our successor provide the
397 // correct value. This can occur on critical edges. If we don't split the
398 // critical edge we'll also have a write vxrm in the succesor that is
399 // redundant with this one.
400 if (PendingInsert ||
401 (BBInfo.AnticipatedOut.isStatic() &&
402 (!Info.isStatic() ||
403 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
404 assert((!PendingInsert ||
405 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
406 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
407 "Pending VXRM insertion mismatch");
408 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
409 << " changing to " << BBInfo.AnticipatedOut << "\n");
410 BuildMI(BB&: MBB, I: MBB.getFirstTerminator(), MIMD: DebugLoc(),
411 MCID: TII->get(Opcode: RISCV::WriteVXRMImm))
412 .addImm(Val: BBInfo.AnticipatedOut.getVXRMImm());
413 }
414}
415
416bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
417 // Skip if the vector extension is not enabled.
418 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
419 if (!ST.hasVInstructions())
420 return false;
421
422 TII = ST.getInstrInfo();
423
424 assert(BlockInfo.empty() && "Expect empty block infos");
425 BlockInfo.resize(new_size: MF.getNumBlockIDs());
426
427 // Phase 1 - collect block information.
428 bool NeedVXRMChange = false;
429 for (const MachineBasicBlock &MBB : MF)
430 NeedVXRMChange |= computeVXRMChanges(MBB);
431
432 if (!NeedVXRMChange) {
433 BlockInfo.clear();
434 return false;
435 }
436
437 // Phase 2 - Compute available VXRM using a forward walk.
438 for (const MachineBasicBlock &MBB : MF) {
439 WorkList.push(x: &MBB);
440 BlockInfo[MBB.getNumber()].InQueue = true;
441 }
442 while (!WorkList.empty()) {
443 const MachineBasicBlock &MBB = *WorkList.front();
444 WorkList.pop();
445 computeAvailable(MBB);
446 }
447
448 // Phase 3 - Compute anticipated VXRM using a backwards walk.
449 for (const MachineBasicBlock &MBB : llvm::reverse(C&: MF)) {
450 WorkList.push(x: &MBB);
451 BlockInfo[MBB.getNumber()].InQueue = true;
452 }
453 while (!WorkList.empty()) {
454 const MachineBasicBlock &MBB = *WorkList.front();
455 WorkList.pop();
456 computeAnticipated(MBB);
457 }
458
459 // Phase 4 - Emit VXRM writes at the earliest place possible.
460 for (MachineBasicBlock &MBB : MF)
461 emitWriteVXRM(MBB);
462
463 BlockInfo.clear();
464
465 return true;
466}
467
468FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
469 return new RISCVInsertWriteVXRM();
470}
471