1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMatInt.h"
16#include "RISCV.h"
17#include "RISCVMachineFunctionInfo.h"
18#include "RISCVSubtarget.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/MemoryLocation.h"
23#include "llvm/Analysis/ValueTracking.h"
24#include "llvm/CodeGen/LiveIntervals.h"
25#include "llvm/CodeGen/LiveVariables.h"
26#include "llvm/CodeGen/MachineCombinerPattern.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineTraceMetrics.h"
30#include "llvm/CodeGen/RegisterScavenging.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/IR/Module.h"
34#include "llvm/MC/MCDwarf.h"
35#include "llvm/MC/MCInstBuilder.h"
36#include "llvm/MC/TargetRegistry.h"
37#include "llvm/Support/ErrorHandling.h"
38
39using namespace llvm;
40
41#define GEN_CHECK_COMPRESS_INSTR
42#include "RISCVGenCompressInstEmitter.inc"
43
44#define GET_INSTRINFO_CTOR_DTOR
45#include "RISCVGenInstrInfo.inc"
46
47#define DEBUG_TYPE "riscv-instr-info"
48STATISTIC(NumVRegSpilled,
49 "Number of registers within vector register groups spilled");
50STATISTIC(NumVRegReloaded,
51 "Number of registers within vector register groups reloaded");
52
53static cl::opt<bool> PreferWholeRegisterMove(
54 "riscv-prefer-whole-register-move", cl::init(Val: false), cl::Hidden,
55 cl::desc("Prefer whole register move for vector registers."));
56
57static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
58 "riscv-force-machine-combiner-strategy", cl::Hidden,
59 cl::desc("Force machine combiner to use a specific strategy for machine "
60 "trace metrics evaluation."),
61 cl::init(Val: MachineTraceStrategy::TS_NumStrategies),
62 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
63 "Local strategy."),
64 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
65 "MinInstrCount strategy.")));
66
67static cl::opt<bool> OutlinerEnableRegSave(
68 "riscv-outliner-regsave", cl::init(Val: true), cl::Hidden,
69 cl::desc("Enable RegSave strategy in machine outliner (save X5 to a "
70 "temporary register when X5 is live across outlined calls)."));
71
72namespace llvm::RISCVVPseudosTable {
73
74using namespace RISCV;
75
76#define GET_RISCVVPseudosTable_IMPL
77#include "RISCVGenSearchableTables.inc"
78
79} // namespace llvm::RISCVVPseudosTable
80
81namespace llvm::RISCV {
82
83#define GET_RISCVMaskedPseudosTable_IMPL
84#include "RISCVGenSearchableTables.inc"
85
86} // end namespace llvm::RISCV
87
88RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI)
89 : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN,
90 RISCV::ADJCALLSTACKUP),
91 RegInfo(STI.getHwMode()), STI(STI) {}
92
93#define GET_INSTRINFO_HELPERS
94#include "RISCVGenInstrInfo.inc"
95
96MCInst RISCVInstrInfo::getNop() const {
97 if (STI.hasStdExtZca())
98 return MCInstBuilder(RISCV::C_NOP);
99 return MCInstBuilder(RISCV::ADDI)
100 .addReg(Reg: RISCV::X0)
101 .addReg(Reg: RISCV::X0)
102 .addImm(Val: 0);
103}
104
105Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
106 int &FrameIndex) const {
107 TypeSize Dummy = TypeSize::getZero();
108 return isLoadFromStackSlot(MI, FrameIndex, MemBytes&: Dummy);
109}
110
111static std::optional<unsigned> getLMULForRVVWholeLoadStore(unsigned Opcode) {
112 switch (Opcode) {
113 default:
114 return std::nullopt;
115 case RISCV::VS1R_V:
116 case RISCV::VL1RE8_V:
117 case RISCV::VL1RE16_V:
118 case RISCV::VL1RE32_V:
119 case RISCV::VL1RE64_V:
120 return 1;
121 case RISCV::VS2R_V:
122 case RISCV::VL2RE8_V:
123 case RISCV::VL2RE16_V:
124 case RISCV::VL2RE32_V:
125 case RISCV::VL2RE64_V:
126 return 2;
127 case RISCV::VS4R_V:
128 case RISCV::VL4RE8_V:
129 case RISCV::VL4RE16_V:
130 case RISCV::VL4RE32_V:
131 case RISCV::VL4RE64_V:
132 return 4;
133 case RISCV::VS8R_V:
134 case RISCV::VL8RE8_V:
135 case RISCV::VL8RE16_V:
136 case RISCV::VL8RE32_V:
137 case RISCV::VL8RE64_V:
138 return 8;
139 }
140}
141
142Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
143 int &FrameIndex,
144 TypeSize &MemBytes) const {
145 switch (MI.getOpcode()) {
146 default:
147 return 0;
148 case RISCV::LB:
149 case RISCV::LBU:
150 MemBytes = TypeSize::getFixed(ExactSize: 1);
151 break;
152 case RISCV::LH:
153 case RISCV::LH_INX:
154 case RISCV::LHU:
155 case RISCV::FLH:
156 MemBytes = TypeSize::getFixed(ExactSize: 2);
157 break;
158 case RISCV::LW:
159 case RISCV::LW_INX:
160 case RISCV::FLW:
161 case RISCV::LWU:
162 MemBytes = TypeSize::getFixed(ExactSize: 4);
163 break;
164 case RISCV::LD:
165 case RISCV::LD_RV32:
166 case RISCV::FLD:
167 MemBytes = TypeSize::getFixed(ExactSize: 8);
168 break;
169 case RISCV::VL1RE8_V:
170 case RISCV::VL2RE8_V:
171 case RISCV::VL4RE8_V:
172 case RISCV::VL8RE8_V:
173 if (!MI.getOperand(i: 1).isFI())
174 return Register();
175 FrameIndex = MI.getOperand(i: 1).getIndex();
176 unsigned LMUL = *getLMULForRVVWholeLoadStore(Opcode: MI.getOpcode());
177 MemBytes = TypeSize::getScalable(MinimumSize: RISCV::RVVBytesPerBlock * LMUL);
178 return MI.getOperand(i: 0).getReg();
179 }
180
181 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
182 MI.getOperand(i: 2).getImm() == 0) {
183 FrameIndex = MI.getOperand(i: 1).getIndex();
184 return MI.getOperand(i: 0).getReg();
185 }
186
187 return 0;
188}
189
190Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
191 int &FrameIndex) const {
192 TypeSize Dummy = TypeSize::getZero();
193 return isStoreToStackSlot(MI, FrameIndex, MemBytes&: Dummy);
194}
195
196Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
197 int &FrameIndex,
198 TypeSize &MemBytes) const {
199 switch (MI.getOpcode()) {
200 default:
201 return 0;
202 case RISCV::SB:
203 MemBytes = TypeSize::getFixed(ExactSize: 1);
204 break;
205 case RISCV::SH:
206 case RISCV::SH_INX:
207 case RISCV::FSH:
208 MemBytes = TypeSize::getFixed(ExactSize: 2);
209 break;
210 case RISCV::SW:
211 case RISCV::SW_INX:
212 case RISCV::FSW:
213 MemBytes = TypeSize::getFixed(ExactSize: 4);
214 break;
215 case RISCV::SD:
216 case RISCV::SD_RV32:
217 case RISCV::FSD:
218 MemBytes = TypeSize::getFixed(ExactSize: 8);
219 break;
220 case RISCV::VS1R_V:
221 case RISCV::VS2R_V:
222 case RISCV::VS4R_V:
223 case RISCV::VS8R_V:
224 if (!MI.getOperand(i: 1).isFI())
225 return Register();
226 FrameIndex = MI.getOperand(i: 1).getIndex();
227 unsigned LMUL = *getLMULForRVVWholeLoadStore(Opcode: MI.getOpcode());
228 MemBytes = TypeSize::getScalable(MinimumSize: RISCV::RVVBytesPerBlock * LMUL);
229 return MI.getOperand(i: 0).getReg();
230 }
231
232 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
233 MI.getOperand(i: 2).getImm() == 0) {
234 FrameIndex = MI.getOperand(i: 1).getIndex();
235 return MI.getOperand(i: 0).getReg();
236 }
237
238 return 0;
239}
240
241bool RISCVInstrInfo::isReMaterializableImpl(
242 const MachineInstr &MI) const {
243 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
244 case RISCV::VMV_V_X:
245 case RISCV::VFMV_V_F:
246 case RISCV::VMV_V_I:
247 case RISCV::VMV_S_X:
248 case RISCV::VFMV_S_F:
249 case RISCV::VID_V:
250 return MI.getOperand(i: 1).isUndef();
251 default:
252 return TargetInstrInfo::isReMaterializableImpl(MI);
253 }
254}
255
256static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
257 unsigned NumRegs) {
258 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
259}
260
261static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
262 const MachineBasicBlock &MBB,
263 MachineBasicBlock::const_iterator MBBI,
264 MachineBasicBlock::const_iterator &DefMBBI,
265 RISCVVType::VLMUL LMul) {
266 if (PreferWholeRegisterMove)
267 return false;
268
269 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
270 "Unexpected COPY instruction.");
271 Register SrcReg = MBBI->getOperand(i: 1).getReg();
272 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
273
274 bool FoundDef = false;
275 bool FirstVSetVLI = false;
276 unsigned FirstSEW = 0;
277 while (MBBI != MBB.begin()) {
278 --MBBI;
279 if (MBBI->isMetaInstruction())
280 continue;
281
282 if (RISCVInstrInfo::isVectorConfigInstr(MI: *MBBI)) {
283 // There is a vsetvli between COPY and source define instruction.
284 // vy = def_vop ... (producing instruction)
285 // ...
286 // vsetvli
287 // ...
288 // vx = COPY vy
289 if (!FoundDef) {
290 if (!FirstVSetVLI) {
291 FirstVSetVLI = true;
292 unsigned FirstVType = MBBI->getOperand(i: 2).getImm();
293 RISCVVType::VLMUL FirstLMul = RISCVVType::getVLMUL(VType: FirstVType);
294 FirstSEW = RISCVVType::getSEW(VType: FirstVType);
295 // The first encountered vsetvli must have the same lmul as the
296 // register class of COPY.
297 if (FirstLMul != LMul)
298 return false;
299 }
300 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
301 // define instruction.
302 if (!RISCVInstrInfo::isVLPreservingConfig(MI: *MBBI))
303 return false;
304 continue;
305 }
306
307 // MBBI is the first vsetvli before the producing instruction.
308 unsigned VType = MBBI->getOperand(i: 2).getImm();
309 // If there is a vsetvli between COPY and the producing instruction.
310 if (FirstVSetVLI) {
311 // If SEW is different, return false.
312 if (RISCVVType::getSEW(VType) != FirstSEW)
313 return false;
314 }
315
316 // If the vsetvli is tail undisturbed, keep the whole register move.
317 if (!RISCVVType::isTailAgnostic(VType))
318 return false;
319
320 // The checking is conservative. We only have register classes for
321 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
322 // for fractional LMUL operations. However, we could not use the vsetvli
323 // lmul for widening operations. The result of widening operation is
324 // 2 x LMUL.
325 return LMul == RISCVVType::getVLMUL(VType);
326 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
327 return false;
328 } else if (MBBI->getNumDefs()) {
329 // Check all the instructions which will change VL.
330 // For example, vleff has implicit def VL.
331 if (MBBI->modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr))
332 return false;
333
334 // Only converting whole register copies to vmv.v.v when the defining
335 // value appears in the explicit operands.
336 for (const MachineOperand &MO : MBBI->explicit_operands()) {
337 if (!MO.isReg() || !MO.isDef())
338 continue;
339 if (!FoundDef && TRI->regsOverlap(RegA: MO.getReg(), RegB: SrcReg)) {
340 // We only permit the source of COPY has the same LMUL as the defined
341 // operand.
342 // There are cases we need to keep the whole register copy if the LMUL
343 // is different.
344 // For example,
345 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
346 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
347 // # The COPY may be created by vlmul_trunc intrinsic.
348 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
349 //
350 // After widening, the valid value will be 4 x e32 elements. If we
351 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
352 // FIXME: The COPY of subregister of Zvlsseg register will not be able
353 // to convert to vmv.v.[v|i] under the constraint.
354 if (MO.getReg() != SrcReg)
355 return false;
356
357 // In widening reduction instructions with LMUL_1 input vector case,
358 // only checking the LMUL is insufficient due to reduction result is
359 // always LMUL_1.
360 // For example,
361 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
362 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
363 // $v26 = COPY killed renamable $v8
364 // After widening, The valid value will be 1 x e16 elements. If we
365 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
366 uint64_t TSFlags = MBBI->getDesc().TSFlags;
367 if (RISCVII::isRVVWideningReduction(TSFlags))
368 return false;
369
370 // If the producing instruction does not depend on vsetvli, do not
371 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
372 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
373 return false;
374
375 // Found the definition.
376 FoundDef = true;
377 DefMBBI = MBBI;
378 break;
379 }
380 }
381 }
382 }
383
384 return false;
385}
386
387void RISCVInstrInfo::copyPhysRegVector(
388 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
389 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
390 const TargetRegisterClass *RegClass) const {
391 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
392 RISCVVType::VLMUL LMul = RISCVRI::getLMul(TSFlags: RegClass->TSFlags);
393 unsigned NF = RISCVRI::getNF(TSFlags: RegClass->TSFlags);
394
395 uint16_t SrcEncoding = TRI->getEncodingValue(Reg: SrcReg);
396 uint16_t DstEncoding = TRI->getEncodingValue(Reg: DstReg);
397 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(VLMul: LMul);
398 assert(!Fractional && "It is impossible be fractional lmul here.");
399 unsigned NumRegs = NF * LMulVal;
400 bool ReversedCopy =
401 forwardCopyWillClobberTuple(DstReg: DstEncoding, SrcReg: SrcEncoding, NumRegs);
402 if (ReversedCopy) {
403 // If the src and dest overlap when copying a tuple, we need to copy the
404 // registers in reverse.
405 SrcEncoding += NumRegs - 1;
406 DstEncoding += NumRegs - 1;
407 }
408
409 unsigned I = 0;
410 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
411 -> std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned,
412 unsigned, unsigned> {
413 if (ReversedCopy) {
414 // For reversed copying, if there are enough aligned registers(8/4/2), we
415 // can do a larger copy(LMUL8/4/2).
416 // Besides, we have already known that DstEncoding is larger than
417 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
418 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
419 // avoid clobbering.
420 uint16_t Diff = DstEncoding - SrcEncoding;
421 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
422 DstEncoding % 8 == 7)
423 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
424 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
425 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
426 DstEncoding % 4 == 3)
427 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
428 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
429 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
430 DstEncoding % 2 == 1)
431 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
432 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
433 // Or we should do LMUL1 copying.
434 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
435 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
436 }
437
438 // For forward copying, if source register encoding and destination register
439 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
440 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
441 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
442 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
443 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
444 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
445 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
446 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
447 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
448 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
449 // Or we should do LMUL1 copying.
450 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
451 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
452 };
453
454 while (I != NumRegs) {
455 // For non-segment copying, we only do this once as the registers are always
456 // aligned.
457 // For segment copying, we may do this several times. If the registers are
458 // aligned to larger LMUL, we can eliminate some copyings.
459 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
460 GetCopyInfo(SrcEncoding, DstEncoding);
461 auto [NumCopied, _] = RISCVVType::decodeVLMUL(VLMul: LMulCopied);
462
463 MachineBasicBlock::const_iterator DefMBBI;
464 if (LMul == LMulCopied &&
465 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
466 Opc = VVOpc;
467 if (DefMBBI->getOpcode() == VIOpc)
468 Opc = VIOpc;
469 }
470
471 // Emit actual copying.
472 // For reversed copying, the encoding should be decreased.
473 MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
474 RegClass, Encoding: ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
475 MCRegister ActualDstReg = TRI->findVRegWithEncoding(
476 RegClass, Encoding: ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
477
478 auto MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Opc), DestReg: ActualDstReg);
479 bool UseVMV_V_I = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc) == RISCV::VMV_V_I;
480 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc) == RISCV::VMV_V_V;
481 if (UseVMV)
482 MIB.addReg(RegNo: ActualDstReg, Flags: RegState::Undef);
483 if (UseVMV_V_I)
484 MIB = MIB.add(MO: DefMBBI->getOperand(i: 2));
485 else
486 MIB = MIB.addReg(RegNo: ActualSrcReg, Flags: getKillRegState(B: KillSrc));
487 if (UseVMV) {
488 const MCInstrDesc &Desc = DefMBBI->getDesc();
489 MIB.add(MO: DefMBBI->getOperand(i: RISCVII::getVLOpNum(Desc))); // AVL
490 unsigned Log2SEW =
491 DefMBBI->getOperand(i: RISCVII::getSEWOpNum(Desc)).getImm();
492 MIB.addImm(Val: Log2SEW ? Log2SEW : 3); // SEW
493 MIB.addImm(Val: 0); // tu, mu
494 MIB.addReg(RegNo: RISCV::VL, Flags: RegState::Implicit);
495 MIB.addReg(RegNo: RISCV::VTYPE, Flags: RegState::Implicit);
496 }
497 // Add an implicit read of the original source to silence the verifier
498 // in the cases where some of the smaller VRs we're copying from might be
499 // undef, caused by the fact that the original, larger source VR might not
500 // be fully initialized at the time this COPY happens.
501 MIB.addReg(RegNo: SrcReg, Flags: RegState::Implicit);
502
503 // If we are copying reversely, we should decrease the encoding.
504 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
505 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
506 I += NumCopied;
507 }
508}
509
510void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
511 MachineBasicBlock::iterator MBBI,
512 const DebugLoc &DL, Register DstReg,
513 Register SrcReg, bool KillSrc,
514 bool RenamableDest, bool RenamableSrc) const {
515 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
516 RegState KillFlag = getKillRegState(B: KillSrc);
517
518 if (RISCV::GPRRegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
519 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI), DestReg: DstReg)
520 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc))
521 .addImm(Val: 0);
522 return;
523 }
524
525 if (RISCV::GPRF16RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
526 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PseudoMV_FPR16INX), DestReg: DstReg)
527 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
528 return;
529 }
530
531 if (RISCV::GPRF32RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
532 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PseudoMV_FPR32INX), DestReg: DstReg)
533 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
534 return;
535 }
536
537 if (RISCV::GPRPairRegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
538 if (STI.isRV32()) {
539 if (STI.hasStdExtZdinx()) {
540 // On RV32_Zdinx, FMV.D will move a pair of registers to another pair of
541 // registers, in one instruction.
542 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_D_IN32X), DestReg: DstReg)
543 .addReg(RegNo: SrcReg, Flags: getRenamableRegState(B: RenamableSrc))
544 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
545 return;
546 }
547
548 if (STI.hasStdExtP()) {
549 // On RV32P, `padd.dw` is a GPR Pair Add
550 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PADD_DW), DestReg: DstReg)
551 .addReg(RegNo: RISCV::X0_Pair)
552 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
553 return;
554 }
555 }
556
557 MCRegister EvenReg = TRI->getSubReg(Reg: SrcReg, Idx: RISCV::sub_gpr_even);
558 MCRegister OddReg = TRI->getSubReg(Reg: SrcReg, Idx: RISCV::sub_gpr_odd);
559 // We need to correct the odd register of X0_Pair.
560 if (OddReg == RISCV::DUMMY_REG_PAIR_WITH_X0)
561 OddReg = RISCV::X0;
562 assert(DstReg != RISCV::X0_Pair && "Cannot write to X0_Pair");
563
564 // Emit an ADDI for both parts of GPRPair.
565 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI),
566 DestReg: TRI->getSubReg(Reg: DstReg, Idx: RISCV::sub_gpr_even))
567 .addReg(RegNo: EvenReg, Flags: KillFlag)
568 .addImm(Val: 0);
569 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI),
570 DestReg: TRI->getSubReg(Reg: DstReg, Idx: RISCV::sub_gpr_odd))
571 .addReg(RegNo: OddReg, Flags: KillFlag)
572 .addImm(Val: 0);
573 return;
574 }
575
576 // Handle copy from csr
577 if (RISCV::VCSRRegClass.contains(Reg: SrcReg) &&
578 RISCV::GPRRegClass.contains(Reg: DstReg)) {
579 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::CSRRS), DestReg: DstReg)
580 .addImm(Val: RISCVSysReg::lookupSysRegByName(Name: TRI->getName(RegNo: SrcReg))->Encoding)
581 .addReg(RegNo: RISCV::X0);
582 return;
583 }
584
585 if (RISCV::FPR16RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
586 unsigned Opc;
587 if (STI.hasStdExtZfh()) {
588 Opc = RISCV::FSGNJ_H;
589 } else {
590 assert(STI.hasStdExtF() &&
591 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
592 "Unexpected extensions");
593 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
594 DstReg = TRI->getMatchingSuperReg(Reg: DstReg, SubIdx: RISCV::sub_16,
595 RC: &RISCV::FPR32RegClass);
596 SrcReg = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: RISCV::sub_16,
597 RC: &RISCV::FPR32RegClass);
598 Opc = RISCV::FSGNJ_S;
599 }
600 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Opc), DestReg: DstReg)
601 .addReg(RegNo: SrcReg, Flags: KillFlag)
602 .addReg(RegNo: SrcReg, Flags: KillFlag);
603 return;
604 }
605
606 if (RISCV::FPR32RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
607 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_S), DestReg: DstReg)
608 .addReg(RegNo: SrcReg, Flags: KillFlag)
609 .addReg(RegNo: SrcReg, Flags: KillFlag);
610 return;
611 }
612
613 if (RISCV::FPR64RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
614 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_D), DestReg: DstReg)
615 .addReg(RegNo: SrcReg, Flags: KillFlag)
616 .addReg(RegNo: SrcReg, Flags: KillFlag);
617 return;
618 }
619
620 if (RISCV::FPR32RegClass.contains(Reg: DstReg) &&
621 RISCV::GPRRegClass.contains(Reg: SrcReg)) {
622 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_W_X), DestReg: DstReg)
623 .addReg(RegNo: SrcReg, Flags: KillFlag);
624 return;
625 }
626
627 if (RISCV::GPRRegClass.contains(Reg: DstReg) &&
628 RISCV::FPR32RegClass.contains(Reg: SrcReg)) {
629 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_X_W), DestReg: DstReg)
630 .addReg(RegNo: SrcReg, Flags: KillFlag);
631 return;
632 }
633
634 if (RISCV::FPR64RegClass.contains(Reg: DstReg) &&
635 RISCV::GPRRegClass.contains(Reg: SrcReg)) {
636 assert(STI.getXLen() == 64 && "Unexpected GPR size");
637 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_D_X), DestReg: DstReg)
638 .addReg(RegNo: SrcReg, Flags: KillFlag);
639 return;
640 }
641
642 if (RISCV::GPRRegClass.contains(Reg: DstReg) &&
643 RISCV::FPR64RegClass.contains(Reg: SrcReg)) {
644 assert(STI.getXLen() == 64 && "Unexpected GPR size");
645 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_X_D), DestReg: DstReg)
646 .addReg(RegNo: SrcReg, Flags: KillFlag);
647 return;
648 }
649
650 // VR->VR copies.
651 const TargetRegisterClass *RegClass =
652 TRI->getCommonMinimalPhysRegClass(Reg1: SrcReg, Reg2: DstReg);
653 if (RISCVRegisterInfo::isRVVRegClass(RC: RegClass)) {
654 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
655 return;
656 }
657
658 llvm_unreachable("Impossible reg-to-reg copy");
659}
660
661void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
662 MachineBasicBlock::iterator I,
663 Register SrcReg, bool IsKill, int FI,
664 const TargetRegisterClass *RC,
665 Register VReg,
666 MachineInstr::MIFlag Flags) const {
667 MachineFunction *MF = MBB.getParent();
668 MachineFrameInfo &MFI = MF->getFrameInfo();
669 Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
670
671 unsigned Opcode;
672 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
673 Opcode = RegInfo.getRegSizeInBits(RC: RISCV::GPRRegClass) == 32 ? RISCV::SW
674 : RISCV::SD;
675 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
676 Opcode = RISCV::SH_INX;
677 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
678 Opcode = RISCV::SW_INX;
679 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
680 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
681 Alignment >= STI.getZilsdAlign()) {
682 Opcode = RISCV::SD_RV32;
683 } else {
684 Opcode = RISCV::PseudoRV32ZdinxSD;
685 }
686 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
687 Opcode = RISCV::FSH;
688 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
689 Opcode = RISCV::FSW;
690 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
691 Opcode = RISCV::FSD;
692 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
693 Opcode = RISCV::VS1R_V;
694 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
695 Opcode = RISCV::VS2R_V;
696 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
697 Opcode = RISCV::VS4R_V;
698 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
699 Opcode = RISCV::VS8R_V;
700 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
701 Opcode = RISCV::PseudoVSPILL2_M1;
702 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
703 Opcode = RISCV::PseudoVSPILL2_M2;
704 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
705 Opcode = RISCV::PseudoVSPILL2_M4;
706 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
707 Opcode = RISCV::PseudoVSPILL3_M1;
708 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
709 Opcode = RISCV::PseudoVSPILL3_M2;
710 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
711 Opcode = RISCV::PseudoVSPILL4_M1;
712 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
713 Opcode = RISCV::PseudoVSPILL4_M2;
714 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
715 Opcode = RISCV::PseudoVSPILL5_M1;
716 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
717 Opcode = RISCV::PseudoVSPILL6_M1;
718 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
719 Opcode = RISCV::PseudoVSPILL7_M1;
720 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
721 Opcode = RISCV::PseudoVSPILL8_M1;
722 else
723 llvm_unreachable("Can't store this register to stack slot");
724
725 if (RISCVRegisterInfo::isRVVRegClass(RC)) {
726 MachineMemOperand *MMO = MF->getMachineMemOperand(
727 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
728 Size: TypeSize::getScalable(MinimumSize: MFI.getObjectSize(ObjectIdx: FI)), BaseAlignment: Alignment);
729
730 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
731 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode))
732 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: IsKill))
733 .addFrameIndex(Idx: FI)
734 .addMemOperand(MMO)
735 .setMIFlag(Flags);
736 NumVRegSpilled += RegInfo.getRegSizeInBits(RC: *RC) / RISCV::RVVBitsPerBlock;
737 } else {
738 MachineMemOperand *MMO = MF->getMachineMemOperand(
739 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
740 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
741
742 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode))
743 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: IsKill))
744 .addFrameIndex(Idx: FI)
745 .addImm(Val: 0)
746 .addMemOperand(MMO)
747 .setMIFlag(Flags);
748 }
749}
750
751void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
752 MachineBasicBlock::iterator I,
753 Register DstReg, int FI,
754 const TargetRegisterClass *RC,
755 Register VReg, unsigned SubReg,
756 MachineInstr::MIFlag Flags) const {
757 MachineFunction *MF = MBB.getParent();
758 MachineFrameInfo &MFI = MF->getFrameInfo();
759 Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
760 DebugLoc DL =
761 Flags & MachineInstr::FrameDestroy ? MBB.findDebugLoc(MBBI: I) : DebugLoc();
762
763 unsigned Opcode;
764 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
765 Opcode = RegInfo.getRegSizeInBits(RC: RISCV::GPRRegClass) == 32 ? RISCV::LW
766 : RISCV::LD;
767 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
768 Opcode = RISCV::LH_INX;
769 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
770 Opcode = RISCV::LW_INX;
771 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
772 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
773 Alignment >= STI.getZilsdAlign()) {
774 Opcode = RISCV::LD_RV32;
775 } else {
776 Opcode = RISCV::PseudoRV32ZdinxLD;
777 }
778 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
779 Opcode = RISCV::FLH;
780 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
781 Opcode = RISCV::FLW;
782 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
783 Opcode = RISCV::FLD;
784 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
785 Opcode = RISCV::VL1RE8_V;
786 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
787 Opcode = RISCV::VL2RE8_V;
788 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
789 Opcode = RISCV::VL4RE8_V;
790 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
791 Opcode = RISCV::VL8RE8_V;
792 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
793 Opcode = RISCV::PseudoVRELOAD2_M1;
794 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
795 Opcode = RISCV::PseudoVRELOAD2_M2;
796 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
797 Opcode = RISCV::PseudoVRELOAD2_M4;
798 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
799 Opcode = RISCV::PseudoVRELOAD3_M1;
800 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
801 Opcode = RISCV::PseudoVRELOAD3_M2;
802 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
803 Opcode = RISCV::PseudoVRELOAD4_M1;
804 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
805 Opcode = RISCV::PseudoVRELOAD4_M2;
806 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
807 Opcode = RISCV::PseudoVRELOAD5_M1;
808 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
809 Opcode = RISCV::PseudoVRELOAD6_M1;
810 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
811 Opcode = RISCV::PseudoVRELOAD7_M1;
812 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
813 Opcode = RISCV::PseudoVRELOAD8_M1;
814 else
815 llvm_unreachable("Can't load this register from stack slot");
816
817 if (RISCVRegisterInfo::isRVVRegClass(RC)) {
818 MachineMemOperand *MMO = MF->getMachineMemOperand(
819 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
820 Size: TypeSize::getScalable(MinimumSize: MFI.getObjectSize(ObjectIdx: FI)), BaseAlignment: Alignment);
821
822 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
823 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode), DestReg: DstReg)
824 .addFrameIndex(Idx: FI)
825 .addMemOperand(MMO)
826 .setMIFlag(Flags);
827 NumVRegReloaded += RegInfo.getRegSizeInBits(RC: *RC) / RISCV::RVVBitsPerBlock;
828 } else {
829 MachineMemOperand *MMO = MF->getMachineMemOperand(
830 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
831 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
832
833 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode), DestReg: DstReg)
834 .addFrameIndex(Idx: FI)
835 .addImm(Val: 0)
836 .addMemOperand(MMO)
837 .setMIFlag(Flags);
838 }
839}
840std::optional<unsigned> getFoldedOpcode(MachineFunction &MF, MachineInstr &MI,
841 ArrayRef<unsigned> Ops,
842 const RISCVSubtarget &ST) {
843
844 // The below optimizations narrow the load so they are only valid for little
845 // endian.
846 // TODO: Support big endian by adding an offset into the frame object?
847 if (MF.getDataLayout().isBigEndian())
848 return std::nullopt;
849
850 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
851 if (Ops.size() != 1 || Ops[0] != 1)
852 return std::nullopt;
853
854 switch (MI.getOpcode()) {
855 default:
856 if (RISCVInstrInfo::isSEXT_W(MI))
857 return RISCV::LW;
858 if (RISCVInstrInfo::isZEXT_W(MI))
859 return RISCV::LWU;
860 if (RISCVInstrInfo::isZEXT_B(MI))
861 return RISCV::LBU;
862 break;
863 case RISCV::SEXT_H:
864 return RISCV::LH;
865 case RISCV::SEXT_B:
866 return RISCV::LB;
867 case RISCV::ZEXT_H_RV32:
868 case RISCV::ZEXT_H_RV64:
869 return RISCV::LHU;
870 }
871
872 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
873 default:
874 return std::nullopt;
875 case RISCV::VMV_X_S: {
876 unsigned Log2SEW =
877 MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
878 if (ST.getXLen() < (1U << Log2SEW))
879 return std::nullopt;
880 switch (Log2SEW) {
881 case 3:
882 return RISCV::LB;
883 case 4:
884 return RISCV::LH;
885 case 5:
886 return RISCV::LW;
887 case 6:
888 return RISCV::LD;
889 default:
890 llvm_unreachable("Unexpected SEW");
891 }
892 }
893 case RISCV::VFMV_F_S: {
894 unsigned Log2SEW =
895 MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
896 switch (Log2SEW) {
897 case 4:
898 return RISCV::FLH;
899 case 5:
900 return RISCV::FLW;
901 case 6:
902 return RISCV::FLD;
903 default:
904 llvm_unreachable("Unexpected SEW");
905 }
906 }
907 }
908}
909
910// This is the version used during InlineSpiller::spillAroundUses
911MachineInstr *
912RISCVInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
913 ArrayRef<unsigned> Ops, int FrameIndex,
914 MachineInstr *&CopyMI, LiveIntervals *LIS,
915 VirtRegMap *VRM) const {
916 MachineBasicBlock::iterator InsertPt = MI;
917 std::optional<unsigned> LoadOpc = getFoldedOpcode(MF, MI, Ops, ST: STI);
918 if (!LoadOpc)
919 return nullptr;
920 Register DstReg = MI.getOperand(i: 0).getReg();
921 return BuildMI(BB&: *MI.getParent(), I: InsertPt, MIMD: MI.getDebugLoc(), MCID: get(Opcode: *LoadOpc),
922 DestReg: DstReg)
923 .addFrameIndex(Idx: FrameIndex)
924 .addImm(Val: 0);
925}
926
927static unsigned getLoadPredicatedOpcode(unsigned Opcode) {
928 switch (Opcode) {
929 case RISCV::LB:
930 return RISCV::PseudoCCLB;
931 case RISCV::LBU:
932 return RISCV::PseudoCCLBU;
933 case RISCV::LH:
934 return RISCV::PseudoCCLH;
935 case RISCV::LHU:
936 return RISCV::PseudoCCLHU;
937 case RISCV::LW:
938 return RISCV::PseudoCCLW;
939 case RISCV::LWU:
940 return RISCV::PseudoCCLWU;
941 case RISCV::LD:
942 return RISCV::PseudoCCLD;
943 case RISCV::QC_E_LB:
944 return RISCV::PseudoCCQC_E_LB;
945 case RISCV::QC_E_LBU:
946 return RISCV::PseudoCCQC_E_LBU;
947 case RISCV::QC_E_LH:
948 return RISCV::PseudoCCQC_E_LH;
949 case RISCV::QC_E_LHU:
950 return RISCV::PseudoCCQC_E_LHU;
951 case RISCV::QC_E_LW:
952 return RISCV::PseudoCCQC_E_LW;
953 default:
954 return 0;
955 }
956}
957
958MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
959 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
960 MachineInstr &LoadMI, MachineInstr *&CopyMI, LiveIntervals *LIS,
961 VirtRegMap *VRM) const {
962 MachineBasicBlock::iterator InsertPt = MI;
963 // For now, only handle RISCV::PseudoCCMOVGPR.
964 if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
965 return nullptr;
966
967 unsigned PredOpc = getLoadPredicatedOpcode(Opcode: LoadMI.getOpcode());
968
969 if (!STI.hasShortForwardBranchILoad() || !PredOpc)
970 return nullptr;
971
972 MachineRegisterInfo &MRI = MF.getRegInfo();
973 if (Ops.size() != 1 || (Ops[0] != 1 && Ops[0] != 2))
974 return nullptr;
975
976 bool Invert = Ops[0] == 2;
977 const MachineOperand &FalseReg = MI.getOperand(i: !Invert ? 2 : 1);
978 Register DestReg = MI.getOperand(i: 0).getReg();
979 const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
980 if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
981 return nullptr;
982
983 // Create a new predicated version of DefMI.
984 MachineInstrBuilder NewMI = BuildMI(BB&: *MI.getParent(), I: InsertPt,
985 MIMD: MI.getDebugLoc(), MCID: get(Opcode: PredOpc), DestReg);
986
987 // Copy the false register.
988 NewMI.add(MO: FalseReg);
989
990 // Copy all the DefMI operands.
991 const MCInstrDesc &DefDesc = LoadMI.getDesc();
992 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
993 NewMI.add(MO: LoadMI.getOperand(i));
994
995 // Add branch opcode, inverting if necessary.
996 unsigned BCC = MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm();
997 if (!Invert)
998 BCC = RISCVCC::getInverseBranchOpcode(BCC);
999 NewMI.addImm(Val: BCC);
1000
1001 // Copy condition portion
1002 NewMI.add(MOs: {MI.getOperand(i: MI.getNumExplicitOperands() - 2),
1003 MI.getOperand(i: MI.getNumExplicitOperands() - 1)});
1004 NewMI.cloneMemRefs(OtherMI: LoadMI);
1005 return NewMI;
1006}
1007
1008void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
1009 MachineBasicBlock::iterator MBBI,
1010 const DebugLoc &DL, Register DstReg, uint64_t Val,
1011 MachineInstr::MIFlag Flag, bool DstRenamable,
1012 bool DstIsDead) const {
1013 Register SrcReg = RISCV::X0;
1014
1015 // For RV32, allow a sign or unsigned 32 bit value.
1016 if (!STI.is64Bit() && !isInt<32>(x: Val)) {
1017 // If have a uimm32 it will still fit in a register so we can allow it.
1018 if (!isUInt<32>(x: Val))
1019 report_fatal_error(reason: "Should only materialize 32-bit constants for RV32");
1020
1021 // Sign extend for generateInstSeq.
1022 Val = SignExtend64<32>(x: Val);
1023 }
1024
1025 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
1026 assert(!Seq.empty());
1027
1028 bool SrcRenamable = false;
1029 unsigned Num = 0;
1030
1031 for (const RISCVMatInt::Inst &Inst : Seq) {
1032 bool LastItem = ++Num == Seq.size();
1033 RegState DstRegState = getDeadRegState(B: DstIsDead && LastItem) |
1034 getRenamableRegState(B: DstRenamable);
1035 RegState SrcRegState = getKillRegState(B: SrcReg != RISCV::X0) |
1036 getRenamableRegState(B: SrcRenamable);
1037 switch (Inst.getOpndKind()) {
1038 case RISCVMatInt::Imm:
1039 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1040 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1041 .addImm(Val: Inst.getImm())
1042 .setMIFlag(Flag);
1043 break;
1044 case RISCVMatInt::RegX0:
1045 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1046 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1047 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1048 .addReg(RegNo: RISCV::X0)
1049 .setMIFlag(Flag);
1050 break;
1051 case RISCVMatInt::RegReg:
1052 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1053 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1054 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1055 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1056 .setMIFlag(Flag);
1057 break;
1058 case RISCVMatInt::RegImm:
1059 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1060 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1061 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1062 .addImm(Val: Inst.getImm())
1063 .setMIFlag(Flag);
1064 break;
1065 }
1066
1067 // Only the first instruction has X0 as its source.
1068 SrcReg = DstReg;
1069 SrcRenamable = DstRenamable;
1070 }
1071}
1072
1073RISCVCC::CondCode RISCVInstrInfo::getCondFromBranchOpc(unsigned Opc) {
1074 switch (Opc) {
1075 default:
1076 return RISCVCC::COND_INVALID;
1077 case RISCV::BEQ:
1078 case RISCV::BEQI:
1079 case RISCV::CV_BEQIMM:
1080 case RISCV::QC_BEQI:
1081 case RISCV::QC_E_BEQI:
1082 case RISCV::NDS_BBC:
1083 case RISCV::NDS_BEQC:
1084 return RISCVCC::COND_EQ;
1085 case RISCV::BNE:
1086 case RISCV::BNEI:
1087 case RISCV::QC_BNEI:
1088 case RISCV::QC_E_BNEI:
1089 case RISCV::CV_BNEIMM:
1090 case RISCV::NDS_BBS:
1091 case RISCV::NDS_BNEC:
1092 return RISCVCC::COND_NE;
1093 case RISCV::BLT:
1094 case RISCV::QC_BLTI:
1095 case RISCV::QC_E_BLTI:
1096 return RISCVCC::COND_LT;
1097 case RISCV::BGE:
1098 case RISCV::QC_BGEI:
1099 case RISCV::QC_E_BGEI:
1100 return RISCVCC::COND_GE;
1101 case RISCV::BLTU:
1102 case RISCV::QC_BLTUI:
1103 case RISCV::QC_E_BLTUI:
1104 return RISCVCC::COND_LTU;
1105 case RISCV::BGEU:
1106 case RISCV::QC_BGEUI:
1107 case RISCV::QC_E_BGEUI:
1108 return RISCVCC::COND_GEU;
1109 }
1110}
1111
1112bool RISCVInstrInfo::evaluateCondBranch(RISCVCC::CondCode CC, int64_t C0,
1113 int64_t C1) {
1114 switch (CC) {
1115 default:
1116 llvm_unreachable("Unexpected CC");
1117 case RISCVCC::COND_EQ:
1118 return C0 == C1;
1119 case RISCVCC::COND_NE:
1120 return C0 != C1;
1121 case RISCVCC::COND_LT:
1122 return C0 < C1;
1123 case RISCVCC::COND_GE:
1124 return C0 >= C1;
1125 case RISCVCC::COND_LTU:
1126 return (uint64_t)C0 < (uint64_t)C1;
1127 case RISCVCC::COND_GEU:
1128 return (uint64_t)C0 >= (uint64_t)C1;
1129 }
1130}
1131
1132// The contents of values added to Cond are not examined outside of
1133// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
1134// push BranchOpcode, Reg1, Reg2.
1135static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
1136 SmallVectorImpl<MachineOperand> &Cond) {
1137 // Block ends with fall-through condbranch.
1138 assert(LastInst.getDesc().isConditionalBranch() &&
1139 "Unknown conditional branch");
1140 Target = LastInst.getOperand(i: 2).getMBB();
1141 Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst.getOpcode()));
1142 Cond.push_back(Elt: LastInst.getOperand(i: 0));
1143 Cond.push_back(Elt: LastInst.getOperand(i: 1));
1144}
1145
1146static unsigned getInverseXqcicmOpcode(unsigned Opcode) {
1147 switch (Opcode) {
1148 default:
1149 llvm_unreachable("Unexpected Opcode");
1150 case RISCV::QC_MVEQ:
1151 return RISCV::QC_MVNE;
1152 case RISCV::QC_MVNE:
1153 return RISCV::QC_MVEQ;
1154 case RISCV::QC_MVLT:
1155 return RISCV::QC_MVGE;
1156 case RISCV::QC_MVGE:
1157 return RISCV::QC_MVLT;
1158 case RISCV::QC_MVLTU:
1159 return RISCV::QC_MVGEU;
1160 case RISCV::QC_MVGEU:
1161 return RISCV::QC_MVLTU;
1162 case RISCV::QC_MVEQI:
1163 return RISCV::QC_MVNEI;
1164 case RISCV::QC_MVNEI:
1165 return RISCV::QC_MVEQI;
1166 case RISCV::QC_MVLTI:
1167 return RISCV::QC_MVGEI;
1168 case RISCV::QC_MVGEI:
1169 return RISCV::QC_MVLTI;
1170 case RISCV::QC_MVLTUI:
1171 return RISCV::QC_MVGEUI;
1172 case RISCV::QC_MVGEUI:
1173 return RISCV::QC_MVLTUI;
1174 }
1175}
1176
1177unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
1178 switch (SelectOpc) {
1179 default:
1180 switch (CC) {
1181 default:
1182 llvm_unreachable("Unexpected condition code!");
1183 case RISCVCC::COND_EQ:
1184 return RISCV::BEQ;
1185 case RISCVCC::COND_NE:
1186 return RISCV::BNE;
1187 case RISCVCC::COND_LT:
1188 return RISCV::BLT;
1189 case RISCVCC::COND_GE:
1190 return RISCV::BGE;
1191 case RISCVCC::COND_LTU:
1192 return RISCV::BLTU;
1193 case RISCVCC::COND_GEU:
1194 return RISCV::BGEU;
1195 }
1196 break;
1197 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
1198 switch (CC) {
1199 default:
1200 llvm_unreachable("Unexpected condition code!");
1201 case RISCVCC::COND_EQ:
1202 return RISCV::BEQI;
1203 case RISCVCC::COND_NE:
1204 return RISCV::BNEI;
1205 }
1206 break;
1207 case RISCV::Select_GPR_Using_CC_SImm5_CV:
1208 switch (CC) {
1209 default:
1210 llvm_unreachable("Unexpected condition code!");
1211 case RISCVCC::COND_EQ:
1212 return RISCV::CV_BEQIMM;
1213 case RISCVCC::COND_NE:
1214 return RISCV::CV_BNEIMM;
1215 }
1216 break;
1217 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
1218 switch (CC) {
1219 default:
1220 llvm_unreachable("Unexpected condition code!");
1221 case RISCVCC::COND_EQ:
1222 return RISCV::QC_BEQI;
1223 case RISCVCC::COND_NE:
1224 return RISCV::QC_BNEI;
1225 case RISCVCC::COND_LT:
1226 return RISCV::QC_BLTI;
1227 case RISCVCC::COND_GE:
1228 return RISCV::QC_BGEI;
1229 }
1230 break;
1231 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
1232 switch (CC) {
1233 default:
1234 llvm_unreachable("Unexpected condition code!");
1235 case RISCVCC::COND_LTU:
1236 return RISCV::QC_BLTUI;
1237 case RISCVCC::COND_GEU:
1238 return RISCV::QC_BGEUI;
1239 }
1240 break;
1241 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
1242 switch (CC) {
1243 default:
1244 llvm_unreachable("Unexpected condition code!");
1245 case RISCVCC::COND_EQ:
1246 return RISCV::QC_E_BEQI;
1247 case RISCVCC::COND_NE:
1248 return RISCV::QC_E_BNEI;
1249 case RISCVCC::COND_LT:
1250 return RISCV::QC_E_BLTI;
1251 case RISCVCC::COND_GE:
1252 return RISCV::QC_E_BGEI;
1253 }
1254 break;
1255 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
1256 switch (CC) {
1257 default:
1258 llvm_unreachable("Unexpected condition code!");
1259 case RISCVCC::COND_LTU:
1260 return RISCV::QC_E_BLTUI;
1261 case RISCVCC::COND_GEU:
1262 return RISCV::QC_E_BGEUI;
1263 }
1264 break;
1265 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
1266 switch (CC) {
1267 default:
1268 llvm_unreachable("Unexpected condition code!");
1269 case RISCVCC::COND_EQ:
1270 return RISCV::NDS_BBC;
1271 case RISCVCC::COND_NE:
1272 return RISCV::NDS_BBS;
1273 }
1274 break;
1275 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
1276 switch (CC) {
1277 default:
1278 llvm_unreachable("Unexpected condition code!");
1279 case RISCVCC::COND_EQ:
1280 return RISCV::NDS_BEQC;
1281 case RISCVCC::COND_NE:
1282 return RISCV::NDS_BNEC;
1283 }
1284 break;
1285 }
1286}
1287
1288RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) {
1289 switch (CC) {
1290 default:
1291 llvm_unreachable("Unrecognized conditional branch");
1292 case RISCVCC::COND_EQ:
1293 return RISCVCC::COND_NE;
1294 case RISCVCC::COND_NE:
1295 return RISCVCC::COND_EQ;
1296 case RISCVCC::COND_LT:
1297 return RISCVCC::COND_GE;
1298 case RISCVCC::COND_GE:
1299 return RISCVCC::COND_LT;
1300 case RISCVCC::COND_LTU:
1301 return RISCVCC::COND_GEU;
1302 case RISCVCC::COND_GEU:
1303 return RISCVCC::COND_LTU;
1304 }
1305}
1306
1307// Return inverse branch
1308unsigned RISCVCC::getInverseBranchOpcode(unsigned BCC) {
1309 switch (BCC) {
1310 default:
1311 llvm_unreachable("Unexpected branch opcode!");
1312 case RISCV::BEQ:
1313 return RISCV::BNE;
1314 case RISCV::BEQI:
1315 return RISCV::BNEI;
1316 case RISCV::BNE:
1317 return RISCV::BEQ;
1318 case RISCV::BNEI:
1319 return RISCV::BEQI;
1320 case RISCV::BLT:
1321 return RISCV::BGE;
1322 case RISCV::BGE:
1323 return RISCV::BLT;
1324 case RISCV::BLTU:
1325 return RISCV::BGEU;
1326 case RISCV::BGEU:
1327 return RISCV::BLTU;
1328 case RISCV::CV_BEQIMM:
1329 return RISCV::CV_BNEIMM;
1330 case RISCV::CV_BNEIMM:
1331 return RISCV::CV_BEQIMM;
1332 case RISCV::QC_BEQI:
1333 return RISCV::QC_BNEI;
1334 case RISCV::QC_BNEI:
1335 return RISCV::QC_BEQI;
1336 case RISCV::QC_BLTI:
1337 return RISCV::QC_BGEI;
1338 case RISCV::QC_BGEI:
1339 return RISCV::QC_BLTI;
1340 case RISCV::QC_BLTUI:
1341 return RISCV::QC_BGEUI;
1342 case RISCV::QC_BGEUI:
1343 return RISCV::QC_BLTUI;
1344 case RISCV::QC_E_BEQI:
1345 return RISCV::QC_E_BNEI;
1346 case RISCV::QC_E_BNEI:
1347 return RISCV::QC_E_BEQI;
1348 case RISCV::QC_E_BLTI:
1349 return RISCV::QC_E_BGEI;
1350 case RISCV::QC_E_BGEI:
1351 return RISCV::QC_E_BLTI;
1352 case RISCV::QC_E_BLTUI:
1353 return RISCV::QC_E_BGEUI;
1354 case RISCV::QC_E_BGEUI:
1355 return RISCV::QC_E_BLTUI;
1356 case RISCV::NDS_BBC:
1357 return RISCV::NDS_BBS;
1358 case RISCV::NDS_BBS:
1359 return RISCV::NDS_BBC;
1360 case RISCV::NDS_BEQC:
1361 return RISCV::NDS_BNEC;
1362 case RISCV::NDS_BNEC:
1363 return RISCV::NDS_BEQC;
1364 }
1365}
1366
1367bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
1368 MachineBasicBlock *&TBB,
1369 MachineBasicBlock *&FBB,
1370 SmallVectorImpl<MachineOperand> &Cond,
1371 bool AllowModify) const {
1372 TBB = FBB = nullptr;
1373 Cond.clear();
1374
1375 // If the block has no terminators, it just falls into the block after it.
1376 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1377 if (I == MBB.end() || !isUnpredicatedTerminator(MI: *I))
1378 return false;
1379
1380 // Count the number of terminators and find the first unconditional or
1381 // indirect branch.
1382 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
1383 int NumTerminators = 0;
1384 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(MI: *J);
1385 J++) {
1386 NumTerminators++;
1387 if (J->getDesc().isUnconditionalBranch() ||
1388 J->getDesc().isIndirectBranch()) {
1389 FirstUncondOrIndirectBr = J.getReverse();
1390 }
1391 }
1392
1393 // If AllowModify is true, we can erase any terminators after
1394 // FirstUncondOrIndirectBR.
1395 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
1396 while (std::next(x: FirstUncondOrIndirectBr) != MBB.end()) {
1397 std::next(x: FirstUncondOrIndirectBr)->eraseFromParent();
1398 NumTerminators--;
1399 }
1400 I = FirstUncondOrIndirectBr;
1401 }
1402
1403 // We can't handle blocks that end in an indirect branch.
1404 if (I->getDesc().isIndirectBranch())
1405 return true;
1406
1407 // We can't handle Generic branch opcodes from Global ISel.
1408 if (I->isPreISelOpcode())
1409 return true;
1410
1411 // We can't handle blocks with more than 2 terminators.
1412 if (NumTerminators > 2)
1413 return true;
1414
1415 // Handle a single unconditional branch.
1416 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
1417 TBB = getBranchDestBlock(MI: *I);
1418 return false;
1419 }
1420
1421 // Handle a single conditional branch.
1422 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
1423 parseCondBranch(LastInst&: *I, Target&: TBB, Cond);
1424 return false;
1425 }
1426
1427 // Handle a conditional branch followed by an unconditional branch.
1428 if (NumTerminators == 2 && std::prev(x: I)->getDesc().isConditionalBranch() &&
1429 I->getDesc().isUnconditionalBranch()) {
1430 parseCondBranch(LastInst&: *std::prev(x: I), Target&: TBB, Cond);
1431 FBB = getBranchDestBlock(MI: *I);
1432 return false;
1433 }
1434
1435 // Otherwise, we can't handle this.
1436 return true;
1437}
1438
1439unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
1440 int *BytesRemoved) const {
1441 if (BytesRemoved)
1442 *BytesRemoved = 0;
1443 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1444 if (I == MBB.end())
1445 return 0;
1446
1447 if (!I->getDesc().isUnconditionalBranch() &&
1448 !I->getDesc().isConditionalBranch())
1449 return 0;
1450
1451 // Remove the branch.
1452 if (BytesRemoved)
1453 *BytesRemoved += getInstSizeInBytes(MI: *I);
1454 I->eraseFromParent();
1455
1456 I = MBB.end();
1457
1458 if (I == MBB.begin())
1459 return 1;
1460 --I;
1461 if (!I->getDesc().isConditionalBranch())
1462 return 1;
1463
1464 // Remove the branch.
1465 if (BytesRemoved)
1466 *BytesRemoved += getInstSizeInBytes(MI: *I);
1467 I->eraseFromParent();
1468 return 2;
1469}
1470
1471// Inserts a branch into the end of the specific MachineBasicBlock, returning
1472// the number of instructions inserted.
1473unsigned RISCVInstrInfo::insertBranch(
1474 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1475 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1476 if (BytesAdded)
1477 *BytesAdded = 0;
1478
1479 // Shouldn't be a fall through.
1480 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1481 assert((Cond.size() == 3 || Cond.size() == 0) &&
1482 "RISC-V branch conditions have two components!");
1483
1484 // Unconditional branch.
1485 if (Cond.empty()) {
1486 MachineInstr &MI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: RISCV::PseudoBR)).addMBB(MBB: TBB);
1487 if (BytesAdded)
1488 *BytesAdded += getInstSizeInBytes(MI);
1489 return 1;
1490 }
1491
1492 // Either a one or two-way conditional branch.
1493 MachineInstr &CondMI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm()))
1494 .add(MO: Cond[1])
1495 .add(MO: Cond[2])
1496 .addMBB(MBB: TBB);
1497 if (BytesAdded)
1498 *BytesAdded += getInstSizeInBytes(MI: CondMI);
1499
1500 // One-way conditional branch.
1501 if (!FBB)
1502 return 1;
1503
1504 // Two-way conditional branch.
1505 MachineInstr &MI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: RISCV::PseudoBR)).addMBB(MBB: FBB);
1506 if (BytesAdded)
1507 *BytesAdded += getInstSizeInBytes(MI);
1508 return 2;
1509}
1510
1511void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1512 MachineBasicBlock &DestBB,
1513 MachineBasicBlock &RestoreBB,
1514 const DebugLoc &DL, int64_t BrOffset,
1515 RegScavenger *RS) const {
1516 assert(RS && "RegScavenger required for long branching");
1517 assert(MBB.empty() &&
1518 "new block should be inserted for expanding unconditional branch");
1519 assert(MBB.pred_size() == 1);
1520 assert(RestoreBB.empty() &&
1521 "restore block should be inserted for restoring clobbered registers");
1522
1523 MachineFunction *MF = MBB.getParent();
1524 MachineRegisterInfo &MRI = MF->getRegInfo();
1525 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1526 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1527
1528 if (!isInt<32>(x: BrOffset))
1529 report_fatal_error(
1530 reason: "Branch offsets outside of the signed 32-bit range not supported");
1531
1532 // FIXME: A virtual register must be used initially, as the register
1533 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1534 // uses the same workaround).
1535 Register ScratchReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRJALRRegClass);
1536 auto II = MBB.end();
1537 // We may also update the jump target to RestoreBB later.
1538 MachineInstr &MI = *BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::PseudoJump))
1539 .addReg(RegNo: ScratchReg, Flags: RegState::Define | RegState::Dead)
1540 .addMBB(MBB: &DestBB, TargetFlags: RISCVII::MO_CALL);
1541
1542 RS->enterBasicBlockEnd(MBB);
1543 // When cf-protection-branch is enabled, we must use t2 (x7) for software
1544 // guarded branches to hold the landing pad label.
1545 bool HasCFBranch =
1546 MF->getInfo<RISCVMachineFunctionInfo>()->hasCFProtectionBranch();
1547 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1548 if (HasCFBranch)
1549 RC = &RISCV::GPRX7RegClass;
1550 Register TmpGPR =
1551 RS->scavengeRegisterBackwards(RC: *RC, To: MI.getIterator(),
1552 /*RestoreAfter=*/false, /*SpAdj=*/SPAdj: 0,
1553 /*AllowSpill=*/false);
1554 if (TmpGPR.isValid())
1555 RS->setRegUsed(Reg: TmpGPR);
1556 else {
1557 // The case when there is no scavenged register needs special handling.
1558
1559 // Pick s11(or s1 for rve) because it doesn't make a difference.
1560 TmpGPR = STI.hasStdExtE() ? RISCV::X9 : RISCV::X27;
1561 // Force t2 if cf-protection-branch is enabled
1562 if (HasCFBranch)
1563 TmpGPR = RISCV::X7;
1564
1565 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1566 if (FrameIndex == -1)
1567 report_fatal_error(reason: "underestimated function size");
1568
1569 storeRegToStackSlot(MBB, I: MI, SrcReg: TmpGPR, /*IsKill=*/true, FI: FrameIndex,
1570 RC: &RISCV::GPRRegClass, VReg: Register());
1571 TRI->eliminateFrameIndex(MI: std::prev(x: MI.getIterator()),
1572 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1573
1574 MI.getOperand(i: 1).setMBB(&RestoreBB);
1575
1576 loadRegFromStackSlot(MBB&: RestoreBB, I: RestoreBB.end(), DstReg: TmpGPR, FI: FrameIndex,
1577 RC: &RISCV::GPRRegClass, VReg: Register());
1578 TRI->eliminateFrameIndex(MI: RestoreBB.back(),
1579 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1580 }
1581
1582 MRI.replaceRegWith(FromReg: ScratchReg, ToReg: TmpGPR);
1583 MRI.clearVirtRegs();
1584}
1585
1586bool RISCVInstrInfo::reverseBranchCondition(
1587 SmallVectorImpl<MachineOperand> &Cond) const {
1588 assert((Cond.size() == 3) && "Invalid branch condition!");
1589
1590 Cond[0].setImm(RISCVCC::getInverseBranchOpcode(BCC: Cond[0].getImm()));
1591
1592 return false;
1593}
1594
1595// Return true if the instruction is a load immediate instruction (i.e.
1596// (ADDI x0, imm) or (BSETI x0, imm)).
1597static bool isLoadImm(const MachineInstr *MI, int64_t &Imm) {
1598 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(i: 1).isReg() &&
1599 MI->getOperand(i: 1).getReg() == RISCV::X0) {
1600 Imm = MI->getOperand(i: 2).getImm();
1601 return true;
1602 }
1603 // BSETI can be used to create power of 2 constants. Only 2048 is currently
1604 // interesting because it is 1 more than the maximum ADDI constant.
1605 if (MI->getOpcode() == RISCV::BSETI && MI->getOperand(i: 1).isReg() &&
1606 MI->getOperand(i: 1).getReg() == RISCV::X0 &&
1607 MI->getOperand(i: 2).getImm() == 11) {
1608 Imm = 2048;
1609 return true;
1610 }
1611 return false;
1612}
1613
1614bool RISCVInstrInfo::isFromLoadImm(const MachineRegisterInfo &MRI,
1615 const MachineOperand &Op, int64_t &Imm) {
1616 // Either a load from immediate instruction or X0.
1617 if (!Op.isReg())
1618 return false;
1619
1620 Register Reg = Op.getReg();
1621 if (Reg == RISCV::X0) {
1622 Imm = 0;
1623 return true;
1624 }
1625 return Reg.isVirtual() && isLoadImm(MI: MRI.getVRegDef(Reg), Imm);
1626}
1627
1628bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1629 bool IsSigned = false;
1630 bool IsEquality = false;
1631 switch (MI.getOpcode()) {
1632 default:
1633 return false;
1634 case RISCV::BEQ:
1635 case RISCV::BNE:
1636 IsEquality = true;
1637 break;
1638 case RISCV::BGE:
1639 case RISCV::BLT:
1640 IsSigned = true;
1641 break;
1642 case RISCV::BGEU:
1643 case RISCV::BLTU:
1644 break;
1645 }
1646
1647 MachineBasicBlock *MBB = MI.getParent();
1648 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1649
1650 const MachineOperand &LHS = MI.getOperand(i: 0);
1651 const MachineOperand &RHS = MI.getOperand(i: 1);
1652 MachineBasicBlock *TBB = MI.getOperand(i: 2).getMBB();
1653
1654 RISCVCC::CondCode CC = getCondFromBranchOpc(Opc: MI.getOpcode());
1655 assert(CC != RISCVCC::COND_INVALID);
1656
1657 // Canonicalize conditional branches which can be constant folded into
1658 // beqz or bnez. We can't modify the CFG here.
1659 int64_t C0, C1;
1660 if (isFromLoadImm(MRI, Op: LHS, Imm&: C0) && isFromLoadImm(MRI, Op: RHS, Imm&: C1)) {
1661 unsigned NewOpc = evaluateCondBranch(CC, C0, C1) ? RISCV::BEQ : RISCV::BNE;
1662 // Build the new branch and remove the old one.
1663 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1664 .addReg(RegNo: RISCV::X0)
1665 .addReg(RegNo: RISCV::X0)
1666 .addMBB(MBB: TBB);
1667 MI.eraseFromParent();
1668 return true;
1669 }
1670
1671 if (IsEquality)
1672 return false;
1673
1674 // For two constants C0 and C1 from
1675 // ```
1676 // li Y, C0
1677 // li Z, C1
1678 // ```
1679 // 1. if C1 = C0 + 1
1680 // we can turn:
1681 // (a) blt Y, X -> bge X, Z
1682 // (b) bge Y, X -> blt X, Z
1683 //
1684 // 2. if C1 = C0 - 1
1685 // we can turn:
1686 // (a) blt X, Y -> bge Z, X
1687 // (b) bge X, Y -> blt Z, X
1688 //
1689 // To make sure this optimization is really beneficial, we only
1690 // optimize for cases where Y had only one use (i.e. only used by the branch).
1691 // Try to find the register for constant Z; return
1692 // invalid register otherwise.
1693 auto searchConst = [&](int64_t C1) -> Register {
1694 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1695 auto DefC1 = std::find_if(first: ++II, last: E, pred: [&](const MachineInstr &I) -> bool {
1696 int64_t Imm;
1697 return isLoadImm(MI: &I, Imm) && Imm == C1 &&
1698 I.getOperand(i: 0).getReg().isVirtual();
1699 });
1700 if (DefC1 != E)
1701 return DefC1->getOperand(i: 0).getReg();
1702
1703 return Register();
1704 };
1705
1706 unsigned NewOpc = RISCVCC::getBrCond(CC: getInverseBranchCondition(CC));
1707
1708 // Might be case 1.
1709 // Don't change 0 to 1 since we can use x0.
1710 // For unsigned cases changing -1U to 0 would be incorrect.
1711 // The incorrect case for signed would be INT_MAX, but isFromLoadImm can't
1712 // return that.
1713 if (isFromLoadImm(MRI, Op: LHS, Imm&: C0) && C0 != 0 && LHS.getReg().isVirtual() &&
1714 MRI.hasOneUse(RegNo: LHS.getReg()) && (IsSigned || C0 != -1)) {
1715 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1716 if (Register RegZ = searchConst(C0 + 1)) {
1717 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1718 .add(MO: RHS)
1719 .addReg(RegNo: RegZ)
1720 .addMBB(MBB: TBB);
1721 // We might extend the live range of Z, clear its kill flag to
1722 // account for this.
1723 MRI.clearKillFlags(Reg: RegZ);
1724 MI.eraseFromParent();
1725 return true;
1726 }
1727 }
1728
1729 // Might be case 2.
1730 // For signed cases we don't want to change 0 since we can use x0.
1731 // For unsigned cases changing 0 to -1U would be incorrect.
1732 // The incorrect case for signed would be INT_MIN, but isFromLoadImm can't
1733 // return that.
1734 if (isFromLoadImm(MRI, Op: RHS, Imm&: C0) && C0 != 0 && RHS.getReg().isVirtual() &&
1735 MRI.hasOneUse(RegNo: RHS.getReg())) {
1736 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1737 if (Register RegZ = searchConst(C0 - 1)) {
1738 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1739 .addReg(RegNo: RegZ)
1740 .add(MO: LHS)
1741 .addMBB(MBB: TBB);
1742 // We might extend the live range of Z, clear its kill flag to
1743 // account for this.
1744 MRI.clearKillFlags(Reg: RegZ);
1745 MI.eraseFromParent();
1746 return true;
1747 }
1748 }
1749
1750 return false;
1751}
1752
1753MachineBasicBlock *
1754RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1755 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1756 // The branch target is always the last operand.
1757 int NumOp = MI.getNumExplicitOperands();
1758 return MI.getOperand(i: NumOp - 1).getMBB();
1759}
1760
1761bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1762 int64_t BrOffset) const {
1763 unsigned XLen = STI.getXLen();
1764 // Ideally we could determine the supported branch offset from the
1765 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1766 // PseudoBR.
1767 switch (BranchOp) {
1768 default:
1769 llvm_unreachable("Unexpected opcode!");
1770 case RISCV::NDS_BBC:
1771 case RISCV::NDS_BBS:
1772 case RISCV::NDS_BEQC:
1773 case RISCV::NDS_BNEC:
1774 return isInt<11>(x: BrOffset);
1775 case RISCV::BEQ:
1776 case RISCV::BNE:
1777 case RISCV::BLT:
1778 case RISCV::BGE:
1779 case RISCV::BLTU:
1780 case RISCV::BGEU:
1781 case RISCV::BEQI:
1782 case RISCV::BNEI:
1783 case RISCV::CV_BEQIMM:
1784 case RISCV::CV_BNEIMM:
1785 case RISCV::QC_BEQI:
1786 case RISCV::QC_BNEI:
1787 case RISCV::QC_BGEI:
1788 case RISCV::QC_BLTI:
1789 case RISCV::QC_BLTUI:
1790 case RISCV::QC_BGEUI:
1791 case RISCV::QC_E_BEQI:
1792 case RISCV::QC_E_BNEI:
1793 case RISCV::QC_E_BGEI:
1794 case RISCV::QC_E_BLTI:
1795 case RISCV::QC_E_BLTUI:
1796 case RISCV::QC_E_BGEUI:
1797 return isInt<13>(x: BrOffset);
1798 case RISCV::JAL:
1799 case RISCV::PseudoBR:
1800 return isInt<21>(x: BrOffset);
1801 case RISCV::PseudoJump:
1802 return isInt<32>(x: SignExtend64(X: BrOffset + 0x800, B: XLen));
1803 }
1804}
1805
1806// If the operation has a predicated pseudo instruction, return the pseudo
1807// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1808// TODO: Support more operations.
1809unsigned getPredicatedOpcode(unsigned Opcode) {
1810 // clang-format off
1811 switch (Opcode) {
1812 case RISCV::ADD: return RISCV::PseudoCCADD;
1813 case RISCV::SUB: return RISCV::PseudoCCSUB;
1814 case RISCV::SLL: return RISCV::PseudoCCSLL;
1815 case RISCV::SRL: return RISCV::PseudoCCSRL;
1816 case RISCV::SRA: return RISCV::PseudoCCSRA;
1817 case RISCV::AND: return RISCV::PseudoCCAND;
1818 case RISCV::OR: return RISCV::PseudoCCOR;
1819 case RISCV::XOR: return RISCV::PseudoCCXOR;
1820 case RISCV::MAX: return RISCV::PseudoCCMAX;
1821 case RISCV::MAXU: return RISCV::PseudoCCMAXU;
1822 case RISCV::MIN: return RISCV::PseudoCCMIN;
1823 case RISCV::MINU: return RISCV::PseudoCCMINU;
1824 case RISCV::MUL: return RISCV::PseudoCCMUL;
1825 case RISCV::LUI: return RISCV::PseudoCCLUI;
1826 case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
1827 case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
1828
1829 case RISCV::ADDI: return RISCV::PseudoCCADDI;
1830 case RISCV::SLLI: return RISCV::PseudoCCSLLI;
1831 case RISCV::SRLI: return RISCV::PseudoCCSRLI;
1832 case RISCV::SRAI: return RISCV::PseudoCCSRAI;
1833 case RISCV::ANDI: return RISCV::PseudoCCANDI;
1834 case RISCV::ORI: return RISCV::PseudoCCORI;
1835 case RISCV::XORI: return RISCV::PseudoCCXORI;
1836
1837 case RISCV::ADDW: return RISCV::PseudoCCADDW;
1838 case RISCV::SUBW: return RISCV::PseudoCCSUBW;
1839 case RISCV::SLLW: return RISCV::PseudoCCSLLW;
1840 case RISCV::SRLW: return RISCV::PseudoCCSRLW;
1841 case RISCV::SRAW: return RISCV::PseudoCCSRAW;
1842
1843 case RISCV::ADDIW: return RISCV::PseudoCCADDIW;
1844 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW;
1845 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW;
1846 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW;
1847
1848 case RISCV::ANDN: return RISCV::PseudoCCANDN;
1849 case RISCV::ORN: return RISCV::PseudoCCORN;
1850 case RISCV::XNOR: return RISCV::PseudoCCXNOR;
1851
1852 case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS;
1853 case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ;
1854 }
1855 // clang-format on
1856
1857 return RISCV::INSTRUCTION_LIST_END;
1858}
1859
1860/// Identify instructions that can be folded into a CCMOV instruction, and
1861/// return the defining instruction.
1862static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1863 const MachineRegisterInfo &MRI,
1864 const TargetInstrInfo *TII,
1865 const RISCVSubtarget &STI) {
1866 if (!Reg.isVirtual())
1867 return nullptr;
1868 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
1869 return nullptr;
1870 MachineInstr *MI = MRI.getVRegDef(Reg);
1871 if (!MI)
1872 return nullptr;
1873
1874 if (!STI.hasShortForwardBranchIMinMax() &&
1875 (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
1876 MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
1877 return nullptr;
1878
1879 if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
1880 return nullptr;
1881
1882 // Check if MI can be predicated and folded into the CCMOV.
1883 if (getPredicatedOpcode(Opcode: MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1884 return nullptr;
1885 // Don't predicate li idiom.
1886 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(i: 1).isReg() &&
1887 MI->getOperand(i: 1).getReg() == RISCV::X0)
1888 return nullptr;
1889 // Check if MI has any other defs or physreg uses.
1890 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
1891 // Reject frame index operands, PEI can't handle the predicated pseudos.
1892 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1893 return nullptr;
1894 if (!MO.isReg())
1895 continue;
1896 // MI can't have any tied operands, that would conflict with predication.
1897 if (MO.isTied())
1898 return nullptr;
1899 if (MO.isDef())
1900 return nullptr;
1901 // Allow constant physregs.
1902 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(PhysReg: MO.getReg()))
1903 return nullptr;
1904 }
1905 bool DontMoveAcrossStores = true;
1906 if (!MI->isSafeToMove(SawStore&: DontMoveAcrossStores))
1907 return nullptr;
1908 return MI;
1909}
1910
1911MachineInstr *
1912RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1913 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1914 bool PreferFalse) const {
1915 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1916 "Unknown select instruction");
1917 if (!STI.hasShortForwardBranchIALU())
1918 return nullptr;
1919
1920 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1921 MachineInstr *DefMI =
1922 canFoldAsPredicatedOp(Reg: MI.getOperand(i: 2).getReg(), MRI, TII: this, STI);
1923 bool Invert = !DefMI;
1924 if (!DefMI)
1925 DefMI = canFoldAsPredicatedOp(Reg: MI.getOperand(i: 1).getReg(), MRI, TII: this, STI);
1926 if (!DefMI)
1927 return nullptr;
1928
1929 // Find new register class to use.
1930 MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1);
1931 Register DestReg = MI.getOperand(i: 0).getReg();
1932 const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
1933 if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
1934 return nullptr;
1935
1936 unsigned PredOpc = getPredicatedOpcode(Opcode: DefMI->getOpcode());
1937 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1938
1939 // Create a new predicated version of DefMI.
1940 MachineInstrBuilder NewMI =
1941 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: PredOpc), DestReg);
1942
1943 // Copy the false register.
1944 NewMI.add(MO: FalseReg);
1945
1946 // Copy all the DefMI operands.
1947 const MCInstrDesc &DefDesc = DefMI->getDesc();
1948 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1949 NewMI.add(MO: DefMI->getOperand(i));
1950
1951 // Add branch opcode, inverting if necessary.
1952 unsigned BCCOpcode = MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm();
1953 if (Invert)
1954 BCCOpcode = RISCVCC::getInverseBranchOpcode(BCC: BCCOpcode);
1955 NewMI.addImm(Val: BCCOpcode);
1956
1957 // Copy the condition portion.
1958 NewMI.add(MO: MI.getOperand(i: MI.getNumExplicitOperands() - 2));
1959 NewMI.add(MO: MI.getOperand(i: MI.getNumExplicitOperands() - 1));
1960
1961 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1962 SeenMIs.insert(Ptr: NewMI);
1963 SeenMIs.erase(Ptr: DefMI);
1964
1965 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1966 // DefMI would be invalid when transferred inside the loop. Checking for a
1967 // loop is expensive, but at least remove kill flags if they are in different
1968 // BBs.
1969 if (DefMI->getParent() != MI.getParent())
1970 NewMI->clearKillInfo();
1971
1972 // The caller will erase MI, but not DefMI.
1973 DefMI->eraseFromParent();
1974 return NewMI;
1975}
1976
1977unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1978 if (MI.isMetaInstruction())
1979 return 0;
1980
1981 unsigned Opcode = MI.getOpcode();
1982
1983 if (Opcode == TargetOpcode::INLINEASM ||
1984 Opcode == TargetOpcode::INLINEASM_BR) {
1985 const MachineFunction &MF = *MI.getParent()->getParent();
1986 return getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(),
1987 MAI: MF.getTarget().getMCAsmInfo());
1988 }
1989
1990 if (requiresNTLHint(MI)) {
1991 if (STI.hasStdExtZca()) {
1992 if (isCompressibleInst(MI, STI))
1993 return 4; // c.ntl.all + c.load/c.store
1994 return 6; // c.ntl.all + load/store
1995 }
1996 return 8; // ntl.all + load/store
1997 }
1998
1999 if (Opcode == TargetOpcode::BUNDLE)
2000 return getInstBundleSize(MI);
2001
2002 if (MI.getParent() && MI.getParent()->getParent()) {
2003 if (isCompressibleInst(MI, STI))
2004 return 2;
2005 }
2006
2007 switch (Opcode) {
2008 case RISCV::PseudoMV_FPR16INX:
2009 case RISCV::PseudoMV_FPR32INX:
2010 case RISCV::PseudoClearGPR:
2011 // MV is always compressible to either c.mv or c.li rd, 0.
2012 return STI.hasStdExtZca() ? 2 : 4;
2013 // Below cases are for short forward branch pseudos
2014 case RISCV::PseudoCCMOVGPRNoX0:
2015 return get(Opcode: MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm())
2016 .getSize() +
2017 2;
2018 case RISCV::PseudoCCMOVGPR:
2019 case RISCV::PseudoCCADD:
2020 case RISCV::PseudoCCSUB:
2021 case RISCV::PseudoCCSLL:
2022 case RISCV::PseudoCCSRL:
2023 case RISCV::PseudoCCSRA:
2024 case RISCV::PseudoCCAND:
2025 case RISCV::PseudoCCOR:
2026 case RISCV::PseudoCCXOR:
2027 case RISCV::PseudoCCADDI:
2028 case RISCV::PseudoCCANDI:
2029 case RISCV::PseudoCCORI:
2030 case RISCV::PseudoCCXORI:
2031 case RISCV::PseudoCCLUI:
2032 case RISCV::PseudoCCSLLI:
2033 case RISCV::PseudoCCSRLI:
2034 case RISCV::PseudoCCSRAI:
2035 case RISCV::PseudoCCADDW:
2036 case RISCV::PseudoCCSUBW:
2037 case RISCV::PseudoCCSLLW:
2038 case RISCV::PseudoCCSRLW:
2039 case RISCV::PseudoCCSRAW:
2040 case RISCV::PseudoCCADDIW:
2041 case RISCV::PseudoCCSLLIW:
2042 case RISCV::PseudoCCSRLIW:
2043 case RISCV::PseudoCCSRAIW:
2044 case RISCV::PseudoCCANDN:
2045 case RISCV::PseudoCCORN:
2046 case RISCV::PseudoCCXNOR:
2047 case RISCV::PseudoCCMAX:
2048 case RISCV::PseudoCCMIN:
2049 case RISCV::PseudoCCMAXU:
2050 case RISCV::PseudoCCMINU:
2051 case RISCV::PseudoCCMUL:
2052 case RISCV::PseudoCCLB:
2053 case RISCV::PseudoCCLH:
2054 case RISCV::PseudoCCLW:
2055 case RISCV::PseudoCCLHU:
2056 case RISCV::PseudoCCLBU:
2057 case RISCV::PseudoCCLWU:
2058 case RISCV::PseudoCCLD:
2059 case RISCV::PseudoCCQC_LI:
2060 return get(Opcode: MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm())
2061 .getSize() +
2062 4;
2063 case RISCV::PseudoCCQC_E_LI:
2064 case RISCV::PseudoCCQC_E_LB:
2065 case RISCV::PseudoCCQC_E_LH:
2066 case RISCV::PseudoCCQC_E_LW:
2067 case RISCV::PseudoCCQC_E_LHU:
2068 case RISCV::PseudoCCQC_E_LBU:
2069 return get(Opcode: MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm())
2070 .getSize() +
2071 6;
2072 case TargetOpcode::STACKMAP:
2073 // The upper bound for a stackmap intrinsic is the full length of its shadow
2074 return StackMapOpers(&MI).getNumPatchBytes();
2075 case TargetOpcode::PATCHPOINT:
2076 // The size of the patchpoint intrinsic is the number of bytes requested
2077 return PatchPointOpers(&MI).getNumPatchBytes();
2078 case TargetOpcode::STATEPOINT: {
2079 // The size of the statepoint intrinsic is the number of bytes requested
2080 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
2081 // No patch bytes means at most a PseudoCall is emitted
2082 return std::max(a: NumBytes, b: 8U);
2083 }
2084 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2085 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
2086 case TargetOpcode::PATCHABLE_TAIL_CALL: {
2087 const MachineFunction &MF = *MI.getParent()->getParent();
2088 const Function &F = MF.getFunction();
2089 if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
2090 F.hasFnAttribute(Kind: "patchable-function-entry")) {
2091 unsigned Num =
2092 F.getFnAttributeAsParsedInteger(Kind: "patchable-function-entry");
2093 // Number of C.NOP or NOP
2094 return (STI.hasStdExtZca() ? 2 : 4) * Num;
2095 }
2096 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
2097 // respectively.
2098 return STI.is64Bit() ? 68 : 44;
2099 }
2100 default:
2101 return get(Opcode).getSize();
2102 }
2103}
2104
2105bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
2106 const unsigned Opcode = MI.getOpcode();
2107 switch (Opcode) {
2108 default:
2109 break;
2110 case RISCV::FSGNJ_D:
2111 case RISCV::FSGNJ_S:
2112 case RISCV::FSGNJ_H:
2113 case RISCV::FSGNJ_D_INX:
2114 case RISCV::FSGNJ_D_IN32X:
2115 case RISCV::FSGNJ_S_INX:
2116 case RISCV::FSGNJ_H_INX:
2117 // The canonical floating-point move is fsgnj rd, rs, rs.
2118 return MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
2119 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg();
2120 case RISCV::ADDI:
2121 case RISCV::ORI:
2122 case RISCV::XORI:
2123 return (MI.getOperand(i: 1).isReg() &&
2124 MI.getOperand(i: 1).getReg() == RISCV::X0) ||
2125 (MI.getOperand(i: 2).isImm() && MI.getOperand(i: 2).getImm() == 0);
2126 }
2127 return MI.isAsCheapAsAMove();
2128}
2129
2130std::optional<DestSourcePair>
2131RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
2132 if (MI.isMoveReg())
2133 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2134 switch (MI.getOpcode()) {
2135 default:
2136 break;
2137 case RISCV::ADD:
2138 case RISCV::OR:
2139 case RISCV::XOR:
2140 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 1).getReg() == RISCV::X0 &&
2141 MI.getOperand(i: 2).isReg())
2142 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
2143 if (MI.getOperand(i: 2).isReg() && MI.getOperand(i: 2).getReg() == RISCV::X0 &&
2144 MI.getOperand(i: 1).isReg())
2145 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2146 break;
2147 case RISCV::ADDI:
2148 // Operand 1 can be a frameindex but callers expect registers
2149 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() &&
2150 MI.getOperand(i: 2).getImm() == 0)
2151 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2152 break;
2153 case RISCV::SUB:
2154 if (MI.getOperand(i: 2).isReg() && MI.getOperand(i: 2).getReg() == RISCV::X0 &&
2155 MI.getOperand(i: 1).isReg())
2156 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2157 break;
2158 case RISCV::SH1ADD:
2159 case RISCV::SH1ADD_UW:
2160 case RISCV::SH2ADD:
2161 case RISCV::SH2ADD_UW:
2162 case RISCV::SH3ADD:
2163 case RISCV::SH3ADD_UW:
2164 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 1).getReg() == RISCV::X0 &&
2165 MI.getOperand(i: 2).isReg())
2166 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
2167 break;
2168 case RISCV::FSGNJ_D:
2169 case RISCV::FSGNJ_S:
2170 case RISCV::FSGNJ_H:
2171 case RISCV::FSGNJ_D_INX:
2172 case RISCV::FSGNJ_D_IN32X:
2173 case RISCV::FSGNJ_S_INX:
2174 case RISCV::FSGNJ_H_INX:
2175 // The canonical floating-point move is fsgnj rd, rs, rs.
2176 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
2177 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg())
2178 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2179 break;
2180 }
2181 return std::nullopt;
2182}
2183
2184MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
2185 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
2186 // The option is unused. Choose Local strategy only for in-order cores. When
2187 // scheduling model is unspecified, use MinInstrCount strategy as more
2188 // generic one.
2189 const auto &SchedModel = STI.getSchedModel();
2190 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
2191 ? MachineTraceStrategy::TS_MinInstrCount
2192 : MachineTraceStrategy::TS_Local;
2193 }
2194 // The strategy was forced by the option.
2195 return ForceMachineCombinerStrategy;
2196}
2197
2198void RISCVInstrInfo::finalizeInsInstrs(
2199 MachineInstr &Root, unsigned &Pattern,
2200 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
2201 int16_t FrmOpIdx =
2202 RISCV::getNamedOperandIdx(Opcode: Root.getOpcode(), Name: RISCV::OpName::frm);
2203 if (FrmOpIdx < 0) {
2204 assert(all_of(InsInstrs,
2205 [](MachineInstr *MI) {
2206 return RISCV::getNamedOperandIdx(MI->getOpcode(),
2207 RISCV::OpName::frm) < 0;
2208 }) &&
2209 "New instructions require FRM whereas the old one does not have it");
2210 return;
2211 }
2212
2213 const MachineOperand &FRM = Root.getOperand(i: FrmOpIdx);
2214 MachineFunction &MF = *Root.getMF();
2215
2216 for (auto *NewMI : InsInstrs) {
2217 // We'd already added the FRM operand.
2218 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
2219 Opcode: NewMI->getOpcode(), Name: RISCV::OpName::frm)) != NewMI->getNumOperands())
2220 continue;
2221 MachineInstrBuilder MIB(MF, NewMI);
2222 MIB.add(MO: FRM);
2223 if (FRM.getImm() == RISCVFPRndMode::DYN)
2224 MIB.addUse(RegNo: RISCV::FRM, Flags: RegState::Implicit);
2225 }
2226}
2227
2228static bool isFADD(unsigned Opc) {
2229 switch (Opc) {
2230 default:
2231 return false;
2232 case RISCV::FADD_H:
2233 case RISCV::FADD_S:
2234 case RISCV::FADD_D:
2235 return true;
2236 }
2237}
2238
2239static bool isFSUB(unsigned Opc) {
2240 switch (Opc) {
2241 default:
2242 return false;
2243 case RISCV::FSUB_H:
2244 case RISCV::FSUB_S:
2245 case RISCV::FSUB_D:
2246 return true;
2247 }
2248}
2249
2250static bool isFMUL(unsigned Opc) {
2251 switch (Opc) {
2252 default:
2253 return false;
2254 case RISCV::FMUL_H:
2255 case RISCV::FMUL_S:
2256 case RISCV::FMUL_D:
2257 return true;
2258 }
2259}
2260
2261bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
2262 bool Invert) const {
2263#define OPCODE_LMUL_CASE(OPC) \
2264 case RISCV::OPC##_M1: \
2265 case RISCV::OPC##_M2: \
2266 case RISCV::OPC##_M4: \
2267 case RISCV::OPC##_M8: \
2268 case RISCV::OPC##_MF2: \
2269 case RISCV::OPC##_MF4: \
2270 case RISCV::OPC##_MF8
2271
2272#define OPCODE_LMUL_MASK_CASE(OPC) \
2273 case RISCV::OPC##_M1_MASK: \
2274 case RISCV::OPC##_M2_MASK: \
2275 case RISCV::OPC##_M4_MASK: \
2276 case RISCV::OPC##_M8_MASK: \
2277 case RISCV::OPC##_MF2_MASK: \
2278 case RISCV::OPC##_MF4_MASK: \
2279 case RISCV::OPC##_MF8_MASK
2280
2281 unsigned Opcode = Inst.getOpcode();
2282 if (Invert) {
2283 if (auto InvOpcode = getInverseOpcode(Opcode))
2284 Opcode = *InvOpcode;
2285 else
2286 return false;
2287 }
2288
2289 // clang-format off
2290 switch (Opcode) {
2291 default:
2292 return false;
2293 OPCODE_LMUL_CASE(PseudoVADD_VV):
2294 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
2295 OPCODE_LMUL_CASE(PseudoVMUL_VV):
2296 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
2297 return true;
2298 }
2299 // clang-format on
2300
2301#undef OPCODE_LMUL_MASK_CASE
2302#undef OPCODE_LMUL_CASE
2303}
2304
2305bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
2306 const MachineInstr &Prev) const {
2307 if (!areOpcodesEqualOrInverse(Opcode1: Root.getOpcode(), Opcode2: Prev.getOpcode()))
2308 return false;
2309
2310 assert(Root.getMF() == Prev.getMF());
2311 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
2312 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
2313
2314 // Make sure vtype operands are also the same.
2315 const MCInstrDesc &Desc = get(Opcode: Root.getOpcode());
2316 const uint64_t TSFlags = Desc.TSFlags;
2317
2318 auto checkImmOperand = [&](unsigned OpIdx) {
2319 return Root.getOperand(i: OpIdx).getImm() == Prev.getOperand(i: OpIdx).getImm();
2320 };
2321
2322 auto checkRegOperand = [&](unsigned OpIdx) {
2323 return Root.getOperand(i: OpIdx).getReg() == Prev.getOperand(i: OpIdx).getReg();
2324 };
2325
2326 // PassThru
2327 // TODO: Potentially we can loosen the condition to consider Root to be
2328 // associable with Prev if Root has NoReg as passthru. In which case we
2329 // also need to loosen the condition on vector policies between these.
2330 if (!checkRegOperand(1))
2331 return false;
2332
2333 // SEW
2334 if (RISCVII::hasSEWOp(TSFlags) &&
2335 !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
2336 return false;
2337
2338 // Mask
2339 if (RISCVII::usesMaskPolicy(TSFlags)) {
2340 const MachineBasicBlock *MBB = Root.getParent();
2341 const MachineBasicBlock::const_reverse_iterator It1(&Root);
2342 const MachineBasicBlock::const_reverse_iterator It2(&Prev);
2343 Register MI1VReg;
2344
2345 bool SeenMI2 = false;
2346 for (auto End = MBB->rend(), It = It1; It != End; ++It) {
2347 if (It == It2) {
2348 SeenMI2 = true;
2349 if (!MI1VReg.isValid())
2350 // There is no V0 def between Root and Prev; they're sharing the
2351 // same V0.
2352 break;
2353 }
2354
2355 if (It->modifiesRegister(Reg: RISCV::V0, TRI)) {
2356 Register SrcReg = It->getOperand(i: 1).getReg();
2357 // If it's not VReg it'll be more difficult to track its defs, so
2358 // bailing out here just to be safe.
2359 if (!SrcReg.isVirtual())
2360 return false;
2361
2362 if (!MI1VReg.isValid()) {
2363 // This is the V0 def for Root.
2364 MI1VReg = SrcReg;
2365 continue;
2366 }
2367
2368 // Some random mask updates.
2369 if (!SeenMI2)
2370 continue;
2371
2372 // This is the V0 def for Prev; check if it's the same as that of
2373 // Root.
2374 if (MI1VReg != SrcReg)
2375 return false;
2376 else
2377 break;
2378 }
2379 }
2380
2381 // If we haven't encountered Prev, it's likely that this function was
2382 // called in a wrong way (e.g. Root is before Prev).
2383 assert(SeenMI2 && "Prev is expected to appear before Root");
2384 }
2385
2386 // Tail / Mask policies
2387 if (RISCVII::hasVecPolicyOp(TSFlags) &&
2388 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
2389 return false;
2390
2391 // VL
2392 if (RISCVII::hasVLOp(TSFlags)) {
2393 unsigned OpIdx = RISCVII::getVLOpNum(Desc);
2394 const MachineOperand &Op1 = Root.getOperand(i: OpIdx);
2395 const MachineOperand &Op2 = Prev.getOperand(i: OpIdx);
2396 if (Op1.getType() != Op2.getType())
2397 return false;
2398 switch (Op1.getType()) {
2399 case MachineOperand::MO_Register:
2400 if (Op1.getReg() != Op2.getReg())
2401 return false;
2402 break;
2403 case MachineOperand::MO_Immediate:
2404 if (Op1.getImm() != Op2.getImm())
2405 return false;
2406 break;
2407 default:
2408 llvm_unreachable("Unrecognized VL operand type");
2409 }
2410 }
2411
2412 // Rounding modes
2413 if (int Idx = RISCVII::getFRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2414 return false;
2415 if (int Idx = RISCVII::getVXRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2416 return false;
2417
2418 return true;
2419}
2420
2421// Most of our RVV pseudos have passthru operand, so the real operands
2422// start from index = 2.
2423bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
2424 bool &Commuted) const {
2425 const MachineBasicBlock *MBB = Inst.getParent();
2426 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2427 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
2428 "Expect the present of passthrough operand.");
2429 MachineInstr *MI1 = MRI.getUniqueVRegDef(Reg: Inst.getOperand(i: 2).getReg());
2430 MachineInstr *MI2 = MRI.getUniqueVRegDef(Reg: Inst.getOperand(i: 3).getReg());
2431
2432 // If only one operand has the same or inverse opcode and it's the second
2433 // source operand, the operands must be commuted.
2434 Commuted = !areRVVInstsReassociable(Root: Inst, Prev: *MI1) &&
2435 areRVVInstsReassociable(Root: Inst, Prev: *MI2);
2436 if (Commuted)
2437 std::swap(a&: MI1, b&: MI2);
2438
2439 return areRVVInstsReassociable(Root: Inst, Prev: *MI1) &&
2440 (isVectorAssociativeAndCommutative(Inst: *MI1) ||
2441 isVectorAssociativeAndCommutative(Inst: *MI1, /* Invert */ true)) &&
2442 hasReassociableOperands(Inst: *MI1, MBB) &&
2443 MRI.hasOneNonDBGUse(RegNo: MI1->getOperand(i: 0).getReg());
2444}
2445
2446bool RISCVInstrInfo::hasReassociableOperands(
2447 const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
2448 if (!isVectorAssociativeAndCommutative(Inst) &&
2449 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2450 return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
2451
2452 const MachineOperand &Op1 = Inst.getOperand(i: 2);
2453 const MachineOperand &Op2 = Inst.getOperand(i: 3);
2454 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2455
2456 // We need virtual register definitions for the operands that we will
2457 // reassociate.
2458 MachineInstr *MI1 = nullptr;
2459 MachineInstr *MI2 = nullptr;
2460 if (Op1.isReg() && Op1.getReg().isVirtual())
2461 MI1 = MRI.getUniqueVRegDef(Reg: Op1.getReg());
2462 if (Op2.isReg() && Op2.getReg().isVirtual())
2463 MI2 = MRI.getUniqueVRegDef(Reg: Op2.getReg());
2464
2465 // And at least one operand must be defined in MBB.
2466 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
2467}
2468
2469void RISCVInstrInfo::getReassociateOperandIndices(
2470 const MachineInstr &Root, unsigned Pattern,
2471 std::array<unsigned, 5> &OperandIndices) const {
2472 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
2473 if (RISCV::getRVVMCOpcode(RVVPseudoOpcode: Root.getOpcode())) {
2474 // Skip the passthrough operand, so increment all indices by one.
2475 for (unsigned I = 0; I < 5; ++I)
2476 ++OperandIndices[I];
2477 }
2478}
2479
2480bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
2481 bool &Commuted) const {
2482 if (isVectorAssociativeAndCommutative(Inst) ||
2483 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2484 return hasReassociableVectorSibling(Inst, Commuted);
2485
2486 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
2487 return false;
2488
2489 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
2490 unsigned OperandIdx = Commuted ? 2 : 1;
2491 const MachineInstr &Sibling =
2492 *MRI.getVRegDef(Reg: Inst.getOperand(i: OperandIdx).getReg());
2493
2494 int16_t InstFrmOpIdx =
2495 RISCV::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: RISCV::OpName::frm);
2496 int16_t SiblingFrmOpIdx =
2497 RISCV::getNamedOperandIdx(Opcode: Sibling.getOpcode(), Name: RISCV::OpName::frm);
2498
2499 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
2500 RISCV::hasEqualFRM(MI1: Inst, MI2: Sibling);
2501}
2502
2503bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
2504 bool Invert) const {
2505 if (isVectorAssociativeAndCommutative(Inst, Invert))
2506 return true;
2507
2508 unsigned Opc = Inst.getOpcode();
2509 if (Invert) {
2510 auto InverseOpcode = getInverseOpcode(Opcode: Opc);
2511 if (!InverseOpcode)
2512 return false;
2513 Opc = *InverseOpcode;
2514 }
2515
2516 if (isFADD(Opc) || isFMUL(Opc))
2517 return Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
2518 Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
2519
2520 switch (Opc) {
2521 default:
2522 return false;
2523 case RISCV::ADD:
2524 case RISCV::ADDW:
2525 case RISCV::AND:
2526 case RISCV::OR:
2527 case RISCV::XOR:
2528 // From RISC-V ISA spec, if both the high and low bits of the same product
2529 // are required, then the recommended code sequence is:
2530 //
2531 // MULH[[S]U] rdh, rs1, rs2
2532 // MUL rdl, rs1, rs2
2533 // (source register specifiers must be in same order and rdh cannot be the
2534 // same as rs1 or rs2)
2535 //
2536 // Microarchitectures can then fuse these into a single multiply operation
2537 // instead of performing two separate multiplies.
2538 // MachineCombiner may reassociate MUL operands and lose the fusion
2539 // opportunity.
2540 case RISCV::MUL:
2541 case RISCV::MULW:
2542 case RISCV::MIN:
2543 case RISCV::MINU:
2544 case RISCV::MAX:
2545 case RISCV::MAXU:
2546 case RISCV::FMIN_H:
2547 case RISCV::FMIN_S:
2548 case RISCV::FMIN_D:
2549 case RISCV::FMAX_H:
2550 case RISCV::FMAX_S:
2551 case RISCV::FMAX_D:
2552 return true;
2553 }
2554
2555 return false;
2556}
2557
2558std::optional<unsigned>
2559RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
2560#define RVV_OPC_LMUL_CASE(OPC, INV) \
2561 case RISCV::OPC##_M1: \
2562 return RISCV::INV##_M1; \
2563 case RISCV::OPC##_M2: \
2564 return RISCV::INV##_M2; \
2565 case RISCV::OPC##_M4: \
2566 return RISCV::INV##_M4; \
2567 case RISCV::OPC##_M8: \
2568 return RISCV::INV##_M8; \
2569 case RISCV::OPC##_MF2: \
2570 return RISCV::INV##_MF2; \
2571 case RISCV::OPC##_MF4: \
2572 return RISCV::INV##_MF4; \
2573 case RISCV::OPC##_MF8: \
2574 return RISCV::INV##_MF8
2575
2576#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
2577 case RISCV::OPC##_M1_MASK: \
2578 return RISCV::INV##_M1_MASK; \
2579 case RISCV::OPC##_M2_MASK: \
2580 return RISCV::INV##_M2_MASK; \
2581 case RISCV::OPC##_M4_MASK: \
2582 return RISCV::INV##_M4_MASK; \
2583 case RISCV::OPC##_M8_MASK: \
2584 return RISCV::INV##_M8_MASK; \
2585 case RISCV::OPC##_MF2_MASK: \
2586 return RISCV::INV##_MF2_MASK; \
2587 case RISCV::OPC##_MF4_MASK: \
2588 return RISCV::INV##_MF4_MASK; \
2589 case RISCV::OPC##_MF8_MASK: \
2590 return RISCV::INV##_MF8_MASK
2591
2592 switch (Opcode) {
2593 default:
2594 return std::nullopt;
2595 case RISCV::FADD_H:
2596 return RISCV::FSUB_H;
2597 case RISCV::FADD_S:
2598 return RISCV::FSUB_S;
2599 case RISCV::FADD_D:
2600 return RISCV::FSUB_D;
2601 case RISCV::FSUB_H:
2602 return RISCV::FADD_H;
2603 case RISCV::FSUB_S:
2604 return RISCV::FADD_S;
2605 case RISCV::FSUB_D:
2606 return RISCV::FADD_D;
2607 case RISCV::ADD:
2608 return RISCV::SUB;
2609 case RISCV::SUB:
2610 return RISCV::ADD;
2611 case RISCV::ADDW:
2612 return RISCV::SUBW;
2613 case RISCV::SUBW:
2614 return RISCV::ADDW;
2615 // clang-format off
2616 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2617 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2618 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2619 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2620 // clang-format on
2621 }
2622
2623#undef RVV_OPC_LMUL_MASK_CASE
2624#undef RVV_OPC_LMUL_CASE
2625}
2626
2627static bool canCombineFPFusedMultiply(const MachineInstr &Root,
2628 const MachineOperand &MO,
2629 bool DoRegPressureReduce) {
2630 if (!MO.isReg() || !MO.getReg().isVirtual())
2631 return false;
2632 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2633 MachineInstr *MI = MRI.getVRegDef(Reg: MO.getReg());
2634 if (!MI || !isFMUL(Opc: MI->getOpcode()))
2635 return false;
2636
2637 if (!Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) ||
2638 !MI->getFlag(Flag: MachineInstr::MIFlag::FmContract))
2639 return false;
2640
2641 // Try combining even if fmul has more than one use as it eliminates
2642 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2643 // for fmul operands, so reject the transformation in register pressure
2644 // reduction mode.
2645 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
2646 return false;
2647
2648 // Do not combine instructions from different basic blocks.
2649 if (Root.getParent() != MI->getParent())
2650 return false;
2651 return RISCV::hasEqualFRM(MI1: Root, MI2: *MI);
2652}
2653
2654static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
2655 SmallVectorImpl<unsigned> &Patterns,
2656 bool DoRegPressureReduce) {
2657 unsigned Opc = Root.getOpcode();
2658 bool IsFAdd = isFADD(Opc);
2659 if (!IsFAdd && !isFSUB(Opc))
2660 return false;
2661 bool Added = false;
2662 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 1),
2663 DoRegPressureReduce)) {
2664 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
2665 : RISCVMachineCombinerPattern::FMSUB);
2666 Added = true;
2667 }
2668 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 2),
2669 DoRegPressureReduce)) {
2670 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
2671 : RISCVMachineCombinerPattern::FNMSUB);
2672 Added = true;
2673 }
2674 return Added;
2675}
2676
2677static bool getFPPatterns(MachineInstr &Root,
2678 SmallVectorImpl<unsigned> &Patterns,
2679 bool DoRegPressureReduce) {
2680 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
2681}
2682
2683/// Utility routine that checks if \param MO is defined by an
2684/// \param CombineOpc instruction in the basic block \param MBB
2685static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
2686 const MachineOperand &MO,
2687 unsigned CombineOpc) {
2688 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2689 const MachineInstr *MI = nullptr;
2690
2691 if (MO.isReg() && MO.getReg().isVirtual())
2692 MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
2693 // And it needs to be in the trace (otherwise, it won't have a depth).
2694 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
2695 return nullptr;
2696 // Must only used by the user we combine with.
2697 if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
2698 return nullptr;
2699
2700 return MI;
2701}
2702
2703/// Utility routine that checks if \param MO is defined by a SLLI in \param
2704/// MBB that can be combined by splitting across 2 SHXADD instructions. The
2705/// first SHXADD shift amount is given by \param OuterShiftAmt.
2706static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
2707 const MachineOperand &MO,
2708 unsigned OuterShiftAmt) {
2709 const MachineInstr *ShiftMI = canCombine(MBB, MO, CombineOpc: RISCV::SLLI);
2710 if (!ShiftMI)
2711 return false;
2712
2713 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
2714 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
2715 return false;
2716
2717 return true;
2718}
2719
2720// Returns the shift amount from a SHXADD instruction. Returns 0 if the
2721// instruction is not a SHXADD.
2722static unsigned getSHXADDShiftAmount(unsigned Opc) {
2723 switch (Opc) {
2724 default:
2725 return 0;
2726 case RISCV::SH1ADD:
2727 return 1;
2728 case RISCV::SH2ADD:
2729 return 2;
2730 case RISCV::SH3ADD:
2731 return 3;
2732 }
2733}
2734
2735// Returns the shift amount from a SHXADD.UW instruction. Returns 0 if the
2736// instruction is not a SHXADD.UW.
2737static unsigned getSHXADDUWShiftAmount(unsigned Opc) {
2738 switch (Opc) {
2739 default:
2740 return 0;
2741 case RISCV::SH1ADD_UW:
2742 return 1;
2743 case RISCV::SH2ADD_UW:
2744 return 2;
2745 case RISCV::SH3ADD_UW:
2746 return 3;
2747 }
2748}
2749
2750// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2751// (sh3add (sh2add Y, Z), X).
2752static bool getSHXADDPatterns(const MachineInstr &Root,
2753 SmallVectorImpl<unsigned> &Patterns) {
2754 unsigned ShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
2755 if (!ShiftAmt)
2756 return false;
2757
2758 const MachineBasicBlock &MBB = *Root.getParent();
2759
2760 const MachineInstr *AddMI = canCombine(MBB, MO: Root.getOperand(i: 2), CombineOpc: RISCV::ADD);
2761 if (!AddMI)
2762 return false;
2763
2764 bool Found = false;
2765 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 1), OuterShiftAmt: ShiftAmt)) {
2766 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
2767 Found = true;
2768 }
2769 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 2), OuterShiftAmt: ShiftAmt)) {
2770 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
2771 Found = true;
2772 }
2773
2774 return Found;
2775}
2776
2777CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
2778 switch (Pattern) {
2779 case RISCVMachineCombinerPattern::FMADD_AX:
2780 case RISCVMachineCombinerPattern::FMADD_XA:
2781 case RISCVMachineCombinerPattern::FMSUB:
2782 case RISCVMachineCombinerPattern::FNMSUB:
2783 return CombinerObjective::MustReduceDepth;
2784 default:
2785 return TargetInstrInfo::getCombinerObjective(Pattern);
2786 }
2787}
2788
2789bool RISCVInstrInfo::getMachineCombinerPatterns(
2790 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
2791 bool DoRegPressureReduce) const {
2792
2793 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
2794 return true;
2795
2796 if (getSHXADDPatterns(Root, Patterns))
2797 return true;
2798
2799 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
2800 DoRegPressureReduce);
2801}
2802
2803static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
2804 switch (RootOpc) {
2805 default:
2806 llvm_unreachable("Unexpected opcode");
2807 case RISCV::FADD_H:
2808 return RISCV::FMADD_H;
2809 case RISCV::FADD_S:
2810 return RISCV::FMADD_S;
2811 case RISCV::FADD_D:
2812 return RISCV::FMADD_D;
2813 case RISCV::FSUB_H:
2814 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
2815 : RISCV::FNMSUB_H;
2816 case RISCV::FSUB_S:
2817 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
2818 : RISCV::FNMSUB_S;
2819 case RISCV::FSUB_D:
2820 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
2821 : RISCV::FNMSUB_D;
2822 }
2823}
2824
2825static unsigned getAddendOperandIdx(unsigned Pattern) {
2826 switch (Pattern) {
2827 default:
2828 llvm_unreachable("Unexpected pattern");
2829 case RISCVMachineCombinerPattern::FMADD_AX:
2830 case RISCVMachineCombinerPattern::FMSUB:
2831 return 2;
2832 case RISCVMachineCombinerPattern::FMADD_XA:
2833 case RISCVMachineCombinerPattern::FNMSUB:
2834 return 1;
2835 }
2836}
2837
2838static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
2839 unsigned Pattern,
2840 SmallVectorImpl<MachineInstr *> &InsInstrs,
2841 SmallVectorImpl<MachineInstr *> &DelInstrs) {
2842 MachineFunction *MF = Root.getMF();
2843 MachineRegisterInfo &MRI = MF->getRegInfo();
2844 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2845
2846 MachineOperand &Mul1 = Prev.getOperand(i: 1);
2847 MachineOperand &Mul2 = Prev.getOperand(i: 2);
2848 MachineOperand &Dst = Root.getOperand(i: 0);
2849 MachineOperand &Addend = Root.getOperand(i: getAddendOperandIdx(Pattern));
2850
2851 Register DstReg = Dst.getReg();
2852 unsigned FusedOpc = getFPFusedMultiplyOpcode(RootOpc: Root.getOpcode(), Pattern);
2853 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
2854 DebugLoc MergedLoc =
2855 DILocation::getMergedLocation(LocA: Root.getDebugLoc(), LocB: Prev.getDebugLoc());
2856
2857 bool Mul1IsKill = Mul1.isKill();
2858 bool Mul2IsKill = Mul2.isKill();
2859 bool AddendIsKill = Addend.isKill();
2860
2861 // We need to clear kill flags since we may be extending the live range past
2862 // a kill. If the mul had kill flags, we can preserve those since we know
2863 // where the previous range stopped.
2864 MRI.clearKillFlags(Reg: Mul1.getReg());
2865 MRI.clearKillFlags(Reg: Mul2.getReg());
2866
2867 MachineInstrBuilder MIB =
2868 BuildMI(MF&: *MF, MIMD: MergedLoc, MCID: TII->get(Opcode: FusedOpc), DestReg: DstReg)
2869 .addReg(RegNo: Mul1.getReg(), Flags: getKillRegState(B: Mul1IsKill))
2870 .addReg(RegNo: Mul2.getReg(), Flags: getKillRegState(B: Mul2IsKill))
2871 .addReg(RegNo: Addend.getReg(), Flags: getKillRegState(B: AddendIsKill))
2872 .setMIFlags(IntersectedFlags);
2873
2874 InsInstrs.push_back(Elt: MIB);
2875 if (MRI.hasOneNonDBGUse(RegNo: Prev.getOperand(i: 0).getReg()))
2876 DelInstrs.push_back(Elt: &Prev);
2877 DelInstrs.push_back(Elt: &Root);
2878}
2879
2880// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2881// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2882// shXadd instructions. The outer shXadd keeps its original opcode.
2883static void
2884genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
2885 SmallVectorImpl<MachineInstr *> &InsInstrs,
2886 SmallVectorImpl<MachineInstr *> &DelInstrs,
2887 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
2888 MachineFunction *MF = Root.getMF();
2889 MachineRegisterInfo &MRI = MF->getRegInfo();
2890 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2891
2892 unsigned OuterShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
2893 assert(OuterShiftAmt != 0 && "Unexpected opcode");
2894
2895 MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 2).getReg());
2896 MachineInstr *ShiftMI =
2897 MRI.getUniqueVRegDef(Reg: AddMI->getOperand(i: AddOpIdx).getReg());
2898
2899 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
2900 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
2901
2902 unsigned InnerOpc;
2903 switch (InnerShiftAmt - OuterShiftAmt) {
2904 default:
2905 llvm_unreachable("Unexpected shift amount");
2906 case 0:
2907 InnerOpc = RISCV::ADD;
2908 break;
2909 case 1:
2910 InnerOpc = RISCV::SH1ADD;
2911 break;
2912 case 2:
2913 InnerOpc = RISCV::SH2ADD;
2914 break;
2915 case 3:
2916 InnerOpc = RISCV::SH3ADD;
2917 break;
2918 }
2919
2920 const MachineOperand &X = AddMI->getOperand(i: 3 - AddOpIdx);
2921 const MachineOperand &Y = ShiftMI->getOperand(i: 1);
2922 const MachineOperand &Z = Root.getOperand(i: 1);
2923
2924 Register NewVR = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
2925
2926 auto MIB1 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: InnerOpc), DestReg: NewVR)
2927 .addReg(RegNo: Y.getReg(), Flags: getKillRegState(B: Y.isKill()))
2928 .addReg(RegNo: Z.getReg(), Flags: getKillRegState(B: Z.isKill()));
2929 auto MIB2 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: Root.getOpcode()),
2930 DestReg: Root.getOperand(i: 0).getReg())
2931 .addReg(RegNo: NewVR, Flags: RegState::Kill)
2932 .addReg(RegNo: X.getReg(), Flags: getKillRegState(B: X.isKill()));
2933
2934 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
2935 InsInstrs.push_back(Elt: MIB1);
2936 InsInstrs.push_back(Elt: MIB2);
2937 DelInstrs.push_back(Elt: ShiftMI);
2938 DelInstrs.push_back(Elt: AddMI);
2939 DelInstrs.push_back(Elt: &Root);
2940}
2941
2942void RISCVInstrInfo::genAlternativeCodeSequence(
2943 MachineInstr &Root, unsigned Pattern,
2944 SmallVectorImpl<MachineInstr *> &InsInstrs,
2945 SmallVectorImpl<MachineInstr *> &DelInstrs,
2946 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
2947 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2948 switch (Pattern) {
2949 default:
2950 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2951 DelInstrs, InstIdxForVirtReg&: InstrIdxForVirtReg);
2952 return;
2953 case RISCVMachineCombinerPattern::FMADD_AX:
2954 case RISCVMachineCombinerPattern::FMSUB: {
2955 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 1).getReg());
2956 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2957 return;
2958 }
2959 case RISCVMachineCombinerPattern::FMADD_XA:
2960 case RISCVMachineCombinerPattern::FNMSUB: {
2961 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 2).getReg());
2962 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2963 return;
2964 }
2965 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2966 genShXAddAddShift(Root, AddOpIdx: 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2967 return;
2968 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2969 genShXAddAddShift(Root, AddOpIdx: 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2970 return;
2971 }
2972}
2973
2974bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2975 StringRef &ErrInfo) const {
2976 MCInstrDesc const &Desc = MI.getDesc();
2977
2978 for (const auto &[Index, Operand] : enumerate(First: Desc.operands())) {
2979 const MachineOperand &MO = MI.getOperand(i: Index);
2980 unsigned OpType = Operand.OperandType;
2981 switch (OpType) {
2982 default:
2983 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2984 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2985 if (!MO.isImm()) {
2986 ErrInfo = "Expected an immediate operand.";
2987 return false;
2988 }
2989 int64_t Imm = MO.getImm();
2990 bool Ok;
2991 switch (OpType) {
2992 default:
2993 llvm_unreachable("Unexpected operand type");
2994
2995#define CASE_OPERAND_UIMM(NUM) \
2996 case RISCVOp::OPERAND_UIMM##NUM: \
2997 Ok = isUInt<NUM>(Imm); \
2998 break;
2999#define CASE_OPERAND_UIMM_LSB_ZEROS(BITS, SUFFIX) \
3000 case RISCVOp::OPERAND_UIMM##BITS##_LSB##SUFFIX: { \
3001 constexpr size_t NumZeros = sizeof(#SUFFIX) - 1; \
3002 Ok = isShiftedUInt<BITS - NumZeros, NumZeros>(Imm); \
3003 break; \
3004 }
3005#define CASE_OPERAND_SIMM(NUM) \
3006 case RISCVOp::OPERAND_SIMM##NUM: \
3007 Ok = isInt<NUM>(Imm); \
3008 break;
3009 // clang-format off
3010 CASE_OPERAND_UIMM(1)
3011 CASE_OPERAND_UIMM(2)
3012 CASE_OPERAND_UIMM(3)
3013 CASE_OPERAND_UIMM(4)
3014 CASE_OPERAND_UIMM(5)
3015 CASE_OPERAND_UIMM(6)
3016 CASE_OPERAND_UIMM(7)
3017 CASE_OPERAND_UIMM(8)
3018 CASE_OPERAND_UIMM(9)
3019 CASE_OPERAND_UIMM(10)
3020 CASE_OPERAND_UIMM(12)
3021 CASE_OPERAND_UIMM(16)
3022 CASE_OPERAND_UIMM(32)
3023 CASE_OPERAND_UIMM(48)
3024 CASE_OPERAND_UIMM(64)
3025 CASE_OPERAND_UIMM_LSB_ZEROS(2, 0)
3026 CASE_OPERAND_UIMM_LSB_ZEROS(5, 0)
3027 CASE_OPERAND_UIMM_LSB_ZEROS(6, 0)
3028 CASE_OPERAND_UIMM_LSB_ZEROS(7, 00)
3029 CASE_OPERAND_UIMM_LSB_ZEROS(7, 000)
3030 CASE_OPERAND_UIMM_LSB_ZEROS(8, 00)
3031 CASE_OPERAND_UIMM_LSB_ZEROS(8, 000)
3032 CASE_OPERAND_UIMM_LSB_ZEROS(9, 000)
3033 // clang-format on
3034 case RISCVOp::OPERAND_UIMM5_NONZERO:
3035 Ok = isUInt<5>(x: Imm) && (Imm != 0);
3036 break;
3037 case RISCVOp::OPERAND_UIMM5_GT3:
3038 Ok = isUInt<5>(x: Imm) && (Imm > 3);
3039 break;
3040 case RISCVOp::OPERAND_UIMM5_PLUS1:
3041 Ok = Imm >= 1 && Imm <= 32;
3042 break;
3043 case RISCVOp::OPERAND_UIMM6_PLUS1:
3044 Ok = Imm >= 1 && Imm <= 64;
3045 break;
3046 case RISCVOp::OPERAND_UIMM7_EQ_XLEN:
3047 Ok = Imm == STI.getXLen();
3048 break;
3049 case RISCVOp::OPERAND_UIMM8_GE32:
3050 Ok = isUInt<8>(x: Imm) && Imm >= 32;
3051 break;
3052 case RISCVOp::OPERAND_UIMM9_YBNDSWI:
3053 Ok = RISCV::isValidYBNDSWImm(Imm);
3054 break;
3055 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
3056 Ok = isShiftedInt<6, 4>(x: Imm) && (Imm != 0);
3057 break;
3058 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
3059 Ok = isShiftedUInt<8, 2>(x: Imm) && (Imm != 0);
3060 break;
3061 case RISCVOp::OPERAND_UIMM16_NONZERO:
3062 Ok = isUInt<16>(x: Imm) && (Imm != 0);
3063 break;
3064 case RISCVOp::OPERAND_THREE:
3065 Ok = Imm == 3;
3066 break;
3067 case RISCVOp::OPERAND_FOUR:
3068 Ok = Imm == 4;
3069 break;
3070 case RISCVOp::OPERAND_IMM5_ZIBI:
3071 Ok = (isUInt<5>(x: Imm) && Imm != 0) || Imm == -1;
3072 break;
3073 // clang-format off
3074 CASE_OPERAND_SIMM(5)
3075 CASE_OPERAND_SIMM(6)
3076 CASE_OPERAND_SIMM(8)
3077 CASE_OPERAND_SIMM(10)
3078 CASE_OPERAND_SIMM(11)
3079 CASE_OPERAND_SIMM(12)
3080 CASE_OPERAND_SIMM(26)
3081 // clang-format on
3082 case RISCVOp::OPERAND_SIMM5_PLUS1:
3083 Ok = Imm >= -15 && Imm <= 16;
3084 break;
3085 case RISCVOp::OPERAND_SIMM5_NONZERO:
3086 Ok = isInt<5>(x: Imm) && (Imm != 0);
3087 break;
3088 case RISCVOp::OPERAND_SIMM6_NONZERO:
3089 Ok = Imm != 0 && isInt<6>(x: Imm);
3090 break;
3091 case RISCVOp::OPERAND_VTYPEI10:
3092 Ok = isUInt<10>(x: Imm) && RISCVVType::isValidVType(VType: Imm);
3093 break;
3094 case RISCVOp::OPERAND_VTYPEI11:
3095 Ok = isUInt<11>(x: Imm) && RISCVVType::isValidVType(VType: Imm);
3096 break;
3097 case RISCVOp::OPERAND_SIMM12_LSB00000:
3098 Ok = isShiftedInt<7, 5>(x: Imm);
3099 break;
3100 case RISCVOp::OPERAND_SIMM16_NONZERO:
3101 Ok = isInt<16>(x: Imm) && (Imm != 0);
3102 break;
3103 case RISCVOp::OPERAND_SIMM20_LI:
3104 Ok = isInt<20>(x: Imm);
3105 break;
3106 case RISCVOp::OPERAND_UIMMLOG2XLEN:
3107 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
3108 break;
3109 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
3110 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
3111 Ok = Ok && Imm != 0;
3112 break;
3113 case RISCVOp::OPERAND_CLUI_IMM:
3114 Ok = (isUInt<5>(x: Imm) && Imm != 0) || (Imm >= 0xfffe0 && Imm <= 0xfffff);
3115 break;
3116 case RISCVOp::OPERAND_RVKRNUM:
3117 Ok = Imm >= 0 && Imm <= 10;
3118 break;
3119 case RISCVOp::OPERAND_RVKRNUM_0_7:
3120 Ok = Imm >= 0 && Imm <= 7;
3121 break;
3122 case RISCVOp::OPERAND_RVKRNUM_1_10:
3123 Ok = Imm >= 1 && Imm <= 10;
3124 break;
3125 case RISCVOp::OPERAND_RVKRNUM_2_14:
3126 Ok = Imm >= 2 && Imm <= 14;
3127 break;
3128 case RISCVOp::OPERAND_RLIST:
3129 Ok = Imm >= RISCVZC::RA && Imm <= RISCVZC::RA_S0_S11;
3130 break;
3131 case RISCVOp::OPERAND_RLIST_S0:
3132 Ok = Imm >= RISCVZC::RA_S0 && Imm <= RISCVZC::RA_S0_S11;
3133 break;
3134 case RISCVOp::OPERAND_STACKADJ:
3135 Ok = Imm >= 0 && Imm <= 48 && Imm % 16 == 0;
3136 break;
3137 case RISCVOp::OPERAND_FRMARG:
3138 Ok = RISCVFPRndMode::isValidRoundingMode(Mode: Imm);
3139 break;
3140 case RISCVOp::OPERAND_RTZARG:
3141 Ok = Imm == RISCVFPRndMode::RTZ;
3142 break;
3143 case RISCVOp::OPERAND_COND_CODE:
3144 Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
3145 break;
3146 case RISCVOp::OPERAND_ATOMIC_ORDERING:
3147 Ok = isValidAtomicOrdering(I: Imm);
3148 break;
3149 case RISCVOp::OPERAND_VEC_POLICY:
3150 Ok = (Imm & (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) ==
3151 Imm;
3152 break;
3153 case RISCVOp::OPERAND_SEW:
3154 Ok = (isUInt<5>(x: Imm) && RISCVVType::isValidSEW(SEW: 1 << Imm));
3155 break;
3156 case RISCVOp::OPERAND_SEW_MASK:
3157 Ok = Imm == 0;
3158 break;
3159 case RISCVOp::OPERAND_VEC_RM:
3160 assert(RISCVII::hasRoundModeOp(Desc.TSFlags));
3161 if (RISCVII::usesVXRM(TSFlags: Desc.TSFlags))
3162 Ok = isUInt<2>(x: Imm);
3163 else
3164 Ok = RISCVFPRndMode::isValidRoundingMode(Mode: Imm);
3165 break;
3166 case RISCVOp::OPERAND_XSFMM_VTYPE:
3167 Ok = RISCVVType::isValidXSfmmVType(VTypeI: Imm);
3168 break;
3169 case RISCVOp::OPERAND_XSFMM_TWIDEN:
3170 Ok = Imm == 1 || Imm == 2 || Imm == 4;
3171 break;
3172 }
3173 if (!Ok) {
3174 ErrInfo = "Invalid immediate";
3175 return false;
3176 }
3177 }
3178 break;
3179 case RISCVOp::OPERAND_SIMM12_LO:
3180 // TODO: We could be stricter about what non-register operands are
3181 // allowed.
3182 if (MO.isReg()) {
3183 ErrInfo = "Expected a non-register operand.";
3184 return false;
3185 }
3186 if (MO.isImm() && !isInt<12>(x: MO.getImm())) {
3187 ErrInfo = "Invalid immediate";
3188 return false;
3189 }
3190 break;
3191 case RISCVOp::OPERAND_UIMM20_LUI:
3192 case RISCVOp::OPERAND_UIMM20_AUIPC:
3193 // TODO: We could be stricter about what non-register operands are
3194 // allowed.
3195 if (MO.isReg()) {
3196 ErrInfo = "Expected a non-register operand.";
3197 return false;
3198 }
3199 if (MO.isImm() && !isUInt<20>(x: MO.getImm())) {
3200 ErrInfo = "Invalid immediate";
3201 return false;
3202 }
3203 break;
3204 case RISCVOp::OPERAND_BARE_SIMM32:
3205 // TODO: We could be stricter about what non-register operands are
3206 // allowed.
3207 if (MO.isReg()) {
3208 ErrInfo = "Expected a non-register operand.";
3209 return false;
3210 }
3211 if (MO.isImm() && !isInt<32>(x: MO.getImm())) {
3212 ErrInfo = "Invalid immediate";
3213 return false;
3214 }
3215 break;
3216 case RISCVOp::OPERAND_AVL:
3217 if (MO.isImm()) {
3218 int64_t Imm = MO.getImm();
3219 // VLMAX is represented as -1.
3220 if (!isUInt<5>(x: Imm) && Imm != -1) {
3221 ErrInfo = "Invalid immediate";
3222 return false;
3223 }
3224 } else if (!MO.isReg()) {
3225 ErrInfo = "Expected a register or immediate operand.";
3226 return false;
3227 }
3228 break;
3229 case RISCVOp::OPERAND_SFB_RHS:
3230 if (!MO.isReg() && !MO.isImm()) {
3231 ErrInfo = "Expected a register or immediate operand.";
3232 return false;
3233 }
3234 break;
3235 }
3236 }
3237
3238 const uint64_t TSFlags = Desc.TSFlags;
3239 if (RISCVII::hasVLOp(TSFlags)) {
3240 const MachineOperand &Op = MI.getOperand(i: RISCVII::getVLOpNum(Desc));
3241 if (!Op.isImm() && !Op.isReg()) {
3242 ErrInfo = "Invalid operand type for VL operand";
3243 return false;
3244 }
3245 if (Op.isReg() && Op.getReg().isValid()) {
3246 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3247 auto *RC = MRI.getRegClass(Reg: Op.getReg());
3248 if (!RISCV::GPRNoX0RegClass.hasSubClassEq(RC)) {
3249 ErrInfo = "Invalid register class for VL operand";
3250 return false;
3251 }
3252 }
3253 if (!RISCVII::hasSEWOp(TSFlags)) {
3254 ErrInfo = "VL operand w/o SEW operand?";
3255 return false;
3256 }
3257 }
3258 if (RISCVII::hasSEWOp(TSFlags)) {
3259 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
3260 if (!MI.getOperand(i: OpIdx).isImm()) {
3261 ErrInfo = "SEW value expected to be an immediate";
3262 return false;
3263 }
3264 uint64_t Log2SEW = MI.getOperand(i: OpIdx).getImm();
3265 if (Log2SEW > 31) {
3266 ErrInfo = "Unexpected SEW value";
3267 return false;
3268 }
3269 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3270 if (!RISCVVType::isValidSEW(SEW)) {
3271 ErrInfo = "Unexpected SEW value";
3272 return false;
3273 }
3274 }
3275 if (RISCVII::hasVecPolicyOp(TSFlags)) {
3276 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
3277 if (!MI.getOperand(i: OpIdx).isImm()) {
3278 ErrInfo = "Policy operand expected to be an immediate";
3279 return false;
3280 }
3281 uint64_t Policy = MI.getOperand(i: OpIdx).getImm();
3282 if (Policy > (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) {
3283 ErrInfo = "Invalid Policy Value";
3284 return false;
3285 }
3286 if (!RISCVII::hasVLOp(TSFlags)) {
3287 ErrInfo = "policy operand w/o VL operand?";
3288 return false;
3289 }
3290
3291 // VecPolicy operands can only exist on instructions with passthru/merge
3292 // arguments. Note that not all arguments with passthru have vec policy
3293 // operands- some instructions have implicit policies.
3294 unsigned UseOpIdx;
3295 if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx)) {
3296 ErrInfo = "policy operand w/o tied operand?";
3297 return false;
3298 }
3299 }
3300
3301 if (int Idx = RISCVII::getFRMOpNum(Desc);
3302 Idx >= 0 && MI.getOperand(i: Idx).getImm() == RISCVFPRndMode::DYN &&
3303 !MI.readsRegister(Reg: RISCV::FRM, /*TRI=*/nullptr)) {
3304 ErrInfo = "dynamic rounding mode should read FRM";
3305 return false;
3306 }
3307
3308 return true;
3309}
3310
3311bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
3312 const MachineInstr &AddrI,
3313 ExtAddrMode &AM) const {
3314 switch (MemI.getOpcode()) {
3315 default:
3316 return false;
3317 case RISCV::LB:
3318 case RISCV::LBU:
3319 case RISCV::LH:
3320 case RISCV::LH_INX:
3321 case RISCV::LHU:
3322 case RISCV::LW:
3323 case RISCV::LW_INX:
3324 case RISCV::LWU:
3325 case RISCV::LD:
3326 case RISCV::LD_RV32:
3327 case RISCV::FLH:
3328 case RISCV::FLW:
3329 case RISCV::FLD:
3330 case RISCV::SB:
3331 case RISCV::SH:
3332 case RISCV::SH_INX:
3333 case RISCV::SW:
3334 case RISCV::SW_INX:
3335 case RISCV::SD:
3336 case RISCV::SD_RV32:
3337 case RISCV::FSH:
3338 case RISCV::FSW:
3339 case RISCV::FSD:
3340 break;
3341 }
3342
3343 if (MemI.getOperand(i: 0).getReg() == Reg)
3344 return false;
3345
3346 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(i: 1).isReg() ||
3347 !AddrI.getOperand(i: 2).isImm())
3348 return false;
3349
3350 int64_t OldOffset = MemI.getOperand(i: 2).getImm();
3351 int64_t Disp = AddrI.getOperand(i: 2).getImm();
3352 int64_t NewOffset = OldOffset + Disp;
3353 if (!STI.is64Bit())
3354 NewOffset = SignExtend64<32>(x: NewOffset);
3355
3356 if (!isInt<12>(x: NewOffset))
3357 return false;
3358
3359 AM.BaseReg = AddrI.getOperand(i: 1).getReg();
3360 AM.ScaledReg = 0;
3361 AM.Scale = 0;
3362 AM.Displacement = NewOffset;
3363 AM.Form = ExtAddrMode::Formula::Basic;
3364 return true;
3365}
3366
3367MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
3368 const ExtAddrMode &AM) const {
3369
3370 const DebugLoc &DL = MemI.getDebugLoc();
3371 MachineBasicBlock &MBB = *MemI.getParent();
3372
3373 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3374 "Addressing mode not supported for folding");
3375
3376 return BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode: MemI.getOpcode()))
3377 .addReg(RegNo: MemI.getOperand(i: 0).getReg(), Flags: getDefRegState(B: MemI.mayLoad()))
3378 .addReg(RegNo: AM.BaseReg)
3379 .addImm(Val: AM.Displacement)
3380 .setMemRefs(MemI.memoperands())
3381 .setMIFlags(MemI.getFlags());
3382}
3383
3384// TODO: At the moment, MIPS introduced paring of instructions operating with
3385// word or double word. This should be extended with more instructions when more
3386// vendors support load/store pairing.
3387bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
3388 switch (Opc) {
3389 default:
3390 return false;
3391 case RISCV::SW:
3392 case RISCV::SD:
3393 case RISCV::LD:
3394 case RISCV::LW:
3395 return true;
3396 }
3397}
3398
3399bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
3400 const TargetRegisterInfo *TRI) {
3401 // If this is a volatile load/store, don't mess with it.
3402 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
3403 return false;
3404
3405 if (LdSt.getOperand(i: 1).isFI())
3406 return true;
3407
3408 assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
3409 // Can't cluster if the instruction modifies the base register
3410 // or it is update form. e.g. ld x5,8(x5)
3411 if (LdSt.modifiesRegister(Reg: LdSt.getOperand(i: 1).getReg(), TRI))
3412 return false;
3413
3414 if (!LdSt.getOperand(i: 2).isImm())
3415 return false;
3416
3417 return true;
3418}
3419
3420bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
3421 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
3422 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3423 const TargetRegisterInfo *TRI) const {
3424 if (!LdSt.mayLoadOrStore())
3425 return false;
3426
3427 // Conservatively, only handle scalar loads/stores for now.
3428 switch (LdSt.getOpcode()) {
3429 case RISCV::LB:
3430 case RISCV::LBU:
3431 case RISCV::SB:
3432 case RISCV::LH:
3433 case RISCV::LH_INX:
3434 case RISCV::LHU:
3435 case RISCV::FLH:
3436 case RISCV::SH:
3437 case RISCV::SH_INX:
3438 case RISCV::FSH:
3439 case RISCV::LW:
3440 case RISCV::LW_INX:
3441 case RISCV::LWU:
3442 case RISCV::FLW:
3443 case RISCV::SW:
3444 case RISCV::SW_INX:
3445 case RISCV::FSW:
3446 case RISCV::LD:
3447 case RISCV::LD_RV32:
3448 case RISCV::FLD:
3449 case RISCV::SD:
3450 case RISCV::SD_RV32:
3451 case RISCV::FSD:
3452 break;
3453 default:
3454 return false;
3455 }
3456 const MachineOperand *BaseOp;
3457 OffsetIsScalable = false;
3458 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
3459 return false;
3460 BaseOps.push_back(Elt: BaseOp);
3461 return true;
3462}
3463
3464// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
3465// helper?
3466static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
3467 ArrayRef<const MachineOperand *> BaseOps1,
3468 const MachineInstr &MI2,
3469 ArrayRef<const MachineOperand *> BaseOps2) {
3470 // Only examine the first "base" operand of each instruction, on the
3471 // assumption that it represents the real base address of the memory access.
3472 // Other operands are typically offsets or indices from this base address.
3473 if (BaseOps1.front()->isIdenticalTo(Other: *BaseOps2.front()))
3474 return true;
3475
3476 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
3477 return false;
3478
3479 auto MO1 = *MI1.memoperands_begin();
3480 auto MO2 = *MI2.memoperands_begin();
3481 if (MO1->getAddrSpace() != MO2->getAddrSpace())
3482 return false;
3483
3484 auto Base1 = MO1->getValue();
3485 auto Base2 = MO2->getValue();
3486 if (!Base1 || !Base2)
3487 return false;
3488 Base1 = getUnderlyingObject(V: Base1);
3489 Base2 = getUnderlyingObject(V: Base2);
3490
3491 if (isa<UndefValue>(Val: Base1) || isa<UndefValue>(Val: Base2))
3492 return false;
3493
3494 return Base1 == Base2;
3495}
3496
3497bool RISCVInstrInfo::shouldClusterMemOps(
3498 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
3499 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
3500 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
3501 unsigned NumBytes) const {
3502 // If the mem ops (to be clustered) do not have the same base ptr, then they
3503 // should not be clustered
3504 if (!BaseOps1.empty() && !BaseOps2.empty()) {
3505 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
3506 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
3507 if (!memOpsHaveSameBasePtr(MI1: FirstLdSt, BaseOps1, MI2: SecondLdSt, BaseOps2))
3508 return false;
3509 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
3510 // If only one base op is empty, they do not have the same base ptr
3511 return false;
3512 }
3513
3514 unsigned CacheLineSize =
3515 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
3516 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
3517 CacheLineSize = CacheLineSize ? CacheLineSize : 64;
3518 // Cluster if the memory operations are on the same or a neighbouring cache
3519 // line, but limit the maximum ClusterSize to avoid creating too much
3520 // additional register pressure.
3521 return ClusterSize <= 4 && std::abs(i: Offset1 - Offset2) < CacheLineSize;
3522}
3523
3524// Set BaseReg (the base register operand), Offset (the byte offset being
3525// accessed) and the access Width of the passed instruction that reads/writes
3526// memory. Returns false if the instruction does not read/write memory or the
3527// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
3528// recognise base operands and offsets in all cases.
3529// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
3530// function) and set it as appropriate.
3531bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
3532 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
3533 LocationSize &Width, const TargetRegisterInfo *TRI) const {
3534 if (!LdSt.mayLoadOrStore())
3535 return false;
3536
3537 // Here we assume the standard RISC-V ISA, which uses a base+offset
3538 // addressing mode. You'll need to relax these conditions to support custom
3539 // load/store instructions.
3540 if (LdSt.getNumExplicitOperands() != 3)
3541 return false;
3542 if ((!LdSt.getOperand(i: 1).isReg() && !LdSt.getOperand(i: 1).isFI()) ||
3543 !LdSt.getOperand(i: 2).isImm())
3544 return false;
3545
3546 if (!LdSt.hasOneMemOperand())
3547 return false;
3548
3549 Width = (*LdSt.memoperands_begin())->getSize();
3550 BaseReg = &LdSt.getOperand(i: 1);
3551 Offset = LdSt.getOperand(i: 2).getImm();
3552 return true;
3553}
3554
3555bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
3556 const MachineInstr &MIa, const MachineInstr &MIb) const {
3557 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
3558 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
3559
3560 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
3561 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
3562 return false;
3563
3564 // Retrieve the base register, offset from the base register and width. Width
3565 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
3566 // base registers are identical, and the offset of a lower memory access +
3567 // the width doesn't overlap the offset of a higher memory access,
3568 // then the memory accesses are different.
3569 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3570 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
3571 int64_t OffsetA = 0, OffsetB = 0;
3572 LocationSize WidthA = LocationSize::precise(Value: 0),
3573 WidthB = LocationSize::precise(Value: 0);
3574 if (getMemOperandWithOffsetWidth(LdSt: MIa, BaseReg&: BaseOpA, Offset&: OffsetA, Width&: WidthA, TRI) &&
3575 getMemOperandWithOffsetWidth(LdSt: MIb, BaseReg&: BaseOpB, Offset&: OffsetB, Width&: WidthB, TRI)) {
3576 if (BaseOpA->isIdenticalTo(Other: *BaseOpB)) {
3577 int LowOffset = std::min(a: OffsetA, b: OffsetB);
3578 int HighOffset = std::max(a: OffsetA, b: OffsetB);
3579 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3580 if (LowWidth.hasValue() &&
3581 LowOffset + (int)LowWidth.getValue() <= HighOffset)
3582 return true;
3583 }
3584 }
3585 return false;
3586}
3587
3588std::pair<unsigned, unsigned>
3589RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3590 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
3591 return std::make_pair(x: TF & Mask, y: TF & ~Mask);
3592}
3593
3594ArrayRef<std::pair<unsigned, const char *>>
3595RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3596 using namespace RISCVII;
3597 static const std::pair<unsigned, const char *> TargetFlags[] = {
3598 {MO_CALL, "riscv-call"},
3599 {MO_LO, "riscv-lo"},
3600 {MO_HI, "riscv-hi"},
3601 {MO_PCREL_LO, "riscv-pcrel-lo"},
3602 {MO_PCREL_HI, "riscv-pcrel-hi"},
3603 {MO_GOT_HI, "riscv-got-hi"},
3604 {MO_TPREL_LO, "riscv-tprel-lo"},
3605 {MO_TPREL_HI, "riscv-tprel-hi"},
3606 {MO_TPREL_ADD, "riscv-tprel-add"},
3607 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
3608 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
3609 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
3610 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
3611 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
3612 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"},
3613 {MO_QC_ACCESS, "riscv-qc-access"},
3614 };
3615 return ArrayRef(TargetFlags);
3616}
3617bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
3618 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
3619 const Function &F = MF.getFunction();
3620
3621 // Can F be deduplicated by the linker? If it can, don't outline from it.
3622 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
3623 return false;
3624
3625 // Don't outline from functions with section markings; the program could
3626 // expect that all the code is in the named section.
3627 if (F.hasSection())
3628 return false;
3629
3630 // It's safe to outline from MF.
3631 return true;
3632}
3633
3634bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
3635 unsigned &Flags) const {
3636 // More accurate safety checking is done in getOutliningCandidateInfo.
3637 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
3638}
3639
3640// Enum values indicating how an outlined call should be constructed.
3641enum MachineOutlinerConstructionID {
3642 MachineOutlinerTailCall,
3643 MachineOutlinerDefault,
3644 MachineOutlinerRegSave
3645};
3646
3647bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
3648 MachineFunction &MF) const {
3649 return MF.getFunction().hasMinSize();
3650}
3651
3652static bool isCandidatePatchable(const MachineBasicBlock &MBB) {
3653 const MachineFunction *MF = MBB.getParent();
3654 const Function &F = MF->getFunction();
3655 return F.getFnAttribute(Kind: "fentry-call").getValueAsBool() ||
3656 F.hasFnAttribute(Kind: "patchable-function-entry");
3657}
3658
3659static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI,
3660 MCRegister RegNo) {
3661 return MI.readsRegister(Reg: RegNo, TRI) ||
3662 MI.getDesc().hasImplicitUseOfPhysReg(Reg: RegNo);
3663}
3664
3665static bool isMIModifiesReg(const MachineInstr &MI,
3666 const TargetRegisterInfo *TRI, MCRegister RegNo) {
3667 return MI.modifiesRegister(Reg: RegNo, TRI) ||
3668 MI.getDesc().hasImplicitDefOfPhysReg(Reg: RegNo);
3669}
3670
3671static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
3672 if (!MBB.back().isReturn())
3673 return true;
3674 if (isCandidatePatchable(MBB))
3675 return true;
3676
3677 // If the candidate reads the pre-set register
3678 // that can be used for expanding PseudoTAIL instruction,
3679 // then we cannot insert tail call.
3680 const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
3681 MCRegister TailExpandUseRegNo =
3682 RISCVII::getTailExpandUseRegNo(FeatureBits: STI.getFeatureBits());
3683 for (const MachineInstr &MI : MBB) {
3684 if (isMIReadsReg(MI, TRI: STI.getRegisterInfo(), RegNo: TailExpandUseRegNo))
3685 return true;
3686 if (isMIModifiesReg(MI, TRI: STI.getRegisterInfo(), RegNo: TailExpandUseRegNo))
3687 break;
3688 }
3689 return false;
3690}
3691
3692static Register findRegisterToSaveX5To(outliner::Candidate &C,
3693 const TargetRegisterInfo &TRI) {
3694 // Candidate registers for saving X5: t1-t6
3695 static const MCPhysReg TempRegs[] = {
3696 RISCV::X6, // t1
3697 RISCV::X7, // t2
3698 RISCV::X28, // t3
3699 RISCV::X29, // t4
3700 RISCV::X30, // t5
3701 RISCV::X31 // t6
3702 };
3703
3704 const MachineFunction *MF = C.getMF();
3705 const MachineRegisterInfo &MRI = MF->getRegInfo();
3706
3707 for (MCPhysReg Reg : TempRegs) {
3708 if (MRI.isReserved(PhysReg: Reg))
3709 continue;
3710
3711 if (C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
3712 C.isAvailableInsideSeq(Reg, TRI)) {
3713 return Reg;
3714 }
3715 }
3716
3717 return Register();
3718}
3719
3720bool RISCVInstrInfo::analyzeCandidate(outliner::Candidate &C) const {
3721 // If the expansion register for tail calls is live across the candidate
3722 // outlined call site, we cannot outline that candidate as the expansion
3723 // would clobber the register.
3724 MCRegister TailExpandUseReg =
3725 RISCVII::getTailExpandUseRegNo(FeatureBits: STI.getFeatureBits());
3726 if (C.back().isReturn() &&
3727 !C.isAvailableAcrossAndOutOfSeq(Reg: TailExpandUseReg, TRI: RegInfo)) {
3728 LLVM_DEBUG(dbgs() << "MBB:\n" << *C.getMBB());
3729 LLVM_DEBUG(dbgs() << "Cannot be outlined between: " << C.front() << "and "
3730 << C.back());
3731 LLVM_DEBUG(dbgs() << "Because the tail-call register is live across "
3732 "the proposed outlined function call\n");
3733 return true;
3734 }
3735
3736 // If last instruction is return then we can rely on
3737 // the verification already performed in the getOutliningTypeImpl.
3738 if (C.back().isReturn()) {
3739 assert(!cannotInsertTailCall(*C.getMBB()) &&
3740 "The candidate who uses return instruction must be outlined "
3741 "using tail call");
3742 return false;
3743 }
3744
3745 // Filter out candidates where the X5 register (t0) can't be used to setup
3746 // the function call.
3747 if (!C.isAvailableInsideSeq(Reg: RISCV::X5, TRI: RegInfo))
3748 return true;
3749
3750 // If X5 is available in the region, use X5 directly (MachineOutlinerDefault).
3751 if (C.isAvailableAcrossAndOutOfSeq(Reg: RISCV::X5, TRI: RegInfo))
3752 return false;
3753
3754 // Otherwise, try to save X5 into t1-t6 (MachineOutlinerRegSave).
3755 if (OutlinerEnableRegSave && findRegisterToSaveX5To(C, TRI: RegInfo))
3756 return false;
3757
3758 return true;
3759}
3760
3761std::optional<std::unique_ptr<outliner::OutlinedFunction>>
3762RISCVInstrInfo::getOutliningCandidateInfo(
3763 const MachineModuleInfo &MMI,
3764 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
3765 unsigned MinRepeats) const {
3766
3767 // Analyze each candidate and erase the ones that are not viable.
3768 llvm::erase_if(C&: RepeatedSequenceLocs, P: [this](auto Candidate) {
3769 return analyzeCandidate(C&: Candidate);
3770 });
3771
3772 // If the sequence doesn't have enough candidates left, then we're done.
3773 if (RepeatedSequenceLocs.size() < MinRepeats)
3774 return std::nullopt;
3775
3776 // Each RepeatedSequenceLoc is identical.
3777 outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3778 unsigned InstrSizeCExt =
3779 Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtZca() ? 2 : 4;
3780 unsigned CallOverhead = 0, FrameOverhead = 0;
3781
3782 // Count the number of CFI instructions in the candidate, if present.
3783 unsigned CFICount = 0;
3784 for (auto &I : Candidate) {
3785 if (I.isCFIInstruction())
3786 CFICount++;
3787 }
3788
3789 // Ensure CFI coverage matches: comparing the number of CFIs in the candidate
3790 // with the total number of CFIs in the parent function for each candidate.
3791 // Outlining only a subset of a function’s CFIs would split the unwind state
3792 // across two code regions and lead to incorrect address offsets between the
3793 // outlined body and the remaining code. To preserve correct unwind info, we
3794 // only outline when all CFIs in the function can be outlined together.
3795 for (outliner::Candidate &C : RepeatedSequenceLocs) {
3796 std::vector<MCCFIInstruction> CFIInstructions =
3797 C.getMF()->getFrameInstructions();
3798
3799 if (CFICount > 0 && CFICount != CFIInstructions.size())
3800 return std::nullopt;
3801 }
3802
3803 MachineOutlinerConstructionID MOCI = MachineOutlinerDefault;
3804 if (Candidate.back().isReturn()) {
3805 MOCI = MachineOutlinerTailCall;
3806 // tail call = auipc + jalr in the worst case without linker relaxation.
3807 // FIXME: This code suggests the JALR can be compressed - how?
3808 CallOverhead = 4 + InstrSizeCExt;
3809 // Using tail call we move ret instruction from caller to callee.
3810 FrameOverhead = 0;
3811 } else {
3812 // call t0, function = 8 bytes.
3813 CallOverhead = 8;
3814 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3815 FrameOverhead = InstrSizeCExt;
3816 }
3817
3818 // If we have CFI instructions, we can only outline if the outlined section
3819 // can be a tail call.
3820 if (MOCI != MachineOutlinerTailCall && CFICount > 0)
3821 return std::nullopt;
3822
3823 if (OutlinerEnableRegSave && MOCI == MachineOutlinerDefault) {
3824 // Set per-candidate overhead based on X5 availability
3825 for (auto &C : RepeatedSequenceLocs) {
3826
3827 if (C.isAvailableAcrossAndOutOfSeq(Reg: RISCV::X5, TRI: RegInfo)) {
3828 // X5 is available, just need the call
3829 unsigned CandCallOverhead = 8;
3830 C.setCallInfo(CID: MachineOutlinerDefault, CO: CandCallOverhead);
3831 } else {
3832 // X5 unavailable, need save + call + restore
3833 // Save (2-4) + Call (8) + Restore (2-4)
3834 unsigned CandCallOverhead = InstrSizeCExt + 8 + InstrSizeCExt;
3835 C.setCallInfo(CID: MachineOutlinerRegSave, CO: CandCallOverhead);
3836 }
3837 }
3838 } else {
3839 for (auto &C : RepeatedSequenceLocs)
3840 C.setCallInfo(CID: MOCI, CO: CallOverhead);
3841 }
3842
3843 unsigned SequenceSize = 0;
3844 for (auto &MI : Candidate)
3845 SequenceSize += getInstSizeInBytes(MI);
3846
3847 return std::make_unique<outliner::OutlinedFunction>(
3848 args&: RepeatedSequenceLocs, args&: SequenceSize, args&: FrameOverhead, args&: MOCI);
3849}
3850
3851outliner::InstrType
3852RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
3853 MachineBasicBlock::iterator &MBBI,
3854 unsigned Flags) const {
3855 MachineInstr &MI = *MBBI;
3856 MachineBasicBlock *MBB = MI.getParent();
3857 const TargetRegisterInfo *TRI =
3858 MBB->getParent()->getSubtarget().getRegisterInfo();
3859 const auto &F = MI.getMF()->getFunction();
3860
3861 // We can only outline CFI instructions if we will tail call the outlined
3862 // function, or fix up the CFI offsets. Currently, CFI instructions are
3863 // outlined only if in a tail call.
3864 if (MI.isCFIInstruction())
3865 return outliner::InstrType::Legal;
3866
3867 if (cannotInsertTailCall(MBB: *MBB) &&
3868 (MI.isReturn() || isMIModifiesReg(MI, TRI, RegNo: RISCV::X5)))
3869 return outliner::InstrType::Illegal;
3870
3871 // Make sure the operands don't reference something unsafe.
3872 for (const auto &MO : MI.operands()) {
3873
3874 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
3875 // if any possible.
3876 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
3877 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
3878 F.hasSection() || F.getSectionPrefix()))
3879 return outliner::InstrType::Illegal;
3880 }
3881
3882 if (isLPAD(MI))
3883 return outliner::InstrType::Illegal;
3884
3885 return outliner::InstrType::Legal;
3886}
3887
3888void RISCVInstrInfo::buildOutlinedFrame(
3889 MachineBasicBlock &MBB, MachineFunction &MF,
3890 const outliner::OutlinedFunction &OF) const {
3891
3892 if (OF.FrameConstructionID == MachineOutlinerTailCall)
3893 return;
3894
3895 MBB.addLiveIn(PhysReg: RISCV::X5);
3896
3897 // Add in a return instruction to the end of the outlined frame.
3898 MBB.insert(I: MBB.end(), MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::JALR))
3899 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
3900 .addReg(RegNo: RISCV::X5)
3901 .addImm(Val: 0));
3902}
3903
3904MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
3905 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
3906 MachineFunction &MF, outliner::Candidate &C) const {
3907
3908 if (C.CallConstructionID == MachineOutlinerTailCall) {
3909 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::PseudoTAIL))
3910 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()),
3911 /*Offset=*/0, TargetFlags: RISCVII::MO_CALL));
3912 return It;
3913 }
3914
3915 if (C.CallConstructionID == MachineOutlinerRegSave) {
3916 Register SaveReg = findRegisterToSaveX5To(C, TRI: RegInfo);
3917 assert(SaveReg && "Cannot find an available register to save/restore X5.");
3918
3919 // Save: ADDI SaveReg, X5, 0 (equivalent to MV SaveReg, X5)
3920 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::ADDI), DestReg: SaveReg)
3921 .addReg(RegNo: RISCV::X5)
3922 .addImm(Val: 0));
3923 It++;
3924
3925 // Call: PseudoCALLReg X5
3926 It = MBB.insert(
3927 I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::PseudoCALLReg), DestReg: RISCV::X5)
3928 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()), Offset: 0,
3929 TargetFlags: RISCVII::MO_CALL));
3930 MachineBasicBlock::iterator CallPt = It;
3931 It++;
3932
3933 // Restore: ADDI X5, SaveReg, 0 (equivalent to MV X5, SaveReg)
3934 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::ADDI), DestReg: RISCV::X5)
3935 .addReg(RegNo: SaveReg)
3936 .addImm(Val: 0));
3937
3938 return CallPt;
3939 }
3940
3941 // Add in a call instruction to the outlined function at the given location.
3942 It = MBB.insert(I: It,
3943 MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::PseudoCALLReg), DestReg: RISCV::X5)
3944 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()), Offset: 0,
3945 TargetFlags: RISCVII::MO_CALL));
3946 return It;
3947}
3948
3949void RISCVInstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
3950 MachineBasicBlock::iterator Iter,
3951 DebugLoc &DL,
3952 bool AllowSideEffects) const {
3953
3954 const MachineFunction &MF = *MBB.getParent();
3955 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
3956
3957 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
3958 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: RISCV::PseudoClearGPR), DestReg: Reg);
3959 } else if (RISCV::FPR32RegClass.contains(Reg)) {
3960 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: RISCV::PseudoClearFPR32), DestReg: Reg);
3961 } else if (RISCV::FPR64RegClass.contains(Reg)) {
3962 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: RISCV::PseudoClearFPR64), DestReg: Reg);
3963 } else if (RISCV::FPR128RegClass.contains(Reg)) {
3964 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: RISCV::PseudoClearFPR128), DestReg: Reg);
3965 } else {
3966 llvm::reportFatalInternalError(
3967 reason: "buildClearRegister is not implemented for vector registers");
3968 }
3969}
3970
3971std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
3972 Register Reg) const {
3973 // TODO: Handle cases where Reg is a super- or sub-register of the
3974 // destination register.
3975 const MachineOperand &Op0 = MI.getOperand(i: 0);
3976 if (!Op0.isReg() || Reg != Op0.getReg())
3977 return std::nullopt;
3978
3979 // Don't consider ADDIW as a candidate because the caller may not be aware
3980 // of its sign extension behaviour.
3981 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(i: 1).isReg() &&
3982 MI.getOperand(i: 2).isImm())
3983 return RegImmPair{MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getImm()};
3984
3985 return std::nullopt;
3986}
3987
3988// MIR printer helper function to annotate Operands with a comment.
3989std::string RISCVInstrInfo::createMIROperandComment(
3990 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
3991 const TargetRegisterInfo *TRI) const {
3992 // Print a generic comment for this operand if there is one.
3993 std::string GenericComment =
3994 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
3995 if (!GenericComment.empty())
3996 return GenericComment;
3997
3998 const MCInstrDesc &Desc = MI.getDesc();
3999 if (OpIdx >= Desc.getNumOperands())
4000 return std::string();
4001
4002 std::string Comment;
4003 raw_string_ostream OS(Comment);
4004
4005 const MCOperandInfo &OpInfo = Desc.operands()[OpIdx];
4006
4007 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
4008 // operand of vector codegen pseudos.
4009 switch (OpInfo.OperandType) {
4010 case RISCVOp::OPERAND_VTYPEI10:
4011 case RISCVOp::OPERAND_VTYPEI11: {
4012 unsigned Imm = Op.getImm();
4013 RISCVVType::printVType(VType: Imm, OS);
4014 break;
4015 }
4016 case RISCVOp::OPERAND_XSFMM_VTYPE: {
4017 unsigned Imm = Op.getImm();
4018 RISCVVType::printXSfmmVType(VType: Imm, OS);
4019 break;
4020 }
4021 case RISCVOp::OPERAND_XSFMM_TWIDEN: {
4022 unsigned Imm = Op.getImm();
4023 OS << "w" << Imm;
4024 break;
4025 }
4026 case RISCVOp::OPERAND_SEW:
4027 case RISCVOp::OPERAND_SEW_MASK: {
4028 unsigned Log2SEW = Op.getImm();
4029 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
4030 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
4031 OS << "e" << SEW;
4032 break;
4033 }
4034 case RISCVOp::OPERAND_VEC_POLICY: {
4035 unsigned Policy = Op.getImm();
4036 assert(Policy <= (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
4037 "Invalid Policy Value");
4038 OS << (Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
4039 << (Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
4040 break;
4041 }
4042 case RISCVOp::OPERAND_AVL:
4043 if (Op.isImm() && Op.getImm() == -1)
4044 OS << "vl=VLMAX";
4045 else
4046 OS << "vl";
4047 break;
4048 case RISCVOp::OPERAND_VEC_RM:
4049 if (RISCVII::usesVXRM(TSFlags: Desc.TSFlags)) {
4050 assert(RISCVVXRndMode::isValidRoundingMode(Op.getImm()));
4051 auto VXRM = static_cast<RISCVVXRndMode::RoundingMode>(Op.getImm());
4052 OS << "vxrm=" << RISCVVXRndMode::roundingModeToString(RndMode: VXRM);
4053 } else {
4054 assert(RISCVFPRndMode::isValidRoundingMode(Op.getImm()));
4055 auto FRM = static_cast<RISCVFPRndMode::RoundingMode>(Op.getImm());
4056 OS << "frm=" << RISCVFPRndMode::roundingModeToString(RndMode: FRM);
4057 }
4058 break;
4059 }
4060
4061 return Comment;
4062}
4063
4064// clang-format off
4065#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
4066 RISCV::Pseudo##OP##_##LMUL
4067
4068#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
4069 RISCV::Pseudo##OP##_##LMUL##_MASK
4070
4071#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
4072 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
4073 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
4074
4075#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
4076 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
4077 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
4078 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
4079 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
4080 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
4081 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
4082
4083#define CASE_RVV_OPCODE_UNMASK(OP) \
4084 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
4085 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
4086
4087#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
4088 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
4089 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
4090 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
4091 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
4092 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
4093 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
4094
4095#define CASE_RVV_OPCODE_MASK(OP) \
4096 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
4097 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
4098
4099#define CASE_RVV_OPCODE_WIDEN(OP) \
4100 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
4101 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
4102
4103#define CASE_RVV_OPCODE(OP) \
4104 CASE_RVV_OPCODE_UNMASK(OP): \
4105 case CASE_RVV_OPCODE_MASK(OP)
4106// clang-format on
4107
4108// clang-format off
4109#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
4110 RISCV::PseudoV##OP##_##TYPE##_##LMUL
4111
4112#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
4113 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
4114 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
4115 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
4116 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
4117 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
4118 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
4119 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
4120
4121// VFMA instructions are SEW specific.
4122#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
4123 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
4124
4125#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
4126 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
4127 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
4128 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
4129 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
4130
4131#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
4132 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
4133 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
4134
4135#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
4136 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
4137 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
4138
4139#define CASE_VFMA_OPCODE_VV(OP) \
4140 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
4141 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
4142 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
4143 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
4144
4145#define CASE_VFMA_SPLATS(OP) \
4146 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
4147 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
4148 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
4149 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
4150// clang-format on
4151
4152bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
4153 unsigned &SrcOpIdx1,
4154 unsigned &SrcOpIdx2) const {
4155 const MCInstrDesc &Desc = MI.getDesc();
4156 if (!Desc.isCommutable())
4157 return false;
4158
4159 switch (MI.getOpcode()) {
4160 case RISCV::TH_MVEQZ:
4161 case RISCV::TH_MVNEZ:
4162 // We can't commute operands if operand 2 (i.e., rs1 in
4163 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
4164 // not valid as the in/out-operand 1).
4165 if (MI.getOperand(i: 2).getReg() == RISCV::X0)
4166 return false;
4167 // Operands 1 and 2 are commutable, if we switch the opcode.
4168 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 2);
4169 case RISCV::QC_SELECTIEQ:
4170 case RISCV::QC_SELECTINE:
4171 case RISCV::QC_SELECTIIEQ:
4172 case RISCV::QC_SELECTIINE:
4173 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 2);
4174 case RISCV::QC_MVEQ:
4175 case RISCV::QC_MVNE:
4176 case RISCV::QC_MVLT:
4177 case RISCV::QC_MVGE:
4178 case RISCV::QC_MVLTU:
4179 case RISCV::QC_MVGEU:
4180 case RISCV::QC_MVEQI:
4181 case RISCV::QC_MVNEI:
4182 case RISCV::QC_MVLTI:
4183 case RISCV::QC_MVGEI:
4184 case RISCV::QC_MVLTUI:
4185 case RISCV::QC_MVGEUI:
4186 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 4);
4187 case RISCV::TH_MULA:
4188 case RISCV::TH_MULAW:
4189 case RISCV::TH_MULAH:
4190 case RISCV::TH_MULS:
4191 case RISCV::TH_MULSW:
4192 case RISCV::TH_MULSH:
4193 // Operands 2 and 3 are commutable.
4194 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 2, CommutableOpIdx2: 3);
4195 case RISCV::PseudoCCMOVGPRNoX0:
4196 case RISCV::PseudoCCMOVGPR:
4197 // Operands 1 and 2 are commutable.
4198 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 2);
4199 case CASE_RVV_OPCODE(VADD_VV):
4200 case CASE_RVV_OPCODE(VAND_VV):
4201 case CASE_RVV_OPCODE(VOR_VV):
4202 case CASE_RVV_OPCODE(VXOR_VV):
4203 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
4204 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
4205 case CASE_RVV_OPCODE(VMIN_VV):
4206 case CASE_RVV_OPCODE(VMINU_VV):
4207 case CASE_RVV_OPCODE(VMAX_VV):
4208 case CASE_RVV_OPCODE(VMAXU_VV):
4209 case CASE_RVV_OPCODE(VMUL_VV):
4210 case CASE_RVV_OPCODE(VMULH_VV):
4211 case CASE_RVV_OPCODE(VMULHU_VV):
4212 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
4213 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
4214 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
4215 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
4216 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
4217 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
4218 case CASE_RVV_OPCODE(VABD_VV):
4219 case CASE_RVV_OPCODE(VABDU_VV):
4220 case CASE_RVV_OPCODE_WIDEN(VWABDA_VV):
4221 case CASE_RVV_OPCODE_WIDEN(VWABDAU_VV):
4222 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
4223 case CASE_RVV_OPCODE(VSADD_VV):
4224 case CASE_RVV_OPCODE(VSADDU_VV):
4225 case CASE_RVV_OPCODE(VAADD_VV):
4226 case CASE_RVV_OPCODE(VAADDU_VV):
4227 case CASE_RVV_OPCODE(VSMUL_VV):
4228 case CASE_RVV_OPCODE_LMUL(VDOT4A_VV, MF2):
4229 case CASE_RVV_OPCODE_LMUL(VDOT4A_VV, M1):
4230 case CASE_RVV_OPCODE_LMUL(VDOT4A_VV, M2):
4231 case CASE_RVV_OPCODE_LMUL(VDOT4A_VV, M4):
4232 case CASE_RVV_OPCODE_LMUL(VDOT4A_VV, M8):
4233 case CASE_RVV_OPCODE_LMUL(VDOT4AU_VV, MF2):
4234 case CASE_RVV_OPCODE_LMUL(VDOT4AU_VV, M1):
4235 case CASE_RVV_OPCODE_LMUL(VDOT4AU_VV, M2):
4236 case CASE_RVV_OPCODE_LMUL(VDOT4AU_VV, M4):
4237 case CASE_RVV_OPCODE_LMUL(VDOT4AU_VV, M8):
4238 // Operands 2 and 3 are commutable.
4239 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 2, CommutableOpIdx2: 3);
4240 case CASE_VFMA_SPLATS(FMADD):
4241 case CASE_VFMA_SPLATS(FMSUB):
4242 case CASE_VFMA_SPLATS(FMACC):
4243 case CASE_VFMA_SPLATS(FMSAC):
4244 case CASE_VFMA_SPLATS(FNMADD):
4245 case CASE_VFMA_SPLATS(FNMSUB):
4246 case CASE_VFMA_SPLATS(FNMACC):
4247 case CASE_VFMA_SPLATS(FNMSAC):
4248 case CASE_VFMA_OPCODE_VV(FMACC):
4249 case CASE_VFMA_OPCODE_VV(FMSAC):
4250 case CASE_VFMA_OPCODE_VV(FNMACC):
4251 case CASE_VFMA_OPCODE_VV(FNMSAC):
4252 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4253 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4254 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4255 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4256 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4257 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4258 // If the tail policy is undisturbed we can't commute.
4259 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4260 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4261 1) == 0)
4262 return false;
4263
4264 // For these instructions we can only swap operand 1 and operand 3 by
4265 // changing the opcode.
4266 unsigned CommutableOpIdx1 = 1;
4267 unsigned CommutableOpIdx2 = 3;
4268 if (!fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1,
4269 CommutableOpIdx2))
4270 return false;
4271 return true;
4272 }
4273 case CASE_VFMA_OPCODE_VV(FMADD):
4274 case CASE_VFMA_OPCODE_VV(FMSUB):
4275 case CASE_VFMA_OPCODE_VV(FNMADD):
4276 case CASE_VFMA_OPCODE_VV(FNMSUB):
4277 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4278 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4279 // If the tail policy is undisturbed we can't commute.
4280 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4281 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4282 1) == 0)
4283 return false;
4284
4285 // For these instructions we have more freedom. We can commute with the
4286 // other multiplicand or with the addend/subtrahend/minuend.
4287
4288 // Any fixed operand must be from source 1, 2 or 3.
4289 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
4290 return false;
4291 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
4292 return false;
4293
4294 // It both ops are fixed one must be the tied source.
4295 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
4296 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
4297 return false;
4298
4299 // Look for two different register operands assumed to be commutable
4300 // regardless of the FMA opcode. The FMA opcode is adjusted later if
4301 // needed.
4302 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
4303 SrcOpIdx2 == CommuteAnyOperandIndex) {
4304 // At least one of operands to be commuted is not specified and
4305 // this method is free to choose appropriate commutable operands.
4306 unsigned CommutableOpIdx1 = SrcOpIdx1;
4307 if (SrcOpIdx1 == SrcOpIdx2) {
4308 // Both of operands are not fixed. Set one of commutable
4309 // operands to the tied source.
4310 CommutableOpIdx1 = 1;
4311 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
4312 // Only one of the operands is not fixed.
4313 CommutableOpIdx1 = SrcOpIdx2;
4314 }
4315
4316 // CommutableOpIdx1 is well defined now. Let's choose another commutable
4317 // operand and assign its index to CommutableOpIdx2.
4318 unsigned CommutableOpIdx2;
4319 if (CommutableOpIdx1 != 1) {
4320 // If we haven't already used the tied source, we must use it now.
4321 CommutableOpIdx2 = 1;
4322 } else {
4323 Register Op1Reg = MI.getOperand(i: CommutableOpIdx1).getReg();
4324
4325 // The commuted operands should have different registers.
4326 // Otherwise, the commute transformation does not change anything and
4327 // is useless. We use this as a hint to make our decision.
4328 if (Op1Reg != MI.getOperand(i: 2).getReg())
4329 CommutableOpIdx2 = 2;
4330 else
4331 CommutableOpIdx2 = 3;
4332 }
4333
4334 // Assign the found pair of commutable indices to SrcOpIdx1 and
4335 // SrcOpIdx2 to return those values.
4336 if (!fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1,
4337 CommutableOpIdx2))
4338 return false;
4339 }
4340
4341 return true;
4342 }
4343 }
4344
4345 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4346}
4347
4348// clang-format off
4349#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
4350 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
4351 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
4352 break;
4353
4354#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
4355 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
4356 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
4357 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
4358 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
4359 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
4360 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
4361 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
4362
4363// VFMA depends on SEW.
4364#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
4365 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
4366 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
4367 break;
4368
4369#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
4370 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
4371 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
4372 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
4373 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
4374
4375#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
4376 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
4377 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
4378
4379#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
4380 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
4381 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
4382
4383#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
4384 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
4385 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
4386 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
4387 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
4388
4389#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
4390 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
4391 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
4392 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
4393 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
4394// clang-format on
4395
4396MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
4397 bool NewMI,
4398 unsigned OpIdx1,
4399 unsigned OpIdx2) const {
4400 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
4401 if (NewMI)
4402 return *MI.getParent()->getParent()->CloneMachineInstr(Orig: &MI);
4403 return MI;
4404 };
4405
4406 switch (MI.getOpcode()) {
4407 case RISCV::TH_MVEQZ:
4408 case RISCV::TH_MVNEZ: {
4409 auto &WorkingMI = cloneIfNew(MI);
4410 WorkingMI.setDesc(get(Opcode: MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
4411 : RISCV::TH_MVEQZ));
4412 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, NewMI: false, OpIdx1,
4413 OpIdx2);
4414 }
4415 case RISCV::QC_SELECTIEQ:
4416 case RISCV::QC_SELECTINE:
4417 case RISCV::QC_SELECTIIEQ:
4418 case RISCV::QC_SELECTIINE:
4419 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4420 case RISCV::QC_MVEQ:
4421 case RISCV::QC_MVNE:
4422 case RISCV::QC_MVLT:
4423 case RISCV::QC_MVGE:
4424 case RISCV::QC_MVLTU:
4425 case RISCV::QC_MVGEU:
4426 case RISCV::QC_MVEQI:
4427 case RISCV::QC_MVNEI:
4428 case RISCV::QC_MVLTI:
4429 case RISCV::QC_MVGEI:
4430 case RISCV::QC_MVLTUI:
4431 case RISCV::QC_MVGEUI: {
4432 auto &WorkingMI = cloneIfNew(MI);
4433 WorkingMI.setDesc(get(Opcode: getInverseXqcicmOpcode(Opcode: MI.getOpcode())));
4434 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, NewMI: false, OpIdx1,
4435 OpIdx2);
4436 }
4437 case RISCV::PseudoCCMOVGPRNoX0:
4438 case RISCV::PseudoCCMOVGPR: {
4439 // CCMOV can be commuted by inverting the condition.
4440 unsigned BCC = MI.getOperand(i: MI.getNumExplicitOperands() - 3).getImm();
4441 BCC = RISCVCC::getInverseBranchOpcode(BCC);
4442 auto &WorkingMI = cloneIfNew(MI);
4443 WorkingMI.getOperand(i: MI.getNumExplicitOperands() - 3).setImm(BCC);
4444 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI*/ false,
4445 OpIdx1, OpIdx2);
4446 }
4447 case CASE_VFMA_SPLATS(FMACC):
4448 case CASE_VFMA_SPLATS(FMADD):
4449 case CASE_VFMA_SPLATS(FMSAC):
4450 case CASE_VFMA_SPLATS(FMSUB):
4451 case CASE_VFMA_SPLATS(FNMACC):
4452 case CASE_VFMA_SPLATS(FNMADD):
4453 case CASE_VFMA_SPLATS(FNMSAC):
4454 case CASE_VFMA_SPLATS(FNMSUB):
4455 case CASE_VFMA_OPCODE_VV(FMACC):
4456 case CASE_VFMA_OPCODE_VV(FMSAC):
4457 case CASE_VFMA_OPCODE_VV(FNMACC):
4458 case CASE_VFMA_OPCODE_VV(FNMSAC):
4459 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4460 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4461 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4462 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4463 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4464 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4465 // It only make sense to toggle these between clobbering the
4466 // addend/subtrahend/minuend one of the multiplicands.
4467 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4468 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
4469 unsigned Opc;
4470 switch (MI.getOpcode()) {
4471 default:
4472 llvm_unreachable("Unexpected opcode");
4473 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
4474 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
4475 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
4476 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
4477 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
4478 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
4479 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
4480 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
4481 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
4482 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
4483 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
4484 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
4485 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
4486 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
4487 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
4488 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
4489 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
4490 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
4491 }
4492
4493 auto &WorkingMI = cloneIfNew(MI);
4494 WorkingMI.setDesc(get(Opcode: Opc));
4495 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI=*/false,
4496 OpIdx1, OpIdx2);
4497 }
4498 case CASE_VFMA_OPCODE_VV(FMADD):
4499 case CASE_VFMA_OPCODE_VV(FMSUB):
4500 case CASE_VFMA_OPCODE_VV(FNMADD):
4501 case CASE_VFMA_OPCODE_VV(FNMSUB):
4502 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4503 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4504 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4505 // If one of the operands, is the addend we need to change opcode.
4506 // Otherwise we're just swapping 2 of the multiplicands.
4507 if (OpIdx1 == 3 || OpIdx2 == 3) {
4508 unsigned Opc;
4509 switch (MI.getOpcode()) {
4510 default:
4511 llvm_unreachable("Unexpected opcode");
4512 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
4513 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
4514 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
4515 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
4516 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
4517 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
4518 }
4519
4520 auto &WorkingMI = cloneIfNew(MI);
4521 WorkingMI.setDesc(get(Opcode: Opc));
4522 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI=*/false,
4523 OpIdx1, OpIdx2);
4524 }
4525 // Let the default code handle it.
4526 break;
4527 }
4528 }
4529
4530 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4531}
4532
4533#undef CASE_VMA_CHANGE_OPCODE_COMMON
4534#undef CASE_VMA_CHANGE_OPCODE_LMULS
4535#undef CASE_VFMA_CHANGE_OPCODE_COMMON
4536#undef CASE_VFMA_CHANGE_OPCODE_LMULS_M1
4537#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF2
4538#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF4
4539#undef CASE_VFMA_CHANGE_OPCODE_VV
4540#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
4541
4542#undef CASE_RVV_OPCODE_UNMASK_LMUL
4543#undef CASE_RVV_OPCODE_MASK_LMUL
4544#undef CASE_RVV_OPCODE_LMUL
4545#undef CASE_RVV_OPCODE_UNMASK_WIDEN
4546#undef CASE_RVV_OPCODE_UNMASK
4547#undef CASE_RVV_OPCODE_MASK_WIDEN
4548#undef CASE_RVV_OPCODE_MASK
4549#undef CASE_RVV_OPCODE_WIDEN
4550#undef CASE_RVV_OPCODE
4551
4552#undef CASE_VMA_OPCODE_COMMON
4553#undef CASE_VMA_OPCODE_LMULS
4554#undef CASE_VFMA_OPCODE_COMMON
4555#undef CASE_VFMA_OPCODE_LMULS_M1
4556#undef CASE_VFMA_OPCODE_LMULS_MF2
4557#undef CASE_VFMA_OPCODE_LMULS_MF4
4558#undef CASE_VFMA_OPCODE_VV
4559#undef CASE_VFMA_SPLATS
4560
4561bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
4562 switch (MI.getOpcode()) {
4563 default:
4564 break;
4565 case RISCV::ADD:
4566 case RISCV::OR:
4567 case RISCV::XOR:
4568 // Normalize (so we hit the next if clause).
4569 // add/[x]or rd, zero, rs => add/[x]or rd, rs, zero
4570 if (MI.getOperand(i: 1).getReg() == RISCV::X0)
4571 commuteInstruction(MI);
4572 // add/[x]or rd, rs, zero => addi rd, rs, 0
4573 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4574 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4575 MI.setDesc(get(Opcode: RISCV::ADDI));
4576 return true;
4577 }
4578 // xor rd, rs, rs => addi rd, zero, 0
4579 if (MI.getOpcode() == RISCV::XOR &&
4580 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg()) {
4581 MI.getOperand(i: 1).setReg(RISCV::X0);
4582 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4583 MI.setDesc(get(Opcode: RISCV::ADDI));
4584 return true;
4585 }
4586 break;
4587 case RISCV::ORI:
4588 case RISCV::XORI:
4589 // [x]ori rd, zero, N => addi rd, zero, N
4590 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4591 MI.setDesc(get(Opcode: RISCV::ADDI));
4592 return true;
4593 }
4594 break;
4595 case RISCV::SUB:
4596 // sub rd, rs, zero => addi rd, rs, 0
4597 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4598 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4599 MI.setDesc(get(Opcode: RISCV::ADDI));
4600 return true;
4601 }
4602 break;
4603 case RISCV::SUBW:
4604 // subw rd, rs, zero => addiw rd, rs, 0
4605 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4606 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4607 MI.setDesc(get(Opcode: RISCV::ADDIW));
4608 return true;
4609 }
4610 break;
4611 case RISCV::ADDW:
4612 // Normalize (so we hit the next if clause).
4613 // addw rd, zero, rs => addw rd, rs, zero
4614 if (MI.getOperand(i: 1).getReg() == RISCV::X0)
4615 commuteInstruction(MI);
4616 // addw rd, rs, zero => addiw rd, rs, 0
4617 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4618 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4619 MI.setDesc(get(Opcode: RISCV::ADDIW));
4620 return true;
4621 }
4622 break;
4623 case RISCV::SH1ADD:
4624 case RISCV::SH1ADD_UW:
4625 case RISCV::SH2ADD:
4626 case RISCV::SH2ADD_UW:
4627 case RISCV::SH3ADD:
4628 case RISCV::SH3ADD_UW:
4629 // shNadd[.uw] rd, zero, rs => addi rd, rs, 0
4630 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4631 MI.removeOperand(OpNo: 1);
4632 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4633 MI.setDesc(get(Opcode: RISCV::ADDI));
4634 return true;
4635 }
4636 // shNadd[.uw] rd, rs, zero => slli[.uw] rd, rs, N
4637 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4638 MI.removeOperand(OpNo: 2);
4639 unsigned Opc = MI.getOpcode();
4640 if (Opc == RISCV::SH1ADD_UW || Opc == RISCV::SH2ADD_UW ||
4641 Opc == RISCV::SH3ADD_UW) {
4642 MI.addOperand(Op: MachineOperand::CreateImm(Val: getSHXADDUWShiftAmount(Opc)));
4643 MI.setDesc(get(Opcode: RISCV::SLLI_UW));
4644 return true;
4645 }
4646 MI.addOperand(Op: MachineOperand::CreateImm(Val: getSHXADDShiftAmount(Opc)));
4647 MI.setDesc(get(Opcode: RISCV::SLLI));
4648 return true;
4649 }
4650 break;
4651 case RISCV::AND:
4652 case RISCV::MUL:
4653 case RISCV::MULH:
4654 case RISCV::MULHSU:
4655 case RISCV::MULHU:
4656 case RISCV::MULW:
4657 // and rd, zero, rs => addi rd, zero, 0
4658 // mul* rd, zero, rs => addi rd, zero, 0
4659 // and rd, rs, zero => addi rd, zero, 0
4660 // mul* rd, rs, zero => addi rd, zero, 0
4661 if (MI.getOperand(i: 1).getReg() == RISCV::X0 ||
4662 MI.getOperand(i: 2).getReg() == RISCV::X0) {
4663 MI.getOperand(i: 1).setReg(RISCV::X0);
4664 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4665 MI.setDesc(get(Opcode: RISCV::ADDI));
4666 return true;
4667 }
4668 break;
4669 case RISCV::ANDI:
4670 // andi rd, zero, C => addi rd, zero, 0
4671 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4672 MI.getOperand(i: 2).setImm(0);
4673 MI.setDesc(get(Opcode: RISCV::ADDI));
4674 return true;
4675 }
4676 break;
4677 case RISCV::SLL:
4678 case RISCV::SRL:
4679 case RISCV::SRA:
4680 // shift rd, zero, rs => addi rd, zero, 0
4681 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4682 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4683 MI.setDesc(get(Opcode: RISCV::ADDI));
4684 return true;
4685 }
4686 // shift rd, rs, zero => addi rd, rs, 0
4687 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4688 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4689 MI.setDesc(get(Opcode: RISCV::ADDI));
4690 return true;
4691 }
4692 break;
4693 case RISCV::SLLW:
4694 case RISCV::SRLW:
4695 case RISCV::SRAW:
4696 // shiftw rd, zero, rs => addi rd, zero, 0
4697 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4698 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4699 MI.setDesc(get(Opcode: RISCV::ADDI));
4700 return true;
4701 }
4702 break;
4703 case RISCV::SLLI:
4704 case RISCV::SRLI:
4705 case RISCV::SRAI:
4706 case RISCV::SLLIW:
4707 case RISCV::SRLIW:
4708 case RISCV::SRAIW:
4709 case RISCV::SLLI_UW:
4710 // shiftimm rd, zero, N => addi rd, zero, 0
4711 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4712 MI.getOperand(i: 2).setImm(0);
4713 MI.setDesc(get(Opcode: RISCV::ADDI));
4714 return true;
4715 }
4716 break;
4717 case RISCV::SLTU:
4718 case RISCV::ADD_UW:
4719 // sltu rd, zero, zero => addi rd, zero, 0
4720 // add.uw rd, zero, zero => addi rd, zero, 0
4721 if (MI.getOperand(i: 1).getReg() == RISCV::X0 &&
4722 MI.getOperand(i: 2).getReg() == RISCV::X0) {
4723 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4724 MI.setDesc(get(Opcode: RISCV::ADDI));
4725 return true;
4726 }
4727 // add.uw rd, zero, rs => addi rd, rs, 0
4728 if (MI.getOpcode() == RISCV::ADD_UW &&
4729 MI.getOperand(i: 1).getReg() == RISCV::X0) {
4730 MI.removeOperand(OpNo: 1);
4731 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4732 MI.setDesc(get(Opcode: RISCV::ADDI));
4733 }
4734 break;
4735 case RISCV::SLTIU:
4736 // sltiu rd, zero, NZC => addi rd, zero, 1
4737 // sltiu rd, zero, 0 => addi rd, zero, 0
4738 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4739 MI.getOperand(i: 2).setImm(MI.getOperand(i: 2).getImm() != 0);
4740 MI.setDesc(get(Opcode: RISCV::ADDI));
4741 return true;
4742 }
4743 break;
4744 case RISCV::SEXT_H:
4745 case RISCV::SEXT_B:
4746 case RISCV::ZEXT_H_RV32:
4747 case RISCV::ZEXT_H_RV64:
4748 // sext.[hb] rd, zero => addi rd, zero, 0
4749 // zext.h rd, zero => addi rd, zero, 0
4750 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4751 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4752 MI.setDesc(get(Opcode: RISCV::ADDI));
4753 return true;
4754 }
4755 break;
4756 case RISCV::MIN:
4757 case RISCV::MINU:
4758 case RISCV::MAX:
4759 case RISCV::MAXU:
4760 // min|max rd, rs, rs => addi rd, rs, 0
4761 if (MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg()) {
4762 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4763 MI.setDesc(get(Opcode: RISCV::ADDI));
4764 return true;
4765 }
4766 break;
4767 case RISCV::BEQ:
4768 case RISCV::BNE:
4769 // b{eq,ne} zero, rs, imm => b{eq,ne} rs, zero, imm
4770 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4771 MachineOperand MO0 = MI.getOperand(i: 0);
4772 MI.removeOperand(OpNo: 0);
4773 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4774 }
4775 break;
4776 case RISCV::BLTU:
4777 // bltu zero, rs, imm => bne rs, zero, imm
4778 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4779 MachineOperand MO0 = MI.getOperand(i: 0);
4780 MI.removeOperand(OpNo: 0);
4781 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4782 MI.setDesc(get(Opcode: RISCV::BNE));
4783 }
4784 break;
4785 case RISCV::BGEU:
4786 // bgeu zero, rs, imm => beq rs, zero, imm
4787 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4788 MachineOperand MO0 = MI.getOperand(i: 0);
4789 MI.removeOperand(OpNo: 0);
4790 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4791 MI.setDesc(get(Opcode: RISCV::BEQ));
4792 }
4793 break;
4794 }
4795 return false;
4796}
4797
4798// clang-format off
4799#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
4800 RISCV::PseudoV##OP##_##LMUL##_TIED
4801
4802#define CASE_WIDEOP_OPCODE_LMULS(OP) \
4803 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
4804 case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
4805 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
4806 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
4807 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
4808 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
4809
4810#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
4811 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
4812 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
4813 break;
4814
4815#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4816 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
4817 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
4818 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
4819 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
4820 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
4821 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
4822
4823// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
4824#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
4825 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
4826
4827#define CASE_FP_WIDEOP_OPCODE_LMULS(OP) \
4828 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4829 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4830 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
4831 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4832 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
4833 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4834 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
4835 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
4836 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
4837
4838#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
4839 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
4840 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
4841 break;
4842
4843#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4844 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4845 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4846 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
4847 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4848 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
4849 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4850 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
4851 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
4852 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
4853
4854#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
4855 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4856 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4857 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4858 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4859 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
4860
4861#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
4862 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4863 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4864 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4865 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4866 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
4867// clang-format on
4868
4869MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
4870 LiveVariables *LV,
4871 LiveIntervals *LIS) const {
4872 MachineInstrBuilder MIB;
4873 switch (MI.getOpcode()) {
4874 default:
4875 return nullptr;
4876 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
4877 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
4878 case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
4879 case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
4880 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4881 MI.getNumExplicitOperands() == 7 &&
4882 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
4883 // If the tail policy is undisturbed we can't convert.
4884 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4885 1) == 0)
4886 return nullptr;
4887 // clang-format off
4888 unsigned NewOpc;
4889 switch (MI.getOpcode()) {
4890 default:
4891 llvm_unreachable("Unexpected opcode");
4892 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
4893 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
4894 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
4895 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
4896 }
4897 // clang-format on
4898
4899 MachineBasicBlock &MBB = *MI.getParent();
4900 MIB = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
4901 .add(MO: MI.getOperand(i: 0))
4902 .addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Undef)
4903 .add(MO: MI.getOperand(i: 1))
4904 .add(MO: MI.getOperand(i: 2))
4905 .add(MO: MI.getOperand(i: 3))
4906 .add(MO: MI.getOperand(i: 4))
4907 .add(MO: MI.getOperand(i: 5))
4908 .add(MO: MI.getOperand(i: 6));
4909 break;
4910 }
4911 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
4912 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
4913 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
4914 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
4915 // If the tail policy is undisturbed we can't convert.
4916 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4917 MI.getNumExplicitOperands() == 6);
4918 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4919 1) == 0)
4920 return nullptr;
4921
4922 // clang-format off
4923 unsigned NewOpc;
4924 switch (MI.getOpcode()) {
4925 default:
4926 llvm_unreachable("Unexpected opcode");
4927 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
4928 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
4929 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
4930 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
4931 }
4932 // clang-format on
4933
4934 MachineBasicBlock &MBB = *MI.getParent();
4935 MIB = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
4936 .add(MO: MI.getOperand(i: 0))
4937 .addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Undef)
4938 .add(MO: MI.getOperand(i: 1))
4939 .add(MO: MI.getOperand(i: 2))
4940 .add(MO: MI.getOperand(i: 3))
4941 .add(MO: MI.getOperand(i: 4))
4942 .add(MO: MI.getOperand(i: 5));
4943 break;
4944 }
4945 }
4946 MIB.copyImplicitOps(OtherMI: MI);
4947
4948 if (LV) {
4949 unsigned NumOps = MI.getNumOperands();
4950 for (unsigned I = 1; I < NumOps; ++I) {
4951 MachineOperand &Op = MI.getOperand(i: I);
4952 if (Op.isReg() && Op.isKill())
4953 LV->replaceKillInstruction(Reg: Op.getReg(), OldMI&: MI, NewMI&: *MIB);
4954 }
4955 }
4956
4957 if (LIS) {
4958 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, NewMI&: *MIB);
4959
4960 if (MI.getOperand(i: 0).isEarlyClobber()) {
4961 // Use operand 1 was tied to early-clobber def operand 0, so its live
4962 // interval could have ended at an early-clobber slot. Now they are not
4963 // tied we need to update it to the normal register slot.
4964 LiveInterval &LI = LIS->getInterval(Reg: MI.getOperand(i: 1).getReg());
4965 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
4966 if (S->end == Idx.getRegSlot(EC: true))
4967 S->end = Idx.getRegSlot();
4968 }
4969 }
4970
4971 return MIB;
4972}
4973
4974#undef CASE_WIDEOP_OPCODE_COMMON
4975#undef CASE_WIDEOP_OPCODE_LMULS
4976#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
4977#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
4978#undef CASE_FP_WIDEOP_OPCODE_COMMON
4979#undef CASE_FP_WIDEOP_OPCODE_LMULS
4980#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
4981#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
4982
4983void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
4984 MachineBasicBlock::iterator II, const DebugLoc &DL,
4985 Register DestReg, uint32_t Amount,
4986 MachineInstr::MIFlag Flag) const {
4987 MachineRegisterInfo &MRI = MF.getRegInfo();
4988 if (llvm::has_single_bit(Value: Amount)) {
4989 uint32_t ShiftAmount = Log2_32(Value: Amount);
4990 if (ShiftAmount == 0)
4991 return;
4992 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
4993 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4994 .addImm(Val: ShiftAmount)
4995 .setMIFlag(Flag);
4996 } else if (int ShXAmount, ShiftAmount;
4997 STI.hasShlAdd(ShAmt: 3) &&
4998 (ShXAmount = isShifted359(Value: Amount, Shift&: ShiftAmount)) != 0) {
4999 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
5000 unsigned Opc;
5001 switch (ShXAmount) {
5002 case 1:
5003 Opc = RISCV::SH1ADD;
5004 break;
5005 case 2:
5006 Opc = RISCV::SH2ADD;
5007 break;
5008 case 3:
5009 Opc = RISCV::SH3ADD;
5010 break;
5011 default:
5012 llvm_unreachable("unexpected result of isShifted359");
5013 }
5014 if (ShiftAmount)
5015 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
5016 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5017 .addImm(Val: ShiftAmount)
5018 .setMIFlag(Flag);
5019 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: Opc), DestReg)
5020 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5021 .addReg(RegNo: DestReg)
5022 .setMIFlag(Flag);
5023 } else if (llvm::has_single_bit(Value: Amount - 1)) {
5024 Register ScaledRegister = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
5025 uint32_t ShiftAmount = Log2_32(Value: Amount - 1);
5026 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg: ScaledRegister)
5027 .addReg(RegNo: DestReg)
5028 .addImm(Val: ShiftAmount)
5029 .setMIFlag(Flag);
5030 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg)
5031 .addReg(RegNo: ScaledRegister, Flags: RegState::Kill)
5032 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5033 .setMIFlag(Flag);
5034 } else if (llvm::has_single_bit(Value: Amount + 1)) {
5035 Register ScaledRegister = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
5036 uint32_t ShiftAmount = Log2_32(Value: Amount + 1);
5037 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg: ScaledRegister)
5038 .addReg(RegNo: DestReg)
5039 .addImm(Val: ShiftAmount)
5040 .setMIFlag(Flag);
5041 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SUB), DestReg)
5042 .addReg(RegNo: ScaledRegister, Flags: RegState::Kill)
5043 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5044 .setMIFlag(Flag);
5045 } else if (STI.hasStdExtZmmul()) {
5046 Register N = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
5047 movImm(MBB, MBBI: II, DL, DstReg: N, Val: Amount, Flag);
5048 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::MUL), DestReg)
5049 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5050 .addReg(RegNo: N, Flags: RegState::Kill)
5051 .setMIFlag(Flag);
5052 } else {
5053 Register Acc;
5054 uint32_t PrevShiftAmount = 0;
5055 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
5056 if (Amount & (1U << ShiftAmount)) {
5057 if (ShiftAmount)
5058 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
5059 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5060 .addImm(Val: ShiftAmount - PrevShiftAmount)
5061 .setMIFlag(Flag);
5062 if (Amount >> (ShiftAmount + 1)) {
5063 // If we don't have an accmulator yet, create it and copy DestReg.
5064 if (!Acc) {
5065 Acc = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
5066 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: TargetOpcode::COPY), DestReg: Acc)
5067 .addReg(RegNo: DestReg)
5068 .setMIFlag(Flag);
5069 } else {
5070 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg: Acc)
5071 .addReg(RegNo: Acc, Flags: RegState::Kill)
5072 .addReg(RegNo: DestReg)
5073 .setMIFlag(Flag);
5074 }
5075 }
5076 PrevShiftAmount = ShiftAmount;
5077 }
5078 }
5079 assert(Acc && "Expected valid accumulator");
5080 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg)
5081 .addReg(RegNo: DestReg, Flags: RegState::Kill)
5082 .addReg(RegNo: Acc, Flags: RegState::Kill)
5083 .setMIFlag(Flag);
5084 }
5085}
5086
5087ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
5088RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5089 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5090 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
5091 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
5092 return ArrayRef(TargetFlags);
5093}
5094
5095unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
5096 return OptLevel >= CodeGenOptLevel::Aggressive
5097 ? STI.getTailDupAggressiveThreshold()
5098 : 2;
5099}
5100
5101bool RISCV::isRVVSpill(const MachineInstr &MI) {
5102 // RVV lacks any support for immediate addressing for stack addresses, so be
5103 // conservative.
5104 unsigned Opcode = MI.getOpcode();
5105 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opcode) &&
5106 !getLMULForRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
5107 return false;
5108 return true;
5109}
5110
5111/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
5112bool RISCV::isVectorCopy(const TargetRegisterInfo *TRI,
5113 const MachineInstr &MI) {
5114 return MI.isCopy() && MI.getOperand(i: 0).getReg().isPhysical() &&
5115 RISCVRegisterInfo::isRVVRegClass(
5116 RC: TRI->getMinimalPhysRegClass(Reg: MI.getOperand(i: 0).getReg()));
5117}
5118
5119std::optional<std::pair<unsigned, unsigned>>
5120RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
5121 switch (Opcode) {
5122 default:
5123 return std::nullopt;
5124 case RISCV::PseudoVSPILL2_M1:
5125 case RISCV::PseudoVRELOAD2_M1:
5126 return std::make_pair(x: 2u, y: 1u);
5127 case RISCV::PseudoVSPILL2_M2:
5128 case RISCV::PseudoVRELOAD2_M2:
5129 return std::make_pair(x: 2u, y: 2u);
5130 case RISCV::PseudoVSPILL2_M4:
5131 case RISCV::PseudoVRELOAD2_M4:
5132 return std::make_pair(x: 2u, y: 4u);
5133 case RISCV::PseudoVSPILL3_M1:
5134 case RISCV::PseudoVRELOAD3_M1:
5135 return std::make_pair(x: 3u, y: 1u);
5136 case RISCV::PseudoVSPILL3_M2:
5137 case RISCV::PseudoVRELOAD3_M2:
5138 return std::make_pair(x: 3u, y: 2u);
5139 case RISCV::PseudoVSPILL4_M1:
5140 case RISCV::PseudoVRELOAD4_M1:
5141 return std::make_pair(x: 4u, y: 1u);
5142 case RISCV::PseudoVSPILL4_M2:
5143 case RISCV::PseudoVRELOAD4_M2:
5144 return std::make_pair(x: 4u, y: 2u);
5145 case RISCV::PseudoVSPILL5_M1:
5146 case RISCV::PseudoVRELOAD5_M1:
5147 return std::make_pair(x: 5u, y: 1u);
5148 case RISCV::PseudoVSPILL6_M1:
5149 case RISCV::PseudoVRELOAD6_M1:
5150 return std::make_pair(x: 6u, y: 1u);
5151 case RISCV::PseudoVSPILL7_M1:
5152 case RISCV::PseudoVRELOAD7_M1:
5153 return std::make_pair(x: 7u, y: 1u);
5154 case RISCV::PseudoVSPILL8_M1:
5155 case RISCV::PseudoVRELOAD8_M1:
5156 return std::make_pair(x: 8u, y: 1u);
5157 }
5158}
5159
5160bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
5161 int16_t MI1FrmOpIdx =
5162 RISCV::getNamedOperandIdx(Opcode: MI1.getOpcode(), Name: RISCV::OpName::frm);
5163 int16_t MI2FrmOpIdx =
5164 RISCV::getNamedOperandIdx(Opcode: MI2.getOpcode(), Name: RISCV::OpName::frm);
5165 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
5166 return false;
5167 MachineOperand FrmOp1 = MI1.getOperand(i: MI1FrmOpIdx);
5168 MachineOperand FrmOp2 = MI2.getOperand(i: MI2FrmOpIdx);
5169 return FrmOp1.getImm() == FrmOp2.getImm();
5170}
5171
5172std::optional<unsigned>
5173RISCV::getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW) {
5174 switch (Opcode) {
5175 default:
5176 return std::nullopt;
5177
5178 // 11.6. Vector Single-Width Shift Instructions
5179 case RISCV::VSLL_VX:
5180 case RISCV::VSRL_VX:
5181 case RISCV::VSRA_VX:
5182 // 12.4. Vector Single-Width Scaling Shift Instructions
5183 case RISCV::VSSRL_VX:
5184 case RISCV::VSSRA_VX:
5185 // Zvbb
5186 case RISCV::VROL_VX:
5187 case RISCV::VROR_VX:
5188 // Only the low lg2(SEW) bits of the shift-amount value are used.
5189 return Log2SEW;
5190
5191 // 11.7 Vector Narrowing Integer Right Shift Instructions
5192 case RISCV::VNSRL_WX:
5193 case RISCV::VNSRA_WX:
5194 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
5195 case RISCV::VNCLIPU_WX:
5196 case RISCV::VNCLIP_WX:
5197 // Zvbb
5198 case RISCV::VWSLL_VX:
5199 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
5200 return Log2SEW + 1;
5201
5202 // 11.1. Vector Single-Width Integer Add and Subtract
5203 case RISCV::VADD_VX:
5204 case RISCV::VSUB_VX:
5205 case RISCV::VRSUB_VX:
5206 // 11.2. Vector Widening Integer Add/Subtract
5207 case RISCV::VWADDU_VX:
5208 case RISCV::VWSUBU_VX:
5209 case RISCV::VWADD_VX:
5210 case RISCV::VWSUB_VX:
5211 case RISCV::VWADDU_WX:
5212 case RISCV::VWSUBU_WX:
5213 case RISCV::VWADD_WX:
5214 case RISCV::VWSUB_WX:
5215 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
5216 case RISCV::VADC_VXM:
5217 case RISCV::VADC_VIM:
5218 case RISCV::VMADC_VXM:
5219 case RISCV::VMADC_VIM:
5220 case RISCV::VMADC_VX:
5221 case RISCV::VSBC_VXM:
5222 case RISCV::VMSBC_VXM:
5223 case RISCV::VMSBC_VX:
5224 // 11.5 Vector Bitwise Logical Instructions
5225 case RISCV::VAND_VX:
5226 case RISCV::VOR_VX:
5227 case RISCV::VXOR_VX:
5228 // 11.8. Vector Integer Compare Instructions
5229 case RISCV::VMSEQ_VX:
5230 case RISCV::VMSNE_VX:
5231 case RISCV::VMSLTU_VX:
5232 case RISCV::VMSLT_VX:
5233 case RISCV::VMSLEU_VX:
5234 case RISCV::VMSLE_VX:
5235 case RISCV::VMSGTU_VX:
5236 case RISCV::VMSGT_VX:
5237 // 11.9. Vector Integer Min/Max Instructions
5238 case RISCV::VMINU_VX:
5239 case RISCV::VMIN_VX:
5240 case RISCV::VMAXU_VX:
5241 case RISCV::VMAX_VX:
5242 // 11.10. Vector Single-Width Integer Multiply Instructions
5243 case RISCV::VMUL_VX:
5244 case RISCV::VMULH_VX:
5245 case RISCV::VMULHU_VX:
5246 case RISCV::VMULHSU_VX:
5247 // 11.11. Vector Integer Divide Instructions
5248 case RISCV::VDIVU_VX:
5249 case RISCV::VDIV_VX:
5250 case RISCV::VREMU_VX:
5251 case RISCV::VREM_VX:
5252 // 11.12. Vector Widening Integer Multiply Instructions
5253 case RISCV::VWMUL_VX:
5254 case RISCV::VWMULU_VX:
5255 case RISCV::VWMULSU_VX:
5256 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
5257 case RISCV::VMACC_VX:
5258 case RISCV::VNMSAC_VX:
5259 case RISCV::VMADD_VX:
5260 case RISCV::VNMSUB_VX:
5261 // 11.14. Vector Widening Integer Multiply-Add Instructions
5262 case RISCV::VWMACCU_VX:
5263 case RISCV::VWMACC_VX:
5264 case RISCV::VWMACCSU_VX:
5265 case RISCV::VWMACCUS_VX:
5266 // 11.15. Vector Integer Merge Instructions
5267 case RISCV::VMERGE_VXM:
5268 // 11.16. Vector Integer Move Instructions
5269 case RISCV::VMV_V_X:
5270 // 12.1. Vector Single-Width Saturating Add and Subtract
5271 case RISCV::VSADDU_VX:
5272 case RISCV::VSADD_VX:
5273 case RISCV::VSSUBU_VX:
5274 case RISCV::VSSUB_VX:
5275 // 12.2. Vector Single-Width Averaging Add and Subtract
5276 case RISCV::VAADDU_VX:
5277 case RISCV::VAADD_VX:
5278 case RISCV::VASUBU_VX:
5279 case RISCV::VASUB_VX:
5280 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
5281 case RISCV::VSMUL_VX:
5282 // 16.1. Integer Scalar Move Instructions
5283 case RISCV::VMV_S_X:
5284 // Zvbb
5285 case RISCV::VANDN_VX:
5286 return 1U << Log2SEW;
5287 }
5288}
5289
5290unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
5291 const RISCVVPseudosTable::PseudoInfo *RVV =
5292 RISCVVPseudosTable::getPseudoInfo(Pseudo: RVVPseudoOpcode);
5293 if (!RVV)
5294 return 0;
5295 return RVV->BaseInstr;
5296}
5297
5298unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
5299 unsigned DestEEW =
5300 (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift;
5301 // EEW = 1
5302 if (DestEEW == 0)
5303 return 0;
5304 // EEW = SEW * n
5305 unsigned Scaled = Log2SEW + (DestEEW - 1);
5306 assert(Scaled >= 3 && Scaled <= 6);
5307 return Scaled;
5308}
5309
5310static std::optional<int64_t> getEffectiveImm(const MachineOperand &MO) {
5311 assert(MO.isImm() || MO.getReg().isVirtual());
5312 if (MO.isImm())
5313 return MO.getImm();
5314 const MachineInstr *Def =
5315 MO.getParent()->getMF()->getRegInfo().getVRegDef(Reg: MO.getReg());
5316 int64_t Imm;
5317 if (isLoadImm(MI: Def, Imm))
5318 return Imm;
5319 return std::nullopt;
5320}
5321
5322/// Given two VL operands, do we know that LHS <= RHS? Must be used in SSA form.
5323bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
5324 assert((LHS.isImm() || LHS.getParent()->getMF()->getRegInfo().isSSA()) &&
5325 (RHS.isImm() || RHS.getParent()->getMF()->getRegInfo().isSSA()));
5326 if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
5327 LHS.getReg() == RHS.getReg())
5328 return true;
5329 if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
5330 return true;
5331 if (LHS.isImm() && LHS.getImm() == 0)
5332 return true;
5333 if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
5334 return false;
5335 std::optional<int64_t> LHSImm = getEffectiveImm(MO: LHS),
5336 RHSImm = getEffectiveImm(MO: RHS);
5337 if (!LHSImm || !RHSImm)
5338 return false;
5339 return LHSImm <= RHSImm;
5340}
5341
5342namespace {
5343class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5344 const MachineInstr *LHS;
5345 const MachineInstr *RHS;
5346 SmallVector<MachineOperand, 3> Cond;
5347
5348public:
5349 RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
5350 const SmallVectorImpl<MachineOperand> &Cond)
5351 : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
5352
5353 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5354 // Make the instructions for loop control be placed in stage 0.
5355 // The predecessors of LHS/RHS are considered by the caller.
5356 if (LHS && MI == LHS)
5357 return true;
5358 if (RHS && MI == RHS)
5359 return true;
5360 return false;
5361 }
5362
5363 std::optional<bool> createTripCountGreaterCondition(
5364 int TC, MachineBasicBlock &MBB,
5365 SmallVectorImpl<MachineOperand> &CondParam) override {
5366 // A branch instruction will be inserted as "if (Cond) goto epilogue".
5367 // Cond is normalized for such use.
5368 // The predecessors of the branch are assumed to have already been inserted.
5369 CondParam = Cond;
5370 return {};
5371 }
5372
5373 void setPreheader(MachineBasicBlock *NewPreheader) override {}
5374
5375 void adjustTripCount(int TripCountAdjust) override {}
5376};
5377} // namespace
5378
5379std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5380RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
5381 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
5382 SmallVector<MachineOperand, 4> Cond;
5383 if (analyzeBranch(MBB&: *LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
5384 return nullptr;
5385
5386 // Infinite loops are not supported
5387 if (TBB == LoopBB && FBB == LoopBB)
5388 return nullptr;
5389
5390 // Must be conditional branch
5391 if (FBB == nullptr)
5392 return nullptr;
5393
5394 assert((TBB == LoopBB || FBB == LoopBB) &&
5395 "The Loop must be a single-basic-block loop");
5396
5397 // Normalization for createTripCountGreaterCondition()
5398 if (TBB == LoopBB)
5399 reverseBranchCondition(Cond);
5400
5401 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
5402 auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
5403 if (!Op.isReg())
5404 return nullptr;
5405 Register Reg = Op.getReg();
5406 if (!Reg.isVirtual())
5407 return nullptr;
5408 return MRI.getVRegDef(Reg);
5409 };
5410
5411 const MachineInstr *LHS = FindRegDef(Cond[1]);
5412 const MachineInstr *RHS = FindRegDef(Cond[2]);
5413 if (LHS && LHS->isPHI())
5414 return nullptr;
5415 if (RHS && RHS->isPHI())
5416 return nullptr;
5417
5418 return std::make_unique<RISCVPipelinerLoopInfo>(args&: LHS, args&: RHS, args&: Cond);
5419}
5420
5421// FIXME: We should remove this if we have a default generic scheduling model.
5422bool RISCVInstrInfo::isHighLatencyDef(int Opc) const {
5423 unsigned RVVMCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc);
5424 Opc = RVVMCOpcode ? RVVMCOpcode : Opc;
5425 switch (Opc) {
5426 default:
5427 return false;
5428 // Integer div/rem.
5429 case RISCV::DIV:
5430 case RISCV::DIVW:
5431 case RISCV::DIVU:
5432 case RISCV::DIVUW:
5433 case RISCV::REM:
5434 case RISCV::REMW:
5435 case RISCV::REMU:
5436 case RISCV::REMUW:
5437 // Floating-point div/sqrt.
5438 case RISCV::FDIV_H:
5439 case RISCV::FDIV_S:
5440 case RISCV::FDIV_D:
5441 case RISCV::FDIV_H_INX:
5442 case RISCV::FDIV_S_INX:
5443 case RISCV::FDIV_D_INX:
5444 case RISCV::FDIV_D_IN32X:
5445 case RISCV::FSQRT_H:
5446 case RISCV::FSQRT_S:
5447 case RISCV::FSQRT_D:
5448 case RISCV::FSQRT_H_INX:
5449 case RISCV::FSQRT_S_INX:
5450 case RISCV::FSQRT_D_INX:
5451 case RISCV::FSQRT_D_IN32X:
5452 // Vector integer div/rem
5453 case RISCV::VDIV_VV:
5454 case RISCV::VDIV_VX:
5455 case RISCV::VDIVU_VV:
5456 case RISCV::VDIVU_VX:
5457 case RISCV::VREM_VV:
5458 case RISCV::VREM_VX:
5459 case RISCV::VREMU_VV:
5460 case RISCV::VREMU_VX:
5461 // Vector floating-point div/sqrt.
5462 case RISCV::VFDIV_VV:
5463 case RISCV::VFDIV_VF:
5464 case RISCV::VFRDIV_VF:
5465 case RISCV::VFSQRT_V:
5466 case RISCV::VFRSQRT7_V:
5467 return true;
5468 }
5469}
5470
5471bool RISCVInstrInfo::isVRegCopy(const MachineInstr *MI, unsigned LMul) const {
5472 if (MI->getOpcode() != TargetOpcode::COPY)
5473 return false;
5474 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
5475 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
5476
5477 Register DstReg = MI->getOperand(i: 0).getReg();
5478 const TargetRegisterClass *RC = DstReg.isVirtual()
5479 ? MRI.getRegClass(Reg: DstReg)
5480 : TRI->getMinimalPhysRegClass(Reg: DstReg);
5481
5482 if (!RISCVRegisterInfo::isRVVRegClass(RC))
5483 return false;
5484
5485 if (!LMul)
5486 return true;
5487
5488 // TODO: Perhaps we could distinguish segment register classes (e.g. VRN3M2)
5489 // in the future.
5490 auto [RCLMul, RCFractional] =
5491 RISCVVType::decodeVLMUL(VLMul: RISCVRI::getLMul(TSFlags: RC->TSFlags));
5492 return (!RCFractional && LMul == RCLMul) || (RCFractional && LMul == 1);
5493}
5494
5495bool RISCVInstrInfo::requiresNTLHint(const MachineInstr &MI) const {
5496 if (MI.memoperands_empty())
5497 return false;
5498
5499 MachineMemOperand *MMO = *(MI.memoperands_begin());
5500 if (!MMO->isNonTemporal())
5501 return false;
5502
5503 return true;
5504}
5505
5506bool RISCVInstrInfo::isSafeToMove(const MachineInstr &From,
5507 const MachineBasicBlock::iterator &To) {
5508 assert(To == From.getParent()->end() || From.getParent() == To->getParent());
5509 SmallVector<Register> PhysUses, PhysDefs;
5510 for (const MachineOperand &MO : From.all_uses())
5511 if (MO.getReg().isPhysical())
5512 PhysUses.push_back(Elt: MO.getReg());
5513 for (const MachineOperand &MO : From.all_defs())
5514 if (MO.getReg().isPhysical())
5515 PhysDefs.push_back(Elt: MO.getReg());
5516 bool SawStore = false;
5517 for (auto II = std::next(x: From.getIterator()); II != To; II++) {
5518 for (Register PhysReg : PhysUses)
5519 if (II->definesRegister(Reg: PhysReg, TRI: nullptr))
5520 return false;
5521 for (Register PhysReg : PhysDefs)
5522 if (II->definesRegister(Reg: PhysReg, TRI: nullptr) ||
5523 II->readsRegister(Reg: PhysReg, TRI: nullptr))
5524 return false;
5525 if (II->mayStore()) {
5526 SawStore = true;
5527 break;
5528 }
5529 }
5530 return From.isSafeToMove(SawStore);
5531}
5532