1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMatInt.h"
16#include "RISCV.h"
17#include "RISCVMachineFunctionInfo.h"
18#include "RISCVSubtarget.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/MemoryLocation.h"
23#include "llvm/Analysis/ValueTracking.h"
24#include "llvm/CodeGen/LiveIntervals.h"
25#include "llvm/CodeGen/LiveVariables.h"
26#include "llvm/CodeGen/MachineCombinerPattern.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineTraceMetrics.h"
30#include "llvm/CodeGen/RegisterScavenging.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/IR/Module.h"
34#include "llvm/MC/MCDwarf.h"
35#include "llvm/MC/MCInstBuilder.h"
36#include "llvm/MC/TargetRegistry.h"
37#include "llvm/Support/ErrorHandling.h"
38
39using namespace llvm;
40
41#define GEN_CHECK_COMPRESS_INSTR
42#include "RISCVGenCompressInstEmitter.inc"
43
44#define GET_INSTRINFO_CTOR_DTOR
45#include "RISCVGenInstrInfo.inc"
46
47#define DEBUG_TYPE "riscv-instr-info"
48STATISTIC(NumVRegSpilled,
49 "Number of registers within vector register groups spilled");
50STATISTIC(NumVRegReloaded,
51 "Number of registers within vector register groups reloaded");
52
53static cl::opt<bool> PreferWholeRegisterMove(
54 "riscv-prefer-whole-register-move", cl::init(Val: false), cl::Hidden,
55 cl::desc("Prefer whole register move for vector registers."));
56
57static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
58 "riscv-force-machine-combiner-strategy", cl::Hidden,
59 cl::desc("Force machine combiner to use a specific strategy for machine "
60 "trace metrics evaluation."),
61 cl::init(Val: MachineTraceStrategy::TS_NumStrategies),
62 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
63 "Local strategy."),
64 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
65 "MinInstrCount strategy.")));
66
67namespace llvm::RISCVVPseudosTable {
68
69using namespace RISCV;
70
71#define GET_RISCVVPseudosTable_IMPL
72#include "RISCVGenSearchableTables.inc"
73
74} // namespace llvm::RISCVVPseudosTable
75
76namespace llvm::RISCV {
77
78#define GET_RISCVMaskedPseudosTable_IMPL
79#include "RISCVGenSearchableTables.inc"
80
81} // end namespace llvm::RISCV
82
83RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI)
84 : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN,
85 RISCV::ADJCALLSTACKUP),
86 RegInfo(STI.getHwMode()), STI(STI) {}
87
88#define GET_INSTRINFO_HELPERS
89#include "RISCVGenInstrInfo.inc"
90
91MCInst RISCVInstrInfo::getNop() const {
92 if (STI.hasStdExtZca())
93 return MCInstBuilder(RISCV::C_NOP);
94 return MCInstBuilder(RISCV::ADDI)
95 .addReg(Reg: RISCV::X0)
96 .addReg(Reg: RISCV::X0)
97 .addImm(Val: 0);
98}
99
100Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
101 int &FrameIndex) const {
102 TypeSize Dummy = TypeSize::getZero();
103 return isLoadFromStackSlot(MI, FrameIndex, MemBytes&: Dummy);
104}
105
106static std::optional<unsigned> getLMULForRVVWholeLoadStore(unsigned Opcode) {
107 switch (Opcode) {
108 default:
109 return std::nullopt;
110 case RISCV::VS1R_V:
111 case RISCV::VL1RE8_V:
112 case RISCV::VL1RE16_V:
113 case RISCV::VL1RE32_V:
114 case RISCV::VL1RE64_V:
115 return 1;
116 case RISCV::VS2R_V:
117 case RISCV::VL2RE8_V:
118 case RISCV::VL2RE16_V:
119 case RISCV::VL2RE32_V:
120 case RISCV::VL2RE64_V:
121 return 2;
122 case RISCV::VS4R_V:
123 case RISCV::VL4RE8_V:
124 case RISCV::VL4RE16_V:
125 case RISCV::VL4RE32_V:
126 case RISCV::VL4RE64_V:
127 return 4;
128 case RISCV::VS8R_V:
129 case RISCV::VL8RE8_V:
130 case RISCV::VL8RE16_V:
131 case RISCV::VL8RE32_V:
132 case RISCV::VL8RE64_V:
133 return 8;
134 }
135}
136
137Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
138 int &FrameIndex,
139 TypeSize &MemBytes) const {
140 switch (MI.getOpcode()) {
141 default:
142 return 0;
143 case RISCV::LB:
144 case RISCV::LBU:
145 MemBytes = TypeSize::getFixed(ExactSize: 1);
146 break;
147 case RISCV::LH:
148 case RISCV::LH_INX:
149 case RISCV::LHU:
150 case RISCV::FLH:
151 MemBytes = TypeSize::getFixed(ExactSize: 2);
152 break;
153 case RISCV::LW:
154 case RISCV::LW_INX:
155 case RISCV::FLW:
156 case RISCV::LWU:
157 MemBytes = TypeSize::getFixed(ExactSize: 4);
158 break;
159 case RISCV::LD:
160 case RISCV::LD_RV32:
161 case RISCV::FLD:
162 MemBytes = TypeSize::getFixed(ExactSize: 8);
163 break;
164 case RISCV::VL1RE8_V:
165 case RISCV::VL2RE8_V:
166 case RISCV::VL4RE8_V:
167 case RISCV::VL8RE8_V:
168 if (!MI.getOperand(i: 1).isFI())
169 return Register();
170 FrameIndex = MI.getOperand(i: 1).getIndex();
171 unsigned LMUL = *getLMULForRVVWholeLoadStore(Opcode: MI.getOpcode());
172 MemBytes = TypeSize::getScalable(MinimumSize: RISCV::RVVBytesPerBlock * LMUL);
173 return MI.getOperand(i: 0).getReg();
174 }
175
176 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
177 MI.getOperand(i: 2).getImm() == 0) {
178 FrameIndex = MI.getOperand(i: 1).getIndex();
179 return MI.getOperand(i: 0).getReg();
180 }
181
182 return 0;
183}
184
185Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
186 int &FrameIndex) const {
187 TypeSize Dummy = TypeSize::getZero();
188 return isStoreToStackSlot(MI, FrameIndex, MemBytes&: Dummy);
189}
190
191Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
192 int &FrameIndex,
193 TypeSize &MemBytes) const {
194 switch (MI.getOpcode()) {
195 default:
196 return 0;
197 case RISCV::SB:
198 MemBytes = TypeSize::getFixed(ExactSize: 1);
199 break;
200 case RISCV::SH:
201 case RISCV::SH_INX:
202 case RISCV::FSH:
203 MemBytes = TypeSize::getFixed(ExactSize: 2);
204 break;
205 case RISCV::SW:
206 case RISCV::SW_INX:
207 case RISCV::FSW:
208 MemBytes = TypeSize::getFixed(ExactSize: 4);
209 break;
210 case RISCV::SD:
211 case RISCV::SD_RV32:
212 case RISCV::FSD:
213 MemBytes = TypeSize::getFixed(ExactSize: 8);
214 break;
215 case RISCV::VS1R_V:
216 case RISCV::VS2R_V:
217 case RISCV::VS4R_V:
218 case RISCV::VS8R_V:
219 if (!MI.getOperand(i: 1).isFI())
220 return Register();
221 FrameIndex = MI.getOperand(i: 1).getIndex();
222 unsigned LMUL = *getLMULForRVVWholeLoadStore(Opcode: MI.getOpcode());
223 MemBytes = TypeSize::getScalable(MinimumSize: RISCV::RVVBytesPerBlock * LMUL);
224 return MI.getOperand(i: 0).getReg();
225 }
226
227 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
228 MI.getOperand(i: 2).getImm() == 0) {
229 FrameIndex = MI.getOperand(i: 1).getIndex();
230 return MI.getOperand(i: 0).getReg();
231 }
232
233 return 0;
234}
235
236bool RISCVInstrInfo::isReMaterializableImpl(
237 const MachineInstr &MI) const {
238 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
239 case RISCV::VMV_V_X:
240 case RISCV::VFMV_V_F:
241 case RISCV::VMV_V_I:
242 case RISCV::VMV_S_X:
243 case RISCV::VFMV_S_F:
244 case RISCV::VID_V:
245 return MI.getOperand(i: 1).isUndef();
246 default:
247 return TargetInstrInfo::isReMaterializableImpl(MI);
248 }
249}
250
251static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
252 unsigned NumRegs) {
253 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
254}
255
256static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
257 const MachineBasicBlock &MBB,
258 MachineBasicBlock::const_iterator MBBI,
259 MachineBasicBlock::const_iterator &DefMBBI,
260 RISCVVType::VLMUL LMul) {
261 if (PreferWholeRegisterMove)
262 return false;
263
264 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
265 "Unexpected COPY instruction.");
266 Register SrcReg = MBBI->getOperand(i: 1).getReg();
267 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
268
269 bool FoundDef = false;
270 bool FirstVSetVLI = false;
271 unsigned FirstSEW = 0;
272 while (MBBI != MBB.begin()) {
273 --MBBI;
274 if (MBBI->isMetaInstruction())
275 continue;
276
277 if (RISCVInstrInfo::isVectorConfigInstr(MI: *MBBI)) {
278 // There is a vsetvli between COPY and source define instruction.
279 // vy = def_vop ... (producing instruction)
280 // ...
281 // vsetvli
282 // ...
283 // vx = COPY vy
284 if (!FoundDef) {
285 if (!FirstVSetVLI) {
286 FirstVSetVLI = true;
287 unsigned FirstVType = MBBI->getOperand(i: 2).getImm();
288 RISCVVType::VLMUL FirstLMul = RISCVVType::getVLMUL(VType: FirstVType);
289 FirstSEW = RISCVVType::getSEW(VType: FirstVType);
290 // The first encountered vsetvli must have the same lmul as the
291 // register class of COPY.
292 if (FirstLMul != LMul)
293 return false;
294 }
295 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
296 // define instruction.
297 if (!RISCVInstrInfo::isVLPreservingConfig(MI: *MBBI))
298 return false;
299 continue;
300 }
301
302 // MBBI is the first vsetvli before the producing instruction.
303 unsigned VType = MBBI->getOperand(i: 2).getImm();
304 // If there is a vsetvli between COPY and the producing instruction.
305 if (FirstVSetVLI) {
306 // If SEW is different, return false.
307 if (RISCVVType::getSEW(VType) != FirstSEW)
308 return false;
309 }
310
311 // If the vsetvli is tail undisturbed, keep the whole register move.
312 if (!RISCVVType::isTailAgnostic(VType))
313 return false;
314
315 // The checking is conservative. We only have register classes for
316 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
317 // for fractional LMUL operations. However, we could not use the vsetvli
318 // lmul for widening operations. The result of widening operation is
319 // 2 x LMUL.
320 return LMul == RISCVVType::getVLMUL(VType);
321 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
322 return false;
323 } else if (MBBI->getNumDefs()) {
324 // Check all the instructions which will change VL.
325 // For example, vleff has implicit def VL.
326 if (MBBI->modifiesRegister(Reg: RISCV::VL, /*TRI=*/nullptr))
327 return false;
328
329 // Only converting whole register copies to vmv.v.v when the defining
330 // value appears in the explicit operands.
331 for (const MachineOperand &MO : MBBI->explicit_operands()) {
332 if (!MO.isReg() || !MO.isDef())
333 continue;
334 if (!FoundDef && TRI->regsOverlap(RegA: MO.getReg(), RegB: SrcReg)) {
335 // We only permit the source of COPY has the same LMUL as the defined
336 // operand.
337 // There are cases we need to keep the whole register copy if the LMUL
338 // is different.
339 // For example,
340 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
341 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
342 // # The COPY may be created by vlmul_trunc intrinsic.
343 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
344 //
345 // After widening, the valid value will be 4 x e32 elements. If we
346 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
347 // FIXME: The COPY of subregister of Zvlsseg register will not be able
348 // to convert to vmv.v.[v|i] under the constraint.
349 if (MO.getReg() != SrcReg)
350 return false;
351
352 // In widening reduction instructions with LMUL_1 input vector case,
353 // only checking the LMUL is insufficient due to reduction result is
354 // always LMUL_1.
355 // For example,
356 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
357 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
358 // $v26 = COPY killed renamable $v8
359 // After widening, The valid value will be 1 x e16 elements. If we
360 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
361 uint64_t TSFlags = MBBI->getDesc().TSFlags;
362 if (RISCVII::isRVVWideningReduction(TSFlags))
363 return false;
364
365 // If the producing instruction does not depend on vsetvli, do not
366 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
367 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
368 return false;
369
370 // Found the definition.
371 FoundDef = true;
372 DefMBBI = MBBI;
373 break;
374 }
375 }
376 }
377 }
378
379 return false;
380}
381
382void RISCVInstrInfo::copyPhysRegVector(
383 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
384 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
385 const TargetRegisterClass *RegClass) const {
386 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
387 RISCVVType::VLMUL LMul = RISCVRI::getLMul(TSFlags: RegClass->TSFlags);
388 unsigned NF = RISCVRI::getNF(TSFlags: RegClass->TSFlags);
389
390 uint16_t SrcEncoding = TRI->getEncodingValue(Reg: SrcReg);
391 uint16_t DstEncoding = TRI->getEncodingValue(Reg: DstReg);
392 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(VLMul: LMul);
393 assert(!Fractional && "It is impossible be fractional lmul here.");
394 unsigned NumRegs = NF * LMulVal;
395 bool ReversedCopy =
396 forwardCopyWillClobberTuple(DstReg: DstEncoding, SrcReg: SrcEncoding, NumRegs);
397 if (ReversedCopy) {
398 // If the src and dest overlap when copying a tuple, we need to copy the
399 // registers in reverse.
400 SrcEncoding += NumRegs - 1;
401 DstEncoding += NumRegs - 1;
402 }
403
404 unsigned I = 0;
405 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
406 -> std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned,
407 unsigned, unsigned> {
408 if (ReversedCopy) {
409 // For reversed copying, if there are enough aligned registers(8/4/2), we
410 // can do a larger copy(LMUL8/4/2).
411 // Besides, we have already known that DstEncoding is larger than
412 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
413 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
414 // avoid clobbering.
415 uint16_t Diff = DstEncoding - SrcEncoding;
416 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
417 DstEncoding % 8 == 7)
418 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
419 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
420 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
421 DstEncoding % 4 == 3)
422 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
423 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
424 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
425 DstEncoding % 2 == 1)
426 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
427 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
428 // Or we should do LMUL1 copying.
429 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
430 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
431 }
432
433 // For forward copying, if source register encoding and destination register
434 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
435 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
436 return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
437 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
438 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
439 return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
440 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
441 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
442 return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
443 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
444 // Or we should do LMUL1 copying.
445 return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
446 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
447 };
448
449 while (I != NumRegs) {
450 // For non-segment copying, we only do this once as the registers are always
451 // aligned.
452 // For segment copying, we may do this several times. If the registers are
453 // aligned to larger LMUL, we can eliminate some copyings.
454 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
455 GetCopyInfo(SrcEncoding, DstEncoding);
456 auto [NumCopied, _] = RISCVVType::decodeVLMUL(VLMul: LMulCopied);
457
458 MachineBasicBlock::const_iterator DefMBBI;
459 if (LMul == LMulCopied &&
460 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
461 Opc = VVOpc;
462 if (DefMBBI->getOpcode() == VIOpc)
463 Opc = VIOpc;
464 }
465
466 // Emit actual copying.
467 // For reversed copying, the encoding should be decreased.
468 MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
469 RegClass, Encoding: ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
470 MCRegister ActualDstReg = TRI->findVRegWithEncoding(
471 RegClass, Encoding: ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
472
473 auto MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Opc), DestReg: ActualDstReg);
474 bool UseVMV_V_I = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc) == RISCV::VMV_V_I;
475 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc) == RISCV::VMV_V_V;
476 if (UseVMV)
477 MIB.addReg(RegNo: ActualDstReg, Flags: RegState::Undef);
478 if (UseVMV_V_I)
479 MIB = MIB.add(MO: DefMBBI->getOperand(i: 2));
480 else
481 MIB = MIB.addReg(RegNo: ActualSrcReg, Flags: getKillRegState(B: KillSrc));
482 if (UseVMV) {
483 const MCInstrDesc &Desc = DefMBBI->getDesc();
484 MIB.add(MO: DefMBBI->getOperand(i: RISCVII::getVLOpNum(Desc))); // AVL
485 unsigned Log2SEW =
486 DefMBBI->getOperand(i: RISCVII::getSEWOpNum(Desc)).getImm();
487 MIB.addImm(Val: Log2SEW ? Log2SEW : 3); // SEW
488 MIB.addImm(Val: 0); // tu, mu
489 MIB.addReg(RegNo: RISCV::VL, Flags: RegState::Implicit);
490 MIB.addReg(RegNo: RISCV::VTYPE, Flags: RegState::Implicit);
491 }
492 // Add an implicit read of the original source to silence the verifier
493 // in the cases where some of the smaller VRs we're copying from might be
494 // undef, caused by the fact that the original, larger source VR might not
495 // be fully initialized at the time this COPY happens.
496 MIB.addReg(RegNo: SrcReg, Flags: RegState::Implicit);
497
498 // If we are copying reversely, we should decrease the encoding.
499 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
500 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
501 I += NumCopied;
502 }
503}
504
505void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
506 MachineBasicBlock::iterator MBBI,
507 const DebugLoc &DL, Register DstReg,
508 Register SrcReg, bool KillSrc,
509 bool RenamableDest, bool RenamableSrc) const {
510 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
511 RegState KillFlag = getKillRegState(B: KillSrc);
512
513 if (RISCV::GPRRegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
514 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI), DestReg: DstReg)
515 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc))
516 .addImm(Val: 0);
517 return;
518 }
519
520 if (RISCV::GPRF16RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
521 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PseudoMV_FPR16INX), DestReg: DstReg)
522 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
523 return;
524 }
525
526 if (RISCV::GPRF32RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
527 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PseudoMV_FPR32INX), DestReg: DstReg)
528 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
529 return;
530 }
531
532 if (RISCV::GPRPairRegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
533 if (STI.isRV32()) {
534 if (STI.hasStdExtZdinx()) {
535 // On RV32_Zdinx, FMV.D will move a pair of registers to another pair of
536 // registers, in one instruction.
537 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_D_IN32X), DestReg: DstReg)
538 .addReg(RegNo: SrcReg, Flags: getRenamableRegState(B: RenamableSrc))
539 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc));
540 return;
541 }
542
543 if (STI.hasStdExtP()) {
544 // On RV32P, `padd.dw` is a GPR Pair Add
545 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::PADD_DW), DestReg: DstReg)
546 .addReg(RegNo: SrcReg, Flags: KillFlag | getRenamableRegState(B: RenamableSrc))
547 .addReg(RegNo: RISCV::X0_Pair);
548 return;
549 }
550 }
551
552 MCRegister EvenReg = TRI->getSubReg(Reg: SrcReg, Idx: RISCV::sub_gpr_even);
553 MCRegister OddReg = TRI->getSubReg(Reg: SrcReg, Idx: RISCV::sub_gpr_odd);
554 // We need to correct the odd register of X0_Pair.
555 if (OddReg == RISCV::DUMMY_REG_PAIR_WITH_X0)
556 OddReg = RISCV::X0;
557 assert(DstReg != RISCV::X0_Pair && "Cannot write to X0_Pair");
558
559 // Emit an ADDI for both parts of GPRPair.
560 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI),
561 DestReg: TRI->getSubReg(Reg: DstReg, Idx: RISCV::sub_gpr_even))
562 .addReg(RegNo: EvenReg, Flags: KillFlag)
563 .addImm(Val: 0);
564 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::ADDI),
565 DestReg: TRI->getSubReg(Reg: DstReg, Idx: RISCV::sub_gpr_odd))
566 .addReg(RegNo: OddReg, Flags: KillFlag)
567 .addImm(Val: 0);
568 return;
569 }
570
571 // Handle copy from csr
572 if (RISCV::VCSRRegClass.contains(Reg: SrcReg) &&
573 RISCV::GPRRegClass.contains(Reg: DstReg)) {
574 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::CSRRS), DestReg: DstReg)
575 .addImm(Val: RISCVSysReg::lookupSysRegByName(Name: TRI->getName(RegNo: SrcReg))->Encoding)
576 .addReg(RegNo: RISCV::X0);
577 return;
578 }
579
580 if (RISCV::FPR16RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
581 unsigned Opc;
582 if (STI.hasStdExtZfh()) {
583 Opc = RISCV::FSGNJ_H;
584 } else {
585 assert(STI.hasStdExtF() &&
586 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
587 "Unexpected extensions");
588 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
589 DstReg = TRI->getMatchingSuperReg(Reg: DstReg, SubIdx: RISCV::sub_16,
590 RC: &RISCV::FPR32RegClass);
591 SrcReg = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: RISCV::sub_16,
592 RC: &RISCV::FPR32RegClass);
593 Opc = RISCV::FSGNJ_S;
594 }
595 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Opc), DestReg: DstReg)
596 .addReg(RegNo: SrcReg, Flags: KillFlag)
597 .addReg(RegNo: SrcReg, Flags: KillFlag);
598 return;
599 }
600
601 if (RISCV::FPR32RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
602 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_S), DestReg: DstReg)
603 .addReg(RegNo: SrcReg, Flags: KillFlag)
604 .addReg(RegNo: SrcReg, Flags: KillFlag);
605 return;
606 }
607
608 if (RISCV::FPR64RegClass.contains(Reg1: DstReg, Reg2: SrcReg)) {
609 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FSGNJ_D), DestReg: DstReg)
610 .addReg(RegNo: SrcReg, Flags: KillFlag)
611 .addReg(RegNo: SrcReg, Flags: KillFlag);
612 return;
613 }
614
615 if (RISCV::FPR32RegClass.contains(Reg: DstReg) &&
616 RISCV::GPRRegClass.contains(Reg: SrcReg)) {
617 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_W_X), DestReg: DstReg)
618 .addReg(RegNo: SrcReg, Flags: KillFlag);
619 return;
620 }
621
622 if (RISCV::GPRRegClass.contains(Reg: DstReg) &&
623 RISCV::FPR32RegClass.contains(Reg: SrcReg)) {
624 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_X_W), DestReg: DstReg)
625 .addReg(RegNo: SrcReg, Flags: KillFlag);
626 return;
627 }
628
629 if (RISCV::FPR64RegClass.contains(Reg: DstReg) &&
630 RISCV::GPRRegClass.contains(Reg: SrcReg)) {
631 assert(STI.getXLen() == 64 && "Unexpected GPR size");
632 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_D_X), DestReg: DstReg)
633 .addReg(RegNo: SrcReg, Flags: KillFlag);
634 return;
635 }
636
637 if (RISCV::GPRRegClass.contains(Reg: DstReg) &&
638 RISCV::FPR64RegClass.contains(Reg: SrcReg)) {
639 assert(STI.getXLen() == 64 && "Unexpected GPR size");
640 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: RISCV::FMV_X_D), DestReg: DstReg)
641 .addReg(RegNo: SrcReg, Flags: KillFlag);
642 return;
643 }
644
645 // VR->VR copies.
646 const TargetRegisterClass *RegClass =
647 TRI->getCommonMinimalPhysRegClass(Reg1: SrcReg, Reg2: DstReg);
648 if (RISCVRegisterInfo::isRVVRegClass(RC: RegClass)) {
649 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
650 return;
651 }
652
653 llvm_unreachable("Impossible reg-to-reg copy");
654}
655
656void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
657 MachineBasicBlock::iterator I,
658 Register SrcReg, bool IsKill, int FI,
659 const TargetRegisterClass *RC,
660 Register VReg,
661 MachineInstr::MIFlag Flags) const {
662 MachineFunction *MF = MBB.getParent();
663 MachineFrameInfo &MFI = MF->getFrameInfo();
664 Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
665
666 unsigned Opcode;
667 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
668 Opcode = RegInfo.getRegSizeInBits(RC: RISCV::GPRRegClass) == 32 ? RISCV::SW
669 : RISCV::SD;
670 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
671 Opcode = RISCV::SH_INX;
672 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
673 Opcode = RISCV::SW_INX;
674 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
675 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
676 Alignment >= STI.getZilsdAlign()) {
677 Opcode = RISCV::SD_RV32;
678 } else {
679 Opcode = RISCV::PseudoRV32ZdinxSD;
680 }
681 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
682 Opcode = RISCV::FSH;
683 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
684 Opcode = RISCV::FSW;
685 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
686 Opcode = RISCV::FSD;
687 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
688 Opcode = RISCV::VS1R_V;
689 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
690 Opcode = RISCV::VS2R_V;
691 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
692 Opcode = RISCV::VS4R_V;
693 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
694 Opcode = RISCV::VS8R_V;
695 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
696 Opcode = RISCV::PseudoVSPILL2_M1;
697 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
698 Opcode = RISCV::PseudoVSPILL2_M2;
699 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
700 Opcode = RISCV::PseudoVSPILL2_M4;
701 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
702 Opcode = RISCV::PseudoVSPILL3_M1;
703 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
704 Opcode = RISCV::PseudoVSPILL3_M2;
705 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
706 Opcode = RISCV::PseudoVSPILL4_M1;
707 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
708 Opcode = RISCV::PseudoVSPILL4_M2;
709 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
710 Opcode = RISCV::PseudoVSPILL5_M1;
711 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
712 Opcode = RISCV::PseudoVSPILL6_M1;
713 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
714 Opcode = RISCV::PseudoVSPILL7_M1;
715 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
716 Opcode = RISCV::PseudoVSPILL8_M1;
717 else
718 llvm_unreachable("Can't store this register to stack slot");
719
720 if (RISCVRegisterInfo::isRVVRegClass(RC)) {
721 MachineMemOperand *MMO = MF->getMachineMemOperand(
722 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
723 Size: TypeSize::getScalable(MinimumSize: MFI.getObjectSize(ObjectIdx: FI)), BaseAlignment: Alignment);
724
725 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
726 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode))
727 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: IsKill))
728 .addFrameIndex(Idx: FI)
729 .addMemOperand(MMO)
730 .setMIFlag(Flags);
731 NumVRegSpilled += RegInfo.getRegSizeInBits(RC: *RC) / RISCV::RVVBitsPerBlock;
732 } else {
733 MachineMemOperand *MMO = MF->getMachineMemOperand(
734 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
735 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
736
737 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode))
738 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: IsKill))
739 .addFrameIndex(Idx: FI)
740 .addImm(Val: 0)
741 .addMemOperand(MMO)
742 .setMIFlag(Flags);
743 }
744}
745
746void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
747 MachineBasicBlock::iterator I,
748 Register DstReg, int FI,
749 const TargetRegisterClass *RC,
750 Register VReg, unsigned SubReg,
751 MachineInstr::MIFlag Flags) const {
752 MachineFunction *MF = MBB.getParent();
753 MachineFrameInfo &MFI = MF->getFrameInfo();
754 Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
755 DebugLoc DL =
756 Flags & MachineInstr::FrameDestroy ? MBB.findDebugLoc(MBBI: I) : DebugLoc();
757
758 unsigned Opcode;
759 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
760 Opcode = RegInfo.getRegSizeInBits(RC: RISCV::GPRRegClass) == 32 ? RISCV::LW
761 : RISCV::LD;
762 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
763 Opcode = RISCV::LH_INX;
764 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
765 Opcode = RISCV::LW_INX;
766 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
767 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
768 Alignment >= STI.getZilsdAlign()) {
769 Opcode = RISCV::LD_RV32;
770 } else {
771 Opcode = RISCV::PseudoRV32ZdinxLD;
772 }
773 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
774 Opcode = RISCV::FLH;
775 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
776 Opcode = RISCV::FLW;
777 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
778 Opcode = RISCV::FLD;
779 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
780 Opcode = RISCV::VL1RE8_V;
781 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
782 Opcode = RISCV::VL2RE8_V;
783 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
784 Opcode = RISCV::VL4RE8_V;
785 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
786 Opcode = RISCV::VL8RE8_V;
787 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
788 Opcode = RISCV::PseudoVRELOAD2_M1;
789 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
790 Opcode = RISCV::PseudoVRELOAD2_M2;
791 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
792 Opcode = RISCV::PseudoVRELOAD2_M4;
793 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
794 Opcode = RISCV::PseudoVRELOAD3_M1;
795 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
796 Opcode = RISCV::PseudoVRELOAD3_M2;
797 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
798 Opcode = RISCV::PseudoVRELOAD4_M1;
799 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
800 Opcode = RISCV::PseudoVRELOAD4_M2;
801 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
802 Opcode = RISCV::PseudoVRELOAD5_M1;
803 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
804 Opcode = RISCV::PseudoVRELOAD6_M1;
805 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
806 Opcode = RISCV::PseudoVRELOAD7_M1;
807 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
808 Opcode = RISCV::PseudoVRELOAD8_M1;
809 else
810 llvm_unreachable("Can't load this register from stack slot");
811
812 if (RISCVRegisterInfo::isRVVRegClass(RC)) {
813 MachineMemOperand *MMO = MF->getMachineMemOperand(
814 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
815 Size: TypeSize::getScalable(MinimumSize: MFI.getObjectSize(ObjectIdx: FI)), BaseAlignment: Alignment);
816
817 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
818 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode), DestReg: DstReg)
819 .addFrameIndex(Idx: FI)
820 .addMemOperand(MMO)
821 .setMIFlag(Flags);
822 NumVRegReloaded += RegInfo.getRegSizeInBits(RC: *RC) / RISCV::RVVBitsPerBlock;
823 } else {
824 MachineMemOperand *MMO = MF->getMachineMemOperand(
825 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
826 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
827
828 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode), DestReg: DstReg)
829 .addFrameIndex(Idx: FI)
830 .addImm(Val: 0)
831 .addMemOperand(MMO)
832 .setMIFlag(Flags);
833 }
834}
835std::optional<unsigned> getFoldedOpcode(MachineFunction &MF, MachineInstr &MI,
836 ArrayRef<unsigned> Ops,
837 const RISCVSubtarget &ST) {
838
839 // The below optimizations narrow the load so they are only valid for little
840 // endian.
841 // TODO: Support big endian by adding an offset into the frame object?
842 if (MF.getDataLayout().isBigEndian())
843 return std::nullopt;
844
845 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
846 if (Ops.size() != 1 || Ops[0] != 1)
847 return std::nullopt;
848
849 switch (MI.getOpcode()) {
850 default:
851 if (RISCVInstrInfo::isSEXT_W(MI))
852 return RISCV::LW;
853 if (RISCVInstrInfo::isZEXT_W(MI))
854 return RISCV::LWU;
855 if (RISCVInstrInfo::isZEXT_B(MI))
856 return RISCV::LBU;
857 break;
858 case RISCV::SEXT_H:
859 return RISCV::LH;
860 case RISCV::SEXT_B:
861 return RISCV::LB;
862 case RISCV::ZEXT_H_RV32:
863 case RISCV::ZEXT_H_RV64:
864 return RISCV::LHU;
865 }
866
867 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
868 default:
869 return std::nullopt;
870 case RISCV::VMV_X_S: {
871 unsigned Log2SEW =
872 MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
873 if (ST.getXLen() < (1U << Log2SEW))
874 return std::nullopt;
875 switch (Log2SEW) {
876 case 3:
877 return RISCV::LB;
878 case 4:
879 return RISCV::LH;
880 case 5:
881 return RISCV::LW;
882 case 6:
883 return RISCV::LD;
884 default:
885 llvm_unreachable("Unexpected SEW");
886 }
887 }
888 case RISCV::VFMV_F_S: {
889 unsigned Log2SEW =
890 MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
891 switch (Log2SEW) {
892 case 4:
893 return RISCV::FLH;
894 case 5:
895 return RISCV::FLW;
896 case 6:
897 return RISCV::FLD;
898 default:
899 llvm_unreachable("Unexpected SEW");
900 }
901 }
902 }
903}
904
905// This is the version used during InlineSpiller::spillAroundUses
906MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
907 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
908 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
909 VirtRegMap *VRM) const {
910
911 std::optional<unsigned> LoadOpc = getFoldedOpcode(MF, MI, Ops, ST: STI);
912 if (!LoadOpc)
913 return nullptr;
914 Register DstReg = MI.getOperand(i: 0).getReg();
915 return BuildMI(BB&: *MI.getParent(), I: InsertPt, MIMD: MI.getDebugLoc(), MCID: get(Opcode: *LoadOpc),
916 DestReg: DstReg)
917 .addFrameIndex(Idx: FrameIndex)
918 .addImm(Val: 0);
919}
920
921static unsigned getLoadPredicatedOpcode(unsigned Opcode) {
922 switch (Opcode) {
923 case RISCV::LB:
924 return RISCV::PseudoCCLB;
925 case RISCV::LBU:
926 return RISCV::PseudoCCLBU;
927 case RISCV::LH:
928 return RISCV::PseudoCCLH;
929 case RISCV::LHU:
930 return RISCV::PseudoCCLHU;
931 case RISCV::LW:
932 return RISCV::PseudoCCLW;
933 case RISCV::LWU:
934 return RISCV::PseudoCCLWU;
935 case RISCV::LD:
936 return RISCV::PseudoCCLD;
937 case RISCV::QC_E_LB:
938 return RISCV::PseudoCCQC_E_LB;
939 case RISCV::QC_E_LBU:
940 return RISCV::PseudoCCQC_E_LBU;
941 case RISCV::QC_E_LH:
942 return RISCV::PseudoCCQC_E_LH;
943 case RISCV::QC_E_LHU:
944 return RISCV::PseudoCCQC_E_LHU;
945 case RISCV::QC_E_LW:
946 return RISCV::PseudoCCQC_E_LW;
947 default:
948 return 0;
949 }
950}
951
952MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
953 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
954 MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
955 LiveIntervals *LIS) const {
956 // For now, only handle RISCV::PseudoCCMOVGPR.
957 if (MI.getOpcode() != RISCV::PseudoCCMOVGPR)
958 return nullptr;
959
960 unsigned PredOpc = getLoadPredicatedOpcode(Opcode: LoadMI.getOpcode());
961
962 if (!STI.hasShortForwardBranchILoad() || !PredOpc)
963 return nullptr;
964
965 MachineRegisterInfo &MRI = MF.getRegInfo();
966 if (Ops.size() != 1 || (Ops[0] != 4 && Ops[0] != 5))
967 return nullptr;
968
969 bool Invert = Ops[0] == 5;
970 const MachineOperand &FalseReg = MI.getOperand(i: !Invert ? 5 : 4);
971 Register DestReg = MI.getOperand(i: 0).getReg();
972 const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
973 if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
974 return nullptr;
975
976 // Create a new predicated version of DefMI.
977 MachineInstrBuilder NewMI = BuildMI(BB&: *MI.getParent(), I: InsertPt,
978 MIMD: MI.getDebugLoc(), MCID: get(Opcode: PredOpc), DestReg)
979 .add(MOs: {MI.getOperand(i: 1), MI.getOperand(i: 2)});
980
981 // Add condition code, inverting if necessary.
982 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
983 if (!Invert)
984 CC = RISCVCC::getInverseBranchCondition(CC);
985 NewMI.addImm(Val: CC);
986
987 // Copy the false register.
988 NewMI.add(MO: FalseReg);
989
990 // Copy all the DefMI operands.
991 const MCInstrDesc &DefDesc = LoadMI.getDesc();
992 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
993 NewMI.add(MO: LoadMI.getOperand(i));
994
995 NewMI.cloneMemRefs(OtherMI: LoadMI);
996 return NewMI;
997}
998
999void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
1000 MachineBasicBlock::iterator MBBI,
1001 const DebugLoc &DL, Register DstReg, uint64_t Val,
1002 MachineInstr::MIFlag Flag, bool DstRenamable,
1003 bool DstIsDead) const {
1004 Register SrcReg = RISCV::X0;
1005
1006 // For RV32, allow a sign or unsigned 32 bit value.
1007 if (!STI.is64Bit() && !isInt<32>(x: Val)) {
1008 // If have a uimm32 it will still fit in a register so we can allow it.
1009 if (!isUInt<32>(x: Val))
1010 report_fatal_error(reason: "Should only materialize 32-bit constants for RV32");
1011
1012 // Sign extend for generateInstSeq.
1013 Val = SignExtend64<32>(x: Val);
1014 }
1015
1016 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
1017 assert(!Seq.empty());
1018
1019 bool SrcRenamable = false;
1020 unsigned Num = 0;
1021
1022 for (const RISCVMatInt::Inst &Inst : Seq) {
1023 bool LastItem = ++Num == Seq.size();
1024 RegState DstRegState = getDeadRegState(B: DstIsDead && LastItem) |
1025 getRenamableRegState(B: DstRenamable);
1026 RegState SrcRegState = getKillRegState(B: SrcReg != RISCV::X0) |
1027 getRenamableRegState(B: SrcRenamable);
1028 switch (Inst.getOpndKind()) {
1029 case RISCVMatInt::Imm:
1030 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1031 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1032 .addImm(Val: Inst.getImm())
1033 .setMIFlag(Flag);
1034 break;
1035 case RISCVMatInt::RegX0:
1036 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1037 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1038 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1039 .addReg(RegNo: RISCV::X0)
1040 .setMIFlag(Flag);
1041 break;
1042 case RISCVMatInt::RegReg:
1043 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1044 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1045 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1046 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1047 .setMIFlag(Flag);
1048 break;
1049 case RISCVMatInt::RegImm:
1050 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: Inst.getOpcode()))
1051 .addReg(RegNo: DstReg, Flags: RegState::Define | DstRegState)
1052 .addReg(RegNo: SrcReg, Flags: SrcRegState)
1053 .addImm(Val: Inst.getImm())
1054 .setMIFlag(Flag);
1055 break;
1056 }
1057
1058 // Only the first instruction has X0 as its source.
1059 SrcReg = DstReg;
1060 SrcRenamable = DstRenamable;
1061 }
1062}
1063
1064RISCVCC::CondCode RISCVInstrInfo::getCondFromBranchOpc(unsigned Opc) {
1065 switch (Opc) {
1066 default:
1067 return RISCVCC::COND_INVALID;
1068 case RISCV::BEQ:
1069 case RISCV::BEQI:
1070 case RISCV::CV_BEQIMM:
1071 case RISCV::QC_BEQI:
1072 case RISCV::QC_E_BEQI:
1073 case RISCV::NDS_BBC:
1074 case RISCV::NDS_BEQC:
1075 return RISCVCC::COND_EQ;
1076 case RISCV::BNE:
1077 case RISCV::BNEI:
1078 case RISCV::QC_BNEI:
1079 case RISCV::QC_E_BNEI:
1080 case RISCV::CV_BNEIMM:
1081 case RISCV::NDS_BBS:
1082 case RISCV::NDS_BNEC:
1083 return RISCVCC::COND_NE;
1084 case RISCV::BLT:
1085 case RISCV::QC_BLTI:
1086 case RISCV::QC_E_BLTI:
1087 return RISCVCC::COND_LT;
1088 case RISCV::BGE:
1089 case RISCV::QC_BGEI:
1090 case RISCV::QC_E_BGEI:
1091 return RISCVCC::COND_GE;
1092 case RISCV::BLTU:
1093 case RISCV::QC_BLTUI:
1094 case RISCV::QC_E_BLTUI:
1095 return RISCVCC::COND_LTU;
1096 case RISCV::BGEU:
1097 case RISCV::QC_BGEUI:
1098 case RISCV::QC_E_BGEUI:
1099 return RISCVCC::COND_GEU;
1100 }
1101}
1102
1103bool RISCVInstrInfo::evaluateCondBranch(RISCVCC::CondCode CC, int64_t C0,
1104 int64_t C1) {
1105 switch (CC) {
1106 default:
1107 llvm_unreachable("Unexpected CC");
1108 case RISCVCC::COND_EQ:
1109 return C0 == C1;
1110 case RISCVCC::COND_NE:
1111 return C0 != C1;
1112 case RISCVCC::COND_LT:
1113 return C0 < C1;
1114 case RISCVCC::COND_GE:
1115 return C0 >= C1;
1116 case RISCVCC::COND_LTU:
1117 return (uint64_t)C0 < (uint64_t)C1;
1118 case RISCVCC::COND_GEU:
1119 return (uint64_t)C0 >= (uint64_t)C1;
1120 }
1121}
1122
1123// The contents of values added to Cond are not examined outside of
1124// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
1125// push BranchOpcode, Reg1, Reg2.
1126static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
1127 SmallVectorImpl<MachineOperand> &Cond) {
1128 // Block ends with fall-through condbranch.
1129 assert(LastInst.getDesc().isConditionalBranch() &&
1130 "Unknown conditional branch");
1131 Target = LastInst.getOperand(i: 2).getMBB();
1132 Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst.getOpcode()));
1133 Cond.push_back(Elt: LastInst.getOperand(i: 0));
1134 Cond.push_back(Elt: LastInst.getOperand(i: 1));
1135}
1136
1137static unsigned getInverseXqcicmOpcode(unsigned Opcode) {
1138 switch (Opcode) {
1139 default:
1140 llvm_unreachable("Unexpected Opcode");
1141 case RISCV::QC_MVEQ:
1142 return RISCV::QC_MVNE;
1143 case RISCV::QC_MVNE:
1144 return RISCV::QC_MVEQ;
1145 case RISCV::QC_MVLT:
1146 return RISCV::QC_MVGE;
1147 case RISCV::QC_MVGE:
1148 return RISCV::QC_MVLT;
1149 case RISCV::QC_MVLTU:
1150 return RISCV::QC_MVGEU;
1151 case RISCV::QC_MVGEU:
1152 return RISCV::QC_MVLTU;
1153 case RISCV::QC_MVEQI:
1154 return RISCV::QC_MVNEI;
1155 case RISCV::QC_MVNEI:
1156 return RISCV::QC_MVEQI;
1157 case RISCV::QC_MVLTI:
1158 return RISCV::QC_MVGEI;
1159 case RISCV::QC_MVGEI:
1160 return RISCV::QC_MVLTI;
1161 case RISCV::QC_MVLTUI:
1162 return RISCV::QC_MVGEUI;
1163 case RISCV::QC_MVGEUI:
1164 return RISCV::QC_MVLTUI;
1165 }
1166}
1167
1168unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
1169 switch (SelectOpc) {
1170 default:
1171 switch (CC) {
1172 default:
1173 llvm_unreachable("Unexpected condition code!");
1174 case RISCVCC::COND_EQ:
1175 return RISCV::BEQ;
1176 case RISCVCC::COND_NE:
1177 return RISCV::BNE;
1178 case RISCVCC::COND_LT:
1179 return RISCV::BLT;
1180 case RISCVCC::COND_GE:
1181 return RISCV::BGE;
1182 case RISCVCC::COND_LTU:
1183 return RISCV::BLTU;
1184 case RISCVCC::COND_GEU:
1185 return RISCV::BGEU;
1186 }
1187 break;
1188 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
1189 switch (CC) {
1190 default:
1191 llvm_unreachable("Unexpected condition code!");
1192 case RISCVCC::COND_EQ:
1193 return RISCV::BEQI;
1194 case RISCVCC::COND_NE:
1195 return RISCV::BNEI;
1196 }
1197 break;
1198 case RISCV::Select_GPR_Using_CC_SImm5_CV:
1199 switch (CC) {
1200 default:
1201 llvm_unreachable("Unexpected condition code!");
1202 case RISCVCC::COND_EQ:
1203 return RISCV::CV_BEQIMM;
1204 case RISCVCC::COND_NE:
1205 return RISCV::CV_BNEIMM;
1206 }
1207 break;
1208 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
1209 switch (CC) {
1210 default:
1211 llvm_unreachable("Unexpected condition code!");
1212 case RISCVCC::COND_EQ:
1213 return RISCV::QC_BEQI;
1214 case RISCVCC::COND_NE:
1215 return RISCV::QC_BNEI;
1216 case RISCVCC::COND_LT:
1217 return RISCV::QC_BLTI;
1218 case RISCVCC::COND_GE:
1219 return RISCV::QC_BGEI;
1220 }
1221 break;
1222 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
1223 switch (CC) {
1224 default:
1225 llvm_unreachable("Unexpected condition code!");
1226 case RISCVCC::COND_LTU:
1227 return RISCV::QC_BLTUI;
1228 case RISCVCC::COND_GEU:
1229 return RISCV::QC_BGEUI;
1230 }
1231 break;
1232 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
1233 switch (CC) {
1234 default:
1235 llvm_unreachable("Unexpected condition code!");
1236 case RISCVCC::COND_EQ:
1237 return RISCV::QC_E_BEQI;
1238 case RISCVCC::COND_NE:
1239 return RISCV::QC_E_BNEI;
1240 case RISCVCC::COND_LT:
1241 return RISCV::QC_E_BLTI;
1242 case RISCVCC::COND_GE:
1243 return RISCV::QC_E_BGEI;
1244 }
1245 break;
1246 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
1247 switch (CC) {
1248 default:
1249 llvm_unreachable("Unexpected condition code!");
1250 case RISCVCC::COND_LTU:
1251 return RISCV::QC_E_BLTUI;
1252 case RISCVCC::COND_GEU:
1253 return RISCV::QC_E_BGEUI;
1254 }
1255 break;
1256 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
1257 switch (CC) {
1258 default:
1259 llvm_unreachable("Unexpected condition code!");
1260 case RISCVCC::COND_EQ:
1261 return RISCV::NDS_BBC;
1262 case RISCVCC::COND_NE:
1263 return RISCV::NDS_BBS;
1264 }
1265 break;
1266 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
1267 switch (CC) {
1268 default:
1269 llvm_unreachable("Unexpected condition code!");
1270 case RISCVCC::COND_EQ:
1271 return RISCV::NDS_BEQC;
1272 case RISCVCC::COND_NE:
1273 return RISCV::NDS_BNEC;
1274 }
1275 break;
1276 }
1277}
1278
1279RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) {
1280 switch (CC) {
1281 default:
1282 llvm_unreachable("Unrecognized conditional branch");
1283 case RISCVCC::COND_EQ:
1284 return RISCVCC::COND_NE;
1285 case RISCVCC::COND_NE:
1286 return RISCVCC::COND_EQ;
1287 case RISCVCC::COND_LT:
1288 return RISCVCC::COND_GE;
1289 case RISCVCC::COND_GE:
1290 return RISCVCC::COND_LT;
1291 case RISCVCC::COND_LTU:
1292 return RISCVCC::COND_GEU;
1293 case RISCVCC::COND_GEU:
1294 return RISCVCC::COND_LTU;
1295 }
1296}
1297
1298bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
1299 MachineBasicBlock *&TBB,
1300 MachineBasicBlock *&FBB,
1301 SmallVectorImpl<MachineOperand> &Cond,
1302 bool AllowModify) const {
1303 TBB = FBB = nullptr;
1304 Cond.clear();
1305
1306 // If the block has no terminators, it just falls into the block after it.
1307 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1308 if (I == MBB.end() || !isUnpredicatedTerminator(MI: *I))
1309 return false;
1310
1311 // Count the number of terminators and find the first unconditional or
1312 // indirect branch.
1313 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
1314 int NumTerminators = 0;
1315 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(MI: *J);
1316 J++) {
1317 NumTerminators++;
1318 if (J->getDesc().isUnconditionalBranch() ||
1319 J->getDesc().isIndirectBranch()) {
1320 FirstUncondOrIndirectBr = J.getReverse();
1321 }
1322 }
1323
1324 // If AllowModify is true, we can erase any terminators after
1325 // FirstUncondOrIndirectBR.
1326 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
1327 while (std::next(x: FirstUncondOrIndirectBr) != MBB.end()) {
1328 std::next(x: FirstUncondOrIndirectBr)->eraseFromParent();
1329 NumTerminators--;
1330 }
1331 I = FirstUncondOrIndirectBr;
1332 }
1333
1334 // We can't handle blocks that end in an indirect branch.
1335 if (I->getDesc().isIndirectBranch())
1336 return true;
1337
1338 // We can't handle Generic branch opcodes from Global ISel.
1339 if (I->isPreISelOpcode())
1340 return true;
1341
1342 // We can't handle blocks with more than 2 terminators.
1343 if (NumTerminators > 2)
1344 return true;
1345
1346 // Handle a single unconditional branch.
1347 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
1348 TBB = getBranchDestBlock(MI: *I);
1349 return false;
1350 }
1351
1352 // Handle a single conditional branch.
1353 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
1354 parseCondBranch(LastInst&: *I, Target&: TBB, Cond);
1355 return false;
1356 }
1357
1358 // Handle a conditional branch followed by an unconditional branch.
1359 if (NumTerminators == 2 && std::prev(x: I)->getDesc().isConditionalBranch() &&
1360 I->getDesc().isUnconditionalBranch()) {
1361 parseCondBranch(LastInst&: *std::prev(x: I), Target&: TBB, Cond);
1362 FBB = getBranchDestBlock(MI: *I);
1363 return false;
1364 }
1365
1366 // Otherwise, we can't handle this.
1367 return true;
1368}
1369
1370unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
1371 int *BytesRemoved) const {
1372 if (BytesRemoved)
1373 *BytesRemoved = 0;
1374 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1375 if (I == MBB.end())
1376 return 0;
1377
1378 if (!I->getDesc().isUnconditionalBranch() &&
1379 !I->getDesc().isConditionalBranch())
1380 return 0;
1381
1382 // Remove the branch.
1383 if (BytesRemoved)
1384 *BytesRemoved += getInstSizeInBytes(MI: *I);
1385 I->eraseFromParent();
1386
1387 I = MBB.end();
1388
1389 if (I == MBB.begin())
1390 return 1;
1391 --I;
1392 if (!I->getDesc().isConditionalBranch())
1393 return 1;
1394
1395 // Remove the branch.
1396 if (BytesRemoved)
1397 *BytesRemoved += getInstSizeInBytes(MI: *I);
1398 I->eraseFromParent();
1399 return 2;
1400}
1401
1402// Inserts a branch into the end of the specific MachineBasicBlock, returning
1403// the number of instructions inserted.
1404unsigned RISCVInstrInfo::insertBranch(
1405 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1406 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1407 if (BytesAdded)
1408 *BytesAdded = 0;
1409
1410 // Shouldn't be a fall through.
1411 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1412 assert((Cond.size() == 3 || Cond.size() == 0) &&
1413 "RISC-V branch conditions have two components!");
1414
1415 // Unconditional branch.
1416 if (Cond.empty()) {
1417 MachineInstr &MI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: RISCV::PseudoBR)).addMBB(MBB: TBB);
1418 if (BytesAdded)
1419 *BytesAdded += getInstSizeInBytes(MI);
1420 return 1;
1421 }
1422
1423 // Either a one or two-way conditional branch.
1424 MachineInstr &CondMI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm()))
1425 .add(MO: Cond[1])
1426 .add(MO: Cond[2])
1427 .addMBB(MBB: TBB);
1428 if (BytesAdded)
1429 *BytesAdded += getInstSizeInBytes(MI: CondMI);
1430
1431 // One-way conditional branch.
1432 if (!FBB)
1433 return 1;
1434
1435 // Two-way conditional branch.
1436 MachineInstr &MI = *BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: RISCV::PseudoBR)).addMBB(MBB: FBB);
1437 if (BytesAdded)
1438 *BytesAdded += getInstSizeInBytes(MI);
1439 return 2;
1440}
1441
1442void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1443 MachineBasicBlock &DestBB,
1444 MachineBasicBlock &RestoreBB,
1445 const DebugLoc &DL, int64_t BrOffset,
1446 RegScavenger *RS) const {
1447 assert(RS && "RegScavenger required for long branching");
1448 assert(MBB.empty() &&
1449 "new block should be inserted for expanding unconditional branch");
1450 assert(MBB.pred_size() == 1);
1451 assert(RestoreBB.empty() &&
1452 "restore block should be inserted for restoring clobbered registers");
1453
1454 MachineFunction *MF = MBB.getParent();
1455 MachineRegisterInfo &MRI = MF->getRegInfo();
1456 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1457 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1458
1459 if (!isInt<32>(x: BrOffset))
1460 report_fatal_error(
1461 reason: "Branch offsets outside of the signed 32-bit range not supported");
1462
1463 // FIXME: A virtual register must be used initially, as the register
1464 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1465 // uses the same workaround).
1466 Register ScratchReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRJALRRegClass);
1467 auto II = MBB.end();
1468 // We may also update the jump target to RestoreBB later.
1469 MachineInstr &MI = *BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::PseudoJump))
1470 .addReg(RegNo: ScratchReg, Flags: RegState::Define | RegState::Dead)
1471 .addMBB(MBB: &DestBB, TargetFlags: RISCVII::MO_CALL);
1472
1473 RS->enterBasicBlockEnd(MBB);
1474 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1475 if (STI.hasStdExtZicfilp())
1476 RC = &RISCV::GPRX7RegClass;
1477 Register TmpGPR =
1478 RS->scavengeRegisterBackwards(RC: *RC, To: MI.getIterator(),
1479 /*RestoreAfter=*/false, /*SpAdj=*/SPAdj: 0,
1480 /*AllowSpill=*/false);
1481 if (TmpGPR.isValid())
1482 RS->setRegUsed(Reg: TmpGPR);
1483 else {
1484 // The case when there is no scavenged register needs special handling.
1485
1486 // Pick s11(or s1 for rve) because it doesn't make a difference.
1487 TmpGPR = STI.hasStdExtE() ? RISCV::X9 : RISCV::X27;
1488 // Force t2 if Zicfilp is on
1489 if (STI.hasStdExtZicfilp())
1490 TmpGPR = RISCV::X7;
1491
1492 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1493 if (FrameIndex == -1)
1494 report_fatal_error(reason: "underestimated function size");
1495
1496 storeRegToStackSlot(MBB, I: MI, SrcReg: TmpGPR, /*IsKill=*/true, FI: FrameIndex,
1497 RC: &RISCV::GPRRegClass, VReg: Register());
1498 TRI->eliminateFrameIndex(MI: std::prev(x: MI.getIterator()),
1499 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1500
1501 MI.getOperand(i: 1).setMBB(&RestoreBB);
1502
1503 loadRegFromStackSlot(MBB&: RestoreBB, I: RestoreBB.end(), DstReg: TmpGPR, FI: FrameIndex,
1504 RC: &RISCV::GPRRegClass, VReg: Register());
1505 TRI->eliminateFrameIndex(MI: RestoreBB.back(),
1506 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1507 }
1508
1509 MRI.replaceRegWith(FromReg: ScratchReg, ToReg: TmpGPR);
1510 MRI.clearVirtRegs();
1511}
1512
1513bool RISCVInstrInfo::reverseBranchCondition(
1514 SmallVectorImpl<MachineOperand> &Cond) const {
1515 assert((Cond.size() == 3) && "Invalid branch condition!");
1516 switch (Cond[0].getImm()) {
1517 default:
1518 llvm_unreachable("Unknown conditional branch!");
1519 case RISCV::BEQ:
1520 Cond[0].setImm(RISCV::BNE);
1521 break;
1522 case RISCV::BEQI:
1523 Cond[0].setImm(RISCV::BNEI);
1524 break;
1525 case RISCV::BNE:
1526 Cond[0].setImm(RISCV::BEQ);
1527 break;
1528 case RISCV::BNEI:
1529 Cond[0].setImm(RISCV::BEQI);
1530 break;
1531 case RISCV::BLT:
1532 Cond[0].setImm(RISCV::BGE);
1533 break;
1534 case RISCV::BGE:
1535 Cond[0].setImm(RISCV::BLT);
1536 break;
1537 case RISCV::BLTU:
1538 Cond[0].setImm(RISCV::BGEU);
1539 break;
1540 case RISCV::BGEU:
1541 Cond[0].setImm(RISCV::BLTU);
1542 break;
1543 case RISCV::CV_BEQIMM:
1544 Cond[0].setImm(RISCV::CV_BNEIMM);
1545 break;
1546 case RISCV::CV_BNEIMM:
1547 Cond[0].setImm(RISCV::CV_BEQIMM);
1548 break;
1549 case RISCV::QC_BEQI:
1550 Cond[0].setImm(RISCV::QC_BNEI);
1551 break;
1552 case RISCV::QC_BNEI:
1553 Cond[0].setImm(RISCV::QC_BEQI);
1554 break;
1555 case RISCV::QC_BGEI:
1556 Cond[0].setImm(RISCV::QC_BLTI);
1557 break;
1558 case RISCV::QC_BLTI:
1559 Cond[0].setImm(RISCV::QC_BGEI);
1560 break;
1561 case RISCV::QC_BGEUI:
1562 Cond[0].setImm(RISCV::QC_BLTUI);
1563 break;
1564 case RISCV::QC_BLTUI:
1565 Cond[0].setImm(RISCV::QC_BGEUI);
1566 break;
1567 case RISCV::QC_E_BEQI:
1568 Cond[0].setImm(RISCV::QC_E_BNEI);
1569 break;
1570 case RISCV::QC_E_BNEI:
1571 Cond[0].setImm(RISCV::QC_E_BEQI);
1572 break;
1573 case RISCV::QC_E_BGEI:
1574 Cond[0].setImm(RISCV::QC_E_BLTI);
1575 break;
1576 case RISCV::QC_E_BLTI:
1577 Cond[0].setImm(RISCV::QC_E_BGEI);
1578 break;
1579 case RISCV::QC_E_BGEUI:
1580 Cond[0].setImm(RISCV::QC_E_BLTUI);
1581 break;
1582 case RISCV::QC_E_BLTUI:
1583 Cond[0].setImm(RISCV::QC_E_BGEUI);
1584 break;
1585 case RISCV::NDS_BBC:
1586 Cond[0].setImm(RISCV::NDS_BBS);
1587 break;
1588 case RISCV::NDS_BBS:
1589 Cond[0].setImm(RISCV::NDS_BBC);
1590 break;
1591 case RISCV::NDS_BEQC:
1592 Cond[0].setImm(RISCV::NDS_BNEC);
1593 break;
1594 case RISCV::NDS_BNEC:
1595 Cond[0].setImm(RISCV::NDS_BEQC);
1596 break;
1597 }
1598
1599 return false;
1600}
1601
1602// Return true if the instruction is a load immediate instruction (i.e.
1603// (ADDI x0, imm) or (BSETI x0, imm)).
1604static bool isLoadImm(const MachineInstr *MI, int64_t &Imm) {
1605 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(i: 1).isReg() &&
1606 MI->getOperand(i: 1).getReg() == RISCV::X0) {
1607 Imm = MI->getOperand(i: 2).getImm();
1608 return true;
1609 }
1610 // BSETI can be used to create power of 2 constants. Only 2048 is currently
1611 // interesting because it is 1 more than the maximum ADDI constant.
1612 if (MI->getOpcode() == RISCV::BSETI && MI->getOperand(i: 1).isReg() &&
1613 MI->getOperand(i: 1).getReg() == RISCV::X0 &&
1614 MI->getOperand(i: 2).getImm() == 11) {
1615 Imm = 2048;
1616 return true;
1617 }
1618 return false;
1619}
1620
1621bool RISCVInstrInfo::isFromLoadImm(const MachineRegisterInfo &MRI,
1622 const MachineOperand &Op, int64_t &Imm) {
1623 // Either a load from immediate instruction or X0.
1624 if (!Op.isReg())
1625 return false;
1626
1627 Register Reg = Op.getReg();
1628 if (Reg == RISCV::X0) {
1629 Imm = 0;
1630 return true;
1631 }
1632 return Reg.isVirtual() && isLoadImm(MI: MRI.getVRegDef(Reg), Imm);
1633}
1634
1635bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1636 bool IsSigned = false;
1637 bool IsEquality = false;
1638 switch (MI.getOpcode()) {
1639 default:
1640 return false;
1641 case RISCV::BEQ:
1642 case RISCV::BNE:
1643 IsEquality = true;
1644 break;
1645 case RISCV::BGE:
1646 case RISCV::BLT:
1647 IsSigned = true;
1648 break;
1649 case RISCV::BGEU:
1650 case RISCV::BLTU:
1651 break;
1652 }
1653
1654 MachineBasicBlock *MBB = MI.getParent();
1655 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1656
1657 const MachineOperand &LHS = MI.getOperand(i: 0);
1658 const MachineOperand &RHS = MI.getOperand(i: 1);
1659 MachineBasicBlock *TBB = MI.getOperand(i: 2).getMBB();
1660
1661 RISCVCC::CondCode CC = getCondFromBranchOpc(Opc: MI.getOpcode());
1662 assert(CC != RISCVCC::COND_INVALID);
1663
1664 // Canonicalize conditional branches which can be constant folded into
1665 // beqz or bnez. We can't modify the CFG here.
1666 int64_t C0, C1;
1667 if (isFromLoadImm(MRI, Op: LHS, Imm&: C0) && isFromLoadImm(MRI, Op: RHS, Imm&: C1)) {
1668 unsigned NewOpc = evaluateCondBranch(CC, C0, C1) ? RISCV::BEQ : RISCV::BNE;
1669 // Build the new branch and remove the old one.
1670 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1671 .addReg(RegNo: RISCV::X0)
1672 .addReg(RegNo: RISCV::X0)
1673 .addMBB(MBB: TBB);
1674 MI.eraseFromParent();
1675 return true;
1676 }
1677
1678 if (IsEquality)
1679 return false;
1680
1681 // For two constants C0 and C1 from
1682 // ```
1683 // li Y, C0
1684 // li Z, C1
1685 // ```
1686 // 1. if C1 = C0 + 1
1687 // we can turn:
1688 // (a) blt Y, X -> bge X, Z
1689 // (b) bge Y, X -> blt X, Z
1690 //
1691 // 2. if C1 = C0 - 1
1692 // we can turn:
1693 // (a) blt X, Y -> bge Z, X
1694 // (b) bge X, Y -> blt Z, X
1695 //
1696 // To make sure this optimization is really beneficial, we only
1697 // optimize for cases where Y had only one use (i.e. only used by the branch).
1698 // Try to find the register for constant Z; return
1699 // invalid register otherwise.
1700 auto searchConst = [&](int64_t C1) -> Register {
1701 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1702 auto DefC1 = std::find_if(first: ++II, last: E, pred: [&](const MachineInstr &I) -> bool {
1703 int64_t Imm;
1704 return isLoadImm(MI: &I, Imm) && Imm == C1 &&
1705 I.getOperand(i: 0).getReg().isVirtual();
1706 });
1707 if (DefC1 != E)
1708 return DefC1->getOperand(i: 0).getReg();
1709
1710 return Register();
1711 };
1712
1713 unsigned NewOpc = RISCVCC::getBrCond(CC: getInverseBranchCondition(CC));
1714
1715 // Might be case 1.
1716 // Don't change 0 to 1 since we can use x0.
1717 // For unsigned cases changing -1U to 0 would be incorrect.
1718 // The incorrect case for signed would be INT_MAX, but isFromLoadImm can't
1719 // return that.
1720 if (isFromLoadImm(MRI, Op: LHS, Imm&: C0) && C0 != 0 && LHS.getReg().isVirtual() &&
1721 MRI.hasOneUse(RegNo: LHS.getReg()) && (IsSigned || C0 != -1)) {
1722 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1723 if (Register RegZ = searchConst(C0 + 1)) {
1724 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1725 .add(MO: RHS)
1726 .addReg(RegNo: RegZ)
1727 .addMBB(MBB: TBB);
1728 // We might extend the live range of Z, clear its kill flag to
1729 // account for this.
1730 MRI.clearKillFlags(Reg: RegZ);
1731 MI.eraseFromParent();
1732 return true;
1733 }
1734 }
1735
1736 // Might be case 2.
1737 // For signed cases we don't want to change 0 since we can use x0.
1738 // For unsigned cases changing 0 to -1U would be incorrect.
1739 // The incorrect case for signed would be INT_MIN, but isFromLoadImm can't
1740 // return that.
1741 if (isFromLoadImm(MRI, Op: RHS, Imm&: C0) && C0 != 0 && RHS.getReg().isVirtual() &&
1742 MRI.hasOneUse(RegNo: RHS.getReg())) {
1743 assert((isInt<12>(C0) || C0 == 2048) && "Unexpected immediate");
1744 if (Register RegZ = searchConst(C0 - 1)) {
1745 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
1746 .addReg(RegNo: RegZ)
1747 .add(MO: LHS)
1748 .addMBB(MBB: TBB);
1749 // We might extend the live range of Z, clear its kill flag to
1750 // account for this.
1751 MRI.clearKillFlags(Reg: RegZ);
1752 MI.eraseFromParent();
1753 return true;
1754 }
1755 }
1756
1757 return false;
1758}
1759
1760MachineBasicBlock *
1761RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1762 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1763 // The branch target is always the last operand.
1764 int NumOp = MI.getNumExplicitOperands();
1765 return MI.getOperand(i: NumOp - 1).getMBB();
1766}
1767
1768bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1769 int64_t BrOffset) const {
1770 unsigned XLen = STI.getXLen();
1771 // Ideally we could determine the supported branch offset from the
1772 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1773 // PseudoBR.
1774 switch (BranchOp) {
1775 default:
1776 llvm_unreachable("Unexpected opcode!");
1777 case RISCV::NDS_BBC:
1778 case RISCV::NDS_BBS:
1779 case RISCV::NDS_BEQC:
1780 case RISCV::NDS_BNEC:
1781 return isInt<11>(x: BrOffset);
1782 case RISCV::BEQ:
1783 case RISCV::BNE:
1784 case RISCV::BLT:
1785 case RISCV::BGE:
1786 case RISCV::BLTU:
1787 case RISCV::BGEU:
1788 case RISCV::BEQI:
1789 case RISCV::BNEI:
1790 case RISCV::CV_BEQIMM:
1791 case RISCV::CV_BNEIMM:
1792 case RISCV::QC_BEQI:
1793 case RISCV::QC_BNEI:
1794 case RISCV::QC_BGEI:
1795 case RISCV::QC_BLTI:
1796 case RISCV::QC_BLTUI:
1797 case RISCV::QC_BGEUI:
1798 case RISCV::QC_E_BEQI:
1799 case RISCV::QC_E_BNEI:
1800 case RISCV::QC_E_BGEI:
1801 case RISCV::QC_E_BLTI:
1802 case RISCV::QC_E_BLTUI:
1803 case RISCV::QC_E_BGEUI:
1804 return isInt<13>(x: BrOffset);
1805 case RISCV::JAL:
1806 case RISCV::PseudoBR:
1807 return isInt<21>(x: BrOffset);
1808 case RISCV::PseudoJump:
1809 return isInt<32>(x: SignExtend64(X: BrOffset + 0x800, B: XLen));
1810 }
1811}
1812
1813// If the operation has a predicated pseudo instruction, return the pseudo
1814// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1815// TODO: Support more operations.
1816unsigned getPredicatedOpcode(unsigned Opcode) {
1817 // clang-format off
1818 switch (Opcode) {
1819 case RISCV::ADD: return RISCV::PseudoCCADD;
1820 case RISCV::SUB: return RISCV::PseudoCCSUB;
1821 case RISCV::SLL: return RISCV::PseudoCCSLL;
1822 case RISCV::SRL: return RISCV::PseudoCCSRL;
1823 case RISCV::SRA: return RISCV::PseudoCCSRA;
1824 case RISCV::AND: return RISCV::PseudoCCAND;
1825 case RISCV::OR: return RISCV::PseudoCCOR;
1826 case RISCV::XOR: return RISCV::PseudoCCXOR;
1827 case RISCV::MAX: return RISCV::PseudoCCMAX;
1828 case RISCV::MAXU: return RISCV::PseudoCCMAXU;
1829 case RISCV::MIN: return RISCV::PseudoCCMIN;
1830 case RISCV::MINU: return RISCV::PseudoCCMINU;
1831 case RISCV::MUL: return RISCV::PseudoCCMUL;
1832 case RISCV::LUI: return RISCV::PseudoCCLUI;
1833 case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
1834 case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
1835
1836 case RISCV::ADDI: return RISCV::PseudoCCADDI;
1837 case RISCV::SLLI: return RISCV::PseudoCCSLLI;
1838 case RISCV::SRLI: return RISCV::PseudoCCSRLI;
1839 case RISCV::SRAI: return RISCV::PseudoCCSRAI;
1840 case RISCV::ANDI: return RISCV::PseudoCCANDI;
1841 case RISCV::ORI: return RISCV::PseudoCCORI;
1842 case RISCV::XORI: return RISCV::PseudoCCXORI;
1843
1844 case RISCV::ADDW: return RISCV::PseudoCCADDW;
1845 case RISCV::SUBW: return RISCV::PseudoCCSUBW;
1846 case RISCV::SLLW: return RISCV::PseudoCCSLLW;
1847 case RISCV::SRLW: return RISCV::PseudoCCSRLW;
1848 case RISCV::SRAW: return RISCV::PseudoCCSRAW;
1849
1850 case RISCV::ADDIW: return RISCV::PseudoCCADDIW;
1851 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW;
1852 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW;
1853 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW;
1854
1855 case RISCV::ANDN: return RISCV::PseudoCCANDN;
1856 case RISCV::ORN: return RISCV::PseudoCCORN;
1857 case RISCV::XNOR: return RISCV::PseudoCCXNOR;
1858
1859 case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS;
1860 case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ;
1861 }
1862 // clang-format on
1863
1864 return RISCV::INSTRUCTION_LIST_END;
1865}
1866
1867/// Identify instructions that can be folded into a CCMOV instruction, and
1868/// return the defining instruction.
1869static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1870 const MachineRegisterInfo &MRI,
1871 const TargetInstrInfo *TII,
1872 const RISCVSubtarget &STI) {
1873 if (!Reg.isVirtual())
1874 return nullptr;
1875 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
1876 return nullptr;
1877 MachineInstr *MI = MRI.getVRegDef(Reg);
1878 if (!MI)
1879 return nullptr;
1880
1881 if (!STI.hasShortForwardBranchIMinMax() &&
1882 (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
1883 MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
1884 return nullptr;
1885
1886 if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
1887 return nullptr;
1888
1889 // Check if MI can be predicated and folded into the CCMOV.
1890 if (getPredicatedOpcode(Opcode: MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1891 return nullptr;
1892 // Don't predicate li idiom.
1893 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(i: 1).isReg() &&
1894 MI->getOperand(i: 1).getReg() == RISCV::X0)
1895 return nullptr;
1896 // Check if MI has any other defs or physreg uses.
1897 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
1898 // Reject frame index operands, PEI can't handle the predicated pseudos.
1899 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1900 return nullptr;
1901 if (!MO.isReg())
1902 continue;
1903 // MI can't have any tied operands, that would conflict with predication.
1904 if (MO.isTied())
1905 return nullptr;
1906 if (MO.isDef())
1907 return nullptr;
1908 // Allow constant physregs.
1909 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(PhysReg: MO.getReg()))
1910 return nullptr;
1911 }
1912 bool DontMoveAcrossStores = true;
1913 if (!MI->isSafeToMove(SawStore&: DontMoveAcrossStores))
1914 return nullptr;
1915 return MI;
1916}
1917
1918MachineInstr *
1919RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1920 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1921 bool PreferFalse) const {
1922 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1923 "Unknown select instruction");
1924 if (!STI.hasShortForwardBranchIALU())
1925 return nullptr;
1926
1927 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1928 MachineInstr *DefMI =
1929 canFoldAsPredicatedOp(Reg: MI.getOperand(i: 5).getReg(), MRI, TII: this, STI);
1930 bool Invert = !DefMI;
1931 if (!DefMI)
1932 DefMI = canFoldAsPredicatedOp(Reg: MI.getOperand(i: 4).getReg(), MRI, TII: this, STI);
1933 if (!DefMI)
1934 return nullptr;
1935
1936 // Find new register class to use.
1937 MachineOperand FalseReg = MI.getOperand(i: Invert ? 5 : 4);
1938 Register DestReg = MI.getOperand(i: 0).getReg();
1939 const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
1940 if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
1941 return nullptr;
1942
1943 unsigned PredOpc = getPredicatedOpcode(Opcode: DefMI->getOpcode());
1944 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1945
1946 // Create a new predicated version of DefMI.
1947 MachineInstrBuilder NewMI =
1948 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: PredOpc), DestReg);
1949
1950 // Copy the condition portion.
1951 NewMI.add(MO: MI.getOperand(i: 1));
1952 NewMI.add(MO: MI.getOperand(i: 2));
1953
1954 // Add condition code, inverting if necessary.
1955 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
1956 if (Invert)
1957 CC = RISCVCC::getInverseBranchCondition(CC);
1958 NewMI.addImm(Val: CC);
1959
1960 // Copy the false register.
1961 NewMI.add(MO: FalseReg);
1962
1963 // Copy all the DefMI operands.
1964 const MCInstrDesc &DefDesc = DefMI->getDesc();
1965 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1966 NewMI.add(MO: DefMI->getOperand(i));
1967
1968 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1969 SeenMIs.insert(Ptr: NewMI);
1970 SeenMIs.erase(Ptr: DefMI);
1971
1972 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1973 // DefMI would be invalid when transferred inside the loop. Checking for a
1974 // loop is expensive, but at least remove kill flags if they are in different
1975 // BBs.
1976 if (DefMI->getParent() != MI.getParent())
1977 NewMI->clearKillInfo();
1978
1979 // The caller will erase MI, but not DefMI.
1980 DefMI->eraseFromParent();
1981 return NewMI;
1982}
1983
1984unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1985 if (MI.isMetaInstruction())
1986 return 0;
1987
1988 unsigned Opcode = MI.getOpcode();
1989
1990 if (Opcode == TargetOpcode::INLINEASM ||
1991 Opcode == TargetOpcode::INLINEASM_BR) {
1992 const MachineFunction &MF = *MI.getParent()->getParent();
1993 return getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(),
1994 MAI: *MF.getTarget().getMCAsmInfo());
1995 }
1996
1997 if (!MI.memoperands_empty()) {
1998 MachineMemOperand *MMO = *(MI.memoperands_begin());
1999 if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) {
2000 if (STI.hasStdExtZca()) {
2001 if (isCompressibleInst(MI, STI))
2002 return 4; // c.ntl.all + c.load/c.store
2003 return 6; // c.ntl.all + load/store
2004 }
2005 return 8; // ntl.all + load/store
2006 }
2007 }
2008
2009 if (Opcode == TargetOpcode::BUNDLE)
2010 return getInstBundleLength(MI);
2011
2012 if (MI.getParent() && MI.getParent()->getParent()) {
2013 if (isCompressibleInst(MI, STI))
2014 return 2;
2015 }
2016
2017 switch (Opcode) {
2018 case RISCV::PseudoMV_FPR16INX:
2019 case RISCV::PseudoMV_FPR32INX:
2020 // MV is always compressible to either c.mv or c.li rd, 0.
2021 return STI.hasStdExtZca() ? 2 : 4;
2022 case TargetOpcode::STACKMAP:
2023 // The upper bound for a stackmap intrinsic is the full length of its shadow
2024 return StackMapOpers(&MI).getNumPatchBytes();
2025 case TargetOpcode::PATCHPOINT:
2026 // The size of the patchpoint intrinsic is the number of bytes requested
2027 return PatchPointOpers(&MI).getNumPatchBytes();
2028 case TargetOpcode::STATEPOINT: {
2029 // The size of the statepoint intrinsic is the number of bytes requested
2030 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
2031 // No patch bytes means at most a PseudoCall is emitted
2032 return std::max(a: NumBytes, b: 8U);
2033 }
2034 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2035 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
2036 case TargetOpcode::PATCHABLE_TAIL_CALL: {
2037 const MachineFunction &MF = *MI.getParent()->getParent();
2038 const Function &F = MF.getFunction();
2039 if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
2040 F.hasFnAttribute(Kind: "patchable-function-entry")) {
2041 unsigned Num;
2042 if (F.getFnAttribute(Kind: "patchable-function-entry")
2043 .getValueAsString()
2044 .getAsInteger(Radix: 10, Result&: Num))
2045 return get(Opcode).getSize();
2046
2047 // Number of C.NOP or NOP
2048 return (STI.hasStdExtZca() ? 2 : 4) * Num;
2049 }
2050 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
2051 // respectively.
2052 return STI.is64Bit() ? 68 : 44;
2053 }
2054 default:
2055 return get(Opcode).getSize();
2056 }
2057}
2058
2059unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
2060 unsigned Size = 0;
2061 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
2062 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
2063 while (++I != E && I->isInsideBundle()) {
2064 assert(!I->isBundle() && "No nested bundle!");
2065 Size += getInstSizeInBytes(MI: *I);
2066 }
2067 return Size;
2068}
2069
2070bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
2071 const unsigned Opcode = MI.getOpcode();
2072 switch (Opcode) {
2073 default:
2074 break;
2075 case RISCV::FSGNJ_D:
2076 case RISCV::FSGNJ_S:
2077 case RISCV::FSGNJ_H:
2078 case RISCV::FSGNJ_D_INX:
2079 case RISCV::FSGNJ_D_IN32X:
2080 case RISCV::FSGNJ_S_INX:
2081 case RISCV::FSGNJ_H_INX:
2082 // The canonical floating-point move is fsgnj rd, rs, rs.
2083 return MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
2084 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg();
2085 case RISCV::ADDI:
2086 case RISCV::ORI:
2087 case RISCV::XORI:
2088 return (MI.getOperand(i: 1).isReg() &&
2089 MI.getOperand(i: 1).getReg() == RISCV::X0) ||
2090 (MI.getOperand(i: 2).isImm() && MI.getOperand(i: 2).getImm() == 0);
2091 }
2092 return MI.isAsCheapAsAMove();
2093}
2094
2095std::optional<DestSourcePair>
2096RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
2097 if (MI.isMoveReg())
2098 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2099 switch (MI.getOpcode()) {
2100 default:
2101 break;
2102 case RISCV::ADD:
2103 case RISCV::OR:
2104 case RISCV::XOR:
2105 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 1).getReg() == RISCV::X0 &&
2106 MI.getOperand(i: 2).isReg())
2107 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
2108 if (MI.getOperand(i: 2).isReg() && MI.getOperand(i: 2).getReg() == RISCV::X0 &&
2109 MI.getOperand(i: 1).isReg())
2110 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2111 break;
2112 case RISCV::ADDI:
2113 // Operand 1 can be a frameindex but callers expect registers
2114 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() &&
2115 MI.getOperand(i: 2).getImm() == 0)
2116 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2117 break;
2118 case RISCV::SUB:
2119 if (MI.getOperand(i: 2).isReg() && MI.getOperand(i: 2).getReg() == RISCV::X0 &&
2120 MI.getOperand(i: 1).isReg())
2121 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2122 break;
2123 case RISCV::SH1ADD:
2124 case RISCV::SH1ADD_UW:
2125 case RISCV::SH2ADD:
2126 case RISCV::SH2ADD_UW:
2127 case RISCV::SH3ADD:
2128 case RISCV::SH3ADD_UW:
2129 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 1).getReg() == RISCV::X0 &&
2130 MI.getOperand(i: 2).isReg())
2131 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
2132 break;
2133 case RISCV::FSGNJ_D:
2134 case RISCV::FSGNJ_S:
2135 case RISCV::FSGNJ_H:
2136 case RISCV::FSGNJ_D_INX:
2137 case RISCV::FSGNJ_D_IN32X:
2138 case RISCV::FSGNJ_S_INX:
2139 case RISCV::FSGNJ_H_INX:
2140 // The canonical floating-point move is fsgnj rd, rs, rs.
2141 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
2142 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg())
2143 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
2144 break;
2145 }
2146 return std::nullopt;
2147}
2148
2149MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
2150 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
2151 // The option is unused. Choose Local strategy only for in-order cores. When
2152 // scheduling model is unspecified, use MinInstrCount strategy as more
2153 // generic one.
2154 const auto &SchedModel = STI.getSchedModel();
2155 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
2156 ? MachineTraceStrategy::TS_MinInstrCount
2157 : MachineTraceStrategy::TS_Local;
2158 }
2159 // The strategy was forced by the option.
2160 return ForceMachineCombinerStrategy;
2161}
2162
2163void RISCVInstrInfo::finalizeInsInstrs(
2164 MachineInstr &Root, unsigned &Pattern,
2165 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
2166 int16_t FrmOpIdx =
2167 RISCV::getNamedOperandIdx(Opcode: Root.getOpcode(), Name: RISCV::OpName::frm);
2168 if (FrmOpIdx < 0) {
2169 assert(all_of(InsInstrs,
2170 [](MachineInstr *MI) {
2171 return RISCV::getNamedOperandIdx(MI->getOpcode(),
2172 RISCV::OpName::frm) < 0;
2173 }) &&
2174 "New instructions require FRM whereas the old one does not have it");
2175 return;
2176 }
2177
2178 const MachineOperand &FRM = Root.getOperand(i: FrmOpIdx);
2179 MachineFunction &MF = *Root.getMF();
2180
2181 for (auto *NewMI : InsInstrs) {
2182 // We'd already added the FRM operand.
2183 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
2184 Opcode: NewMI->getOpcode(), Name: RISCV::OpName::frm)) != NewMI->getNumOperands())
2185 continue;
2186 MachineInstrBuilder MIB(MF, NewMI);
2187 MIB.add(MO: FRM);
2188 if (FRM.getImm() == RISCVFPRndMode::DYN)
2189 MIB.addUse(RegNo: RISCV::FRM, Flags: RegState::Implicit);
2190 }
2191}
2192
2193static bool isFADD(unsigned Opc) {
2194 switch (Opc) {
2195 default:
2196 return false;
2197 case RISCV::FADD_H:
2198 case RISCV::FADD_S:
2199 case RISCV::FADD_D:
2200 return true;
2201 }
2202}
2203
2204static bool isFSUB(unsigned Opc) {
2205 switch (Opc) {
2206 default:
2207 return false;
2208 case RISCV::FSUB_H:
2209 case RISCV::FSUB_S:
2210 case RISCV::FSUB_D:
2211 return true;
2212 }
2213}
2214
2215static bool isFMUL(unsigned Opc) {
2216 switch (Opc) {
2217 default:
2218 return false;
2219 case RISCV::FMUL_H:
2220 case RISCV::FMUL_S:
2221 case RISCV::FMUL_D:
2222 return true;
2223 }
2224}
2225
2226bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
2227 bool Invert) const {
2228#define OPCODE_LMUL_CASE(OPC) \
2229 case RISCV::OPC##_M1: \
2230 case RISCV::OPC##_M2: \
2231 case RISCV::OPC##_M4: \
2232 case RISCV::OPC##_M8: \
2233 case RISCV::OPC##_MF2: \
2234 case RISCV::OPC##_MF4: \
2235 case RISCV::OPC##_MF8
2236
2237#define OPCODE_LMUL_MASK_CASE(OPC) \
2238 case RISCV::OPC##_M1_MASK: \
2239 case RISCV::OPC##_M2_MASK: \
2240 case RISCV::OPC##_M4_MASK: \
2241 case RISCV::OPC##_M8_MASK: \
2242 case RISCV::OPC##_MF2_MASK: \
2243 case RISCV::OPC##_MF4_MASK: \
2244 case RISCV::OPC##_MF8_MASK
2245
2246 unsigned Opcode = Inst.getOpcode();
2247 if (Invert) {
2248 if (auto InvOpcode = getInverseOpcode(Opcode))
2249 Opcode = *InvOpcode;
2250 else
2251 return false;
2252 }
2253
2254 // clang-format off
2255 switch (Opcode) {
2256 default:
2257 return false;
2258 OPCODE_LMUL_CASE(PseudoVADD_VV):
2259 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
2260 OPCODE_LMUL_CASE(PseudoVMUL_VV):
2261 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
2262 return true;
2263 }
2264 // clang-format on
2265
2266#undef OPCODE_LMUL_MASK_CASE
2267#undef OPCODE_LMUL_CASE
2268}
2269
2270bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
2271 const MachineInstr &Prev) const {
2272 if (!areOpcodesEqualOrInverse(Opcode1: Root.getOpcode(), Opcode2: Prev.getOpcode()))
2273 return false;
2274
2275 assert(Root.getMF() == Prev.getMF());
2276 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
2277 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
2278
2279 // Make sure vtype operands are also the same.
2280 const MCInstrDesc &Desc = get(Opcode: Root.getOpcode());
2281 const uint64_t TSFlags = Desc.TSFlags;
2282
2283 auto checkImmOperand = [&](unsigned OpIdx) {
2284 return Root.getOperand(i: OpIdx).getImm() == Prev.getOperand(i: OpIdx).getImm();
2285 };
2286
2287 auto checkRegOperand = [&](unsigned OpIdx) {
2288 return Root.getOperand(i: OpIdx).getReg() == Prev.getOperand(i: OpIdx).getReg();
2289 };
2290
2291 // PassThru
2292 // TODO: Potentially we can loosen the condition to consider Root to be
2293 // associable with Prev if Root has NoReg as passthru. In which case we
2294 // also need to loosen the condition on vector policies between these.
2295 if (!checkRegOperand(1))
2296 return false;
2297
2298 // SEW
2299 if (RISCVII::hasSEWOp(TSFlags) &&
2300 !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
2301 return false;
2302
2303 // Mask
2304 if (RISCVII::usesMaskPolicy(TSFlags)) {
2305 const MachineBasicBlock *MBB = Root.getParent();
2306 const MachineBasicBlock::const_reverse_iterator It1(&Root);
2307 const MachineBasicBlock::const_reverse_iterator It2(&Prev);
2308 Register MI1VReg;
2309
2310 bool SeenMI2 = false;
2311 for (auto End = MBB->rend(), It = It1; It != End; ++It) {
2312 if (It == It2) {
2313 SeenMI2 = true;
2314 if (!MI1VReg.isValid())
2315 // There is no V0 def between Root and Prev; they're sharing the
2316 // same V0.
2317 break;
2318 }
2319
2320 if (It->modifiesRegister(Reg: RISCV::V0, TRI)) {
2321 Register SrcReg = It->getOperand(i: 1).getReg();
2322 // If it's not VReg it'll be more difficult to track its defs, so
2323 // bailing out here just to be safe.
2324 if (!SrcReg.isVirtual())
2325 return false;
2326
2327 if (!MI1VReg.isValid()) {
2328 // This is the V0 def for Root.
2329 MI1VReg = SrcReg;
2330 continue;
2331 }
2332
2333 // Some random mask updates.
2334 if (!SeenMI2)
2335 continue;
2336
2337 // This is the V0 def for Prev; check if it's the same as that of
2338 // Root.
2339 if (MI1VReg != SrcReg)
2340 return false;
2341 else
2342 break;
2343 }
2344 }
2345
2346 // If we haven't encountered Prev, it's likely that this function was
2347 // called in a wrong way (e.g. Root is before Prev).
2348 assert(SeenMI2 && "Prev is expected to appear before Root");
2349 }
2350
2351 // Tail / Mask policies
2352 if (RISCVII::hasVecPolicyOp(TSFlags) &&
2353 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
2354 return false;
2355
2356 // VL
2357 if (RISCVII::hasVLOp(TSFlags)) {
2358 unsigned OpIdx = RISCVII::getVLOpNum(Desc);
2359 const MachineOperand &Op1 = Root.getOperand(i: OpIdx);
2360 const MachineOperand &Op2 = Prev.getOperand(i: OpIdx);
2361 if (Op1.getType() != Op2.getType())
2362 return false;
2363 switch (Op1.getType()) {
2364 case MachineOperand::MO_Register:
2365 if (Op1.getReg() != Op2.getReg())
2366 return false;
2367 break;
2368 case MachineOperand::MO_Immediate:
2369 if (Op1.getImm() != Op2.getImm())
2370 return false;
2371 break;
2372 default:
2373 llvm_unreachable("Unrecognized VL operand type");
2374 }
2375 }
2376
2377 // Rounding modes
2378 if (int Idx = RISCVII::getFRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2379 return false;
2380 if (int Idx = RISCVII::getVXRMOpNum(Desc); Idx >= 0 && !checkImmOperand(Idx))
2381 return false;
2382
2383 return true;
2384}
2385
2386// Most of our RVV pseudos have passthru operand, so the real operands
2387// start from index = 2.
2388bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
2389 bool &Commuted) const {
2390 const MachineBasicBlock *MBB = Inst.getParent();
2391 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2392 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
2393 "Expect the present of passthrough operand.");
2394 MachineInstr *MI1 = MRI.getUniqueVRegDef(Reg: Inst.getOperand(i: 2).getReg());
2395 MachineInstr *MI2 = MRI.getUniqueVRegDef(Reg: Inst.getOperand(i: 3).getReg());
2396
2397 // If only one operand has the same or inverse opcode and it's the second
2398 // source operand, the operands must be commuted.
2399 Commuted = !areRVVInstsReassociable(Root: Inst, Prev: *MI1) &&
2400 areRVVInstsReassociable(Root: Inst, Prev: *MI2);
2401 if (Commuted)
2402 std::swap(a&: MI1, b&: MI2);
2403
2404 return areRVVInstsReassociable(Root: Inst, Prev: *MI1) &&
2405 (isVectorAssociativeAndCommutative(Inst: *MI1) ||
2406 isVectorAssociativeAndCommutative(Inst: *MI1, /* Invert */ true)) &&
2407 hasReassociableOperands(Inst: *MI1, MBB) &&
2408 MRI.hasOneNonDBGUse(RegNo: MI1->getOperand(i: 0).getReg());
2409}
2410
2411bool RISCVInstrInfo::hasReassociableOperands(
2412 const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
2413 if (!isVectorAssociativeAndCommutative(Inst) &&
2414 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2415 return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
2416
2417 const MachineOperand &Op1 = Inst.getOperand(i: 2);
2418 const MachineOperand &Op2 = Inst.getOperand(i: 3);
2419 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
2420
2421 // We need virtual register definitions for the operands that we will
2422 // reassociate.
2423 MachineInstr *MI1 = nullptr;
2424 MachineInstr *MI2 = nullptr;
2425 if (Op1.isReg() && Op1.getReg().isVirtual())
2426 MI1 = MRI.getUniqueVRegDef(Reg: Op1.getReg());
2427 if (Op2.isReg() && Op2.getReg().isVirtual())
2428 MI2 = MRI.getUniqueVRegDef(Reg: Op2.getReg());
2429
2430 // And at least one operand must be defined in MBB.
2431 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
2432}
2433
2434void RISCVInstrInfo::getReassociateOperandIndices(
2435 const MachineInstr &Root, unsigned Pattern,
2436 std::array<unsigned, 5> &OperandIndices) const {
2437 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
2438 if (RISCV::getRVVMCOpcode(RVVPseudoOpcode: Root.getOpcode())) {
2439 // Skip the passthrough operand, so increment all indices by one.
2440 for (unsigned I = 0; I < 5; ++I)
2441 ++OperandIndices[I];
2442 }
2443}
2444
2445bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
2446 bool &Commuted) const {
2447 if (isVectorAssociativeAndCommutative(Inst) ||
2448 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
2449 return hasReassociableVectorSibling(Inst, Commuted);
2450
2451 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
2452 return false;
2453
2454 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
2455 unsigned OperandIdx = Commuted ? 2 : 1;
2456 const MachineInstr &Sibling =
2457 *MRI.getVRegDef(Reg: Inst.getOperand(i: OperandIdx).getReg());
2458
2459 int16_t InstFrmOpIdx =
2460 RISCV::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: RISCV::OpName::frm);
2461 int16_t SiblingFrmOpIdx =
2462 RISCV::getNamedOperandIdx(Opcode: Sibling.getOpcode(), Name: RISCV::OpName::frm);
2463
2464 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
2465 RISCV::hasEqualFRM(MI1: Inst, MI2: Sibling);
2466}
2467
2468bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
2469 bool Invert) const {
2470 if (isVectorAssociativeAndCommutative(Inst, Invert))
2471 return true;
2472
2473 unsigned Opc = Inst.getOpcode();
2474 if (Invert) {
2475 auto InverseOpcode = getInverseOpcode(Opcode: Opc);
2476 if (!InverseOpcode)
2477 return false;
2478 Opc = *InverseOpcode;
2479 }
2480
2481 if (isFADD(Opc) || isFMUL(Opc))
2482 return Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
2483 Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
2484
2485 switch (Opc) {
2486 default:
2487 return false;
2488 case RISCV::ADD:
2489 case RISCV::ADDW:
2490 case RISCV::AND:
2491 case RISCV::OR:
2492 case RISCV::XOR:
2493 // From RISC-V ISA spec, if both the high and low bits of the same product
2494 // are required, then the recommended code sequence is:
2495 //
2496 // MULH[[S]U] rdh, rs1, rs2
2497 // MUL rdl, rs1, rs2
2498 // (source register specifiers must be in same order and rdh cannot be the
2499 // same as rs1 or rs2)
2500 //
2501 // Microarchitectures can then fuse these into a single multiply operation
2502 // instead of performing two separate multiplies.
2503 // MachineCombiner may reassociate MUL operands and lose the fusion
2504 // opportunity.
2505 case RISCV::MUL:
2506 case RISCV::MULW:
2507 case RISCV::MIN:
2508 case RISCV::MINU:
2509 case RISCV::MAX:
2510 case RISCV::MAXU:
2511 case RISCV::FMIN_H:
2512 case RISCV::FMIN_S:
2513 case RISCV::FMIN_D:
2514 case RISCV::FMAX_H:
2515 case RISCV::FMAX_S:
2516 case RISCV::FMAX_D:
2517 return true;
2518 }
2519
2520 return false;
2521}
2522
2523std::optional<unsigned>
2524RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
2525#define RVV_OPC_LMUL_CASE(OPC, INV) \
2526 case RISCV::OPC##_M1: \
2527 return RISCV::INV##_M1; \
2528 case RISCV::OPC##_M2: \
2529 return RISCV::INV##_M2; \
2530 case RISCV::OPC##_M4: \
2531 return RISCV::INV##_M4; \
2532 case RISCV::OPC##_M8: \
2533 return RISCV::INV##_M8; \
2534 case RISCV::OPC##_MF2: \
2535 return RISCV::INV##_MF2; \
2536 case RISCV::OPC##_MF4: \
2537 return RISCV::INV##_MF4; \
2538 case RISCV::OPC##_MF8: \
2539 return RISCV::INV##_MF8
2540
2541#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
2542 case RISCV::OPC##_M1_MASK: \
2543 return RISCV::INV##_M1_MASK; \
2544 case RISCV::OPC##_M2_MASK: \
2545 return RISCV::INV##_M2_MASK; \
2546 case RISCV::OPC##_M4_MASK: \
2547 return RISCV::INV##_M4_MASK; \
2548 case RISCV::OPC##_M8_MASK: \
2549 return RISCV::INV##_M8_MASK; \
2550 case RISCV::OPC##_MF2_MASK: \
2551 return RISCV::INV##_MF2_MASK; \
2552 case RISCV::OPC##_MF4_MASK: \
2553 return RISCV::INV##_MF4_MASK; \
2554 case RISCV::OPC##_MF8_MASK: \
2555 return RISCV::INV##_MF8_MASK
2556
2557 switch (Opcode) {
2558 default:
2559 return std::nullopt;
2560 case RISCV::FADD_H:
2561 return RISCV::FSUB_H;
2562 case RISCV::FADD_S:
2563 return RISCV::FSUB_S;
2564 case RISCV::FADD_D:
2565 return RISCV::FSUB_D;
2566 case RISCV::FSUB_H:
2567 return RISCV::FADD_H;
2568 case RISCV::FSUB_S:
2569 return RISCV::FADD_S;
2570 case RISCV::FSUB_D:
2571 return RISCV::FADD_D;
2572 case RISCV::ADD:
2573 return RISCV::SUB;
2574 case RISCV::SUB:
2575 return RISCV::ADD;
2576 case RISCV::ADDW:
2577 return RISCV::SUBW;
2578 case RISCV::SUBW:
2579 return RISCV::ADDW;
2580 // clang-format off
2581 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2582 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2583 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2584 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2585 // clang-format on
2586 }
2587
2588#undef RVV_OPC_LMUL_MASK_CASE
2589#undef RVV_OPC_LMUL_CASE
2590}
2591
2592static bool canCombineFPFusedMultiply(const MachineInstr &Root,
2593 const MachineOperand &MO,
2594 bool DoRegPressureReduce) {
2595 if (!MO.isReg() || !MO.getReg().isVirtual())
2596 return false;
2597 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2598 MachineInstr *MI = MRI.getVRegDef(Reg: MO.getReg());
2599 if (!MI || !isFMUL(Opc: MI->getOpcode()))
2600 return false;
2601
2602 if (!Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) ||
2603 !MI->getFlag(Flag: MachineInstr::MIFlag::FmContract))
2604 return false;
2605
2606 // Try combining even if fmul has more than one use as it eliminates
2607 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2608 // for fmul operands, so reject the transformation in register pressure
2609 // reduction mode.
2610 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
2611 return false;
2612
2613 // Do not combine instructions from different basic blocks.
2614 if (Root.getParent() != MI->getParent())
2615 return false;
2616 return RISCV::hasEqualFRM(MI1: Root, MI2: *MI);
2617}
2618
2619static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
2620 SmallVectorImpl<unsigned> &Patterns,
2621 bool DoRegPressureReduce) {
2622 unsigned Opc = Root.getOpcode();
2623 bool IsFAdd = isFADD(Opc);
2624 if (!IsFAdd && !isFSUB(Opc))
2625 return false;
2626 bool Added = false;
2627 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 1),
2628 DoRegPressureReduce)) {
2629 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
2630 : RISCVMachineCombinerPattern::FMSUB);
2631 Added = true;
2632 }
2633 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 2),
2634 DoRegPressureReduce)) {
2635 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
2636 : RISCVMachineCombinerPattern::FNMSUB);
2637 Added = true;
2638 }
2639 return Added;
2640}
2641
2642static bool getFPPatterns(MachineInstr &Root,
2643 SmallVectorImpl<unsigned> &Patterns,
2644 bool DoRegPressureReduce) {
2645 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
2646}
2647
2648/// Utility routine that checks if \param MO is defined by an
2649/// \param CombineOpc instruction in the basic block \param MBB
2650static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
2651 const MachineOperand &MO,
2652 unsigned CombineOpc) {
2653 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2654 const MachineInstr *MI = nullptr;
2655
2656 if (MO.isReg() && MO.getReg().isVirtual())
2657 MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
2658 // And it needs to be in the trace (otherwise, it won't have a depth).
2659 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
2660 return nullptr;
2661 // Must only used by the user we combine with.
2662 if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
2663 return nullptr;
2664
2665 return MI;
2666}
2667
2668/// Utility routine that checks if \param MO is defined by a SLLI in \param
2669/// MBB that can be combined by splitting across 2 SHXADD instructions. The
2670/// first SHXADD shift amount is given by \param OuterShiftAmt.
2671static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
2672 const MachineOperand &MO,
2673 unsigned OuterShiftAmt) {
2674 const MachineInstr *ShiftMI = canCombine(MBB, MO, CombineOpc: RISCV::SLLI);
2675 if (!ShiftMI)
2676 return false;
2677
2678 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
2679 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
2680 return false;
2681
2682 return true;
2683}
2684
2685// Returns the shift amount from a SHXADD instruction. Returns 0 if the
2686// instruction is not a SHXADD.
2687static unsigned getSHXADDShiftAmount(unsigned Opc) {
2688 switch (Opc) {
2689 default:
2690 return 0;
2691 case RISCV::SH1ADD:
2692 return 1;
2693 case RISCV::SH2ADD:
2694 return 2;
2695 case RISCV::SH3ADD:
2696 return 3;
2697 }
2698}
2699
2700// Returns the shift amount from a SHXADD.UW instruction. Returns 0 if the
2701// instruction is not a SHXADD.UW.
2702static unsigned getSHXADDUWShiftAmount(unsigned Opc) {
2703 switch (Opc) {
2704 default:
2705 return 0;
2706 case RISCV::SH1ADD_UW:
2707 return 1;
2708 case RISCV::SH2ADD_UW:
2709 return 2;
2710 case RISCV::SH3ADD_UW:
2711 return 3;
2712 }
2713}
2714
2715// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2716// (sh3add (sh2add Y, Z), X).
2717static bool getSHXADDPatterns(const MachineInstr &Root,
2718 SmallVectorImpl<unsigned> &Patterns) {
2719 unsigned ShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
2720 if (!ShiftAmt)
2721 return false;
2722
2723 const MachineBasicBlock &MBB = *Root.getParent();
2724
2725 const MachineInstr *AddMI = canCombine(MBB, MO: Root.getOperand(i: 2), CombineOpc: RISCV::ADD);
2726 if (!AddMI)
2727 return false;
2728
2729 bool Found = false;
2730 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 1), OuterShiftAmt: ShiftAmt)) {
2731 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
2732 Found = true;
2733 }
2734 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 2), OuterShiftAmt: ShiftAmt)) {
2735 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
2736 Found = true;
2737 }
2738
2739 return Found;
2740}
2741
2742CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
2743 switch (Pattern) {
2744 case RISCVMachineCombinerPattern::FMADD_AX:
2745 case RISCVMachineCombinerPattern::FMADD_XA:
2746 case RISCVMachineCombinerPattern::FMSUB:
2747 case RISCVMachineCombinerPattern::FNMSUB:
2748 return CombinerObjective::MustReduceDepth;
2749 default:
2750 return TargetInstrInfo::getCombinerObjective(Pattern);
2751 }
2752}
2753
2754bool RISCVInstrInfo::getMachineCombinerPatterns(
2755 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
2756 bool DoRegPressureReduce) const {
2757
2758 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
2759 return true;
2760
2761 if (getSHXADDPatterns(Root, Patterns))
2762 return true;
2763
2764 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
2765 DoRegPressureReduce);
2766}
2767
2768static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
2769 switch (RootOpc) {
2770 default:
2771 llvm_unreachable("Unexpected opcode");
2772 case RISCV::FADD_H:
2773 return RISCV::FMADD_H;
2774 case RISCV::FADD_S:
2775 return RISCV::FMADD_S;
2776 case RISCV::FADD_D:
2777 return RISCV::FMADD_D;
2778 case RISCV::FSUB_H:
2779 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
2780 : RISCV::FNMSUB_H;
2781 case RISCV::FSUB_S:
2782 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
2783 : RISCV::FNMSUB_S;
2784 case RISCV::FSUB_D:
2785 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
2786 : RISCV::FNMSUB_D;
2787 }
2788}
2789
2790static unsigned getAddendOperandIdx(unsigned Pattern) {
2791 switch (Pattern) {
2792 default:
2793 llvm_unreachable("Unexpected pattern");
2794 case RISCVMachineCombinerPattern::FMADD_AX:
2795 case RISCVMachineCombinerPattern::FMSUB:
2796 return 2;
2797 case RISCVMachineCombinerPattern::FMADD_XA:
2798 case RISCVMachineCombinerPattern::FNMSUB:
2799 return 1;
2800 }
2801}
2802
2803static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
2804 unsigned Pattern,
2805 SmallVectorImpl<MachineInstr *> &InsInstrs,
2806 SmallVectorImpl<MachineInstr *> &DelInstrs) {
2807 MachineFunction *MF = Root.getMF();
2808 MachineRegisterInfo &MRI = MF->getRegInfo();
2809 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2810
2811 MachineOperand &Mul1 = Prev.getOperand(i: 1);
2812 MachineOperand &Mul2 = Prev.getOperand(i: 2);
2813 MachineOperand &Dst = Root.getOperand(i: 0);
2814 MachineOperand &Addend = Root.getOperand(i: getAddendOperandIdx(Pattern));
2815
2816 Register DstReg = Dst.getReg();
2817 unsigned FusedOpc = getFPFusedMultiplyOpcode(RootOpc: Root.getOpcode(), Pattern);
2818 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
2819 DebugLoc MergedLoc =
2820 DILocation::getMergedLocation(LocA: Root.getDebugLoc(), LocB: Prev.getDebugLoc());
2821
2822 bool Mul1IsKill = Mul1.isKill();
2823 bool Mul2IsKill = Mul2.isKill();
2824 bool AddendIsKill = Addend.isKill();
2825
2826 // We need to clear kill flags since we may be extending the live range past
2827 // a kill. If the mul had kill flags, we can preserve those since we know
2828 // where the previous range stopped.
2829 MRI.clearKillFlags(Reg: Mul1.getReg());
2830 MRI.clearKillFlags(Reg: Mul2.getReg());
2831
2832 MachineInstrBuilder MIB =
2833 BuildMI(MF&: *MF, MIMD: MergedLoc, MCID: TII->get(Opcode: FusedOpc), DestReg: DstReg)
2834 .addReg(RegNo: Mul1.getReg(), Flags: getKillRegState(B: Mul1IsKill))
2835 .addReg(RegNo: Mul2.getReg(), Flags: getKillRegState(B: Mul2IsKill))
2836 .addReg(RegNo: Addend.getReg(), Flags: getKillRegState(B: AddendIsKill))
2837 .setMIFlags(IntersectedFlags);
2838
2839 InsInstrs.push_back(Elt: MIB);
2840 if (MRI.hasOneNonDBGUse(RegNo: Prev.getOperand(i: 0).getReg()))
2841 DelInstrs.push_back(Elt: &Prev);
2842 DelInstrs.push_back(Elt: &Root);
2843}
2844
2845// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2846// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2847// shXadd instructions. The outer shXadd keeps its original opcode.
2848static void
2849genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
2850 SmallVectorImpl<MachineInstr *> &InsInstrs,
2851 SmallVectorImpl<MachineInstr *> &DelInstrs,
2852 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
2853 MachineFunction *MF = Root.getMF();
2854 MachineRegisterInfo &MRI = MF->getRegInfo();
2855 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2856
2857 unsigned OuterShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
2858 assert(OuterShiftAmt != 0 && "Unexpected opcode");
2859
2860 MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 2).getReg());
2861 MachineInstr *ShiftMI =
2862 MRI.getUniqueVRegDef(Reg: AddMI->getOperand(i: AddOpIdx).getReg());
2863
2864 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
2865 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
2866
2867 unsigned InnerOpc;
2868 switch (InnerShiftAmt - OuterShiftAmt) {
2869 default:
2870 llvm_unreachable("Unexpected shift amount");
2871 case 0:
2872 InnerOpc = RISCV::ADD;
2873 break;
2874 case 1:
2875 InnerOpc = RISCV::SH1ADD;
2876 break;
2877 case 2:
2878 InnerOpc = RISCV::SH2ADD;
2879 break;
2880 case 3:
2881 InnerOpc = RISCV::SH3ADD;
2882 break;
2883 }
2884
2885 const MachineOperand &X = AddMI->getOperand(i: 3 - AddOpIdx);
2886 const MachineOperand &Y = ShiftMI->getOperand(i: 1);
2887 const MachineOperand &Z = Root.getOperand(i: 1);
2888
2889 Register NewVR = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
2890
2891 auto MIB1 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: InnerOpc), DestReg: NewVR)
2892 .addReg(RegNo: Y.getReg(), Flags: getKillRegState(B: Y.isKill()))
2893 .addReg(RegNo: Z.getReg(), Flags: getKillRegState(B: Z.isKill()));
2894 auto MIB2 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: Root.getOpcode()),
2895 DestReg: Root.getOperand(i: 0).getReg())
2896 .addReg(RegNo: NewVR, Flags: RegState::Kill)
2897 .addReg(RegNo: X.getReg(), Flags: getKillRegState(B: X.isKill()));
2898
2899 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
2900 InsInstrs.push_back(Elt: MIB1);
2901 InsInstrs.push_back(Elt: MIB2);
2902 DelInstrs.push_back(Elt: ShiftMI);
2903 DelInstrs.push_back(Elt: AddMI);
2904 DelInstrs.push_back(Elt: &Root);
2905}
2906
2907void RISCVInstrInfo::genAlternativeCodeSequence(
2908 MachineInstr &Root, unsigned Pattern,
2909 SmallVectorImpl<MachineInstr *> &InsInstrs,
2910 SmallVectorImpl<MachineInstr *> &DelInstrs,
2911 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
2912 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2913 switch (Pattern) {
2914 default:
2915 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2916 DelInstrs, InstIdxForVirtReg&: InstrIdxForVirtReg);
2917 return;
2918 case RISCVMachineCombinerPattern::FMADD_AX:
2919 case RISCVMachineCombinerPattern::FMSUB: {
2920 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 1).getReg());
2921 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2922 return;
2923 }
2924 case RISCVMachineCombinerPattern::FMADD_XA:
2925 case RISCVMachineCombinerPattern::FNMSUB: {
2926 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 2).getReg());
2927 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2928 return;
2929 }
2930 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2931 genShXAddAddShift(Root, AddOpIdx: 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2932 return;
2933 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2934 genShXAddAddShift(Root, AddOpIdx: 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2935 return;
2936 }
2937}
2938
2939bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2940 StringRef &ErrInfo) const {
2941 MCInstrDesc const &Desc = MI.getDesc();
2942
2943 for (const auto &[Index, Operand] : enumerate(First: Desc.operands())) {
2944 const MachineOperand &MO = MI.getOperand(i: Index);
2945 unsigned OpType = Operand.OperandType;
2946 switch (OpType) {
2947 default:
2948 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2949 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2950 if (!MO.isImm()) {
2951 ErrInfo = "Expected an immediate operand.";
2952 return false;
2953 }
2954 int64_t Imm = MO.getImm();
2955 bool Ok;
2956 switch (OpType) {
2957 default:
2958 llvm_unreachable("Unexpected operand type");
2959
2960#define CASE_OPERAND_UIMM(NUM) \
2961 case RISCVOp::OPERAND_UIMM##NUM: \
2962 Ok = isUInt<NUM>(Imm); \
2963 break;
2964#define CASE_OPERAND_UIMM_LSB_ZEROS(BITS, SUFFIX) \
2965 case RISCVOp::OPERAND_UIMM##BITS##_LSB##SUFFIX: { \
2966 constexpr size_t NumZeros = sizeof(#SUFFIX) - 1; \
2967 Ok = isShiftedUInt<BITS - NumZeros, NumZeros>(Imm); \
2968 break; \
2969 }
2970#define CASE_OPERAND_SIMM(NUM) \
2971 case RISCVOp::OPERAND_SIMM##NUM: \
2972 Ok = isInt<NUM>(Imm); \
2973 break;
2974 // clang-format off
2975 CASE_OPERAND_UIMM(1)
2976 CASE_OPERAND_UIMM(2)
2977 CASE_OPERAND_UIMM(3)
2978 CASE_OPERAND_UIMM(4)
2979 CASE_OPERAND_UIMM(5)
2980 CASE_OPERAND_UIMM(6)
2981 CASE_OPERAND_UIMM(7)
2982 CASE_OPERAND_UIMM(8)
2983 CASE_OPERAND_UIMM(9)
2984 CASE_OPERAND_UIMM(10)
2985 CASE_OPERAND_UIMM(12)
2986 CASE_OPERAND_UIMM(16)
2987 CASE_OPERAND_UIMM(32)
2988 CASE_OPERAND_UIMM(48)
2989 CASE_OPERAND_UIMM(64)
2990 CASE_OPERAND_UIMM_LSB_ZEROS(2, 0)
2991 CASE_OPERAND_UIMM_LSB_ZEROS(5, 0)
2992 CASE_OPERAND_UIMM_LSB_ZEROS(6, 0)
2993 CASE_OPERAND_UIMM_LSB_ZEROS(7, 00)
2994 CASE_OPERAND_UIMM_LSB_ZEROS(7, 000)
2995 CASE_OPERAND_UIMM_LSB_ZEROS(8, 00)
2996 CASE_OPERAND_UIMM_LSB_ZEROS(8, 000)
2997 CASE_OPERAND_UIMM_LSB_ZEROS(9, 000)
2998 // clang-format on
2999 case RISCVOp::OPERAND_UIMM5_NONZERO:
3000 Ok = isUInt<5>(x: Imm) && (Imm != 0);
3001 break;
3002 case RISCVOp::OPERAND_UIMM5_GT3:
3003 Ok = isUInt<5>(x: Imm) && (Imm > 3);
3004 break;
3005 case RISCVOp::OPERAND_UIMM5_PLUS1:
3006 Ok = Imm >= 1 && Imm <= 32;
3007 break;
3008 case RISCVOp::OPERAND_UIMM8_GE32:
3009 Ok = isUInt<8>(x: Imm) && Imm >= 32;
3010 break;
3011 case RISCVOp::OPERAND_SIMM8_UNSIGNED:
3012 Ok = isInt<8>(x: Imm);
3013 break;
3014 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
3015 Ok = isShiftedInt<6, 4>(x: Imm) && (Imm != 0);
3016 break;
3017 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
3018 Ok = isShiftedUInt<8, 2>(x: Imm) && (Imm != 0);
3019 break;
3020 case RISCVOp::OPERAND_UIMM16_NONZERO:
3021 Ok = isUInt<16>(x: Imm) && (Imm != 0);
3022 break;
3023 case RISCVOp::OPERAND_THREE:
3024 Ok = Imm == 3;
3025 break;
3026 case RISCVOp::OPERAND_FOUR:
3027 Ok = Imm == 4;
3028 break;
3029 case RISCVOp::OPERAND_IMM5_ZIBI:
3030 Ok = (isUInt<5>(x: Imm) && Imm != 0) || Imm == -1;
3031 break;
3032 // clang-format off
3033 CASE_OPERAND_SIMM(5)
3034 CASE_OPERAND_SIMM(6)
3035 CASE_OPERAND_SIMM(10)
3036 CASE_OPERAND_SIMM(11)
3037 CASE_OPERAND_SIMM(26)
3038 // clang-format on
3039 case RISCVOp::OPERAND_SIMM5_PLUS1:
3040 Ok = Imm >= -15 && Imm <= 16;
3041 break;
3042 case RISCVOp::OPERAND_SIMM5_NONZERO:
3043 Ok = isInt<5>(x: Imm) && (Imm != 0);
3044 break;
3045 case RISCVOp::OPERAND_SIMM6_NONZERO:
3046 Ok = Imm != 0 && isInt<6>(x: Imm);
3047 break;
3048 case RISCVOp::OPERAND_VTYPEI10:
3049 Ok = isUInt<10>(x: Imm);
3050 break;
3051 case RISCVOp::OPERAND_VTYPEI11:
3052 Ok = isUInt<11>(x: Imm);
3053 break;
3054 case RISCVOp::OPERAND_SIMM12_LSB00000:
3055 Ok = isShiftedInt<7, 5>(x: Imm);
3056 break;
3057 case RISCVOp::OPERAND_SIMM16_NONZERO:
3058 Ok = isInt<16>(x: Imm) && (Imm != 0);
3059 break;
3060 case RISCVOp::OPERAND_SIMM20_LI:
3061 Ok = isInt<20>(x: Imm);
3062 break;
3063 case RISCVOp::OPERAND_UIMMLOG2XLEN:
3064 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
3065 break;
3066 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
3067 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
3068 Ok = Ok && Imm != 0;
3069 break;
3070 case RISCVOp::OPERAND_CLUI_IMM:
3071 Ok = (isUInt<5>(x: Imm) && Imm != 0) || (Imm >= 0xfffe0 && Imm <= 0xfffff);
3072 break;
3073 case RISCVOp::OPERAND_RVKRNUM:
3074 Ok = Imm >= 0 && Imm <= 10;
3075 break;
3076 case RISCVOp::OPERAND_RVKRNUM_0_7:
3077 Ok = Imm >= 0 && Imm <= 7;
3078 break;
3079 case RISCVOp::OPERAND_RVKRNUM_1_10:
3080 Ok = Imm >= 1 && Imm <= 10;
3081 break;
3082 case RISCVOp::OPERAND_RVKRNUM_2_14:
3083 Ok = Imm >= 2 && Imm <= 14;
3084 break;
3085 case RISCVOp::OPERAND_RLIST:
3086 Ok = Imm >= RISCVZC::RA && Imm <= RISCVZC::RA_S0_S11;
3087 break;
3088 case RISCVOp::OPERAND_RLIST_S0:
3089 Ok = Imm >= RISCVZC::RA_S0 && Imm <= RISCVZC::RA_S0_S11;
3090 break;
3091 case RISCVOp::OPERAND_STACKADJ:
3092 Ok = Imm >= 0 && Imm <= 48 && Imm % 16 == 0;
3093 break;
3094 case RISCVOp::OPERAND_FRMARG:
3095 Ok = RISCVFPRndMode::isValidRoundingMode(Mode: Imm);
3096 break;
3097 case RISCVOp::OPERAND_RTZARG:
3098 Ok = Imm == RISCVFPRndMode::RTZ;
3099 break;
3100 case RISCVOp::OPERAND_COND_CODE:
3101 Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
3102 break;
3103 case RISCVOp::OPERAND_ATOMIC_ORDERING:
3104 Ok = isValidAtomicOrdering(I: Imm);
3105 break;
3106 case RISCVOp::OPERAND_VEC_POLICY:
3107 Ok = (Imm & (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) ==
3108 Imm;
3109 break;
3110 case RISCVOp::OPERAND_SEW:
3111 Ok = (isUInt<5>(x: Imm) && RISCVVType::isValidSEW(SEW: 1 << Imm));
3112 break;
3113 case RISCVOp::OPERAND_SEW_MASK:
3114 Ok = Imm == 0;
3115 break;
3116 case RISCVOp::OPERAND_VEC_RM:
3117 assert(RISCVII::hasRoundModeOp(Desc.TSFlags));
3118 if (RISCVII::usesVXRM(TSFlags: Desc.TSFlags))
3119 Ok = isUInt<2>(x: Imm);
3120 else
3121 Ok = RISCVFPRndMode::isValidRoundingMode(Mode: Imm);
3122 break;
3123 case RISCVOp::OPERAND_XSFMM_VTYPE:
3124 Ok = RISCVVType::isValidXSfmmVType(VTypeI: Imm);
3125 break;
3126 case RISCVOp::OPERAND_XSFMM_TWIDEN:
3127 Ok = Imm == 1 || Imm == 2 || Imm == 4;
3128 break;
3129 }
3130 if (!Ok) {
3131 ErrInfo = "Invalid immediate";
3132 return false;
3133 }
3134 }
3135 break;
3136 case RISCVOp::OPERAND_SIMM12_LO:
3137 // TODO: We could be stricter about what non-register operands are
3138 // allowed.
3139 if (MO.isReg()) {
3140 ErrInfo = "Expected a non-register operand.";
3141 return false;
3142 }
3143 if (MO.isImm() && !isInt<12>(x: MO.getImm())) {
3144 ErrInfo = "Invalid immediate";
3145 return false;
3146 }
3147 break;
3148 case RISCVOp::OPERAND_UIMM20_LUI:
3149 case RISCVOp::OPERAND_UIMM20_AUIPC:
3150 // TODO: We could be stricter about what non-register operands are
3151 // allowed.
3152 if (MO.isReg()) {
3153 ErrInfo = "Expected a non-register operand.";
3154 return false;
3155 }
3156 if (MO.isImm() && !isUInt<20>(x: MO.getImm())) {
3157 ErrInfo = "Invalid immediate";
3158 return false;
3159 }
3160 break;
3161 case RISCVOp::OPERAND_BARE_SIMM32:
3162 // TODO: We could be stricter about what non-register operands are
3163 // allowed.
3164 if (MO.isReg()) {
3165 ErrInfo = "Expected a non-register operand.";
3166 return false;
3167 }
3168 if (MO.isImm() && !isInt<32>(x: MO.getImm())) {
3169 ErrInfo = "Invalid immediate";
3170 return false;
3171 }
3172 break;
3173 case RISCVOp::OPERAND_AVL:
3174 if (MO.isImm()) {
3175 int64_t Imm = MO.getImm();
3176 // VLMAX is represented as -1.
3177 if (!isUInt<5>(x: Imm) && Imm != -1) {
3178 ErrInfo = "Invalid immediate";
3179 return false;
3180 }
3181 } else if (!MO.isReg()) {
3182 ErrInfo = "Expected a register or immediate operand.";
3183 return false;
3184 }
3185 break;
3186 }
3187 }
3188
3189 const uint64_t TSFlags = Desc.TSFlags;
3190 if (RISCVII::hasVLOp(TSFlags)) {
3191 const MachineOperand &Op = MI.getOperand(i: RISCVII::getVLOpNum(Desc));
3192 if (!Op.isImm() && !Op.isReg()) {
3193 ErrInfo = "Invalid operand type for VL operand";
3194 return false;
3195 }
3196 if (Op.isReg() && Op.getReg().isValid()) {
3197 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3198 auto *RC = MRI.getRegClass(Reg: Op.getReg());
3199 if (!RISCV::GPRNoX0RegClass.hasSubClassEq(RC)) {
3200 ErrInfo = "Invalid register class for VL operand";
3201 return false;
3202 }
3203 }
3204 if (!RISCVII::hasSEWOp(TSFlags)) {
3205 ErrInfo = "VL operand w/o SEW operand?";
3206 return false;
3207 }
3208 }
3209 if (RISCVII::hasSEWOp(TSFlags)) {
3210 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
3211 if (!MI.getOperand(i: OpIdx).isImm()) {
3212 ErrInfo = "SEW value expected to be an immediate";
3213 return false;
3214 }
3215 uint64_t Log2SEW = MI.getOperand(i: OpIdx).getImm();
3216 if (Log2SEW > 31) {
3217 ErrInfo = "Unexpected SEW value";
3218 return false;
3219 }
3220 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3221 if (!RISCVVType::isValidSEW(SEW)) {
3222 ErrInfo = "Unexpected SEW value";
3223 return false;
3224 }
3225 }
3226 if (RISCVII::hasVecPolicyOp(TSFlags)) {
3227 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
3228 if (!MI.getOperand(i: OpIdx).isImm()) {
3229 ErrInfo = "Policy operand expected to be an immediate";
3230 return false;
3231 }
3232 uint64_t Policy = MI.getOperand(i: OpIdx).getImm();
3233 if (Policy > (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) {
3234 ErrInfo = "Invalid Policy Value";
3235 return false;
3236 }
3237 if (!RISCVII::hasVLOp(TSFlags)) {
3238 ErrInfo = "policy operand w/o VL operand?";
3239 return false;
3240 }
3241
3242 // VecPolicy operands can only exist on instructions with passthru/merge
3243 // arguments. Note that not all arguments with passthru have vec policy
3244 // operands- some instructions have implicit policies.
3245 unsigned UseOpIdx;
3246 if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx)) {
3247 ErrInfo = "policy operand w/o tied operand?";
3248 return false;
3249 }
3250 }
3251
3252 if (int Idx = RISCVII::getFRMOpNum(Desc);
3253 Idx >= 0 && MI.getOperand(i: Idx).getImm() == RISCVFPRndMode::DYN &&
3254 !MI.readsRegister(Reg: RISCV::FRM, /*TRI=*/nullptr)) {
3255 ErrInfo = "dynamic rounding mode should read FRM";
3256 return false;
3257 }
3258
3259 return true;
3260}
3261
3262bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
3263 const MachineInstr &AddrI,
3264 ExtAddrMode &AM) const {
3265 switch (MemI.getOpcode()) {
3266 default:
3267 return false;
3268 case RISCV::LB:
3269 case RISCV::LBU:
3270 case RISCV::LH:
3271 case RISCV::LH_INX:
3272 case RISCV::LHU:
3273 case RISCV::LW:
3274 case RISCV::LW_INX:
3275 case RISCV::LWU:
3276 case RISCV::LD:
3277 case RISCV::LD_RV32:
3278 case RISCV::FLH:
3279 case RISCV::FLW:
3280 case RISCV::FLD:
3281 case RISCV::SB:
3282 case RISCV::SH:
3283 case RISCV::SH_INX:
3284 case RISCV::SW:
3285 case RISCV::SW_INX:
3286 case RISCV::SD:
3287 case RISCV::SD_RV32:
3288 case RISCV::FSH:
3289 case RISCV::FSW:
3290 case RISCV::FSD:
3291 break;
3292 }
3293
3294 if (MemI.getOperand(i: 0).getReg() == Reg)
3295 return false;
3296
3297 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(i: 1).isReg() ||
3298 !AddrI.getOperand(i: 2).isImm())
3299 return false;
3300
3301 int64_t OldOffset = MemI.getOperand(i: 2).getImm();
3302 int64_t Disp = AddrI.getOperand(i: 2).getImm();
3303 int64_t NewOffset = OldOffset + Disp;
3304 if (!STI.is64Bit())
3305 NewOffset = SignExtend64<32>(x: NewOffset);
3306
3307 if (!isInt<12>(x: NewOffset))
3308 return false;
3309
3310 AM.BaseReg = AddrI.getOperand(i: 1).getReg();
3311 AM.ScaledReg = 0;
3312 AM.Scale = 0;
3313 AM.Displacement = NewOffset;
3314 AM.Form = ExtAddrMode::Formula::Basic;
3315 return true;
3316}
3317
3318MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
3319 const ExtAddrMode &AM) const {
3320
3321 const DebugLoc &DL = MemI.getDebugLoc();
3322 MachineBasicBlock &MBB = *MemI.getParent();
3323
3324 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3325 "Addressing mode not supported for folding");
3326
3327 return BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode: MemI.getOpcode()))
3328 .addReg(RegNo: MemI.getOperand(i: 0).getReg(), Flags: getDefRegState(B: MemI.mayLoad()))
3329 .addReg(RegNo: AM.BaseReg)
3330 .addImm(Val: AM.Displacement)
3331 .setMemRefs(MemI.memoperands())
3332 .setMIFlags(MemI.getFlags());
3333}
3334
3335// TODO: At the moment, MIPS introduced paring of instructions operating with
3336// word or double word. This should be extended with more instructions when more
3337// vendors support load/store pairing.
3338bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
3339 switch (Opc) {
3340 default:
3341 return false;
3342 case RISCV::SW:
3343 case RISCV::SD:
3344 case RISCV::LD:
3345 case RISCV::LW:
3346 return true;
3347 }
3348}
3349
3350bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
3351 const TargetRegisterInfo *TRI) {
3352 // If this is a volatile load/store, don't mess with it.
3353 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
3354 return false;
3355
3356 if (LdSt.getOperand(i: 1).isFI())
3357 return true;
3358
3359 assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
3360 // Can't cluster if the instruction modifies the base register
3361 // or it is update form. e.g. ld x5,8(x5)
3362 if (LdSt.modifiesRegister(Reg: LdSt.getOperand(i: 1).getReg(), TRI))
3363 return false;
3364
3365 if (!LdSt.getOperand(i: 2).isImm())
3366 return false;
3367
3368 return true;
3369}
3370
3371bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
3372 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
3373 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3374 const TargetRegisterInfo *TRI) const {
3375 if (!LdSt.mayLoadOrStore())
3376 return false;
3377
3378 // Conservatively, only handle scalar loads/stores for now.
3379 switch (LdSt.getOpcode()) {
3380 case RISCV::LB:
3381 case RISCV::LBU:
3382 case RISCV::SB:
3383 case RISCV::LH:
3384 case RISCV::LH_INX:
3385 case RISCV::LHU:
3386 case RISCV::FLH:
3387 case RISCV::SH:
3388 case RISCV::SH_INX:
3389 case RISCV::FSH:
3390 case RISCV::LW:
3391 case RISCV::LW_INX:
3392 case RISCV::LWU:
3393 case RISCV::FLW:
3394 case RISCV::SW:
3395 case RISCV::SW_INX:
3396 case RISCV::FSW:
3397 case RISCV::LD:
3398 case RISCV::LD_RV32:
3399 case RISCV::FLD:
3400 case RISCV::SD:
3401 case RISCV::SD_RV32:
3402 case RISCV::FSD:
3403 break;
3404 default:
3405 return false;
3406 }
3407 const MachineOperand *BaseOp;
3408 OffsetIsScalable = false;
3409 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
3410 return false;
3411 BaseOps.push_back(Elt: BaseOp);
3412 return true;
3413}
3414
3415// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
3416// helper?
3417static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
3418 ArrayRef<const MachineOperand *> BaseOps1,
3419 const MachineInstr &MI2,
3420 ArrayRef<const MachineOperand *> BaseOps2) {
3421 // Only examine the first "base" operand of each instruction, on the
3422 // assumption that it represents the real base address of the memory access.
3423 // Other operands are typically offsets or indices from this base address.
3424 if (BaseOps1.front()->isIdenticalTo(Other: *BaseOps2.front()))
3425 return true;
3426
3427 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
3428 return false;
3429
3430 auto MO1 = *MI1.memoperands_begin();
3431 auto MO2 = *MI2.memoperands_begin();
3432 if (MO1->getAddrSpace() != MO2->getAddrSpace())
3433 return false;
3434
3435 auto Base1 = MO1->getValue();
3436 auto Base2 = MO2->getValue();
3437 if (!Base1 || !Base2)
3438 return false;
3439 Base1 = getUnderlyingObject(V: Base1);
3440 Base2 = getUnderlyingObject(V: Base2);
3441
3442 if (isa<UndefValue>(Val: Base1) || isa<UndefValue>(Val: Base2))
3443 return false;
3444
3445 return Base1 == Base2;
3446}
3447
3448bool RISCVInstrInfo::shouldClusterMemOps(
3449 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
3450 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
3451 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
3452 unsigned NumBytes) const {
3453 // If the mem ops (to be clustered) do not have the same base ptr, then they
3454 // should not be clustered
3455 if (!BaseOps1.empty() && !BaseOps2.empty()) {
3456 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
3457 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
3458 if (!memOpsHaveSameBasePtr(MI1: FirstLdSt, BaseOps1, MI2: SecondLdSt, BaseOps2))
3459 return false;
3460 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
3461 // If only one base op is empty, they do not have the same base ptr
3462 return false;
3463 }
3464
3465 unsigned CacheLineSize =
3466 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
3467 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
3468 CacheLineSize = CacheLineSize ? CacheLineSize : 64;
3469 // Cluster if the memory operations are on the same or a neighbouring cache
3470 // line, but limit the maximum ClusterSize to avoid creating too much
3471 // additional register pressure.
3472 return ClusterSize <= 4 && std::abs(i: Offset1 - Offset2) < CacheLineSize;
3473}
3474
3475// Set BaseReg (the base register operand), Offset (the byte offset being
3476// accessed) and the access Width of the passed instruction that reads/writes
3477// memory. Returns false if the instruction does not read/write memory or the
3478// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
3479// recognise base operands and offsets in all cases.
3480// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
3481// function) and set it as appropriate.
3482bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
3483 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
3484 LocationSize &Width, const TargetRegisterInfo *TRI) const {
3485 if (!LdSt.mayLoadOrStore())
3486 return false;
3487
3488 // Here we assume the standard RISC-V ISA, which uses a base+offset
3489 // addressing mode. You'll need to relax these conditions to support custom
3490 // load/store instructions.
3491 if (LdSt.getNumExplicitOperands() != 3)
3492 return false;
3493 if ((!LdSt.getOperand(i: 1).isReg() && !LdSt.getOperand(i: 1).isFI()) ||
3494 !LdSt.getOperand(i: 2).isImm())
3495 return false;
3496
3497 if (!LdSt.hasOneMemOperand())
3498 return false;
3499
3500 Width = (*LdSt.memoperands_begin())->getSize();
3501 BaseReg = &LdSt.getOperand(i: 1);
3502 Offset = LdSt.getOperand(i: 2).getImm();
3503 return true;
3504}
3505
3506bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
3507 const MachineInstr &MIa, const MachineInstr &MIb) const {
3508 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
3509 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
3510
3511 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
3512 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
3513 return false;
3514
3515 // Retrieve the base register, offset from the base register and width. Width
3516 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
3517 // base registers are identical, and the offset of a lower memory access +
3518 // the width doesn't overlap the offset of a higher memory access,
3519 // then the memory accesses are different.
3520 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3521 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
3522 int64_t OffsetA = 0, OffsetB = 0;
3523 LocationSize WidthA = LocationSize::precise(Value: 0),
3524 WidthB = LocationSize::precise(Value: 0);
3525 if (getMemOperandWithOffsetWidth(LdSt: MIa, BaseReg&: BaseOpA, Offset&: OffsetA, Width&: WidthA, TRI) &&
3526 getMemOperandWithOffsetWidth(LdSt: MIb, BaseReg&: BaseOpB, Offset&: OffsetB, Width&: WidthB, TRI)) {
3527 if (BaseOpA->isIdenticalTo(Other: *BaseOpB)) {
3528 int LowOffset = std::min(a: OffsetA, b: OffsetB);
3529 int HighOffset = std::max(a: OffsetA, b: OffsetB);
3530 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3531 if (LowWidth.hasValue() &&
3532 LowOffset + (int)LowWidth.getValue() <= HighOffset)
3533 return true;
3534 }
3535 }
3536 return false;
3537}
3538
3539std::pair<unsigned, unsigned>
3540RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3541 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
3542 return std::make_pair(x: TF & Mask, y: TF & ~Mask);
3543}
3544
3545ArrayRef<std::pair<unsigned, const char *>>
3546RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3547 using namespace RISCVII;
3548 static const std::pair<unsigned, const char *> TargetFlags[] = {
3549 {MO_CALL, "riscv-call"},
3550 {MO_LO, "riscv-lo"},
3551 {MO_HI, "riscv-hi"},
3552 {MO_PCREL_LO, "riscv-pcrel-lo"},
3553 {MO_PCREL_HI, "riscv-pcrel-hi"},
3554 {MO_GOT_HI, "riscv-got-hi"},
3555 {MO_TPREL_LO, "riscv-tprel-lo"},
3556 {MO_TPREL_HI, "riscv-tprel-hi"},
3557 {MO_TPREL_ADD, "riscv-tprel-add"},
3558 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
3559 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
3560 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
3561 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
3562 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
3563 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
3564 return ArrayRef(TargetFlags);
3565}
3566bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
3567 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
3568 const Function &F = MF.getFunction();
3569
3570 // Can F be deduplicated by the linker? If it can, don't outline from it.
3571 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
3572 return false;
3573
3574 // Don't outline from functions with section markings; the program could
3575 // expect that all the code is in the named section.
3576 if (F.hasSection())
3577 return false;
3578
3579 // It's safe to outline from MF.
3580 return true;
3581}
3582
3583bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
3584 unsigned &Flags) const {
3585 // More accurate safety checking is done in getOutliningCandidateInfo.
3586 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
3587}
3588
3589// Enum values indicating how an outlined call should be constructed.
3590enum MachineOutlinerConstructionID {
3591 MachineOutlinerTailCall,
3592 MachineOutlinerDefault
3593};
3594
3595bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
3596 MachineFunction &MF) const {
3597 return MF.getFunction().hasMinSize();
3598}
3599
3600static bool isCandidatePatchable(const MachineBasicBlock &MBB) {
3601 const MachineFunction *MF = MBB.getParent();
3602 const Function &F = MF->getFunction();
3603 return F.getFnAttribute(Kind: "fentry-call").getValueAsBool() ||
3604 F.hasFnAttribute(Kind: "patchable-function-entry");
3605}
3606
3607static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI,
3608 MCRegister RegNo) {
3609 return MI.readsRegister(Reg: RegNo, TRI) ||
3610 MI.getDesc().hasImplicitUseOfPhysReg(Reg: RegNo);
3611}
3612
3613static bool isMIModifiesReg(const MachineInstr &MI,
3614 const TargetRegisterInfo *TRI, MCRegister RegNo) {
3615 return MI.modifiesRegister(Reg: RegNo, TRI) ||
3616 MI.getDesc().hasImplicitDefOfPhysReg(Reg: RegNo);
3617}
3618
3619static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
3620 if (!MBB.back().isReturn())
3621 return true;
3622 if (isCandidatePatchable(MBB))
3623 return true;
3624
3625 // If the candidate reads the pre-set register
3626 // that can be used for expanding PseudoTAIL instruction,
3627 // then we cannot insert tail call.
3628 const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
3629 MCRegister TailExpandUseRegNo =
3630 RISCVII::getTailExpandUseRegNo(FeatureBits: STI.getFeatureBits());
3631 for (const MachineInstr &MI : MBB) {
3632 if (isMIReadsReg(MI, TRI: STI.getRegisterInfo(), RegNo: TailExpandUseRegNo))
3633 return true;
3634 if (isMIModifiesReg(MI, TRI: STI.getRegisterInfo(), RegNo: TailExpandUseRegNo))
3635 break;
3636 }
3637 return false;
3638}
3639
3640bool RISCVInstrInfo::analyzeCandidate(outliner::Candidate &C) const {
3641 // If the expansion register for tail calls is live across the candidate
3642 // outlined call site, we cannot outline that candidate as the expansion
3643 // would clobber the register.
3644 MCRegister TailExpandUseReg =
3645 RISCVII::getTailExpandUseRegNo(FeatureBits: STI.getFeatureBits());
3646 if (C.back().isReturn() &&
3647 !C.isAvailableAcrossAndOutOfSeq(Reg: TailExpandUseReg, TRI: RegInfo)) {
3648 LLVM_DEBUG(dbgs() << "MBB:\n" << *C.getMBB());
3649 LLVM_DEBUG(dbgs() << "Cannot be outlined between: " << C.front() << "and "
3650 << C.back());
3651 LLVM_DEBUG(dbgs() << "Because the tail-call register is live across "
3652 "the proposed outlined function call\n");
3653 return true;
3654 }
3655
3656 // If last instruction is return then we can rely on
3657 // the verification already performed in the getOutliningTypeImpl.
3658 if (C.back().isReturn()) {
3659 assert(!cannotInsertTailCall(*C.getMBB()) &&
3660 "The candidate who uses return instruction must be outlined "
3661 "using tail call");
3662 return false;
3663 }
3664
3665 // Filter out candidates where the X5 register (t0) can't be used to setup
3666 // the function call.
3667 if (llvm::any_of(Range&: C, P: [this](const MachineInstr &MI) {
3668 return isMIModifiesReg(MI, TRI: &RegInfo, RegNo: RISCV::X5);
3669 }))
3670 return true;
3671
3672 return !C.isAvailableAcrossAndOutOfSeq(Reg: RISCV::X5, TRI: RegInfo);
3673}
3674
3675std::optional<std::unique_ptr<outliner::OutlinedFunction>>
3676RISCVInstrInfo::getOutliningCandidateInfo(
3677 const MachineModuleInfo &MMI,
3678 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
3679 unsigned MinRepeats) const {
3680
3681 // Analyze each candidate and erase the ones that are not viable.
3682 llvm::erase_if(C&: RepeatedSequenceLocs, P: [this](auto Candidate) {
3683 return analyzeCandidate(C&: Candidate);
3684 });
3685
3686 // If the sequence doesn't have enough candidates left, then we're done.
3687 if (RepeatedSequenceLocs.size() < MinRepeats)
3688 return std::nullopt;
3689
3690 // Each RepeatedSequenceLoc is identical.
3691 outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3692 unsigned InstrSizeCExt =
3693 Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtZca() ? 2 : 4;
3694 unsigned CallOverhead = 0, FrameOverhead = 0;
3695
3696 // Count the number of CFI instructions in the candidate, if present.
3697 unsigned CFICount = 0;
3698 for (auto &I : Candidate) {
3699 if (I.isCFIInstruction())
3700 CFICount++;
3701 }
3702
3703 // Ensure CFI coverage matches: comparing the number of CFIs in the candidate
3704 // with the total number of CFIs in the parent function for each candidate.
3705 // Outlining only a subset of a function’s CFIs would split the unwind state
3706 // across two code regions and lead to incorrect address offsets between the
3707 // outlined body and the remaining code. To preserve correct unwind info, we
3708 // only outline when all CFIs in the function can be outlined together.
3709 for (outliner::Candidate &C : RepeatedSequenceLocs) {
3710 std::vector<MCCFIInstruction> CFIInstructions =
3711 C.getMF()->getFrameInstructions();
3712
3713 if (CFICount > 0 && CFICount != CFIInstructions.size())
3714 return std::nullopt;
3715 }
3716
3717 MachineOutlinerConstructionID MOCI = MachineOutlinerDefault;
3718 if (Candidate.back().isReturn()) {
3719 MOCI = MachineOutlinerTailCall;
3720 // tail call = auipc + jalr in the worst case without linker relaxation.
3721 // FIXME: This code suggests the JALR can be compressed - how?
3722 CallOverhead = 4 + InstrSizeCExt;
3723 // Using tail call we move ret instruction from caller to callee.
3724 FrameOverhead = 0;
3725 } else {
3726 // call t0, function = 8 bytes.
3727 CallOverhead = 8;
3728 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3729 FrameOverhead = InstrSizeCExt;
3730 }
3731
3732 // If we have CFI instructions, we can only outline if the outlined section
3733 // can be a tail call.
3734 if (MOCI != MachineOutlinerTailCall && CFICount > 0)
3735 return std::nullopt;
3736
3737 for (auto &C : RepeatedSequenceLocs)
3738 C.setCallInfo(CID: MOCI, CO: CallOverhead);
3739
3740 unsigned SequenceSize = 0;
3741 for (auto &MI : Candidate)
3742 SequenceSize += getInstSizeInBytes(MI);
3743
3744 return std::make_unique<outliner::OutlinedFunction>(
3745 args&: RepeatedSequenceLocs, args&: SequenceSize, args&: FrameOverhead, args&: MOCI);
3746}
3747
3748outliner::InstrType
3749RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
3750 MachineBasicBlock::iterator &MBBI,
3751 unsigned Flags) const {
3752 MachineInstr &MI = *MBBI;
3753 MachineBasicBlock *MBB = MI.getParent();
3754 const TargetRegisterInfo *TRI =
3755 MBB->getParent()->getSubtarget().getRegisterInfo();
3756 const auto &F = MI.getMF()->getFunction();
3757
3758 // We can only outline CFI instructions if we will tail call the outlined
3759 // function, or fix up the CFI offsets. Currently, CFI instructions are
3760 // outlined only if in a tail call.
3761 if (MI.isCFIInstruction())
3762 return outliner::InstrType::Legal;
3763
3764 if (cannotInsertTailCall(MBB: *MBB) &&
3765 (MI.isReturn() || isMIModifiesReg(MI, TRI, RegNo: RISCV::X5)))
3766 return outliner::InstrType::Illegal;
3767
3768 // Make sure the operands don't reference something unsafe.
3769 for (const auto &MO : MI.operands()) {
3770
3771 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
3772 // if any possible.
3773 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
3774 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
3775 F.hasSection() || F.getSectionPrefix()))
3776 return outliner::InstrType::Illegal;
3777 }
3778
3779 if (isLPAD(MI))
3780 return outliner::InstrType::Illegal;
3781
3782 return outliner::InstrType::Legal;
3783}
3784
3785void RISCVInstrInfo::buildOutlinedFrame(
3786 MachineBasicBlock &MBB, MachineFunction &MF,
3787 const outliner::OutlinedFunction &OF) const {
3788
3789 if (OF.FrameConstructionID == MachineOutlinerTailCall)
3790 return;
3791
3792 MBB.addLiveIn(PhysReg: RISCV::X5);
3793
3794 // Add in a return instruction to the end of the outlined frame.
3795 MBB.insert(I: MBB.end(), MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::JALR))
3796 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
3797 .addReg(RegNo: RISCV::X5)
3798 .addImm(Val: 0));
3799}
3800
3801MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
3802 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
3803 MachineFunction &MF, outliner::Candidate &C) const {
3804
3805 if (C.CallConstructionID == MachineOutlinerTailCall) {
3806 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::PseudoTAIL))
3807 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()),
3808 /*Offset=*/0, TargetFlags: RISCVII::MO_CALL));
3809 return It;
3810 }
3811
3812 // Add in a call instruction to the outlined function at the given location.
3813 It = MBB.insert(I: It,
3814 MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: RISCV::PseudoCALLReg), DestReg: RISCV::X5)
3815 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()), Offset: 0,
3816 TargetFlags: RISCVII::MO_CALL));
3817 return It;
3818}
3819
3820std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
3821 Register Reg) const {
3822 // TODO: Handle cases where Reg is a super- or sub-register of the
3823 // destination register.
3824 const MachineOperand &Op0 = MI.getOperand(i: 0);
3825 if (!Op0.isReg() || Reg != Op0.getReg())
3826 return std::nullopt;
3827
3828 // Don't consider ADDIW as a candidate because the caller may not be aware
3829 // of its sign extension behaviour.
3830 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(i: 1).isReg() &&
3831 MI.getOperand(i: 2).isImm())
3832 return RegImmPair{MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getImm()};
3833
3834 return std::nullopt;
3835}
3836
3837// MIR printer helper function to annotate Operands with a comment.
3838std::string RISCVInstrInfo::createMIROperandComment(
3839 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
3840 const TargetRegisterInfo *TRI) const {
3841 // Print a generic comment for this operand if there is one.
3842 std::string GenericComment =
3843 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
3844 if (!GenericComment.empty())
3845 return GenericComment;
3846
3847 const MCInstrDesc &Desc = MI.getDesc();
3848 if (OpIdx >= Desc.getNumOperands())
3849 return std::string();
3850
3851 std::string Comment;
3852 raw_string_ostream OS(Comment);
3853
3854 const MCOperandInfo &OpInfo = Desc.operands()[OpIdx];
3855
3856 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
3857 // operand of vector codegen pseudos.
3858 switch (OpInfo.OperandType) {
3859 case RISCVOp::OPERAND_VTYPEI10:
3860 case RISCVOp::OPERAND_VTYPEI11: {
3861 unsigned Imm = Op.getImm();
3862 RISCVVType::printVType(VType: Imm, OS);
3863 break;
3864 }
3865 case RISCVOp::OPERAND_XSFMM_VTYPE: {
3866 unsigned Imm = Op.getImm();
3867 RISCVVType::printXSfmmVType(VType: Imm, OS);
3868 break;
3869 }
3870 case RISCVOp::OPERAND_XSFMM_TWIDEN: {
3871 unsigned Imm = Op.getImm();
3872 OS << "w" << Imm;
3873 break;
3874 }
3875 case RISCVOp::OPERAND_SEW:
3876 case RISCVOp::OPERAND_SEW_MASK: {
3877 unsigned Log2SEW = Op.getImm();
3878 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3879 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3880 OS << "e" << SEW;
3881 break;
3882 }
3883 case RISCVOp::OPERAND_VEC_POLICY: {
3884 unsigned Policy = Op.getImm();
3885 assert(Policy <= (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
3886 "Invalid Policy Value");
3887 OS << (Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
3888 << (Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
3889 break;
3890 }
3891 case RISCVOp::OPERAND_AVL:
3892 if (Op.isImm() && Op.getImm() == -1)
3893 OS << "vl=VLMAX";
3894 else
3895 OS << "vl";
3896 break;
3897 case RISCVOp::OPERAND_VEC_RM:
3898 if (RISCVII::usesVXRM(TSFlags: Desc.TSFlags)) {
3899 assert(RISCVVXRndMode::isValidRoundingMode(Op.getImm()));
3900 auto VXRM = static_cast<RISCVVXRndMode::RoundingMode>(Op.getImm());
3901 OS << "vxrm=" << RISCVVXRndMode::roundingModeToString(RndMode: VXRM);
3902 } else {
3903 assert(RISCVFPRndMode::isValidRoundingMode(Op.getImm()));
3904 auto FRM = static_cast<RISCVFPRndMode::RoundingMode>(Op.getImm());
3905 OS << "frm=" << RISCVFPRndMode::roundingModeToString(RndMode: FRM);
3906 }
3907 break;
3908 }
3909
3910 return Comment;
3911}
3912
3913// clang-format off
3914#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
3915 RISCV::Pseudo##OP##_##LMUL
3916
3917#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
3918 RISCV::Pseudo##OP##_##LMUL##_MASK
3919
3920#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
3921 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
3922 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
3923
3924#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
3925 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
3926 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
3927 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
3928 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
3929 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
3930 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
3931
3932#define CASE_RVV_OPCODE_UNMASK(OP) \
3933 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3934 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
3935
3936#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
3937 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
3938 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
3939 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
3940 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
3941 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
3942 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
3943
3944#define CASE_RVV_OPCODE_MASK(OP) \
3945 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
3946 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
3947
3948#define CASE_RVV_OPCODE_WIDEN(OP) \
3949 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3950 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
3951
3952#define CASE_RVV_OPCODE(OP) \
3953 CASE_RVV_OPCODE_UNMASK(OP): \
3954 case CASE_RVV_OPCODE_MASK(OP)
3955// clang-format on
3956
3957// clang-format off
3958#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
3959 RISCV::PseudoV##OP##_##TYPE##_##LMUL
3960
3961#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
3962 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
3963 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
3964 case CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
3965 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
3966 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
3967 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
3968 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
3969
3970// VFMA instructions are SEW specific.
3971#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
3972 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
3973
3974#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
3975 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
3976 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
3977 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
3978 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
3979
3980#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
3981 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
3982 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
3983
3984#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
3985 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
3986 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
3987
3988#define CASE_VFMA_OPCODE_VV(OP) \
3989 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
3990 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
3991 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
3992 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
3993
3994#define CASE_VFMA_SPLATS(OP) \
3995 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
3996 case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
3997 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
3998 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
3999// clang-format on
4000
4001bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
4002 unsigned &SrcOpIdx1,
4003 unsigned &SrcOpIdx2) const {
4004 const MCInstrDesc &Desc = MI.getDesc();
4005 if (!Desc.isCommutable())
4006 return false;
4007
4008 switch (MI.getOpcode()) {
4009 case RISCV::TH_MVEQZ:
4010 case RISCV::TH_MVNEZ:
4011 // We can't commute operands if operand 2 (i.e., rs1 in
4012 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
4013 // not valid as the in/out-operand 1).
4014 if (MI.getOperand(i: 2).getReg() == RISCV::X0)
4015 return false;
4016 // Operands 1 and 2 are commutable, if we switch the opcode.
4017 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 2);
4018 case RISCV::QC_SELECTIEQ:
4019 case RISCV::QC_SELECTINE:
4020 case RISCV::QC_SELECTIIEQ:
4021 case RISCV::QC_SELECTIINE:
4022 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 2);
4023 case RISCV::QC_MVEQ:
4024 case RISCV::QC_MVNE:
4025 case RISCV::QC_MVLT:
4026 case RISCV::QC_MVGE:
4027 case RISCV::QC_MVLTU:
4028 case RISCV::QC_MVGEU:
4029 case RISCV::QC_MVEQI:
4030 case RISCV::QC_MVNEI:
4031 case RISCV::QC_MVLTI:
4032 case RISCV::QC_MVGEI:
4033 case RISCV::QC_MVLTUI:
4034 case RISCV::QC_MVGEUI:
4035 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 1, CommutableOpIdx2: 4);
4036 case RISCV::TH_MULA:
4037 case RISCV::TH_MULAW:
4038 case RISCV::TH_MULAH:
4039 case RISCV::TH_MULS:
4040 case RISCV::TH_MULSW:
4041 case RISCV::TH_MULSH:
4042 // Operands 2 and 3 are commutable.
4043 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 2, CommutableOpIdx2: 3);
4044 case RISCV::PseudoCCMOVGPRNoX0:
4045 case RISCV::PseudoCCMOVGPR:
4046 // Operands 4 and 5 are commutable.
4047 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 4, CommutableOpIdx2: 5);
4048 case CASE_RVV_OPCODE(VADD_VV):
4049 case CASE_RVV_OPCODE(VAND_VV):
4050 case CASE_RVV_OPCODE(VOR_VV):
4051 case CASE_RVV_OPCODE(VXOR_VV):
4052 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
4053 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
4054 case CASE_RVV_OPCODE(VMIN_VV):
4055 case CASE_RVV_OPCODE(VMINU_VV):
4056 case CASE_RVV_OPCODE(VMAX_VV):
4057 case CASE_RVV_OPCODE(VMAXU_VV):
4058 case CASE_RVV_OPCODE(VMUL_VV):
4059 case CASE_RVV_OPCODE(VMULH_VV):
4060 case CASE_RVV_OPCODE(VMULHU_VV):
4061 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
4062 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
4063 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
4064 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
4065 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
4066 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
4067 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
4068 case CASE_RVV_OPCODE(VSADD_VV):
4069 case CASE_RVV_OPCODE(VSADDU_VV):
4070 case CASE_RVV_OPCODE(VAADD_VV):
4071 case CASE_RVV_OPCODE(VAADDU_VV):
4072 case CASE_RVV_OPCODE(VSMUL_VV):
4073 // Operands 2 and 3 are commutable.
4074 return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: 2, CommutableOpIdx2: 3);
4075 case CASE_VFMA_SPLATS(FMADD):
4076 case CASE_VFMA_SPLATS(FMSUB):
4077 case CASE_VFMA_SPLATS(FMACC):
4078 case CASE_VFMA_SPLATS(FMSAC):
4079 case CASE_VFMA_SPLATS(FNMADD):
4080 case CASE_VFMA_SPLATS(FNMSUB):
4081 case CASE_VFMA_SPLATS(FNMACC):
4082 case CASE_VFMA_SPLATS(FNMSAC):
4083 case CASE_VFMA_OPCODE_VV(FMACC):
4084 case CASE_VFMA_OPCODE_VV(FMSAC):
4085 case CASE_VFMA_OPCODE_VV(FNMACC):
4086 case CASE_VFMA_OPCODE_VV(FNMSAC):
4087 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4088 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4089 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4090 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4091 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4092 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4093 // If the tail policy is undisturbed we can't commute.
4094 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4095 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4096 1) == 0)
4097 return false;
4098
4099 // For these instructions we can only swap operand 1 and operand 3 by
4100 // changing the opcode.
4101 unsigned CommutableOpIdx1 = 1;
4102 unsigned CommutableOpIdx2 = 3;
4103 if (!fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1,
4104 CommutableOpIdx2))
4105 return false;
4106 return true;
4107 }
4108 case CASE_VFMA_OPCODE_VV(FMADD):
4109 case CASE_VFMA_OPCODE_VV(FMSUB):
4110 case CASE_VFMA_OPCODE_VV(FNMADD):
4111 case CASE_VFMA_OPCODE_VV(FNMSUB):
4112 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4113 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4114 // If the tail policy is undisturbed we can't commute.
4115 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
4116 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4117 1) == 0)
4118 return false;
4119
4120 // For these instructions we have more freedom. We can commute with the
4121 // other multiplicand or with the addend/subtrahend/minuend.
4122
4123 // Any fixed operand must be from source 1, 2 or 3.
4124 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
4125 return false;
4126 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
4127 return false;
4128
4129 // It both ops are fixed one must be the tied source.
4130 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
4131 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
4132 return false;
4133
4134 // Look for two different register operands assumed to be commutable
4135 // regardless of the FMA opcode. The FMA opcode is adjusted later if
4136 // needed.
4137 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
4138 SrcOpIdx2 == CommuteAnyOperandIndex) {
4139 // At least one of operands to be commuted is not specified and
4140 // this method is free to choose appropriate commutable operands.
4141 unsigned CommutableOpIdx1 = SrcOpIdx1;
4142 if (SrcOpIdx1 == SrcOpIdx2) {
4143 // Both of operands are not fixed. Set one of commutable
4144 // operands to the tied source.
4145 CommutableOpIdx1 = 1;
4146 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
4147 // Only one of the operands is not fixed.
4148 CommutableOpIdx1 = SrcOpIdx2;
4149 }
4150
4151 // CommutableOpIdx1 is well defined now. Let's choose another commutable
4152 // operand and assign its index to CommutableOpIdx2.
4153 unsigned CommutableOpIdx2;
4154 if (CommutableOpIdx1 != 1) {
4155 // If we haven't already used the tied source, we must use it now.
4156 CommutableOpIdx2 = 1;
4157 } else {
4158 Register Op1Reg = MI.getOperand(i: CommutableOpIdx1).getReg();
4159
4160 // The commuted operands should have different registers.
4161 // Otherwise, the commute transformation does not change anything and
4162 // is useless. We use this as a hint to make our decision.
4163 if (Op1Reg != MI.getOperand(i: 2).getReg())
4164 CommutableOpIdx2 = 2;
4165 else
4166 CommutableOpIdx2 = 3;
4167 }
4168
4169 // Assign the found pair of commutable indices to SrcOpIdx1 and
4170 // SrcOpIdx2 to return those values.
4171 if (!fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1,
4172 CommutableOpIdx2))
4173 return false;
4174 }
4175
4176 return true;
4177 }
4178 }
4179
4180 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4181}
4182
4183// clang-format off
4184#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
4185 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
4186 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
4187 break;
4188
4189#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
4190 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
4191 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
4192 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
4193 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
4194 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
4195 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
4196 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
4197
4198// VFMA depends on SEW.
4199#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
4200 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
4201 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
4202 break;
4203
4204#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
4205 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
4206 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
4207 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
4208 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
4209
4210#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
4211 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
4212 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
4213
4214#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
4215 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
4216 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
4217
4218#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
4219 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
4220 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
4221 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
4222 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
4223
4224#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
4225 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
4226 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
4227 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
4228 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
4229// clang-format on
4230
4231MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
4232 bool NewMI,
4233 unsigned OpIdx1,
4234 unsigned OpIdx2) const {
4235 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
4236 if (NewMI)
4237 return *MI.getParent()->getParent()->CloneMachineInstr(Orig: &MI);
4238 return MI;
4239 };
4240
4241 switch (MI.getOpcode()) {
4242 case RISCV::TH_MVEQZ:
4243 case RISCV::TH_MVNEZ: {
4244 auto &WorkingMI = cloneIfNew(MI);
4245 WorkingMI.setDesc(get(Opcode: MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
4246 : RISCV::TH_MVEQZ));
4247 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, NewMI: false, OpIdx1,
4248 OpIdx2);
4249 }
4250 case RISCV::QC_SELECTIEQ:
4251 case RISCV::QC_SELECTINE:
4252 case RISCV::QC_SELECTIIEQ:
4253 case RISCV::QC_SELECTIINE:
4254 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4255 case RISCV::QC_MVEQ:
4256 case RISCV::QC_MVNE:
4257 case RISCV::QC_MVLT:
4258 case RISCV::QC_MVGE:
4259 case RISCV::QC_MVLTU:
4260 case RISCV::QC_MVGEU:
4261 case RISCV::QC_MVEQI:
4262 case RISCV::QC_MVNEI:
4263 case RISCV::QC_MVLTI:
4264 case RISCV::QC_MVGEI:
4265 case RISCV::QC_MVLTUI:
4266 case RISCV::QC_MVGEUI: {
4267 auto &WorkingMI = cloneIfNew(MI);
4268 WorkingMI.setDesc(get(Opcode: getInverseXqcicmOpcode(Opcode: MI.getOpcode())));
4269 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, NewMI: false, OpIdx1,
4270 OpIdx2);
4271 }
4272 case RISCV::PseudoCCMOVGPRNoX0:
4273 case RISCV::PseudoCCMOVGPR: {
4274 // CCMOV can be commuted by inverting the condition.
4275 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
4276 CC = RISCVCC::getInverseBranchCondition(CC);
4277 auto &WorkingMI = cloneIfNew(MI);
4278 WorkingMI.getOperand(i: 3).setImm(CC);
4279 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI*/ false,
4280 OpIdx1, OpIdx2);
4281 }
4282 case CASE_VFMA_SPLATS(FMACC):
4283 case CASE_VFMA_SPLATS(FMADD):
4284 case CASE_VFMA_SPLATS(FMSAC):
4285 case CASE_VFMA_SPLATS(FMSUB):
4286 case CASE_VFMA_SPLATS(FNMACC):
4287 case CASE_VFMA_SPLATS(FNMADD):
4288 case CASE_VFMA_SPLATS(FNMSAC):
4289 case CASE_VFMA_SPLATS(FNMSUB):
4290 case CASE_VFMA_OPCODE_VV(FMACC):
4291 case CASE_VFMA_OPCODE_VV(FMSAC):
4292 case CASE_VFMA_OPCODE_VV(FNMACC):
4293 case CASE_VFMA_OPCODE_VV(FNMSAC):
4294 case CASE_VMA_OPCODE_LMULS(MADD, VX):
4295 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
4296 case CASE_VMA_OPCODE_LMULS(MACC, VX):
4297 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
4298 case CASE_VMA_OPCODE_LMULS(MACC, VV):
4299 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
4300 // It only make sense to toggle these between clobbering the
4301 // addend/subtrahend/minuend one of the multiplicands.
4302 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4303 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
4304 unsigned Opc;
4305 switch (MI.getOpcode()) {
4306 default:
4307 llvm_unreachable("Unexpected opcode");
4308 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
4309 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
4310 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
4311 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
4312 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
4313 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
4314 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
4315 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
4316 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
4317 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
4318 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
4319 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
4320 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
4321 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
4322 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
4323 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
4324 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
4325 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
4326 }
4327
4328 auto &WorkingMI = cloneIfNew(MI);
4329 WorkingMI.setDesc(get(Opcode: Opc));
4330 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI=*/false,
4331 OpIdx1, OpIdx2);
4332 }
4333 case CASE_VFMA_OPCODE_VV(FMADD):
4334 case CASE_VFMA_OPCODE_VV(FMSUB):
4335 case CASE_VFMA_OPCODE_VV(FNMADD):
4336 case CASE_VFMA_OPCODE_VV(FNMSUB):
4337 case CASE_VMA_OPCODE_LMULS(MADD, VV):
4338 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
4339 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
4340 // If one of the operands, is the addend we need to change opcode.
4341 // Otherwise we're just swapping 2 of the multiplicands.
4342 if (OpIdx1 == 3 || OpIdx2 == 3) {
4343 unsigned Opc;
4344 switch (MI.getOpcode()) {
4345 default:
4346 llvm_unreachable("Unexpected opcode");
4347 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
4348 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
4349 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
4350 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
4351 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
4352 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
4353 }
4354
4355 auto &WorkingMI = cloneIfNew(MI);
4356 WorkingMI.setDesc(get(Opcode: Opc));
4357 return TargetInstrInfo::commuteInstructionImpl(MI&: WorkingMI, /*NewMI=*/false,
4358 OpIdx1, OpIdx2);
4359 }
4360 // Let the default code handle it.
4361 break;
4362 }
4363 }
4364
4365 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4366}
4367
4368#undef CASE_VMA_CHANGE_OPCODE_COMMON
4369#undef CASE_VMA_CHANGE_OPCODE_LMULS
4370#undef CASE_VFMA_CHANGE_OPCODE_COMMON
4371#undef CASE_VFMA_CHANGE_OPCODE_LMULS_M1
4372#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF2
4373#undef CASE_VFMA_CHANGE_OPCODE_LMULS_MF4
4374#undef CASE_VFMA_CHANGE_OPCODE_VV
4375#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
4376
4377#undef CASE_RVV_OPCODE_UNMASK_LMUL
4378#undef CASE_RVV_OPCODE_MASK_LMUL
4379#undef CASE_RVV_OPCODE_LMUL
4380#undef CASE_RVV_OPCODE_UNMASK_WIDEN
4381#undef CASE_RVV_OPCODE_UNMASK
4382#undef CASE_RVV_OPCODE_MASK_WIDEN
4383#undef CASE_RVV_OPCODE_MASK
4384#undef CASE_RVV_OPCODE_WIDEN
4385#undef CASE_RVV_OPCODE
4386
4387#undef CASE_VMA_OPCODE_COMMON
4388#undef CASE_VMA_OPCODE_LMULS
4389#undef CASE_VFMA_OPCODE_COMMON
4390#undef CASE_VFMA_OPCODE_LMULS_M1
4391#undef CASE_VFMA_OPCODE_LMULS_MF2
4392#undef CASE_VFMA_OPCODE_LMULS_MF4
4393#undef CASE_VFMA_OPCODE_VV
4394#undef CASE_VFMA_SPLATS
4395
4396bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
4397 switch (MI.getOpcode()) {
4398 default:
4399 break;
4400 case RISCV::ADD:
4401 case RISCV::OR:
4402 case RISCV::XOR:
4403 // Normalize (so we hit the next if clause).
4404 // add/[x]or rd, zero, rs => add/[x]or rd, rs, zero
4405 if (MI.getOperand(i: 1).getReg() == RISCV::X0)
4406 commuteInstruction(MI);
4407 // add/[x]or rd, rs, zero => addi rd, rs, 0
4408 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4409 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4410 MI.setDesc(get(Opcode: RISCV::ADDI));
4411 return true;
4412 }
4413 // xor rd, rs, rs => addi rd, zero, 0
4414 if (MI.getOpcode() == RISCV::XOR &&
4415 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg()) {
4416 MI.getOperand(i: 1).setReg(RISCV::X0);
4417 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4418 MI.setDesc(get(Opcode: RISCV::ADDI));
4419 return true;
4420 }
4421 break;
4422 case RISCV::ORI:
4423 case RISCV::XORI:
4424 // [x]ori rd, zero, N => addi rd, zero, N
4425 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4426 MI.setDesc(get(Opcode: RISCV::ADDI));
4427 return true;
4428 }
4429 break;
4430 case RISCV::SUB:
4431 // sub rd, rs, zero => addi rd, rs, 0
4432 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4433 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4434 MI.setDesc(get(Opcode: RISCV::ADDI));
4435 return true;
4436 }
4437 break;
4438 case RISCV::SUBW:
4439 // subw rd, rs, zero => addiw rd, rs, 0
4440 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4441 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4442 MI.setDesc(get(Opcode: RISCV::ADDIW));
4443 return true;
4444 }
4445 break;
4446 case RISCV::ADDW:
4447 // Normalize (so we hit the next if clause).
4448 // addw rd, zero, rs => addw rd, rs, zero
4449 if (MI.getOperand(i: 1).getReg() == RISCV::X0)
4450 commuteInstruction(MI);
4451 // addw rd, rs, zero => addiw rd, rs, 0
4452 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4453 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4454 MI.setDesc(get(Opcode: RISCV::ADDIW));
4455 return true;
4456 }
4457 break;
4458 case RISCV::SH1ADD:
4459 case RISCV::SH1ADD_UW:
4460 case RISCV::SH2ADD:
4461 case RISCV::SH2ADD_UW:
4462 case RISCV::SH3ADD:
4463 case RISCV::SH3ADD_UW:
4464 // shNadd[.uw] rd, zero, rs => addi rd, rs, 0
4465 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4466 MI.removeOperand(OpNo: 1);
4467 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4468 MI.setDesc(get(Opcode: RISCV::ADDI));
4469 return true;
4470 }
4471 // shNadd[.uw] rd, rs, zero => slli[.uw] rd, rs, N
4472 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4473 MI.removeOperand(OpNo: 2);
4474 unsigned Opc = MI.getOpcode();
4475 if (Opc == RISCV::SH1ADD_UW || Opc == RISCV::SH2ADD_UW ||
4476 Opc == RISCV::SH3ADD_UW) {
4477 MI.addOperand(Op: MachineOperand::CreateImm(Val: getSHXADDUWShiftAmount(Opc)));
4478 MI.setDesc(get(Opcode: RISCV::SLLI_UW));
4479 return true;
4480 }
4481 MI.addOperand(Op: MachineOperand::CreateImm(Val: getSHXADDShiftAmount(Opc)));
4482 MI.setDesc(get(Opcode: RISCV::SLLI));
4483 return true;
4484 }
4485 break;
4486 case RISCV::AND:
4487 case RISCV::MUL:
4488 case RISCV::MULH:
4489 case RISCV::MULHSU:
4490 case RISCV::MULHU:
4491 case RISCV::MULW:
4492 // and rd, zero, rs => addi rd, zero, 0
4493 // mul* rd, zero, rs => addi rd, zero, 0
4494 // and rd, rs, zero => addi rd, zero, 0
4495 // mul* rd, rs, zero => addi rd, zero, 0
4496 if (MI.getOperand(i: 1).getReg() == RISCV::X0 ||
4497 MI.getOperand(i: 2).getReg() == RISCV::X0) {
4498 MI.getOperand(i: 1).setReg(RISCV::X0);
4499 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4500 MI.setDesc(get(Opcode: RISCV::ADDI));
4501 return true;
4502 }
4503 break;
4504 case RISCV::ANDI:
4505 // andi rd, zero, C => addi rd, zero, 0
4506 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4507 MI.getOperand(i: 2).setImm(0);
4508 MI.setDesc(get(Opcode: RISCV::ADDI));
4509 return true;
4510 }
4511 break;
4512 case RISCV::SLL:
4513 case RISCV::SRL:
4514 case RISCV::SRA:
4515 // shift rd, zero, rs => addi rd, zero, 0
4516 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4517 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4518 MI.setDesc(get(Opcode: RISCV::ADDI));
4519 return true;
4520 }
4521 // shift rd, rs, zero => addi rd, rs, 0
4522 if (MI.getOperand(i: 2).getReg() == RISCV::X0) {
4523 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4524 MI.setDesc(get(Opcode: RISCV::ADDI));
4525 return true;
4526 }
4527 break;
4528 case RISCV::SLLW:
4529 case RISCV::SRLW:
4530 case RISCV::SRAW:
4531 // shiftw rd, zero, rs => addi rd, zero, 0
4532 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4533 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4534 MI.setDesc(get(Opcode: RISCV::ADDI));
4535 return true;
4536 }
4537 break;
4538 case RISCV::SLLI:
4539 case RISCV::SRLI:
4540 case RISCV::SRAI:
4541 case RISCV::SLLIW:
4542 case RISCV::SRLIW:
4543 case RISCV::SRAIW:
4544 case RISCV::SLLI_UW:
4545 // shiftimm rd, zero, N => addi rd, zero, 0
4546 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4547 MI.getOperand(i: 2).setImm(0);
4548 MI.setDesc(get(Opcode: RISCV::ADDI));
4549 return true;
4550 }
4551 break;
4552 case RISCV::SLTU:
4553 case RISCV::ADD_UW:
4554 // sltu rd, zero, zero => addi rd, zero, 0
4555 // add.uw rd, zero, zero => addi rd, zero, 0
4556 if (MI.getOperand(i: 1).getReg() == RISCV::X0 &&
4557 MI.getOperand(i: 2).getReg() == RISCV::X0) {
4558 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4559 MI.setDesc(get(Opcode: RISCV::ADDI));
4560 return true;
4561 }
4562 // add.uw rd, zero, rs => addi rd, rs, 0
4563 if (MI.getOpcode() == RISCV::ADD_UW &&
4564 MI.getOperand(i: 1).getReg() == RISCV::X0) {
4565 MI.removeOperand(OpNo: 1);
4566 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4567 MI.setDesc(get(Opcode: RISCV::ADDI));
4568 }
4569 break;
4570 case RISCV::SLTIU:
4571 // sltiu rd, zero, NZC => addi rd, zero, 1
4572 // sltiu rd, zero, 0 => addi rd, zero, 0
4573 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4574 MI.getOperand(i: 2).setImm(MI.getOperand(i: 2).getImm() != 0);
4575 MI.setDesc(get(Opcode: RISCV::ADDI));
4576 return true;
4577 }
4578 break;
4579 case RISCV::SEXT_H:
4580 case RISCV::SEXT_B:
4581 case RISCV::ZEXT_H_RV32:
4582 case RISCV::ZEXT_H_RV64:
4583 // sext.[hb] rd, zero => addi rd, zero, 0
4584 // zext.h rd, zero => addi rd, zero, 0
4585 if (MI.getOperand(i: 1).getReg() == RISCV::X0) {
4586 MI.addOperand(Op: MachineOperand::CreateImm(Val: 0));
4587 MI.setDesc(get(Opcode: RISCV::ADDI));
4588 return true;
4589 }
4590 break;
4591 case RISCV::MIN:
4592 case RISCV::MINU:
4593 case RISCV::MAX:
4594 case RISCV::MAXU:
4595 // min|max rd, rs, rs => addi rd, rs, 0
4596 if (MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg()) {
4597 MI.getOperand(i: 2).ChangeToImmediate(ImmVal: 0);
4598 MI.setDesc(get(Opcode: RISCV::ADDI));
4599 return true;
4600 }
4601 break;
4602 case RISCV::BEQ:
4603 case RISCV::BNE:
4604 // b{eq,ne} zero, rs, imm => b{eq,ne} rs, zero, imm
4605 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4606 MachineOperand MO0 = MI.getOperand(i: 0);
4607 MI.removeOperand(OpNo: 0);
4608 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4609 }
4610 break;
4611 case RISCV::BLTU:
4612 // bltu zero, rs, imm => bne rs, zero, imm
4613 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4614 MachineOperand MO0 = MI.getOperand(i: 0);
4615 MI.removeOperand(OpNo: 0);
4616 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4617 MI.setDesc(get(Opcode: RISCV::BNE));
4618 }
4619 break;
4620 case RISCV::BGEU:
4621 // bgeu zero, rs, imm => beq rs, zero, imm
4622 if (MI.getOperand(i: 0).getReg() == RISCV::X0) {
4623 MachineOperand MO0 = MI.getOperand(i: 0);
4624 MI.removeOperand(OpNo: 0);
4625 MI.insert(InsertBefore: MI.operands_begin() + 1, Ops: {MO0});
4626 MI.setDesc(get(Opcode: RISCV::BEQ));
4627 }
4628 break;
4629 }
4630 return false;
4631}
4632
4633// clang-format off
4634#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
4635 RISCV::PseudoV##OP##_##LMUL##_TIED
4636
4637#define CASE_WIDEOP_OPCODE_LMULS(OP) \
4638 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
4639 case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
4640 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
4641 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
4642 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
4643 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
4644
4645#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
4646 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
4647 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
4648 break;
4649
4650#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4651 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
4652 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
4653 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
4654 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
4655 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
4656 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
4657
4658// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
4659#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
4660 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
4661
4662#define CASE_FP_WIDEOP_OPCODE_LMULS(OP) \
4663 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4664 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4665 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
4666 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4667 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
4668 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4669 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
4670 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
4671 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
4672
4673#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
4674 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
4675 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
4676 break;
4677
4678#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
4679 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4680 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4681 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
4682 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4683 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
4684 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4685 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
4686 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
4687 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
4688
4689#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
4690 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
4691 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
4692 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
4693 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
4694 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
4695
4696#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
4697 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
4698 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
4699 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
4700 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
4701 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
4702// clang-format on
4703
4704MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
4705 LiveVariables *LV,
4706 LiveIntervals *LIS) const {
4707 MachineInstrBuilder MIB;
4708 switch (MI.getOpcode()) {
4709 default:
4710 return nullptr;
4711 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
4712 case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
4713 case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
4714 case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
4715 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4716 MI.getNumExplicitOperands() == 7 &&
4717 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
4718 // If the tail policy is undisturbed we can't convert.
4719 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4720 1) == 0)
4721 return nullptr;
4722 // clang-format off
4723 unsigned NewOpc;
4724 switch (MI.getOpcode()) {
4725 default:
4726 llvm_unreachable("Unexpected opcode");
4727 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
4728 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
4729 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
4730 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
4731 }
4732 // clang-format on
4733
4734 MachineBasicBlock &MBB = *MI.getParent();
4735 MIB = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
4736 .add(MO: MI.getOperand(i: 0))
4737 .addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Undef)
4738 .add(MO: MI.getOperand(i: 1))
4739 .add(MO: MI.getOperand(i: 2))
4740 .add(MO: MI.getOperand(i: 3))
4741 .add(MO: MI.getOperand(i: 4))
4742 .add(MO: MI.getOperand(i: 5))
4743 .add(MO: MI.getOperand(i: 6));
4744 break;
4745 }
4746 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
4747 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
4748 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
4749 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
4750 // If the tail policy is undisturbed we can't convert.
4751 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
4752 MI.getNumExplicitOperands() == 6);
4753 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
4754 1) == 0)
4755 return nullptr;
4756
4757 // clang-format off
4758 unsigned NewOpc;
4759 switch (MI.getOpcode()) {
4760 default:
4761 llvm_unreachable("Unexpected opcode");
4762 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
4763 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
4764 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
4765 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
4766 }
4767 // clang-format on
4768
4769 MachineBasicBlock &MBB = *MI.getParent();
4770 MIB = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: NewOpc))
4771 .add(MO: MI.getOperand(i: 0))
4772 .addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Undef)
4773 .add(MO: MI.getOperand(i: 1))
4774 .add(MO: MI.getOperand(i: 2))
4775 .add(MO: MI.getOperand(i: 3))
4776 .add(MO: MI.getOperand(i: 4))
4777 .add(MO: MI.getOperand(i: 5));
4778 break;
4779 }
4780 }
4781 MIB.copyImplicitOps(OtherMI: MI);
4782
4783 if (LV) {
4784 unsigned NumOps = MI.getNumOperands();
4785 for (unsigned I = 1; I < NumOps; ++I) {
4786 MachineOperand &Op = MI.getOperand(i: I);
4787 if (Op.isReg() && Op.isKill())
4788 LV->replaceKillInstruction(Reg: Op.getReg(), OldMI&: MI, NewMI&: *MIB);
4789 }
4790 }
4791
4792 if (LIS) {
4793 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, NewMI&: *MIB);
4794
4795 if (MI.getOperand(i: 0).isEarlyClobber()) {
4796 // Use operand 1 was tied to early-clobber def operand 0, so its live
4797 // interval could have ended at an early-clobber slot. Now they are not
4798 // tied we need to update it to the normal register slot.
4799 LiveInterval &LI = LIS->getInterval(Reg: MI.getOperand(i: 1).getReg());
4800 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
4801 if (S->end == Idx.getRegSlot(EC: true))
4802 S->end = Idx.getRegSlot();
4803 }
4804 }
4805
4806 return MIB;
4807}
4808
4809#undef CASE_WIDEOP_OPCODE_COMMON
4810#undef CASE_WIDEOP_OPCODE_LMULS
4811#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
4812#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
4813#undef CASE_FP_WIDEOP_OPCODE_COMMON
4814#undef CASE_FP_WIDEOP_OPCODE_LMULS
4815#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
4816#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
4817
4818void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
4819 MachineBasicBlock::iterator II, const DebugLoc &DL,
4820 Register DestReg, uint32_t Amount,
4821 MachineInstr::MIFlag Flag) const {
4822 MachineRegisterInfo &MRI = MF.getRegInfo();
4823 if (llvm::has_single_bit<uint32_t>(Value: Amount)) {
4824 uint32_t ShiftAmount = Log2_32(Value: Amount);
4825 if (ShiftAmount == 0)
4826 return;
4827 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
4828 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4829 .addImm(Val: ShiftAmount)
4830 .setMIFlag(Flag);
4831 } else if (int ShXAmount, ShiftAmount;
4832 STI.hasShlAdd(ShAmt: 3) &&
4833 (ShXAmount = isShifted359(Value: Amount, Shift&: ShiftAmount)) != 0) {
4834 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
4835 unsigned Opc;
4836 switch (ShXAmount) {
4837 case 1:
4838 Opc = RISCV::SH1ADD;
4839 break;
4840 case 2:
4841 Opc = RISCV::SH2ADD;
4842 break;
4843 case 3:
4844 Opc = RISCV::SH3ADD;
4845 break;
4846 default:
4847 llvm_unreachable("unexpected result of isShifted359");
4848 }
4849 if (ShiftAmount)
4850 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
4851 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4852 .addImm(Val: ShiftAmount)
4853 .setMIFlag(Flag);
4854 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: Opc), DestReg)
4855 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4856 .addReg(RegNo: DestReg)
4857 .setMIFlag(Flag);
4858 } else if (llvm::has_single_bit<uint32_t>(Value: Amount - 1)) {
4859 Register ScaledRegister = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
4860 uint32_t ShiftAmount = Log2_32(Value: Amount - 1);
4861 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg: ScaledRegister)
4862 .addReg(RegNo: DestReg)
4863 .addImm(Val: ShiftAmount)
4864 .setMIFlag(Flag);
4865 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg)
4866 .addReg(RegNo: ScaledRegister, Flags: RegState::Kill)
4867 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4868 .setMIFlag(Flag);
4869 } else if (llvm::has_single_bit<uint32_t>(Value: Amount + 1)) {
4870 Register ScaledRegister = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
4871 uint32_t ShiftAmount = Log2_32(Value: Amount + 1);
4872 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg: ScaledRegister)
4873 .addReg(RegNo: DestReg)
4874 .addImm(Val: ShiftAmount)
4875 .setMIFlag(Flag);
4876 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SUB), DestReg)
4877 .addReg(RegNo: ScaledRegister, Flags: RegState::Kill)
4878 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4879 .setMIFlag(Flag);
4880 } else if (STI.hasStdExtZmmul()) {
4881 Register N = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
4882 movImm(MBB, MBBI: II, DL, DstReg: N, Val: Amount, Flag);
4883 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::MUL), DestReg)
4884 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4885 .addReg(RegNo: N, Flags: RegState::Kill)
4886 .setMIFlag(Flag);
4887 } else {
4888 Register Acc;
4889 uint32_t PrevShiftAmount = 0;
4890 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
4891 if (Amount & (1U << ShiftAmount)) {
4892 if (ShiftAmount)
4893 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::SLLI), DestReg)
4894 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4895 .addImm(Val: ShiftAmount - PrevShiftAmount)
4896 .setMIFlag(Flag);
4897 if (Amount >> (ShiftAmount + 1)) {
4898 // If we don't have an accmulator yet, create it and copy DestReg.
4899 if (!Acc) {
4900 Acc = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
4901 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: TargetOpcode::COPY), DestReg: Acc)
4902 .addReg(RegNo: DestReg)
4903 .setMIFlag(Flag);
4904 } else {
4905 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg: Acc)
4906 .addReg(RegNo: Acc, Flags: RegState::Kill)
4907 .addReg(RegNo: DestReg)
4908 .setMIFlag(Flag);
4909 }
4910 }
4911 PrevShiftAmount = ShiftAmount;
4912 }
4913 }
4914 assert(Acc && "Expected valid accumulator");
4915 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: get(Opcode: RISCV::ADD), DestReg)
4916 .addReg(RegNo: DestReg, Flags: RegState::Kill)
4917 .addReg(RegNo: Acc, Flags: RegState::Kill)
4918 .setMIFlag(Flag);
4919 }
4920}
4921
4922ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4923RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4924 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4925 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
4926 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
4927 return ArrayRef(TargetFlags);
4928}
4929
4930unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
4931 return OptLevel >= CodeGenOptLevel::Aggressive
4932 ? STI.getTailDupAggressiveThreshold()
4933 : 2;
4934}
4935
4936bool RISCV::isRVVSpill(const MachineInstr &MI) {
4937 // RVV lacks any support for immediate addressing for stack addresses, so be
4938 // conservative.
4939 unsigned Opcode = MI.getOpcode();
4940 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opcode) &&
4941 !getLMULForRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
4942 return false;
4943 return true;
4944}
4945
4946/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
4947bool RISCV::isVectorCopy(const TargetRegisterInfo *TRI,
4948 const MachineInstr &MI) {
4949 return MI.isCopy() && MI.getOperand(i: 0).getReg().isPhysical() &&
4950 RISCVRegisterInfo::isRVVRegClass(
4951 RC: TRI->getMinimalPhysRegClass(Reg: MI.getOperand(i: 0).getReg()));
4952}
4953
4954std::optional<std::pair<unsigned, unsigned>>
4955RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
4956 switch (Opcode) {
4957 default:
4958 return std::nullopt;
4959 case RISCV::PseudoVSPILL2_M1:
4960 case RISCV::PseudoVRELOAD2_M1:
4961 return std::make_pair(x: 2u, y: 1u);
4962 case RISCV::PseudoVSPILL2_M2:
4963 case RISCV::PseudoVRELOAD2_M2:
4964 return std::make_pair(x: 2u, y: 2u);
4965 case RISCV::PseudoVSPILL2_M4:
4966 case RISCV::PseudoVRELOAD2_M4:
4967 return std::make_pair(x: 2u, y: 4u);
4968 case RISCV::PseudoVSPILL3_M1:
4969 case RISCV::PseudoVRELOAD3_M1:
4970 return std::make_pair(x: 3u, y: 1u);
4971 case RISCV::PseudoVSPILL3_M2:
4972 case RISCV::PseudoVRELOAD3_M2:
4973 return std::make_pair(x: 3u, y: 2u);
4974 case RISCV::PseudoVSPILL4_M1:
4975 case RISCV::PseudoVRELOAD4_M1:
4976 return std::make_pair(x: 4u, y: 1u);
4977 case RISCV::PseudoVSPILL4_M2:
4978 case RISCV::PseudoVRELOAD4_M2:
4979 return std::make_pair(x: 4u, y: 2u);
4980 case RISCV::PseudoVSPILL5_M1:
4981 case RISCV::PseudoVRELOAD5_M1:
4982 return std::make_pair(x: 5u, y: 1u);
4983 case RISCV::PseudoVSPILL6_M1:
4984 case RISCV::PseudoVRELOAD6_M1:
4985 return std::make_pair(x: 6u, y: 1u);
4986 case RISCV::PseudoVSPILL7_M1:
4987 case RISCV::PseudoVRELOAD7_M1:
4988 return std::make_pair(x: 7u, y: 1u);
4989 case RISCV::PseudoVSPILL8_M1:
4990 case RISCV::PseudoVRELOAD8_M1:
4991 return std::make_pair(x: 8u, y: 1u);
4992 }
4993}
4994
4995bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
4996 int16_t MI1FrmOpIdx =
4997 RISCV::getNamedOperandIdx(Opcode: MI1.getOpcode(), Name: RISCV::OpName::frm);
4998 int16_t MI2FrmOpIdx =
4999 RISCV::getNamedOperandIdx(Opcode: MI2.getOpcode(), Name: RISCV::OpName::frm);
5000 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
5001 return false;
5002 MachineOperand FrmOp1 = MI1.getOperand(i: MI1FrmOpIdx);
5003 MachineOperand FrmOp2 = MI2.getOperand(i: MI2FrmOpIdx);
5004 return FrmOp1.getImm() == FrmOp2.getImm();
5005}
5006
5007std::optional<unsigned>
5008RISCV::getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW) {
5009 switch (Opcode) {
5010 default:
5011 return std::nullopt;
5012
5013 // 11.6. Vector Single-Width Shift Instructions
5014 case RISCV::VSLL_VX:
5015 case RISCV::VSRL_VX:
5016 case RISCV::VSRA_VX:
5017 // 12.4. Vector Single-Width Scaling Shift Instructions
5018 case RISCV::VSSRL_VX:
5019 case RISCV::VSSRA_VX:
5020 // Zvbb
5021 case RISCV::VROL_VX:
5022 case RISCV::VROR_VX:
5023 // Only the low lg2(SEW) bits of the shift-amount value are used.
5024 return Log2SEW;
5025
5026 // 11.7 Vector Narrowing Integer Right Shift Instructions
5027 case RISCV::VNSRL_WX:
5028 case RISCV::VNSRA_WX:
5029 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
5030 case RISCV::VNCLIPU_WX:
5031 case RISCV::VNCLIP_WX:
5032 // Zvbb
5033 case RISCV::VWSLL_VX:
5034 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
5035 return Log2SEW + 1;
5036
5037 // 11.1. Vector Single-Width Integer Add and Subtract
5038 case RISCV::VADD_VX:
5039 case RISCV::VSUB_VX:
5040 case RISCV::VRSUB_VX:
5041 // 11.2. Vector Widening Integer Add/Subtract
5042 case RISCV::VWADDU_VX:
5043 case RISCV::VWSUBU_VX:
5044 case RISCV::VWADD_VX:
5045 case RISCV::VWSUB_VX:
5046 case RISCV::VWADDU_WX:
5047 case RISCV::VWSUBU_WX:
5048 case RISCV::VWADD_WX:
5049 case RISCV::VWSUB_WX:
5050 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
5051 case RISCV::VADC_VXM:
5052 case RISCV::VADC_VIM:
5053 case RISCV::VMADC_VXM:
5054 case RISCV::VMADC_VIM:
5055 case RISCV::VMADC_VX:
5056 case RISCV::VSBC_VXM:
5057 case RISCV::VMSBC_VXM:
5058 case RISCV::VMSBC_VX:
5059 // 11.5 Vector Bitwise Logical Instructions
5060 case RISCV::VAND_VX:
5061 case RISCV::VOR_VX:
5062 case RISCV::VXOR_VX:
5063 // 11.8. Vector Integer Compare Instructions
5064 case RISCV::VMSEQ_VX:
5065 case RISCV::VMSNE_VX:
5066 case RISCV::VMSLTU_VX:
5067 case RISCV::VMSLT_VX:
5068 case RISCV::VMSLEU_VX:
5069 case RISCV::VMSLE_VX:
5070 case RISCV::VMSGTU_VX:
5071 case RISCV::VMSGT_VX:
5072 // 11.9. Vector Integer Min/Max Instructions
5073 case RISCV::VMINU_VX:
5074 case RISCV::VMIN_VX:
5075 case RISCV::VMAXU_VX:
5076 case RISCV::VMAX_VX:
5077 // 11.10. Vector Single-Width Integer Multiply Instructions
5078 case RISCV::VMUL_VX:
5079 case RISCV::VMULH_VX:
5080 case RISCV::VMULHU_VX:
5081 case RISCV::VMULHSU_VX:
5082 // 11.11. Vector Integer Divide Instructions
5083 case RISCV::VDIVU_VX:
5084 case RISCV::VDIV_VX:
5085 case RISCV::VREMU_VX:
5086 case RISCV::VREM_VX:
5087 // 11.12. Vector Widening Integer Multiply Instructions
5088 case RISCV::VWMUL_VX:
5089 case RISCV::VWMULU_VX:
5090 case RISCV::VWMULSU_VX:
5091 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
5092 case RISCV::VMACC_VX:
5093 case RISCV::VNMSAC_VX:
5094 case RISCV::VMADD_VX:
5095 case RISCV::VNMSUB_VX:
5096 // 11.14. Vector Widening Integer Multiply-Add Instructions
5097 case RISCV::VWMACCU_VX:
5098 case RISCV::VWMACC_VX:
5099 case RISCV::VWMACCSU_VX:
5100 case RISCV::VWMACCUS_VX:
5101 // 11.15. Vector Integer Merge Instructions
5102 case RISCV::VMERGE_VXM:
5103 // 11.16. Vector Integer Move Instructions
5104 case RISCV::VMV_V_X:
5105 // 12.1. Vector Single-Width Saturating Add and Subtract
5106 case RISCV::VSADDU_VX:
5107 case RISCV::VSADD_VX:
5108 case RISCV::VSSUBU_VX:
5109 case RISCV::VSSUB_VX:
5110 // 12.2. Vector Single-Width Averaging Add and Subtract
5111 case RISCV::VAADDU_VX:
5112 case RISCV::VAADD_VX:
5113 case RISCV::VASUBU_VX:
5114 case RISCV::VASUB_VX:
5115 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
5116 case RISCV::VSMUL_VX:
5117 // 16.1. Integer Scalar Move Instructions
5118 case RISCV::VMV_S_X:
5119 // Zvbb
5120 case RISCV::VANDN_VX:
5121 return 1U << Log2SEW;
5122 }
5123}
5124
5125unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
5126 const RISCVVPseudosTable::PseudoInfo *RVV =
5127 RISCVVPseudosTable::getPseudoInfo(Pseudo: RVVPseudoOpcode);
5128 if (!RVV)
5129 return 0;
5130 return RVV->BaseInstr;
5131}
5132
5133unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
5134 unsigned DestEEW =
5135 (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift;
5136 // EEW = 1
5137 if (DestEEW == 0)
5138 return 0;
5139 // EEW = SEW * n
5140 unsigned Scaled = Log2SEW + (DestEEW - 1);
5141 assert(Scaled >= 3 && Scaled <= 6);
5142 return Scaled;
5143}
5144
5145static std::optional<int64_t> getEffectiveImm(const MachineOperand &MO) {
5146 assert(MO.isImm() || MO.getReg().isVirtual());
5147 if (MO.isImm())
5148 return MO.getImm();
5149 const MachineInstr *Def =
5150 MO.getParent()->getMF()->getRegInfo().getVRegDef(Reg: MO.getReg());
5151 int64_t Imm;
5152 if (isLoadImm(MI: Def, Imm))
5153 return Imm;
5154 return std::nullopt;
5155}
5156
5157/// Given two VL operands, do we know that LHS <= RHS? Must be used in SSA form.
5158bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
5159 assert((LHS.isImm() || LHS.getParent()->getMF()->getRegInfo().isSSA()) &&
5160 (RHS.isImm() || RHS.getParent()->getMF()->getRegInfo().isSSA()));
5161 if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
5162 LHS.getReg() == RHS.getReg())
5163 return true;
5164 if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
5165 return true;
5166 if (LHS.isImm() && LHS.getImm() == 0)
5167 return true;
5168 if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
5169 return false;
5170 std::optional<int64_t> LHSImm = getEffectiveImm(MO: LHS),
5171 RHSImm = getEffectiveImm(MO: RHS);
5172 if (!LHSImm || !RHSImm)
5173 return false;
5174 return LHSImm <= RHSImm;
5175}
5176
5177namespace {
5178class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5179 const MachineInstr *LHS;
5180 const MachineInstr *RHS;
5181 SmallVector<MachineOperand, 3> Cond;
5182
5183public:
5184 RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
5185 const SmallVectorImpl<MachineOperand> &Cond)
5186 : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
5187
5188 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5189 // Make the instructions for loop control be placed in stage 0.
5190 // The predecessors of LHS/RHS are considered by the caller.
5191 if (LHS && MI == LHS)
5192 return true;
5193 if (RHS && MI == RHS)
5194 return true;
5195 return false;
5196 }
5197
5198 std::optional<bool> createTripCountGreaterCondition(
5199 int TC, MachineBasicBlock &MBB,
5200 SmallVectorImpl<MachineOperand> &CondParam) override {
5201 // A branch instruction will be inserted as "if (Cond) goto epilogue".
5202 // Cond is normalized for such use.
5203 // The predecessors of the branch are assumed to have already been inserted.
5204 CondParam = Cond;
5205 return {};
5206 }
5207
5208 void setPreheader(MachineBasicBlock *NewPreheader) override {}
5209
5210 void adjustTripCount(int TripCountAdjust) override {}
5211};
5212} // namespace
5213
5214std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5215RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
5216 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
5217 SmallVector<MachineOperand, 4> Cond;
5218 if (analyzeBranch(MBB&: *LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
5219 return nullptr;
5220
5221 // Infinite loops are not supported
5222 if (TBB == LoopBB && FBB == LoopBB)
5223 return nullptr;
5224
5225 // Must be conditional branch
5226 if (FBB == nullptr)
5227 return nullptr;
5228
5229 assert((TBB == LoopBB || FBB == LoopBB) &&
5230 "The Loop must be a single-basic-block loop");
5231
5232 // Normalization for createTripCountGreaterCondition()
5233 if (TBB == LoopBB)
5234 reverseBranchCondition(Cond);
5235
5236 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
5237 auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
5238 if (!Op.isReg())
5239 return nullptr;
5240 Register Reg = Op.getReg();
5241 if (!Reg.isVirtual())
5242 return nullptr;
5243 return MRI.getVRegDef(Reg);
5244 };
5245
5246 const MachineInstr *LHS = FindRegDef(Cond[1]);
5247 const MachineInstr *RHS = FindRegDef(Cond[2]);
5248 if (LHS && LHS->isPHI())
5249 return nullptr;
5250 if (RHS && RHS->isPHI())
5251 return nullptr;
5252
5253 return std::make_unique<RISCVPipelinerLoopInfo>(args&: LHS, args&: RHS, args&: Cond);
5254}
5255
5256// FIXME: We should remove this if we have a default generic scheduling model.
5257bool RISCVInstrInfo::isHighLatencyDef(int Opc) const {
5258 unsigned RVVMCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opc);
5259 Opc = RVVMCOpcode ? RVVMCOpcode : Opc;
5260 switch (Opc) {
5261 default:
5262 return false;
5263 // Integer div/rem.
5264 case RISCV::DIV:
5265 case RISCV::DIVW:
5266 case RISCV::DIVU:
5267 case RISCV::DIVUW:
5268 case RISCV::REM:
5269 case RISCV::REMW:
5270 case RISCV::REMU:
5271 case RISCV::REMUW:
5272 // Floating-point div/sqrt.
5273 case RISCV::FDIV_H:
5274 case RISCV::FDIV_S:
5275 case RISCV::FDIV_D:
5276 case RISCV::FDIV_H_INX:
5277 case RISCV::FDIV_S_INX:
5278 case RISCV::FDIV_D_INX:
5279 case RISCV::FDIV_D_IN32X:
5280 case RISCV::FSQRT_H:
5281 case RISCV::FSQRT_S:
5282 case RISCV::FSQRT_D:
5283 case RISCV::FSQRT_H_INX:
5284 case RISCV::FSQRT_S_INX:
5285 case RISCV::FSQRT_D_INX:
5286 case RISCV::FSQRT_D_IN32X:
5287 // Vector integer div/rem
5288 case RISCV::VDIV_VV:
5289 case RISCV::VDIV_VX:
5290 case RISCV::VDIVU_VV:
5291 case RISCV::VDIVU_VX:
5292 case RISCV::VREM_VV:
5293 case RISCV::VREM_VX:
5294 case RISCV::VREMU_VV:
5295 case RISCV::VREMU_VX:
5296 // Vector floating-point div/sqrt.
5297 case RISCV::VFDIV_VV:
5298 case RISCV::VFDIV_VF:
5299 case RISCV::VFRDIV_VF:
5300 case RISCV::VFSQRT_V:
5301 case RISCV::VFRSQRT7_V:
5302 return true;
5303 }
5304}
5305
5306bool RISCVInstrInfo::isVRegCopy(const MachineInstr *MI, unsigned LMul) const {
5307 if (MI->getOpcode() != TargetOpcode::COPY)
5308 return false;
5309 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
5310 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
5311
5312 Register DstReg = MI->getOperand(i: 0).getReg();
5313 const TargetRegisterClass *RC = DstReg.isVirtual()
5314 ? MRI.getRegClass(Reg: DstReg)
5315 : TRI->getMinimalPhysRegClass(Reg: DstReg);
5316
5317 if (!RISCVRegisterInfo::isRVVRegClass(RC))
5318 return false;
5319
5320 if (!LMul)
5321 return true;
5322
5323 // TODO: Perhaps we could distinguish segment register classes (e.g. VRN3M2)
5324 // in the future.
5325 auto [RCLMul, RCFractional] =
5326 RISCVVType::decodeVLMUL(VLMul: RISCVRI::getLMul(TSFlags: RC->TSFlags));
5327 return (!RCFractional && LMul == RCLMul) || (RCFractional && LMul == 1);
5328}
5329