1//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "../Target.h"
10#include "../ParallelSnippetGenerator.h"
11#include "../SerialSnippetGenerator.h"
12#include "../SnippetGenerator.h"
13
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCV.h"
18#include "RISCVExegesisPasses.h"
19#include "RISCVInstrInfo.h"
20#include "RISCVRegisterInfo.h"
21#include "llvm/Support/Regex.h"
22#include "llvm/Support/raw_ostream.h"
23
24// include computeAvailableFeatures and computeRequiredFeatures.
25#define GET_AVAILABLE_OPCODE_CHECKER
26#include "RISCVGenInstrInfo.inc"
27
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29
30#include <vector>
31
32namespace llvm {
33namespace exegesis {
34
35static cl::opt<bool>
36 OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
37 cl::desc("Only enumerate VLMAX for VL operand"),
38 cl::init(Val: false), cl::Hidden);
39
40static cl::opt<bool>
41 EnumerateRoundingModes("riscv-enumerate-rounding-modes",
42 cl::desc("Enumerate different FRM and VXRM"),
43 cl::init(Val: true), cl::Hidden);
44
45static cl::opt<std::string>
46 FilterConfig("riscv-filter-config",
47 cl::desc("Show only the configs matching this regex"),
48 cl::init(Val: ""), cl::Hidden);
49
50#include "RISCVGenExegesis.inc"
51
52namespace {
53
54template <class BaseT> class RISCVSnippetGenerator : public BaseT {
55 static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
56 if (UsesVXRM) {
57 assert(RISCVVXRndMode::isValidRoundingMode(Val));
58 OS << RISCVVXRndMode::roundingModeToString(
59 RndMode: static_cast<RISCVVXRndMode::RoundingMode>(Val));
60 } else {
61 assert(RISCVFPRndMode::isValidRoundingMode(Val));
62 OS << RISCVFPRndMode::roundingModeToString(
63 RndMode: static_cast<RISCVFPRndMode::RoundingMode>(Val));
64 }
65 }
66
67 static constexpr unsigned MinSEW = 8;
68 // ELEN is basically SEW_max.
69 unsigned ELEN = 64;
70
71 // We can't know the real min/max VLEN w/o a Function, so we're
72 // using the VLen from Zvl.
73 unsigned ZvlVLen = 32;
74
75 /// Mask for registers that are NOT standalone registers like X0 and V0
76 BitVector AggregateRegisters;
77
78 // Returns true when opcode is available in any of the FBs.
79 static bool
80 isOpcodeAvailableIn(unsigned Opcode,
81 ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
82 FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
83 for (uint8_t FB : FBs) {
84 if (RequiredFeatures[FB])
85 return true;
86 }
87 return false;
88 }
89
90 static bool isRVVFloatingPointOp(unsigned Opcode) {
91 return isOpcodeAvailableIn(Opcode,
92 FBs: {RISCV_MC::Feature_HasVInstructionsAnyFBit});
93 }
94
95 // Get the element group width of each vector cryptor extension.
96 static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
97 using namespace RISCV_MC;
98 if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvkgBit,
99 Feature_HasStdExtZvknedBit,
100 Feature_HasStdExtZvksedBit}))
101 return 128U;
102 if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvkshBit}))
103 return 256U;
104 if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvknhaOrZvknhbBit}))
105 // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
106 // Otherwise it's 128.
107 return SEW == 64 ? 256U : 128U;
108
109 llvm_unreachable("Unsupported opcode");
110 }
111
112 // A handy utility to multiply or divide an integer by LMUL.
113 template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
114 auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
115 return IsFractional ? Val / LMul : Val * LMul;
116 }
117
118 /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
119 /// nullopt if BaseOpcode is not a vector sext/zext.
120 static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
121 switch (BaseOpcode) {
122 case RISCV::VSEXT_VF2:
123 case RISCV::VZEXT_VF2:
124 return 2;
125 case RISCV::VSEXT_VF4:
126 case RISCV::VZEXT_VF4:
127 return 4;
128 case RISCV::VSEXT_VF8:
129 case RISCV::VZEXT_VF8:
130 return 8;
131 default:
132 return std::nullopt;
133 }
134 }
135
136 void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
137 unsigned BaseOpcode,
138 const BitVector &ForbiddenRegisters,
139 std::vector<CodeTemplate> &Result) const;
140
141public:
142 RISCVSnippetGenerator(const LLVMState &State,
143 const SnippetGenerator::Options &Opts)
144 : BaseT(State, Opts),
145 AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
146 // Initialize standalone registers mask.
147 const MCRegisterInfo &RegInfo = State.getRegInfo();
148 const unsigned StandaloneRegClasses[] = {
149 RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
150
151 for (unsigned RegClassID : StandaloneRegClasses)
152 for (unsigned Reg : RegInfo.getRegClass(i: RegClassID))
153 AggregateRegisters.reset(Idx: Reg);
154
155 // Initialize ELEN and VLEN.
156 // FIXME: We could have obtained these two constants from RISCVSubtarget
157 // but in order to get that from TargetMachine, we need a Function.
158 const MCSubtargetInfo &STI = State.getSubtargetInfo();
159 ELEN = STI.hasFeature(Feature: RISCV::FeatureStdExtZve64x) ? 64 : 32;
160
161 const unsigned ZvlFeatures[] = {
162 RISCV::FeatureStdExtZvl32b, RISCV::FeatureStdExtZvl64b,
163 RISCV::FeatureStdExtZvl128b, RISCV::FeatureStdExtZvl256b,
164 RISCV::FeatureStdExtZvl512b, RISCV::FeatureStdExtZvl1024b,
165 RISCV::FeatureStdExtZvl2048b, RISCV::FeatureStdExtZvl4096b,
166 RISCV::FeatureStdExtZvl8192b, RISCV::FeatureStdExtZvl16384b,
167 RISCV::FeatureStdExtZvl32768b, RISCV::FeatureStdExtZvl65536b};
168 for (auto [Idx, Feature] : enumerate(First: ZvlFeatures)) {
169 if (STI.hasFeature(Feature))
170 ZvlVLen = std::max(a: ZvlVLen, b: 1u << (Idx + 5));
171 }
172 }
173
174 Expected<std::vector<CodeTemplate>>
175 generateCodeTemplates(InstructionTemplate Variant,
176 const BitVector &ForbiddenRegisters) const override;
177};
178
179static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
180 const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedPseudo: MaskedOp);
181 return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
182}
183
184// There are primarily two kinds of opcodes that are not eligible
185// in a serial snippet:
186// (1) Has a use operand that can not overlap with the def operand
187// (i.e. early clobber).
188// (2) The register file of the only use operand is different from
189// that of the def operand. For instance, use operand is vector and
190// the result is a scalar.
191static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
192 const Instruction &I) {
193 if (llvm::any_of(Range: I.Operands,
194 P: [](const Operand &Op) { return Op.isEarlyClobber(); }))
195 return true;
196
197 switch (BaseOpcode) {
198 case RISCV::VCOMPRESS_VM:
199 case RISCV::VCPOP_M:
200 case RISCV::VCPOP_V:
201 // The permutation instructions listed below cannot have destination
202 // overlapping with the source.
203 case RISCV::VRGATHEREI16_VV:
204 case RISCV::VRGATHER_VI:
205 case RISCV::VRGATHER_VV:
206 case RISCV::VRGATHER_VX:
207 case RISCV::VSLIDE1UP_VX:
208 case RISCV::VSLIDEUP_VI:
209 case RISCV::VSLIDEUP_VX:
210 return true;
211 default:
212 return false;
213 }
214}
215
216static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
217 switch (BaseOpcode) {
218 case RISCV::VFNCVT_F_F_W:
219 case RISCV::VFWCVT_F_F_V:
220 case RISCV::VFNCVTBF16_F_F_W:
221 case RISCV::VFWCVTBF16_F_F_V:
222 return true;
223 default:
224 return false;
225 }
226}
227
228static bool isVectorReduction(unsigned BaseOpcode) {
229 switch (BaseOpcode) {
230 case RISCV::VREDAND_VS:
231 case RISCV::VREDMAXU_VS:
232 case RISCV::VREDMAX_VS:
233 case RISCV::VREDMINU_VS:
234 case RISCV::VREDMIN_VS:
235 case RISCV::VREDOR_VS:
236 case RISCV::VREDSUM_VS:
237 case RISCV::VREDXOR_VS:
238 case RISCV::VWREDSUMU_VS:
239 case RISCV::VWREDSUM_VS:
240 case RISCV::VFREDMAX_VS:
241 case RISCV::VFREDMIN_VS:
242 case RISCV::VFREDOSUM_VS:
243 case RISCV::VFREDUSUM_VS:
244 return true;
245 default:
246 return false;
247 }
248}
249
250template <class BaseT>
251void RISCVSnippetGenerator<BaseT>::annotateWithVType(
252 const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
253 const BitVector &ForbiddenRegisters,
254 std::vector<CodeTemplate> &Result) const {
255 const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
256 unsigned VPseudoOpcode = Instr.getOpcode();
257
258 bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
259
260 const MCInstrDesc &MIDesc = Instr.Description;
261 const uint64_t TSFlags = MIDesc.TSFlags;
262
263 RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
264
265 const size_t StartingResultSize = Result.size();
266
267 SmallPtrSet<const Operand *, 4> VTypeOperands;
268 std::optional<AliasingConfigurations> SelfAliasing;
269 // Exegesis see instructions with tied operands being inherently serial.
270 // But for RVV instructions, those tied operands are passthru rather
271 // than real read operands. So we manually put dependency between
272 // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
273 // operands.
274 auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
275 // Initialize SelfAliasing on first use.
276 if (!SelfAliasing.has_value()) {
277 BitVector ExcludeRegs = ForbiddenRegisters;
278 ExcludeRegs |= AggregateRegisters;
279 SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
280 bool EmptyUses = false;
281 for (auto &ARO : SelfAliasing->Configurations) {
282 auto &Uses = ARO.Uses;
283 for (auto ROA = Uses.begin(); ROA != Uses.end();) {
284 const Operand *Op = ROA->Op;
285 // Exclude tied operand(s).
286 if (Op->isTied()) {
287 ROA = Uses.erase(CI: ROA);
288 continue;
289 }
290
291 // Special handling for reduction operations: for a given reduction
292 // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
293 // since we're only reading `vs1[0]` and many implementations
294 // optimize for this case (e.g. chaining). Instead, we're forcing
295 // it to create alias between vd and vs2.
296 if (isVectorReduction(BaseOpcode) &&
297 // vs1's operand index is always 3.
298 Op->getIndex() == 3) {
299 ROA = Uses.erase(CI: ROA);
300 continue;
301 }
302
303 // Exclude any special operands like SEW and VL -- we've already
304 // assigned values to them.
305 if (VTypeOperands.count(Ptr: Op)) {
306 ROA = Uses.erase(CI: ROA);
307 continue;
308 }
309 ++ROA;
310 }
311
312 // If any of the use operand candidate lists is empty, there is
313 // no point to assign self aliasing registers.
314 if (Uses.empty()) {
315 EmptyUses = true;
316 break;
317 }
318 }
319 if (EmptyUses)
320 SelfAliasing->Configurations.clear();
321 }
322
323 // This is a self aliasing instruction so defs and uses are from the same
324 // instance, hence twice IT in the following call.
325 if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
326 setRandomAliasing(AliasingConfigurations: *SelfAliasing, DefIB&: IT, UseIB&: IT);
327 };
328
329 // We are going to create a CodeTemplate (configuration) for each supported
330 // SEW, policy, and VL.
331 // FIXME: Account for EEW and EMUL.
332 SmallVector<std::optional<unsigned>, 4> Log2SEWs;
333 SmallVector<std::optional<unsigned>, 4> Policies;
334 SmallVector<std::optional<int>, 3> AVLs;
335 SmallVector<std::optional<unsigned>, 8> RoundingModes;
336
337 bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
338 bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
339 bool HasVLOp = RISCVII::hasVLOp(TSFlags);
340 bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
341 bool UsesVXRM = RISCVII::usesVXRM(TSFlags);
342
343 if (HasSEWOp) {
344 const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(Desc: MIDesc)];
345 VTypeOperands.insert(Ptr: &SEWOp);
346
347 if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
348 // If it's a mask-producing instruction, the SEW operand is always zero.
349 Log2SEWs.push_back(Elt: 0);
350 } else {
351 SmallVector<unsigned, 4> SEWCandidates;
352
353 // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
354 // [SEW_min, LMUL * ELEN].
355 unsigned SEWUpperBound =
356 VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
357 for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
358 SEWCandidates.push_back(Elt: SEW);
359
360 // Some scheduling classes already integrate SEW; only put
361 // their corresponding SEW values at the SEW operands.
362 // NOTE: It is imperative to put this condition in the front, otherwise
363 // it is tricky and difficult to know if there is an integrated
364 // SEW after other rules are applied to filter the candidates.
365 const auto *RVVBase =
366 RISCVVInversePseudosTable::getBaseInfo(BaseInstr: BaseOpcode, VLMul, SEW);
367 if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
368 isMaskedSibling(MaskedOp: VPseudoOpcode, UnmaskedOp: RVVBase->Pseudo) ||
369 isMaskedSibling(MaskedOp: RVVBase->Pseudo, UnmaskedOp: VPseudoOpcode))) {
370 // There is an integrated SEW, remove all but the SEW pushed last.
371 SEWCandidates.erase(CS: SEWCandidates.begin(), CE: SEWCandidates.end() - 1);
372 break;
373 }
374 }
375
376 // Filter out some candidates.
377 for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
378 // For floating point operations, only select SEW of the supported FLEN.
379 if (isRVVFloatingPointOp(Opcode: VPseudoOpcode)) {
380 bool Supported = false;
381 Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
382 Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtZvfh) && *SEW == 16;
383 Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtF) && *SEW == 32;
384 Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtD) && *SEW == 64;
385 if (!Supported) {
386 SEW = SEWCandidates.erase(CI: SEW);
387 continue;
388 }
389 }
390
391 // The EEW for source operand in VSEXT and VZEXT is a fraction
392 // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
393 if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
394 if (*SEW / *Frac < MinSEW) {
395 SEW = SEWCandidates.erase(CI: SEW);
396 continue;
397 }
398
399 // Most vector crypto 1.0 instructions only work on SEW=32.
400 using namespace RISCV_MC;
401 if (isOpcodeAvailableIn(Opcode: BaseOpcode, FBs: {Feature_HasStdExtZvkgBit,
402 Feature_HasStdExtZvknedBit,
403 Feature_HasStdExtZvknhaOrZvknhbBit,
404 Feature_HasStdExtZvksedBit,
405 Feature_HasStdExtZvkshBit})) {
406 if (*SEW != 32)
407 // Zvknhb supports SEW=64 as well.
408 if (*SEW != 64 || !STI.hasFeature(Feature: RISCV::FeatureStdExtZvknhb) ||
409 !isOpcodeAvailableIn(Opcode: BaseOpcode,
410 FBs: {Feature_HasStdExtZvknhaOrZvknhbBit})) {
411 SEW = SEWCandidates.erase(CI: SEW);
412 continue;
413 }
414
415 // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
416 // because some of the extensions have SEW-dependant EGW.
417 unsigned EGW = getZvkEGWSize(Opcode: BaseOpcode, SEW: *SEW);
418 if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
419 SEW = SEWCandidates.erase(CI: SEW);
420 continue;
421 }
422 }
423
424 ++SEW;
425 }
426
427 // We're not going to produce any result with zero SEW candidate.
428 if (SEWCandidates.empty())
429 return;
430
431 for (unsigned SEW : SEWCandidates)
432 Log2SEWs.push_back(Elt: Log2_32(Value: SEW));
433 }
434 } else {
435 Log2SEWs.push_back(Elt: std::nullopt);
436 }
437
438 if (HasPolicyOp) {
439 VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVecPolicyOpNum(Desc: MIDesc)]);
440
441 Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC,
442 (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)};
443 } else {
444 Policies.push_back(Elt: std::nullopt);
445 }
446
447 if (HasVLOp) {
448 VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc)]);
449
450 if (OnlyUsesVLMAXForVL)
451 AVLs.push_back(Elt: -1);
452 else
453 AVLs = {// 5-bit immediate value
454 1,
455 // VLMAX
456 -1,
457 // Non-X0 register
458 0};
459 } else {
460 AVLs.push_back(Elt: std::nullopt);
461 }
462
463 if (HasRMOp) {
464 VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc) - 1]);
465
466 if (UsesVXRM) {
467 // Use RNU as the default VXRM.
468 RoundingModes = {RISCVVXRndMode::RNU};
469 if (EnumerateRoundingModes)
470 RoundingModes.append(
471 IL: {RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD});
472 } else {
473 if (EnumerateRoundingModes)
474 RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ,
475 RISCVFPRndMode::RDN, RISCVFPRndMode::RUP,
476 RISCVFPRndMode::RMM};
477 else
478 // If we're not enumerating FRM, use DYN to instruct
479 // RISCVInsertReadWriteCSRPass to insert nothing.
480 RoundingModes = {RISCVFPRndMode::DYN};
481 }
482 } else {
483 RoundingModes = {std::nullopt};
484 }
485
486 std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
487 std::optional<unsigned>, std::optional<unsigned>>>
488 Combinations;
489 for (auto AVL : AVLs) {
490 for (auto Log2SEW : Log2SEWs)
491 for (auto Policy : Policies) {
492 for (auto RM : RoundingModes)
493 Combinations.insert(x: std::make_tuple(args&: RM, args&: AVL, args&: Log2SEW, args&: Policy));
494 }
495 }
496
497 std::string ConfigStr;
498 SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
499 for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
500 InstructionTemplate IT(&Instr);
501
502 ListSeparator LS;
503 ConfigStr = "vtype = {";
504 raw_string_ostream SS(ConfigStr);
505
506 ValueAssignments.clear();
507
508 if (RM) {
509 const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc) - 1];
510 ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *RM)});
511 printRoundingMode(OS&: SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", Val: *RM,
512 UsesVXRM);
513 }
514
515 if (AVL) {
516 MCOperand OpVal;
517 if (*AVL < 0) {
518 // VLMAX
519 OpVal = MCOperand::createImm(Val: -1);
520 SS << LS << "AVL: VLMAX";
521 } else if (*AVL == 0) {
522 // A register holding AVL.
523 // TODO: Generate a random register.
524 OpVal = MCOperand::createReg(Reg: RISCV::X5);
525 OpVal.print(OS&: SS << LS << "AVL: ");
526 } else {
527 // A 5-bit immediate.
528 // The actual value assignment is deferred to
529 // RISCVExegesisTarget::randomizeTargetMCOperand.
530 SS << LS << "AVL: simm5";
531 }
532 if (OpVal.isValid()) {
533 const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc)];
534 ValueAssignments.push_back(Elt: {&Op, OpVal});
535 }
536 }
537
538 if (Log2SEW) {
539 const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(Desc: MIDesc)];
540 ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *Log2SEW)});
541 SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
542 }
543
544 if (Policy) {
545 const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(Desc: MIDesc)];
546 ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *Policy)});
547 SS << LS
548 << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
549 << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
550 }
551
552 SS << "}";
553
554 // Filter out some configurations, if needed.
555 if (!FilterConfig.empty()) {
556 if (!Regex(FilterConfig).match(String: ConfigStr))
557 continue;
558 }
559
560 CodeTemplate CT = OrigCT.clone();
561 CT.Config = std::move(ConfigStr);
562 for (InstructionTemplate &IT : CT.Instructions) {
563 if (IsSerial) {
564 // Reset this template's value assignments and do it
565 // ourselves.
566 IT = InstructionTemplate(&Instr);
567 assignSerialRVVOperands(IT);
568 }
569
570 for (const auto &[Op, OpVal] : ValueAssignments)
571 IT.getValueFor(Op: *Op) = OpVal;
572 }
573 Result.push_back(x: std::move(CT));
574 if (Result.size() - StartingResultSize >=
575 SnippetGenerator::Opts.MaxConfigsPerOpcode)
576 return;
577 }
578}
579
580template <class BaseT>
581Expected<std::vector<CodeTemplate>>
582RISCVSnippetGenerator<BaseT>::generateCodeTemplates(
583 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
584 const Instruction &Instr = Variant.getInstr();
585
586 bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
587
588 unsigned BaseOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Instr.getOpcode());
589
590 // Bail out ineligible opcodes before generating base code templates since
591 // the latter is quite expensive.
592 if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, I: Instr))
593 return std::vector<CodeTemplate>{};
594
595 auto BaseCodeTemplates =
596 BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
597 if (!BaseCodeTemplates)
598 return BaseCodeTemplates.takeError();
599
600 if (!BaseOpcode)
601 return BaseCodeTemplates;
602
603 // Specialize for RVV pseudo.
604 std::vector<CodeTemplate> ExpandedTemplates;
605 for (const auto &BaseCT : *BaseCodeTemplates)
606 annotateWithVType(OrigCT: BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
607 Result&: ExpandedTemplates);
608
609 return ExpandedTemplates;
610}
611
612// Stores constant value to a general-purpose (integer) register.
613static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
614 MCRegister Reg, const APInt &Value) {
615 SmallVector<MCInst, 8> MCInstSeq;
616 MCRegister DestReg = Reg;
617
618 RISCVMatInt::generateMCInstSeq(Val: Value.getSExtValue(), STI, DestReg, Insts&: MCInstSeq);
619
620 std::vector<MCInst> MatIntInstrs(MCInstSeq.begin(), MCInstSeq.end());
621 return MatIntInstrs;
622}
623
624const MCPhysReg ScratchIntReg = RISCV::X30; // t5
625
626// Stores constant bits to a floating-point register.
627static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI,
628 MCRegister Reg, const APInt &Bits,
629 unsigned FmvOpcode) {
630 std::vector<MCInst> Instrs = loadIntReg(STI, Reg: ScratchIntReg, Value: Bits);
631 Instrs.push_back(x: MCInstBuilder(FmvOpcode).addReg(Reg).addReg(Reg: ScratchIntReg));
632 return Instrs;
633}
634
635// main idea is:
636// we support APInt only if (represented as double) it has zero fractional
637// part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5
638// and then do FCVT this is only reliable thing in 32-bit mode, otherwise we
639// need to use __floatsidf
640static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
641 MCRegister Reg,
642 const APInt &Bits) {
643 double D = Bits.bitsToDouble();
644 double IPart;
645 double FPart = std::modf(x: D, iptr: &IPart);
646
647 if (std::abs(x: FPart) > std::numeric_limits<double>::epsilon()) {
648 errs() << "loadFP64RegBits32 is not implemented for doubles like " << D
649 << ", please remove fractional part\n";
650 return {};
651 }
652
653 std::vector<MCInst> Instrs = loadIntReg(STI, Reg: ScratchIntReg, Value: Bits);
654 Instrs.push_back(
655 x: MCInstBuilder(RISCV::FCVT_D_W).addReg(Reg).addReg(Reg: ScratchIntReg));
656 return Instrs;
657}
658
659class ExegesisRISCVTarget : public ExegesisTarget {
660 // NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
661 // opcodes is just a small portion of the entire opcode space, so I thought it
662 // would be a waste of space to use BitVector.
663 mutable SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;
664
665public:
666 ExegesisRISCVTarget();
667
668 bool matchesArch(Triple::ArchType Arch) const override;
669
670 std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
671 const APInt &Value) const override;
672
673 const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
674 unsigned Opcode) const override {
675 // We don't want to support RVV instructions that depend on VTYPE, because
676 // those instructions by themselves don't carry any additional information
677 // for us to setup the proper VTYPE environment via VSETVL instructions.
678 // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
679 // LMUL and SEW and I don't think enumerating those combinations is any
680 // better than the ugly trick here that memorizes the corresponding MC
681 // opcodes of the RVV pseudo we have processed previously. This works most
682 // of the time because RVV pseudo opcodes are placed before any other RVV
683 // opcodes. Of course this doesn't work if we're asked to benchmark only a
684 // certain subset of opcodes.
685 if (RVVMCOpcodesWithPseudos.count(V: Opcode))
686 return "The MC opcode of RVV instructions are ignored";
687
688 // We want to support all RVV pseudos.
689 if (unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opcode)) {
690 RVVMCOpcodesWithPseudos.insert(V: MCOpcode);
691 return nullptr;
692 }
693
694 return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
695 }
696
697 MCRegister getDefaultLoopCounterRegister(const Triple &) const override;
698
699 void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
700 MachineBasicBlock &TargetMBB,
701 const MCInstrInfo &MII,
702 MCRegister LoopRegister) const override;
703
704 MCRegister getScratchMemoryRegister(const Triple &TT) const override;
705
706 void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
707 unsigned Offset) const override;
708
709 ArrayRef<MCPhysReg> getUnavailableRegisters() const override;
710
711 bool allowAsBackToBack(const Instruction &Instr) const override {
712 return !Instr.Description.isPseudo();
713 }
714
715 Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
716 MCOperand &AssignedValue,
717 const BitVector &ForbiddenRegs) const override;
718
719 std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
720 const LLVMState &State,
721 const SnippetGenerator::Options &Opts) const override {
722 return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
723 args: State, args: Opts);
724 }
725
726 std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
727 const LLVMState &State,
728 const SnippetGenerator::Options &Opts) const override {
729 return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
730 args: State, args: Opts);
731 }
732
733 std::vector<InstructionTemplate>
734 generateInstructionVariants(const Instruction &Instr,
735 unsigned MaxConfigsPerOpcode) const override;
736
737 void addTargetSpecificPasses(PassManagerBase &PM) const override {
738 // Turn AVL operand of physical registers into virtual registers.
739 PM.add(P: exegesis::createRISCVPreprocessingPass());
740 PM.add(P: createRISCVInsertVSETVLIPass());
741 // Setting up the correct FRM.
742 PM.add(P: createRISCVInsertReadWriteCSRPass());
743 PM.add(P: createRISCVInsertWriteVXRMPass());
744 // This will assign physical register to the result of VSETVLI instructions
745 // that produce VLMAX.
746 PM.add(P: exegesis::createRISCVPostprocessingPass());
747 // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
748 // PseudoMovImm with RISCVPostRAExpandPseudoPass though.
749 PM.add(P: createRISCVPostRAExpandPseudoPass());
750 }
751};
752
753ExegesisRISCVTarget::ExegesisRISCVTarget()
754 : ExegesisTarget(RISCVCpuPfmCounters, RISCV_MC::isOpcodeAvailable) {}
755
756bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const {
757 return Arch == Triple::riscv32 || Arch == Triple::riscv64;
758}
759
760std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
761 MCRegister Reg,
762 const APInt &Value) const {
763 if (RISCV::GPRRegClass.contains(Reg))
764 return loadIntReg(STI, Reg, Value);
765 if (RISCV::FPR16RegClass.contains(Reg))
766 return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_H_X);
767 if (RISCV::FPR32RegClass.contains(Reg))
768 return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_W_X);
769 if (RISCV::FPR64RegClass.contains(Reg)) {
770 if (STI.hasFeature(Feature: RISCV::Feature64Bit))
771 return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_D_X);
772 return loadFP64RegBits32(STI, Reg, Bits: Value);
773 }
774 // TODO: Emit proper code to initialize other kinds of registers.
775 return {};
776}
777
778const MCPhysReg DefaultLoopCounterReg = RISCV::X31; // t6
779const MCPhysReg ScratchMemoryReg = RISCV::X10; // a0
780
781MCRegister
782ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const {
783 return DefaultLoopCounterReg;
784}
785
786void ExegesisRISCVTarget::decrementLoopCounterAndJump(
787 MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
788 const MCInstrInfo &MII, MCRegister LoopRegister) const {
789 BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: RISCV::ADDI))
790 .addDef(RegNo: LoopRegister)
791 .addUse(RegNo: LoopRegister)
792 .addImm(Val: -1);
793 BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: RISCV::BNE))
794 .addUse(RegNo: LoopRegister)
795 .addUse(RegNo: RISCV::X0)
796 .addMBB(MBB: &TargetMBB);
797}
798
799MCRegister
800ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const {
801 return ScratchMemoryReg; // a0
802}
803
804void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT,
805 MCRegister Reg,
806 unsigned Offset) const {
807 // TODO: for now we ignore Offset because have no way
808 // to detect it in instruction.
809 auto &I = IT.getInstr();
810
811 auto MemOpIt =
812 find_if(Range: I.Operands, P: [](const Operand &Op) { return Op.isMemory(); });
813 assert(MemOpIt != I.Operands.end() &&
814 "Instruction must have memory operands");
815
816 const Operand &MemOp = *MemOpIt;
817
818 assert(MemOp.isReg() && "Memory operand expected to be register");
819
820 IT.getValueFor(Op: MemOp) = MCOperand::createReg(Reg);
821}
822
823const MCPhysReg UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg,
824 ScratchIntReg, ScratchMemoryReg};
825
826ArrayRef<MCPhysReg> ExegesisRISCVTarget::getUnavailableRegisters() const {
827 return UnavailableRegisters;
828}
829
830Error ExegesisRISCVTarget::randomizeTargetMCOperand(
831 const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
832 const BitVector &ForbiddenRegs) const {
833 uint8_t OperandType =
834 Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType;
835
836 switch (OperandType) {
837 case RISCVOp::OPERAND_FRMARG:
838 AssignedValue = MCOperand::createImm(Val: RISCVFPRndMode::DYN);
839 break;
840 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
841 AssignedValue = MCOperand::createImm(Val: 0b1 << 4);
842 break;
843 case RISCVOp::OPERAND_SIMM6_NONZERO:
844 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
845 AssignedValue = MCOperand::createImm(Val: 1);
846 break;
847 case RISCVOp::OPERAND_SIMM5:
848 // 5-bit signed immediate value.
849 AssignedValue = MCOperand::createImm(Val: randomIndex(Max: 31) - 16);
850 break;
851 case RISCVOp::OPERAND_AVL:
852 case RISCVOp::OPERAND_UIMM5:
853 // 5-bit unsigned immediate value.
854 AssignedValue = MCOperand::createImm(Val: randomIndex(Max: 31));
855 break;
856 default:
857 if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
858 OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
859 AssignedValue = MCOperand::createImm(Val: 0);
860 }
861 return Error::success();
862}
863
864std::vector<InstructionTemplate>
865ExegesisRISCVTarget::generateInstructionVariants(
866 const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const {
867 InstructionTemplate IT{&Instr};
868 for (const Operand &Op : Instr.Operands)
869 if (Op.isMemory()) {
870 IT.getValueFor(Op) = MCOperand::createReg(Reg: ScratchMemoryReg);
871 }
872 return {IT};
873}
874
875} // anonymous namespace
876
877static ExegesisTarget *getTheRISCVExegesisTarget() {
878 static ExegesisRISCVTarget Target;
879 return &Target;
880}
881
882void InitializeRISCVExegesisTarget() {
883 ExegesisTarget::registerTarget(T: getTheRISCVExegesisTarget());
884}
885
886} // namespace exegesis
887} // namespace llvm
888