1 | //===-- Target.cpp ----------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "../Target.h" |
10 | #include "../ParallelSnippetGenerator.h" |
11 | #include "../SerialSnippetGenerator.h" |
12 | #include "../SnippetGenerator.h" |
13 | |
14 | #include "MCTargetDesc/RISCVBaseInfo.h" |
15 | #include "MCTargetDesc/RISCVMCTargetDesc.h" |
16 | #include "MCTargetDesc/RISCVMatInt.h" |
17 | #include "RISCV.h" |
18 | #include "RISCVExegesisPasses.h" |
19 | #include "RISCVInstrInfo.h" |
20 | #include "RISCVRegisterInfo.h" |
21 | #include "llvm/Support/Regex.h" |
22 | #include "llvm/Support/raw_ostream.h" |
23 | |
24 | // include computeAvailableFeatures and computeRequiredFeatures. |
25 | #define GET_AVAILABLE_OPCODE_CHECKER |
26 | #include "RISCVGenInstrInfo.inc" |
27 | |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | |
30 | #include <vector> |
31 | |
32 | namespace llvm { |
33 | namespace exegesis { |
34 | |
35 | static cl::opt<bool> |
36 | OnlyUsesVLMAXForVL("riscv-vlmax-for-vl" , |
37 | cl::desc("Only enumerate VLMAX for VL operand" ), |
38 | cl::init(Val: false), cl::Hidden); |
39 | |
40 | static cl::opt<bool> |
41 | EnumerateRoundingModes("riscv-enumerate-rounding-modes" , |
42 | cl::desc("Enumerate different FRM and VXRM" ), |
43 | cl::init(Val: true), cl::Hidden); |
44 | |
45 | static cl::opt<std::string> |
46 | FilterConfig("riscv-filter-config" , |
47 | cl::desc("Show only the configs matching this regex" ), |
48 | cl::init(Val: "" ), cl::Hidden); |
49 | |
50 | #include "RISCVGenExegesis.inc" |
51 | |
52 | namespace { |
53 | |
54 | template <class BaseT> class RISCVSnippetGenerator : public BaseT { |
55 | static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) { |
56 | if (UsesVXRM) { |
57 | assert(RISCVVXRndMode::isValidRoundingMode(Val)); |
58 | OS << RISCVVXRndMode::roundingModeToString( |
59 | RndMode: static_cast<RISCVVXRndMode::RoundingMode>(Val)); |
60 | } else { |
61 | assert(RISCVFPRndMode::isValidRoundingMode(Val)); |
62 | OS << RISCVFPRndMode::roundingModeToString( |
63 | RndMode: static_cast<RISCVFPRndMode::RoundingMode>(Val)); |
64 | } |
65 | } |
66 | |
67 | static constexpr unsigned MinSEW = 8; |
68 | // ELEN is basically SEW_max. |
69 | unsigned ELEN = 64; |
70 | |
71 | // We can't know the real min/max VLEN w/o a Function, so we're |
72 | // using the VLen from Zvl. |
73 | unsigned ZvlVLen = 32; |
74 | |
75 | /// Mask for registers that are NOT standalone registers like X0 and V0 |
76 | BitVector AggregateRegisters; |
77 | |
78 | // Returns true when opcode is available in any of the FBs. |
79 | static bool |
80 | isOpcodeAvailableIn(unsigned Opcode, |
81 | ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) { |
82 | FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode); |
83 | for (uint8_t FB : FBs) { |
84 | if (RequiredFeatures[FB]) |
85 | return true; |
86 | } |
87 | return false; |
88 | } |
89 | |
90 | static bool isRVVFloatingPointOp(unsigned Opcode) { |
91 | return isOpcodeAvailableIn(Opcode, |
92 | FBs: {RISCV_MC::Feature_HasVInstructionsAnyFBit}); |
93 | } |
94 | |
95 | // Get the element group width of each vector cryptor extension. |
96 | static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) { |
97 | using namespace RISCV_MC; |
98 | if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvkgBit, |
99 | Feature_HasStdExtZvknedBit, |
100 | Feature_HasStdExtZvksedBit})) |
101 | return 128U; |
102 | if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvkshBit})) |
103 | return 256U; |
104 | if (isOpcodeAvailableIn(Opcode, FBs: {Feature_HasStdExtZvknhaOrZvknhbBit})) |
105 | // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256. |
106 | // Otherwise it's 128. |
107 | return SEW == 64 ? 256U : 128U; |
108 | |
109 | llvm_unreachable("Unsupported opcode" ); |
110 | } |
111 | |
112 | // A handy utility to multiply or divide an integer by LMUL. |
113 | template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) { |
114 | auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); |
115 | return IsFractional ? Val / LMul : Val * LMul; |
116 | } |
117 | |
118 | /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or |
119 | /// nullopt if BaseOpcode is not a vector sext/zext. |
120 | static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) { |
121 | switch (BaseOpcode) { |
122 | case RISCV::VSEXT_VF2: |
123 | case RISCV::VZEXT_VF2: |
124 | return 2; |
125 | case RISCV::VSEXT_VF4: |
126 | case RISCV::VZEXT_VF4: |
127 | return 4; |
128 | case RISCV::VSEXT_VF8: |
129 | case RISCV::VZEXT_VF8: |
130 | return 8; |
131 | default: |
132 | return std::nullopt; |
133 | } |
134 | } |
135 | |
136 | void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr, |
137 | unsigned BaseOpcode, |
138 | const BitVector &ForbiddenRegisters, |
139 | std::vector<CodeTemplate> &Result) const; |
140 | |
141 | public: |
142 | RISCVSnippetGenerator(const LLVMState &State, |
143 | const SnippetGenerator::Options &Opts) |
144 | : BaseT(State, Opts), |
145 | AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) { |
146 | // Initialize standalone registers mask. |
147 | const MCRegisterInfo &RegInfo = State.getRegInfo(); |
148 | const unsigned StandaloneRegClasses[] = { |
149 | RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID}; |
150 | |
151 | for (unsigned RegClassID : StandaloneRegClasses) |
152 | for (unsigned Reg : RegInfo.getRegClass(i: RegClassID)) |
153 | AggregateRegisters.reset(Idx: Reg); |
154 | |
155 | // Initialize ELEN and VLEN. |
156 | // FIXME: We could have obtained these two constants from RISCVSubtarget |
157 | // but in order to get that from TargetMachine, we need a Function. |
158 | const MCSubtargetInfo &STI = State.getSubtargetInfo(); |
159 | ELEN = STI.hasFeature(Feature: RISCV::FeatureStdExtZve64x) ? 64 : 32; |
160 | |
161 | const unsigned ZvlFeatures[] = { |
162 | RISCV::FeatureStdExtZvl32b, RISCV::FeatureStdExtZvl64b, |
163 | RISCV::FeatureStdExtZvl128b, RISCV::FeatureStdExtZvl256b, |
164 | RISCV::FeatureStdExtZvl512b, RISCV::FeatureStdExtZvl1024b, |
165 | RISCV::FeatureStdExtZvl2048b, RISCV::FeatureStdExtZvl4096b, |
166 | RISCV::FeatureStdExtZvl8192b, RISCV::FeatureStdExtZvl16384b, |
167 | RISCV::FeatureStdExtZvl32768b, RISCV::FeatureStdExtZvl65536b}; |
168 | for (auto [Idx, Feature] : enumerate(First: ZvlFeatures)) { |
169 | if (STI.hasFeature(Feature)) |
170 | ZvlVLen = std::max(a: ZvlVLen, b: 1u << (Idx + 5)); |
171 | } |
172 | } |
173 | |
174 | Expected<std::vector<CodeTemplate>> |
175 | generateCodeTemplates(InstructionTemplate Variant, |
176 | const BitVector &ForbiddenRegisters) const override; |
177 | }; |
178 | |
179 | static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) { |
180 | const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedPseudo: MaskedOp); |
181 | return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp; |
182 | } |
183 | |
184 | // There are primarily two kinds of opcodes that are not eligible |
185 | // in a serial snippet: |
186 | // (1) Has a use operand that can not overlap with the def operand |
187 | // (i.e. early clobber). |
188 | // (2) The register file of the only use operand is different from |
189 | // that of the def operand. For instance, use operand is vector and |
190 | // the result is a scalar. |
191 | static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode, |
192 | const Instruction &I) { |
193 | if (llvm::any_of(Range: I.Operands, |
194 | P: [](const Operand &Op) { return Op.isEarlyClobber(); })) |
195 | return true; |
196 | |
197 | switch (BaseOpcode) { |
198 | case RISCV::VCOMPRESS_VM: |
199 | case RISCV::VCPOP_M: |
200 | case RISCV::VCPOP_V: |
201 | // The permutation instructions listed below cannot have destination |
202 | // overlapping with the source. |
203 | case RISCV::VRGATHEREI16_VV: |
204 | case RISCV::VRGATHER_VI: |
205 | case RISCV::VRGATHER_VV: |
206 | case RISCV::VRGATHER_VX: |
207 | case RISCV::VSLIDE1UP_VX: |
208 | case RISCV::VSLIDEUP_VI: |
209 | case RISCV::VSLIDEUP_VX: |
210 | return true; |
211 | default: |
212 | return false; |
213 | } |
214 | } |
215 | |
216 | static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) { |
217 | switch (BaseOpcode) { |
218 | case RISCV::VFNCVT_F_F_W: |
219 | case RISCV::VFWCVT_F_F_V: |
220 | case RISCV::VFNCVTBF16_F_F_W: |
221 | case RISCV::VFWCVTBF16_F_F_V: |
222 | return true; |
223 | default: |
224 | return false; |
225 | } |
226 | } |
227 | |
228 | static bool isVectorReduction(unsigned BaseOpcode) { |
229 | switch (BaseOpcode) { |
230 | case RISCV::VREDAND_VS: |
231 | case RISCV::VREDMAXU_VS: |
232 | case RISCV::VREDMAX_VS: |
233 | case RISCV::VREDMINU_VS: |
234 | case RISCV::VREDMIN_VS: |
235 | case RISCV::VREDOR_VS: |
236 | case RISCV::VREDSUM_VS: |
237 | case RISCV::VREDXOR_VS: |
238 | case RISCV::VWREDSUMU_VS: |
239 | case RISCV::VWREDSUM_VS: |
240 | case RISCV::VFREDMAX_VS: |
241 | case RISCV::VFREDMIN_VS: |
242 | case RISCV::VFREDOSUM_VS: |
243 | case RISCV::VFREDUSUM_VS: |
244 | return true; |
245 | default: |
246 | return false; |
247 | } |
248 | } |
249 | |
250 | template <class BaseT> |
251 | void RISCVSnippetGenerator<BaseT>::annotateWithVType( |
252 | const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode, |
253 | const BitVector &ForbiddenRegisters, |
254 | std::vector<CodeTemplate> &Result) const { |
255 | const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo(); |
256 | unsigned VPseudoOpcode = Instr.getOpcode(); |
257 | |
258 | bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>; |
259 | |
260 | const MCInstrDesc &MIDesc = Instr.Description; |
261 | const uint64_t TSFlags = MIDesc.TSFlags; |
262 | |
263 | RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); |
264 | |
265 | const size_t StartingResultSize = Result.size(); |
266 | |
267 | SmallPtrSet<const Operand *, 4> VTypeOperands; |
268 | std::optional<AliasingConfigurations> SelfAliasing; |
269 | // Exegesis see instructions with tied operands being inherently serial. |
270 | // But for RVV instructions, those tied operands are passthru rather |
271 | // than real read operands. So we manually put dependency between |
272 | // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM |
273 | // operands. |
274 | auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) { |
275 | // Initialize SelfAliasing on first use. |
276 | if (!SelfAliasing.has_value()) { |
277 | BitVector ExcludeRegs = ForbiddenRegisters; |
278 | ExcludeRegs |= AggregateRegisters; |
279 | SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs); |
280 | bool EmptyUses = false; |
281 | for (auto &ARO : SelfAliasing->Configurations) { |
282 | auto &Uses = ARO.Uses; |
283 | for (auto ROA = Uses.begin(); ROA != Uses.end();) { |
284 | const Operand *Op = ROA->Op; |
285 | // Exclude tied operand(s). |
286 | if (Op->isTied()) { |
287 | ROA = Uses.erase(CI: ROA); |
288 | continue; |
289 | } |
290 | |
291 | // Special handling for reduction operations: for a given reduction |
292 | // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1 |
293 | // since we're only reading `vs1[0]` and many implementations |
294 | // optimize for this case (e.g. chaining). Instead, we're forcing |
295 | // it to create alias between vd and vs2. |
296 | if (isVectorReduction(BaseOpcode) && |
297 | // vs1's operand index is always 3. |
298 | Op->getIndex() == 3) { |
299 | ROA = Uses.erase(CI: ROA); |
300 | continue; |
301 | } |
302 | |
303 | // Exclude any special operands like SEW and VL -- we've already |
304 | // assigned values to them. |
305 | if (VTypeOperands.count(Ptr: Op)) { |
306 | ROA = Uses.erase(CI: ROA); |
307 | continue; |
308 | } |
309 | ++ROA; |
310 | } |
311 | |
312 | // If any of the use operand candidate lists is empty, there is |
313 | // no point to assign self aliasing registers. |
314 | if (Uses.empty()) { |
315 | EmptyUses = true; |
316 | break; |
317 | } |
318 | } |
319 | if (EmptyUses) |
320 | SelfAliasing->Configurations.clear(); |
321 | } |
322 | |
323 | // This is a self aliasing instruction so defs and uses are from the same |
324 | // instance, hence twice IT in the following call. |
325 | if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing()) |
326 | setRandomAliasing(AliasingConfigurations: *SelfAliasing, DefIB&: IT, UseIB&: IT); |
327 | }; |
328 | |
329 | // We are going to create a CodeTemplate (configuration) for each supported |
330 | // SEW, policy, and VL. |
331 | // FIXME: Account for EEW and EMUL. |
332 | SmallVector<std::optional<unsigned>, 4> Log2SEWs; |
333 | SmallVector<std::optional<unsigned>, 4> Policies; |
334 | SmallVector<std::optional<int>, 3> AVLs; |
335 | SmallVector<std::optional<unsigned>, 8> RoundingModes; |
336 | |
337 | bool HasSEWOp = RISCVII::hasSEWOp(TSFlags); |
338 | bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); |
339 | bool HasVLOp = RISCVII::hasVLOp(TSFlags); |
340 | bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags); |
341 | bool UsesVXRM = RISCVII::usesVXRM(TSFlags); |
342 | |
343 | if (HasSEWOp) { |
344 | const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(Desc: MIDesc)]; |
345 | VTypeOperands.insert(Ptr: &SEWOp); |
346 | |
347 | if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) { |
348 | // If it's a mask-producing instruction, the SEW operand is always zero. |
349 | Log2SEWs.push_back(Elt: 0); |
350 | } else { |
351 | SmallVector<unsigned, 4> SEWCandidates; |
352 | |
353 | // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between |
354 | // [SEW_min, LMUL * ELEN]. |
355 | unsigned SEWUpperBound = |
356 | VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN; |
357 | for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) { |
358 | SEWCandidates.push_back(Elt: SEW); |
359 | |
360 | // Some scheduling classes already integrate SEW; only put |
361 | // their corresponding SEW values at the SEW operands. |
362 | // NOTE: It is imperative to put this condition in the front, otherwise |
363 | // it is tricky and difficult to know if there is an integrated |
364 | // SEW after other rules are applied to filter the candidates. |
365 | const auto *RVVBase = |
366 | RISCVVInversePseudosTable::getBaseInfo(BaseInstr: BaseOpcode, VLMul, SEW); |
367 | if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode || |
368 | isMaskedSibling(MaskedOp: VPseudoOpcode, UnmaskedOp: RVVBase->Pseudo) || |
369 | isMaskedSibling(MaskedOp: RVVBase->Pseudo, UnmaskedOp: VPseudoOpcode))) { |
370 | // There is an integrated SEW, remove all but the SEW pushed last. |
371 | SEWCandidates.erase(CS: SEWCandidates.begin(), CE: SEWCandidates.end() - 1); |
372 | break; |
373 | } |
374 | } |
375 | |
376 | // Filter out some candidates. |
377 | for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) { |
378 | // For floating point operations, only select SEW of the supported FLEN. |
379 | if (isRVVFloatingPointOp(Opcode: VPseudoOpcode)) { |
380 | bool Supported = false; |
381 | Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16; |
382 | Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtZvfh) && *SEW == 16; |
383 | Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtF) && *SEW == 32; |
384 | Supported |= STI.hasFeature(Feature: RISCV::FeatureStdExtD) && *SEW == 64; |
385 | if (!Supported) { |
386 | SEW = SEWCandidates.erase(CI: SEW); |
387 | continue; |
388 | } |
389 | } |
390 | |
391 | // The EEW for source operand in VSEXT and VZEXT is a fraction |
392 | // of the SEW, hence only SEWs that will lead to valid EEW are allowed. |
393 | if (auto Frac = isRVVSignZeroExtend(BaseOpcode)) |
394 | if (*SEW / *Frac < MinSEW) { |
395 | SEW = SEWCandidates.erase(CI: SEW); |
396 | continue; |
397 | } |
398 | |
399 | // Most vector crypto 1.0 instructions only work on SEW=32. |
400 | using namespace RISCV_MC; |
401 | if (isOpcodeAvailableIn(Opcode: BaseOpcode, FBs: {Feature_HasStdExtZvkgBit, |
402 | Feature_HasStdExtZvknedBit, |
403 | Feature_HasStdExtZvknhaOrZvknhbBit, |
404 | Feature_HasStdExtZvksedBit, |
405 | Feature_HasStdExtZvkshBit})) { |
406 | if (*SEW != 32) |
407 | // Zvknhb supports SEW=64 as well. |
408 | if (*SEW != 64 || !STI.hasFeature(Feature: RISCV::FeatureStdExtZvknhb) || |
409 | !isOpcodeAvailableIn(Opcode: BaseOpcode, |
410 | FBs: {Feature_HasStdExtZvknhaOrZvknhbBit})) { |
411 | SEW = SEWCandidates.erase(CI: SEW); |
412 | continue; |
413 | } |
414 | |
415 | // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here, |
416 | // because some of the extensions have SEW-dependant EGW. |
417 | unsigned EGW = getZvkEGWSize(Opcode: BaseOpcode, SEW: *SEW); |
418 | if (multiplyLMul(ZvlVLen, VLMul) < EGW) { |
419 | SEW = SEWCandidates.erase(CI: SEW); |
420 | continue; |
421 | } |
422 | } |
423 | |
424 | ++SEW; |
425 | } |
426 | |
427 | // We're not going to produce any result with zero SEW candidate. |
428 | if (SEWCandidates.empty()) |
429 | return; |
430 | |
431 | for (unsigned SEW : SEWCandidates) |
432 | Log2SEWs.push_back(Elt: Log2_32(Value: SEW)); |
433 | } |
434 | } else { |
435 | Log2SEWs.push_back(Elt: std::nullopt); |
436 | } |
437 | |
438 | if (HasPolicyOp) { |
439 | VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVecPolicyOpNum(Desc: MIDesc)]); |
440 | |
441 | Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC, |
442 | (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)}; |
443 | } else { |
444 | Policies.push_back(Elt: std::nullopt); |
445 | } |
446 | |
447 | if (HasVLOp) { |
448 | VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc)]); |
449 | |
450 | if (OnlyUsesVLMAXForVL) |
451 | AVLs.push_back(Elt: -1); |
452 | else |
453 | AVLs = {// 5-bit immediate value |
454 | 1, |
455 | // VLMAX |
456 | -1, |
457 | // Non-X0 register |
458 | 0}; |
459 | } else { |
460 | AVLs.push_back(Elt: std::nullopt); |
461 | } |
462 | |
463 | if (HasRMOp) { |
464 | VTypeOperands.insert(Ptr: &Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc) - 1]); |
465 | |
466 | if (UsesVXRM) { |
467 | // Use RNU as the default VXRM. |
468 | RoundingModes = {RISCVVXRndMode::RNU}; |
469 | if (EnumerateRoundingModes) |
470 | RoundingModes.append( |
471 | IL: {RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD}); |
472 | } else { |
473 | if (EnumerateRoundingModes) |
474 | RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ, |
475 | RISCVFPRndMode::RDN, RISCVFPRndMode::RUP, |
476 | RISCVFPRndMode::RMM}; |
477 | else |
478 | // If we're not enumerating FRM, use DYN to instruct |
479 | // RISCVInsertReadWriteCSRPass to insert nothing. |
480 | RoundingModes = {RISCVFPRndMode::DYN}; |
481 | } |
482 | } else { |
483 | RoundingModes = {std::nullopt}; |
484 | } |
485 | |
486 | std::set<std::tuple<std::optional<unsigned>, std::optional<int>, |
487 | std::optional<unsigned>, std::optional<unsigned>>> |
488 | Combinations; |
489 | for (auto AVL : AVLs) { |
490 | for (auto Log2SEW : Log2SEWs) |
491 | for (auto Policy : Policies) { |
492 | for (auto RM : RoundingModes) |
493 | Combinations.insert(x: std::make_tuple(args&: RM, args&: AVL, args&: Log2SEW, args&: Policy)); |
494 | } |
495 | } |
496 | |
497 | std::string ConfigStr; |
498 | SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments; |
499 | for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) { |
500 | InstructionTemplate IT(&Instr); |
501 | |
502 | ListSeparator LS; |
503 | ConfigStr = "vtype = {" ; |
504 | raw_string_ostream SS(ConfigStr); |
505 | |
506 | ValueAssignments.clear(); |
507 | |
508 | if (RM) { |
509 | const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc) - 1]; |
510 | ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *RM)}); |
511 | printRoundingMode(OS&: SS << LS << (UsesVXRM ? "VXRM" : "FRM" ) << ": " , Val: *RM, |
512 | UsesVXRM); |
513 | } |
514 | |
515 | if (AVL) { |
516 | MCOperand OpVal; |
517 | if (*AVL < 0) { |
518 | // VLMAX |
519 | OpVal = MCOperand::createImm(Val: -1); |
520 | SS << LS << "AVL: VLMAX" ; |
521 | } else if (*AVL == 0) { |
522 | // A register holding AVL. |
523 | // TODO: Generate a random register. |
524 | OpVal = MCOperand::createReg(Reg: RISCV::X5); |
525 | OpVal.print(OS&: SS << LS << "AVL: " ); |
526 | } else { |
527 | // A 5-bit immediate. |
528 | // The actual value assignment is deferred to |
529 | // RISCVExegesisTarget::randomizeTargetMCOperand. |
530 | SS << LS << "AVL: simm5" ; |
531 | } |
532 | if (OpVal.isValid()) { |
533 | const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(Desc: MIDesc)]; |
534 | ValueAssignments.push_back(Elt: {&Op, OpVal}); |
535 | } |
536 | } |
537 | |
538 | if (Log2SEW) { |
539 | const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(Desc: MIDesc)]; |
540 | ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *Log2SEW)}); |
541 | SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8); |
542 | } |
543 | |
544 | if (Policy) { |
545 | const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(Desc: MIDesc)]; |
546 | ValueAssignments.push_back(Elt: {&Op, MCOperand::createImm(Val: *Policy)}); |
547 | SS << LS |
548 | << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu" ) |
549 | << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu" ); |
550 | } |
551 | |
552 | SS << "}" ; |
553 | |
554 | // Filter out some configurations, if needed. |
555 | if (!FilterConfig.empty()) { |
556 | if (!Regex(FilterConfig).match(String: ConfigStr)) |
557 | continue; |
558 | } |
559 | |
560 | CodeTemplate CT = OrigCT.clone(); |
561 | CT.Config = std::move(ConfigStr); |
562 | for (InstructionTemplate &IT : CT.Instructions) { |
563 | if (IsSerial) { |
564 | // Reset this template's value assignments and do it |
565 | // ourselves. |
566 | IT = InstructionTemplate(&Instr); |
567 | assignSerialRVVOperands(IT); |
568 | } |
569 | |
570 | for (const auto &[Op, OpVal] : ValueAssignments) |
571 | IT.getValueFor(Op: *Op) = OpVal; |
572 | } |
573 | Result.push_back(x: std::move(CT)); |
574 | if (Result.size() - StartingResultSize >= |
575 | SnippetGenerator::Opts.MaxConfigsPerOpcode) |
576 | return; |
577 | } |
578 | } |
579 | |
580 | template <class BaseT> |
581 | Expected<std::vector<CodeTemplate>> |
582 | RISCVSnippetGenerator<BaseT>::generateCodeTemplates( |
583 | InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { |
584 | const Instruction &Instr = Variant.getInstr(); |
585 | |
586 | bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>; |
587 | |
588 | unsigned BaseOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Instr.getOpcode()); |
589 | |
590 | // Bail out ineligible opcodes before generating base code templates since |
591 | // the latter is quite expensive. |
592 | if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, I: Instr)) |
593 | return std::vector<CodeTemplate>{}; |
594 | |
595 | auto BaseCodeTemplates = |
596 | BaseT::generateCodeTemplates(Variant, ForbiddenRegisters); |
597 | if (!BaseCodeTemplates) |
598 | return BaseCodeTemplates.takeError(); |
599 | |
600 | if (!BaseOpcode) |
601 | return BaseCodeTemplates; |
602 | |
603 | // Specialize for RVV pseudo. |
604 | std::vector<CodeTemplate> ExpandedTemplates; |
605 | for (const auto &BaseCT : *BaseCodeTemplates) |
606 | annotateWithVType(OrigCT: BaseCT, Instr, BaseOpcode, ForbiddenRegisters, |
607 | Result&: ExpandedTemplates); |
608 | |
609 | return ExpandedTemplates; |
610 | } |
611 | |
612 | // Stores constant value to a general-purpose (integer) register. |
613 | static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI, |
614 | MCRegister Reg, const APInt &Value) { |
615 | SmallVector<MCInst, 8> MCInstSeq; |
616 | MCRegister DestReg = Reg; |
617 | |
618 | RISCVMatInt::generateMCInstSeq(Val: Value.getSExtValue(), STI, DestReg, Insts&: MCInstSeq); |
619 | |
620 | std::vector<MCInst> MatIntInstrs(MCInstSeq.begin(), MCInstSeq.end()); |
621 | return MatIntInstrs; |
622 | } |
623 | |
624 | const MCPhysReg ScratchIntReg = RISCV::X30; // t5 |
625 | |
626 | // Stores constant bits to a floating-point register. |
627 | static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI, |
628 | MCRegister Reg, const APInt &Bits, |
629 | unsigned FmvOpcode) { |
630 | std::vector<MCInst> Instrs = loadIntReg(STI, Reg: ScratchIntReg, Value: Bits); |
631 | Instrs.push_back(x: MCInstBuilder(FmvOpcode).addReg(Reg).addReg(Reg: ScratchIntReg)); |
632 | return Instrs; |
633 | } |
634 | |
635 | // main idea is: |
636 | // we support APInt only if (represented as double) it has zero fractional |
637 | // part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5 |
638 | // and then do FCVT this is only reliable thing in 32-bit mode, otherwise we |
639 | // need to use __floatsidf |
640 | static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI, |
641 | MCRegister Reg, |
642 | const APInt &Bits) { |
643 | double D = Bits.bitsToDouble(); |
644 | double IPart; |
645 | double FPart = std::modf(x: D, iptr: &IPart); |
646 | |
647 | if (std::abs(x: FPart) > std::numeric_limits<double>::epsilon()) { |
648 | errs() << "loadFP64RegBits32 is not implemented for doubles like " << D |
649 | << ", please remove fractional part\n" ; |
650 | return {}; |
651 | } |
652 | |
653 | std::vector<MCInst> Instrs = loadIntReg(STI, Reg: ScratchIntReg, Value: Bits); |
654 | Instrs.push_back( |
655 | x: MCInstBuilder(RISCV::FCVT_D_W).addReg(Reg).addReg(Reg: ScratchIntReg)); |
656 | return Instrs; |
657 | } |
658 | |
659 | class ExegesisRISCVTarget : public ExegesisTarget { |
660 | // NOTE: Alternatively, we can use BitVector here, but the number of RVV MC |
661 | // opcodes is just a small portion of the entire opcode space, so I thought it |
662 | // would be a waste of space to use BitVector. |
663 | mutable SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos; |
664 | |
665 | public: |
666 | ExegesisRISCVTarget(); |
667 | |
668 | bool matchesArch(Triple::ArchType Arch) const override; |
669 | |
670 | std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, |
671 | const APInt &Value) const override; |
672 | |
673 | const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, |
674 | unsigned Opcode) const override { |
675 | // We don't want to support RVV instructions that depend on VTYPE, because |
676 | // those instructions by themselves don't carry any additional information |
677 | // for us to setup the proper VTYPE environment via VSETVL instructions. |
678 | // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires |
679 | // LMUL and SEW and I don't think enumerating those combinations is any |
680 | // better than the ugly trick here that memorizes the corresponding MC |
681 | // opcodes of the RVV pseudo we have processed previously. This works most |
682 | // of the time because RVV pseudo opcodes are placed before any other RVV |
683 | // opcodes. Of course this doesn't work if we're asked to benchmark only a |
684 | // certain subset of opcodes. |
685 | if (RVVMCOpcodesWithPseudos.count(V: Opcode)) |
686 | return "The MC opcode of RVV instructions are ignored" ; |
687 | |
688 | // We want to support all RVV pseudos. |
689 | if (unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: Opcode)) { |
690 | RVVMCOpcodesWithPseudos.insert(V: MCOpcode); |
691 | return nullptr; |
692 | } |
693 | |
694 | return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode); |
695 | } |
696 | |
697 | MCRegister getDefaultLoopCounterRegister(const Triple &) const override; |
698 | |
699 | void decrementLoopCounterAndJump(MachineBasicBlock &MBB, |
700 | MachineBasicBlock &TargetMBB, |
701 | const MCInstrInfo &MII, |
702 | MCRegister LoopRegister) const override; |
703 | |
704 | MCRegister getScratchMemoryRegister(const Triple &TT) const override; |
705 | |
706 | void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg, |
707 | unsigned Offset) const override; |
708 | |
709 | ArrayRef<MCPhysReg> getUnavailableRegisters() const override; |
710 | |
711 | bool allowAsBackToBack(const Instruction &Instr) const override { |
712 | return !Instr.Description.isPseudo(); |
713 | } |
714 | |
715 | Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, |
716 | MCOperand &AssignedValue, |
717 | const BitVector &ForbiddenRegs) const override; |
718 | |
719 | std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator( |
720 | const LLVMState &State, |
721 | const SnippetGenerator::Options &Opts) const override { |
722 | return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>( |
723 | args: State, args: Opts); |
724 | } |
725 | |
726 | std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator( |
727 | const LLVMState &State, |
728 | const SnippetGenerator::Options &Opts) const override { |
729 | return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>( |
730 | args: State, args: Opts); |
731 | } |
732 | |
733 | std::vector<InstructionTemplate> |
734 | generateInstructionVariants(const Instruction &Instr, |
735 | unsigned MaxConfigsPerOpcode) const override; |
736 | |
737 | void addTargetSpecificPasses(PassManagerBase &PM) const override { |
738 | // Turn AVL operand of physical registers into virtual registers. |
739 | PM.add(P: exegesis::createRISCVPreprocessingPass()); |
740 | PM.add(P: createRISCVInsertVSETVLIPass()); |
741 | // Setting up the correct FRM. |
742 | PM.add(P: createRISCVInsertReadWriteCSRPass()); |
743 | PM.add(P: createRISCVInsertWriteVXRMPass()); |
744 | // This will assign physical register to the result of VSETVLI instructions |
745 | // that produce VLMAX. |
746 | PM.add(P: exegesis::createRISCVPostprocessingPass()); |
747 | // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand |
748 | // PseudoMovImm with RISCVPostRAExpandPseudoPass though. |
749 | PM.add(P: createRISCVPostRAExpandPseudoPass()); |
750 | } |
751 | }; |
752 | |
753 | ExegesisRISCVTarget::ExegesisRISCVTarget() |
754 | : ExegesisTarget(RISCVCpuPfmCounters, RISCV_MC::isOpcodeAvailable) {} |
755 | |
756 | bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const { |
757 | return Arch == Triple::riscv32 || Arch == Triple::riscv64; |
758 | } |
759 | |
760 | std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI, |
761 | MCRegister Reg, |
762 | const APInt &Value) const { |
763 | if (RISCV::GPRRegClass.contains(Reg)) |
764 | return loadIntReg(STI, Reg, Value); |
765 | if (RISCV::FPR16RegClass.contains(Reg)) |
766 | return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_H_X); |
767 | if (RISCV::FPR32RegClass.contains(Reg)) |
768 | return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_W_X); |
769 | if (RISCV::FPR64RegClass.contains(Reg)) { |
770 | if (STI.hasFeature(Feature: RISCV::Feature64Bit)) |
771 | return loadFPRegBits(STI, Reg, Bits: Value, FmvOpcode: RISCV::FMV_D_X); |
772 | return loadFP64RegBits32(STI, Reg, Bits: Value); |
773 | } |
774 | // TODO: Emit proper code to initialize other kinds of registers. |
775 | return {}; |
776 | } |
777 | |
778 | const MCPhysReg DefaultLoopCounterReg = RISCV::X31; // t6 |
779 | const MCPhysReg ScratchMemoryReg = RISCV::X10; // a0 |
780 | |
781 | MCRegister |
782 | ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const { |
783 | return DefaultLoopCounterReg; |
784 | } |
785 | |
786 | void ExegesisRISCVTarget::decrementLoopCounterAndJump( |
787 | MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, |
788 | const MCInstrInfo &MII, MCRegister LoopRegister) const { |
789 | BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: RISCV::ADDI)) |
790 | .addDef(RegNo: LoopRegister) |
791 | .addUse(RegNo: LoopRegister) |
792 | .addImm(Val: -1); |
793 | BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: RISCV::BNE)) |
794 | .addUse(RegNo: LoopRegister) |
795 | .addUse(RegNo: RISCV::X0) |
796 | .addMBB(MBB: &TargetMBB); |
797 | } |
798 | |
799 | MCRegister |
800 | ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const { |
801 | return ScratchMemoryReg; // a0 |
802 | } |
803 | |
804 | void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT, |
805 | MCRegister Reg, |
806 | unsigned Offset) const { |
807 | // TODO: for now we ignore Offset because have no way |
808 | // to detect it in instruction. |
809 | auto &I = IT.getInstr(); |
810 | |
811 | auto MemOpIt = |
812 | find_if(Range: I.Operands, P: [](const Operand &Op) { return Op.isMemory(); }); |
813 | assert(MemOpIt != I.Operands.end() && |
814 | "Instruction must have memory operands" ); |
815 | |
816 | const Operand &MemOp = *MemOpIt; |
817 | |
818 | assert(MemOp.isReg() && "Memory operand expected to be register" ); |
819 | |
820 | IT.getValueFor(Op: MemOp) = MCOperand::createReg(Reg); |
821 | } |
822 | |
823 | const MCPhysReg UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg, |
824 | ScratchIntReg, ScratchMemoryReg}; |
825 | |
826 | ArrayRef<MCPhysReg> ExegesisRISCVTarget::getUnavailableRegisters() const { |
827 | return UnavailableRegisters; |
828 | } |
829 | |
830 | Error ExegesisRISCVTarget::randomizeTargetMCOperand( |
831 | const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, |
832 | const BitVector &ForbiddenRegs) const { |
833 | uint8_t OperandType = |
834 | Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType; |
835 | |
836 | switch (OperandType) { |
837 | case RISCVOp::OPERAND_FRMARG: |
838 | AssignedValue = MCOperand::createImm(Val: RISCVFPRndMode::DYN); |
839 | break; |
840 | case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: |
841 | AssignedValue = MCOperand::createImm(Val: 0b1 << 4); |
842 | break; |
843 | case RISCVOp::OPERAND_SIMM6_NONZERO: |
844 | case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: |
845 | AssignedValue = MCOperand::createImm(Val: 1); |
846 | break; |
847 | case RISCVOp::OPERAND_SIMM5: |
848 | // 5-bit signed immediate value. |
849 | AssignedValue = MCOperand::createImm(Val: randomIndex(Max: 31) - 16); |
850 | break; |
851 | case RISCVOp::OPERAND_AVL: |
852 | case RISCVOp::OPERAND_UIMM5: |
853 | // 5-bit unsigned immediate value. |
854 | AssignedValue = MCOperand::createImm(Val: randomIndex(Max: 31)); |
855 | break; |
856 | default: |
857 | if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && |
858 | OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM) |
859 | AssignedValue = MCOperand::createImm(Val: 0); |
860 | } |
861 | return Error::success(); |
862 | } |
863 | |
864 | std::vector<InstructionTemplate> |
865 | ExegesisRISCVTarget::generateInstructionVariants( |
866 | const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const { |
867 | InstructionTemplate IT{&Instr}; |
868 | for (const Operand &Op : Instr.Operands) |
869 | if (Op.isMemory()) { |
870 | IT.getValueFor(Op) = MCOperand::createReg(Reg: ScratchMemoryReg); |
871 | } |
872 | return {IT}; |
873 | } |
874 | |
875 | } // anonymous namespace |
876 | |
877 | static ExegesisTarget *getTheRISCVExegesisTarget() { |
878 | static ExegesisRISCVTarget Target; |
879 | return &Target; |
880 | } |
881 | |
882 | void InitializeRISCVExegesisTarget() { |
883 | ExegesisTarget::registerTarget(T: getTheRISCVExegesisTarget()); |
884 | } |
885 | |
886 | } // namespace exegesis |
887 | } // namespace llvm |
888 | |