1//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The CodeEmitterGen component for variable-length instructions.
10//
11// The basic CodeEmitterGen is almost exclusively designed for fixed-
12// length instructions. A good analogy for its encoding scheme is how printf
13// works: The (immutable) formatting string represent the fixed values in the
14// encoded instruction. Placeholders (i.e. %something), on the other hand,
15// represent encoding for instruction operands.
16// ```
17// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
18// <encoded value for operand `dst`>);
19// ```
20// VarLenCodeEmitterGen in this file provides an alternative encoding scheme
21// that works more like a C++ stream operator:
22// ```
23// OS << 0b1101;
24// if (Cond)
25// OS << OperandEncoding0;
26// OS << 0b1001 << OperandEncoding1;
27// ```
28// You are free to concatenate arbitrary types (and sizes) of encoding
29// fragments on any bit position, bringing more flexibilities on defining
30// encoding for variable-length instructions.
31//
32// In a more specific way, instruction encoding is represented by a DAG type
33// `Inst` field. Here is an example:
34// ```
35// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
36// (operand "$dst", 4));
37// ```
38// It represents the following instruction encoding:
39// ```
40// MSB LSB
41// 1101<encoding for operand src>1001<encoding for operand dst>
42// ```
43// For more details about DAG operators in the above snippet, please
44// refer to \file include/llvm/Target/Target.td.
45//
46// VarLenCodeEmitter will convert the above DAG into the same helper function
47// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
48// for few details).
49//
50//===----------------------------------------------------------------------===//
51
52#include "VarLenCodeEmitterGen.h"
53#include "CodeGenHwModes.h"
54#include "CodeGenInstruction.h"
55#include "CodeGenTarget.h"
56#include "InfoByHwMode.h"
57#include "llvm/ADT/ArrayRef.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/Support/raw_ostream.h"
60#include "llvm/TableGen/Error.h"
61#include "llvm/TableGen/Record.h"
62
63#include <algorithm>
64
65using namespace llvm;
66
67namespace {
68
69class VarLenCodeEmitterGen {
70 const RecordKeeper &Records;
71
72 // Representaton of alternative encodings used for HwModes.
73 using AltEncodingTy = int;
74 // Mode identifier when only one encoding is defined.
75 const AltEncodingTy Universal = -1;
76 // The set of alternative instruction encodings with a descriptive
77 // name suffix to improve readability of the generated code.
78 std::map<AltEncodingTy, std::string> Modes;
79
80 DenseMap<const Record *, DenseMap<AltEncodingTy, VarLenInst>> VarLenInsts;
81
82 // Emit based values (i.e. fixed bits in the encoded instructions)
83 void emitInstructionBaseValues(
84 raw_ostream &OS,
85 ArrayRef<const CodeGenInstruction *> NumberedInstructions,
86 const CodeGenTarget &Target, AltEncodingTy Mode);
87
88 std::string getInstructionCases(const Record *R, const CodeGenTarget &Target);
89 std::string getInstructionCaseForEncoding(const Record *R, AltEncodingTy Mode,
90 const VarLenInst &VLI,
91 const CodeGenTarget &Target,
92 int Indent);
93
94public:
95 explicit VarLenCodeEmitterGen(const RecordKeeper &R) : Records(R) {}
96
97 void run(raw_ostream &OS);
98};
99} // end anonymous namespace
100
101// Get the name of custom encoder or decoder, if there is any.
102// Returns `{encoder name, decoder name}`.
103static std::pair<StringRef, StringRef>
104getCustomCoders(ArrayRef<const Init *> Args) {
105 std::pair<StringRef, StringRef> Result;
106 for (const auto *Arg : Args) {
107 const auto *DI = dyn_cast<DagInit>(Val: Arg);
108 if (!DI)
109 continue;
110 const Init *Op = DI->getOperator();
111 if (!isa<DefInit>(Val: Op))
112 continue;
113 // syntax: `(<encoder | decoder> "function name")`
114 StringRef OpName = cast<DefInit>(Val: Op)->getDef()->getName();
115 if (OpName != "encoder" && OpName != "decoder")
116 continue;
117 if (!DI->getNumArgs() || !isa<StringInit>(Val: DI->getArg(Num: 0)))
118 PrintFatalError(Msg: "expected '" + OpName +
119 "' directive to be followed by a custom function name.");
120 StringRef FuncName = cast<StringInit>(Val: DI->getArg(Num: 0))->getValue();
121 if (OpName == "encoder")
122 Result.first = FuncName;
123 else
124 Result.second = FuncName;
125 }
126 return Result;
127}
128
129VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
130 : TheDef(TheDef), NumBits(0U), HasDynamicSegment(false) {
131 buildRec(DI);
132 for (const auto &S : Segments)
133 NumBits += S.BitWidth;
134}
135
136void VarLenInst::buildRec(const DagInit *DI) {
137 assert(TheDef && "The def record is nullptr ?");
138
139 std::string Op = DI->getOperator()->getAsString();
140
141 if (Op == "ascend" || Op == "descend") {
142 bool Reverse = Op == "descend";
143 int i = Reverse ? DI->getNumArgs() - 1 : 0;
144 int e = Reverse ? -1 : DI->getNumArgs();
145 int s = Reverse ? -1 : 1;
146 for (; i != e; i += s) {
147 const Init *Arg = DI->getArg(Num: i);
148 if (const auto *BI = dyn_cast<BitsInit>(Val: Arg)) {
149 if (!BI->isComplete())
150 PrintFatalError(ErrorLoc: TheDef->getLoc(),
151 Msg: "Expecting complete bits init in `" + Op + "`");
152 Segments.push_back(Elt: {.BitWidth: BI->getNumBits(), .Value: BI});
153 } else if (const auto *BI = dyn_cast<BitInit>(Val: Arg)) {
154 if (!BI->isConcrete())
155 PrintFatalError(ErrorLoc: TheDef->getLoc(),
156 Msg: "Expecting concrete bit init in `" + Op + "`");
157 Segments.push_back(Elt: {.BitWidth: 1, .Value: BI});
158 } else if (const auto *SubDI = dyn_cast<DagInit>(Val: Arg)) {
159 buildRec(DI: SubDI);
160 } else {
161 PrintFatalError(ErrorLoc: TheDef->getLoc(), Msg: "Unrecognized type of argument in `" +
162 Op + "`: " + Arg->getAsString());
163 }
164 }
165 } else if (Op == "operand") {
166 // (operand <operand name>, <# of bits>,
167 // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
168 if (DI->getNumArgs() < 2)
169 PrintFatalError(ErrorLoc: TheDef->getLoc(),
170 Msg: "Expecting at least 2 arguments for `operand`");
171 HasDynamicSegment = true;
172 const Init *OperandName = DI->getArg(Num: 0), *NumBits = DI->getArg(Num: 1);
173 if (!isa<StringInit>(Val: OperandName) || !isa<IntInit>(Val: NumBits))
174 PrintFatalError(ErrorLoc: TheDef->getLoc(), Msg: "Invalid argument types for `operand`");
175
176 auto NumBitsVal = cast<IntInit>(Val: NumBits)->getValue();
177 if (NumBitsVal <= 0)
178 PrintFatalError(ErrorLoc: TheDef->getLoc(), Msg: "Invalid number of bits for `operand`");
179
180 auto [CustomEncoder, CustomDecoder] =
181 getCustomCoders(Args: DI->getArgs().slice(N: 2));
182 Segments.push_back(Elt: {.BitWidth: static_cast<unsigned>(NumBitsVal), .Value: OperandName,
183 .CustomEncoder: CustomEncoder, .CustomDecoder: CustomDecoder});
184 } else if (Op == "slice") {
185 // (slice <operand name>, <high / low bit>, <low / high bit>,
186 // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
187 if (DI->getNumArgs() < 3)
188 PrintFatalError(ErrorLoc: TheDef->getLoc(),
189 Msg: "Expecting at least 3 arguments for `slice`");
190 HasDynamicSegment = true;
191 const Init *OperandName = DI->getArg(Num: 0), *HiBit = DI->getArg(Num: 1),
192 *LoBit = DI->getArg(Num: 2);
193 if (!isa<StringInit>(Val: OperandName) || !isa<IntInit>(Val: HiBit) ||
194 !isa<IntInit>(Val: LoBit))
195 PrintFatalError(ErrorLoc: TheDef->getLoc(), Msg: "Invalid argument types for `slice`");
196
197 auto HiBitVal = cast<IntInit>(Val: HiBit)->getValue(),
198 LoBitVal = cast<IntInit>(Val: LoBit)->getValue();
199 if (HiBitVal < 0 || LoBitVal < 0)
200 PrintFatalError(ErrorLoc: TheDef->getLoc(), Msg: "Invalid bit range for `slice`");
201 bool NeedSwap = false;
202 unsigned NumBits = 0U;
203 if (HiBitVal < LoBitVal) {
204 NeedSwap = true;
205 NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
206 } else {
207 NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
208 }
209
210 auto [CustomEncoder, CustomDecoder] =
211 getCustomCoders(Args: DI->getArgs().slice(N: 3));
212
213 if (NeedSwap) {
214 // Normalization: Hi bit should always be the second argument.
215 SmallVector<std::pair<const Init *, const StringInit *>> NewArgs(
216 DI->getArgAndNames());
217 std::swap(x&: NewArgs[1], y&: NewArgs[2]);
218 Segments.push_back(Elt: {.BitWidth: NumBits, .Value: DagInit::get(V: DI->getOperator(), ArgAndNames: NewArgs),
219 .CustomEncoder: CustomEncoder, .CustomDecoder: CustomDecoder});
220 } else {
221 Segments.push_back(Elt: {.BitWidth: NumBits, .Value: DI, .CustomEncoder: CustomEncoder, .CustomDecoder: CustomDecoder});
222 }
223 }
224}
225
226void VarLenCodeEmitterGen::run(raw_ostream &OS) {
227 CodeGenTarget Target(Records);
228
229 auto NumberedInstructions = Target.getInstructionsByEnumValue();
230
231 for (const CodeGenInstruction *CGI : NumberedInstructions) {
232 const Record *R = CGI->TheDef;
233 // Create the corresponding VarLenInst instance.
234 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
235 R->getValueAsBit(FieldName: "isPseudo"))
236 continue;
237
238 // Setup alternative encodings according to HwModes
239 if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
240 if (auto *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
241 const CodeGenHwModes &HWM = Target.getHwModes();
242 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
243 for (const auto [Mode, EncodingDef] : EBM) {
244 Modes.try_emplace(k: Mode, args: "_" + HWM.getMode(Id: Mode).Name.str());
245 const RecordVal *RV = EncodingDef->getValue(Name: "Inst");
246 const DagInit *DI = cast<DagInit>(Val: RV->getValue());
247 VarLenInsts[R].try_emplace(Key: Mode, Args: VarLenInst(DI, RV));
248 }
249 continue;
250 }
251 }
252 const RecordVal *RV = R->getValue(Name: "Inst");
253 const DagInit *DI = cast<DagInit>(Val: RV->getValue());
254 VarLenInsts[R].try_emplace(Key: Universal, Args: VarLenInst(DI, RV));
255 }
256
257 if (Modes.empty())
258 Modes.try_emplace(k: Universal, args: ""); // Base case, skip suffix.
259
260 // Emit function declaration
261 OS << "void " << Target.getName()
262 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
263 << " SmallVectorImpl<MCFixup> &Fixups,\n"
264 << " APInt &Inst,\n"
265 << " APInt &Scratch,\n"
266 << " const MCSubtargetInfo &STI) const {\n";
267
268 // Emit instruction base values
269 for (const auto &Mode : Modes)
270 emitInstructionBaseValues(OS, NumberedInstructions, Target, Mode: Mode.first);
271
272 if (Modes.size() > 1) {
273 OS << " unsigned Mode = STI.getHwMode();\n";
274 }
275
276 for (const auto &Mode : Modes) {
277 // Emit helper function to retrieve base values.
278 OS << " auto getInstBits" << Mode.second
279 << " = [&](unsigned Opcode) -> APInt {\n"
280 << " unsigned NumBits = Index" << Mode.second << "[Opcode][0];\n"
281 << " if (!NumBits)\n"
282 << " return APInt::getZeroWidth();\n"
283 << " unsigned Idx = Index" << Mode.second << "[Opcode][1];\n"
284 << " ArrayRef<uint64_t> Data(&InstBits" << Mode.second << "[Idx], "
285 << "APInt::getNumWords(NumBits));\n"
286 << " return APInt(NumBits, Data);\n"
287 << " };\n";
288 }
289
290 // Map to accumulate all the cases.
291 std::map<std::string, std::vector<std::string>> CaseMap;
292
293 // Construct all cases statement for each opcode
294 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Instruction")) {
295 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
296 R->getValueAsBit(FieldName: "isPseudo"))
297 continue;
298 std::string InstName =
299 (R->getValueAsString(FieldName: "Namespace") + "::" + R->getName()).str();
300 std::string Case = getInstructionCases(R, Target);
301
302 CaseMap[Case].push_back(x: std::move(InstName));
303 }
304
305 // Emit initial function code
306 OS << " const unsigned opcode = MI.getOpcode();\n"
307 << " switch (opcode) {\n";
308
309 // Emit each case statement
310 for (const auto &C : CaseMap) {
311 const std::string &Case = C.first;
312 const auto &InstList = C.second;
313
314 ListSeparator LS("\n");
315 for (const auto &InstName : InstList)
316 OS << LS << " case " << InstName << ":";
317
318 OS << " {\n";
319 OS << Case;
320 OS << " break;\n"
321 << " }\n";
322 }
323 // Default case: unhandled opcode
324 OS << " default:\n"
325 << " std::string msg;\n"
326 << " raw_string_ostream Msg(msg);\n"
327 << " Msg << \"Not supported instr: \" << MI;\n"
328 << " report_fatal_error(Msg.str().c_str());\n"
329 << " }\n";
330 OS << "}\n\n";
331}
332
333static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
334 unsigned &Index) {
335 if (!Bits.getNumWords()) {
336 IS.indent(NumSpaces: 4) << "{/*NumBits*/0, /*Index*/0},";
337 return;
338 }
339
340 IS.indent(NumSpaces: 4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " << "/*Index*/"
341 << Index << "},";
342
343 SS.indent(NumSpaces: 4);
344 for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
345 SS << "UINT64_C(" << utostr(X: Bits.getRawData()[I]) << "),";
346}
347
348void VarLenCodeEmitterGen::emitInstructionBaseValues(
349 raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
350 const CodeGenTarget &Target, AltEncodingTy Mode) {
351 std::string IndexArray, StorageArray;
352 raw_string_ostream IS(IndexArray), SS(StorageArray);
353
354 IS << " static const unsigned Index" << Modes[Mode] << "[][2] = {\n";
355 SS << " static const uint64_t InstBits" << Modes[Mode] << "[] = {\n";
356
357 unsigned NumFixedValueWords = 0U;
358 for (const CodeGenInstruction *CGI : NumberedInstructions) {
359 const Record *R = CGI->TheDef;
360
361 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
362 R->getValueAsBit(FieldName: "isPseudo")) {
363 IS.indent(NumSpaces: 4) << "{/*NumBits*/0, /*Index*/0},\n";
364 continue;
365 }
366
367 const auto InstIt = VarLenInsts.find(Val: R);
368 if (InstIt == VarLenInsts.end())
369 PrintFatalError(Rec: R, Msg: "VarLenInst not found for this record");
370 auto ModeIt = InstIt->second.find(Val: Mode);
371 if (ModeIt == InstIt->second.end())
372 ModeIt = InstIt->second.find(Val: Universal);
373 if (ModeIt == InstIt->second.end()) {
374 IS.indent(NumSpaces: 4) << "{/*NumBits*/0, /*Index*/0},\t" << "// " << R->getName()
375 << " no encoding\n";
376 continue;
377 }
378 const VarLenInst &VLI = ModeIt->second;
379 unsigned i = 0U, BitWidth = VLI.size();
380
381 // Start by filling in fixed values.
382 APInt Value(BitWidth, 0);
383 auto SI = VLI.begin(), SE = VLI.end();
384 // Scan through all the segments that have fixed-bits values.
385 while (i < BitWidth && SI != SE) {
386 unsigned SegmentNumBits = SI->BitWidth;
387 if (const auto *BI = dyn_cast<BitsInit>(Val: SI->Value)) {
388 for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
389 auto *B = cast<BitInit>(Val: BI->getBit(Bit: Idx));
390 Value.setBitVal(BitPosition: i + Idx, BitValue: B->getValue());
391 }
392 }
393 if (const auto *BI = dyn_cast<BitInit>(Val: SI->Value))
394 Value.setBitVal(BitPosition: i, BitValue: BI->getValue());
395
396 i += SegmentNumBits;
397 ++SI;
398 }
399
400 emitInstBits(IS, SS, Bits: Value, Index&: NumFixedValueWords);
401 IS << '\t' << "// " << R->getName() << "\n";
402 if (Value.getNumWords())
403 SS << '\t' << "// " << R->getName() << "\n";
404 }
405 IS.indent(NumSpaces: 4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
406 SS.indent(NumSpaces: 4) << "UINT64_C(0)\n };\n";
407
408 OS << IndexArray << StorageArray;
409}
410
411std::string
412VarLenCodeEmitterGen::getInstructionCases(const Record *R,
413 const CodeGenTarget &Target) {
414 auto It = VarLenInsts.find(Val: R);
415 if (It == VarLenInsts.end())
416 PrintFatalError(Rec: R, Msg: "Parsed encoding record not found");
417 const auto &Map = It->second;
418
419 // Is this instructions encoding universal (same for all modes)?
420 // Allways true if there is only one mode.
421 if (Map.size() == 1 && Map.begin()->first == Universal) {
422 // Universal, just pick the first mode.
423 AltEncodingTy Mode = Modes.begin()->first;
424 const auto &Encoding = Map.begin()->second;
425 return getInstructionCaseForEncoding(R, Mode, VLI: Encoding, Target,
426 /*Indent=*/6);
427 }
428
429 std::string Case;
430 Case += " switch (Mode) {\n";
431 Case += " default: llvm_unreachable(\"Unhandled Mode\");\n";
432 for (const auto &Mode : Modes) {
433 Case += " case " + itostr(X: Mode.first) + ": {\n";
434 const auto &It = Map.find(Val: Mode.first);
435 if (It == Map.end()) {
436 Case +=
437 " llvm_unreachable(\"Undefined encoding in this mode\");\n";
438 } else {
439 Case += getInstructionCaseForEncoding(R, Mode: It->first, VLI: It->second, Target,
440 /*Indent=*/8);
441 }
442 Case += " break;\n";
443 Case += " }\n";
444 }
445 Case += " }\n";
446 return Case;
447}
448
449std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
450 const Record *R, AltEncodingTy Mode, const VarLenInst &VLI,
451 const CodeGenTarget &Target, int Indent) {
452 CodeGenInstruction &CGI = Target.getInstruction(InstRec: R);
453
454 std::string Case;
455 raw_string_ostream SS(Case);
456 // Populate based value.
457 SS.indent(NumSpaces: Indent) << "Inst = getInstBits" << Modes[Mode] << "(opcode);\n";
458
459 // Process each segment in VLI.
460 size_t Offset = 0U;
461 unsigned HighScratchAccess = 0U;
462 for (const auto &ES : VLI) {
463 unsigned NumBits = ES.BitWidth;
464 const Init *Val = ES.Value;
465 // If it's a StringInit or DagInit, it's a reference to an operand
466 // or part of an operand.
467 if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
468 StringRef OperandName;
469 unsigned LoBit = 0U;
470 if (const auto *SV = dyn_cast<StringInit>(Val)) {
471 OperandName = SV->getValue();
472 } else {
473 // Normalized: (slice <operand name>, <high bit>, <low bit>)
474 const auto *DV = cast<DagInit>(Val);
475 OperandName = cast<StringInit>(Val: DV->getArg(Num: 0))->getValue();
476 LoBit = static_cast<unsigned>(cast<IntInit>(Val: DV->getArg(Num: 2))->getValue());
477 }
478
479 auto OpIdx = CGI.Operands.ParseOperandName(Op: OperandName);
480 unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(Op: OpIdx);
481 StringRef CustomEncoder =
482 CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
483 if (ES.CustomEncoder.size())
484 CustomEncoder = ES.CustomEncoder;
485
486 SS.indent(NumSpaces: Indent) << "Scratch.clearAllBits();\n";
487 SS.indent(NumSpaces: Indent) << "// op: " << OperandName.drop_front(N: 1) << "\n";
488 if (CustomEncoder.empty())
489 SS.indent(NumSpaces: Indent) << "getMachineOpValue(MI, MI.getOperand("
490 << utostr(X: FlatOpIdx) << ")";
491 else
492 SS.indent(NumSpaces: Indent) << CustomEncoder << "(MI, /*OpIdx=*/"
493 << utostr(X: FlatOpIdx);
494
495 SS << ", /*Pos=*/" << utostr(X: Offset) << ", Scratch, Fixups, STI);\n";
496
497 SS.indent(NumSpaces: Indent) << "Inst.insertBits("
498 << "Scratch.extractBits(" << utostr(X: NumBits) << ", "
499 << utostr(X: LoBit) << ")"
500 << ", " << Offset << ");\n";
501
502 HighScratchAccess = std::max(a: HighScratchAccess, b: NumBits + LoBit);
503 }
504 Offset += NumBits;
505 }
506
507 StringRef PostEmitter = R->getValueAsString(FieldName: "PostEncoderMethod");
508 if (!PostEmitter.empty())
509 SS.indent(NumSpaces: Indent) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
510
511 // Resize the scratch buffer if it's to small.
512 std::string ScratchResizeStr;
513 if (VLI.size() && !VLI.isFixedValueOnly()) {
514 raw_string_ostream RS(ScratchResizeStr);
515 RS.indent(NumSpaces: Indent) << "if (Scratch.getBitWidth() < " << HighScratchAccess
516 << ") { Scratch = Scratch.zext(" << HighScratchAccess
517 << "); }\n";
518 }
519
520 return ScratchResizeStr + Case;
521}
522
523void llvm::emitVarLenCodeEmitter(const RecordKeeper &R, raw_ostream &OS) {
524 VarLenCodeEmitterGen(R).run(OS);
525}
526