1//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// It contains the tablegen backend that emits the decoder functions for
10// targets with fixed/variable length instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Common/CodeGenHwModes.h"
15#include "Common/CodeGenInstruction.h"
16#include "Common/CodeGenRegisters.h"
17#include "Common/CodeGenTarget.h"
18#include "Common/InfoByHwMode.h"
19#include "Common/InstructionEncoding.h"
20#include "Common/SubtargetFeatureInfo.h"
21#include "Common/VarLenCodeEmitterGen.h"
22#include "DecoderTableEmitter.h"
23#include "DecoderTree.h"
24#include "TableGenBackends.h"
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallBitVector.h"
30#include "llvm/ADT/SmallString.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/ADT/StringRef.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/Format.h"
39#include "llvm/Support/FormatVariadic.h"
40#include "llvm/Support/FormattedStream.h"
41#include "llvm/Support/KnownBits.h"
42#include "llvm/Support/MathExtras.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/TableGen/Error.h"
45#include "llvm/TableGen/Record.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <cstdint>
50#include <map>
51#include <memory>
52#include <set>
53#include <string>
54#include <utility>
55#include <vector>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "decoder-emitter"
60
61extern cl::OptionCategory DisassemblerEmitterCat;
62
63enum SuppressLevel {
64 SUPPRESSION_DISABLE,
65 SUPPRESSION_LEVEL1,
66 SUPPRESSION_LEVEL2
67};
68
69static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
70 "suppress-per-hwmode-duplicates",
71 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
72 cl::values(
73 clEnumValN(
74 SUPPRESSION_DISABLE, "O0",
75 "Do not prevent DecoderTable duplications caused by HwModes"),
76 clEnumValN(
77 SUPPRESSION_LEVEL1, "O1",
78 "Remove duplicate DecoderTable entries generated due to HwModes"),
79 clEnumValN(
80 SUPPRESSION_LEVEL2, "O2",
81 "Extract HwModes-specific instructions into new DecoderTables, "
82 "significantly reducing Table Duplications")),
83 cl::init(Val: SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
84
85static cl::opt<bool> UseFnTableInDecodeToMCInst(
86 "use-fn-table-in-decode-to-mcinst",
87 cl::desc(
88 "Use a table of function pointers instead of a switch case in the\n"
89 "generated `decodeToMCInst` function. Helps improve compile time\n"
90 "of the generated code."),
91 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
92
93// Enabling this option requires use of different `InsnType` for different
94// bitwidths and defining `InsnBitWidth` template specialization for the
95// `InsnType` types used. Some common specializations are already defined in
96// MCDecoder.h.
97static cl::opt<bool> SpecializeDecodersPerBitwidth(
98 "specialize-decoders-per-bitwidth",
99 cl::desc("Specialize the generated `decodeToMCInst` function per bitwidth. "
100 "Helps reduce the code size."),
101 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
102
103static cl::opt<bool> IgnoreNonDecodableOperands(
104 "ignore-non-decodable-operands",
105 cl::desc(
106 "Do not issue an error if an operand cannot be decoded automatically."),
107 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
108
109static cl::opt<bool> IgnoreFullyDefinedOperands(
110 "ignore-fully-defined-operands",
111 cl::desc(
112 "Do not automatically decode operands with no '?' in their encoding."),
113 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
114
115STATISTIC(NumEncodings, "Number of encodings considered");
116STATISTIC(NumEncodingsLackingDisasm,
117 "Number of encodings without disassembler info");
118STATISTIC(NumInstructions, "Number of instructions considered");
119STATISTIC(NumEncodingsSupported, "Number of encodings supported");
120STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
121
122/// Similar to KnownBits::print(), but allows you to specify a character to use
123/// to print unknown bits.
124static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
125 char Unknown) {
126 for (unsigned I = Bits.getBitWidth(); I--;) {
127 if (Bits.Zero[I] && Bits.One[I])
128 OS << '!';
129 else if (Bits.Zero[I])
130 OS << '0';
131 else if (Bits.One[I])
132 OS << '1';
133 else
134 OS << Unknown;
135 }
136}
137
138namespace {
139
140/// Sorting predicate to sort encoding IDs by encoding width.
141class LessEncodingIDByWidth {
142 ArrayRef<InstructionEncoding> Encodings;
143
144public:
145 explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
146 : Encodings(Encodings) {}
147
148 bool operator()(unsigned ID1, unsigned ID2) const {
149 return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
150 }
151};
152
153using NamespacesHwModesMap = std::map<StringRef, std::set<unsigned>>;
154
155class DecoderEmitter {
156 const RecordKeeper &RK;
157 CodeGenTarget Target;
158 const CodeGenHwModes &CGH;
159
160 /// All parsed encodings.
161 std::vector<InstructionEncoding> Encodings;
162
163 /// Encodings IDs for each HwMode. An ID is an index into Encodings.
164 SmallDenseMap<unsigned, std::vector<unsigned>> EncodingIDsByHwMode;
165
166public:
167 explicit DecoderEmitter(const RecordKeeper &RK);
168
169 const CodeGenTarget &getTarget() const { return Target; }
170
171 void emitInstrLenTable(formatted_raw_ostream &OS,
172 ArrayRef<unsigned> InstrLen) const;
173 void emitPredicateFunction(formatted_raw_ostream &OS,
174 const PredicateSet &Predicates) const;
175
176 void emitRegClassByHwModeDecoders(formatted_raw_ostream &OS) const;
177 void emitDecoderFunction(formatted_raw_ostream &OS,
178 const DecoderSet &Decoders,
179 unsigned BucketBitWidth) const;
180
181 // run - Output the code emitter
182 void run(raw_ostream &o) const;
183
184private:
185 void collectHwModesReferencedForEncodings(
186 std::vector<unsigned> &HwModeIDs,
187 NamespacesHwModesMap &NamespacesWithHwModes) const;
188
189 void
190 handleHwModesUnrelatedEncodings(unsigned EncodingID,
191 ArrayRef<unsigned> HwModeIDs,
192 NamespacesHwModesMap &NamespacesWithHwModes);
193
194 void parseInstructionEncodings();
195};
196
197struct EncodingIsland {
198 unsigned StartBit;
199 unsigned NumBits;
200 uint64_t FieldVal;
201};
202
203/// Filter - Filter works with FilterChooser to produce the decoding tree for
204/// the ISA.
205///
206/// It is useful to think of a Filter as governing the switch stmts of the
207/// decoding tree in a certain level. Each case stmt delegates to an inferior
208/// FilterChooser to decide what further decoding logic to employ, or in another
209/// words, what other remaining bits to look at. The FilterChooser eventually
210/// chooses a best Filter to do its job.
211///
212/// This recursive scheme ends when the number of Opcodes assigned to the
213/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
214/// the Filter/FilterChooser combo does not know how to distinguish among the
215/// Opcodes assigned.
216///
217/// An example of a conflict is
218///
219/// Decoding Conflict:
220/// ................................
221/// 1111............................
222/// 1111010.........................
223/// 1111010...00....................
224/// 1111010...00........0001........
225/// 111101000.00........0001........
226/// 111101000.00........00010000....
227/// 111101000_00________00010000____ VST4q8a
228/// 111101000_00________00010000____ VST4q8b
229///
230/// The Debug output shows the path that the decoding tree follows to reach the
231/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
232/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
233///
234/// The encoding info in the .td files does not specify this meta information,
235/// which could have been used by the decoder to resolve the conflict. The
236/// decoder could try to decode the even/odd register numbering and assign to
237/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
238/// version and return the Opcode since the two have the same Asm format string.
239struct Filter {
240 unsigned StartBit; // the starting bit position
241 unsigned NumBits; // number of bits to filter
242
243 // Map of well-known segment value to the set of uid's with that value.
244 std::map<uint64_t, std::vector<unsigned>> FilteredIDs;
245
246 // Set of uid's with non-constant segment values.
247 std::vector<unsigned> VariableIDs;
248
249 Filter(ArrayRef<InstructionEncoding> Encodings,
250 ArrayRef<unsigned> EncodingIDs, unsigned StartBit, unsigned NumBits);
251
252 // Returns the number of fanout produced by the filter. More fanout implies
253 // the filter distinguishes more categories of instructions.
254 unsigned usefulness() const;
255}; // end class Filter
256
257// These are states of our finite state machines used in FilterChooser's
258// filterProcessor() which produces the filter candidates to use.
259enum bitAttr_t {
260 ATTR_NONE,
261 ATTR_FILTERED,
262 ATTR_ALL_SET,
263 ATTR_ALL_UNSET,
264 ATTR_MIXED
265};
266
267/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
268/// in order to perform the decoding of instructions at the current level.
269///
270/// Decoding proceeds from the top down. Based on the well-known encoding bits
271/// of instructions available, FilterChooser builds up the possible Filters that
272/// can further the task of decoding by distinguishing among the remaining
273/// candidate instructions.
274///
275/// Once a filter has been chosen, it is called upon to divide the decoding task
276/// into sub-tasks and delegates them to its inferior FilterChoosers for further
277/// processings.
278///
279/// It is useful to think of a Filter as governing the switch stmts of the
280/// decoding tree. And each case is delegated to an inferior FilterChooser to
281/// decide what further remaining bits to look at.
282
283class FilterChooser {
284 // TODO: Unfriend by providing the necessary accessors.
285 friend class DecoderTreeBuilder;
286
287 // Vector of encodings to choose our filter.
288 ArrayRef<InstructionEncoding> Encodings;
289
290 /// Encoding IDs for this filter chooser to work on.
291 /// Sorted by non-decreasing encoding width.
292 SmallVector<unsigned, 0> EncodingIDs;
293
294 // Array of bit values passed down from our parent.
295 // Set to all unknown for Parent == nullptr.
296 KnownBits FilterBits;
297
298 // Links to the FilterChooser above us in the decoding tree.
299 const FilterChooser *Parent;
300
301 /// If the selected filter matches multiple encodings, then this is the
302 /// starting position and the width of the filtered range.
303 unsigned StartBit;
304 unsigned NumBits;
305
306 /// If the selected filter matches multiple encodings, and there is
307 /// *exactly one* encoding in which all bits are known in the filtered range,
308 /// then this is the ID of that encoding.
309 /// Also used when there is only one encoding.
310 std::optional<unsigned> SingletonEncodingID;
311
312 /// If the selected filter matches multiple encodings, and there is
313 /// *at least one* encoding in which all bits are known in the filtered range,
314 /// then this is the FilterChooser created for the subset of encodings that
315 /// contain some unknown bits in the filtered range.
316 std::unique_ptr<const FilterChooser> VariableFC;
317
318 /// If the selected filter matches multiple encodings, and there is
319 /// *more than one* encoding in which all bits are known in the filtered
320 /// range, then this is a map of field values to FilterChoosers created for
321 /// the subset of encodings sharing that field value.
322 /// The "field value" here refers to the encoding bits in the filtered range.
323 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324
325 /// Set to true if decoding conflict was encountered.
326 bool HasConflict = false;
327
328public:
329 /// Constructs a top-level filter chooser.
330 FilterChooser(ArrayRef<InstructionEncoding> Encodings,
331 ArrayRef<unsigned> EncodingIDs)
332 : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr) {
333 // Sort encoding IDs once.
334 stable_sort(Range&: this->EncodingIDs, C: LessEncodingIDByWidth(Encodings));
335 // Filter width is the width of the smallest encoding.
336 unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
337 FilterBits = KnownBits(FilterWidth);
338 doFilter();
339 }
340
341 /// Constructs an inferior filter chooser.
342 FilterChooser(ArrayRef<InstructionEncoding> Encodings,
343 ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
344 const FilterChooser &Parent)
345 : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent) {
346 // Inferior filter choosers are created from sorted array of encoding IDs.
347 assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
348 assert(!FilterBits.hasConflict() && "Broken filter");
349 // Filter width is the width of the smallest encoding.
350 unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
351 this->FilterBits = FilterBits.anyext(BitWidth: FilterWidth);
352 doFilter();
353 }
354
355 FilterChooser(const FilterChooser &) = delete;
356 void operator=(const FilterChooser &) = delete;
357
358 /// Returns the width of the largest encoding.
359 unsigned getMaxEncodingWidth() const {
360 // The last encoding ID is the ID of an encoding with the largest width.
361 return Encodings[EncodingIDs.back()].getBitWidth();
362 }
363
364 /// Returns true if any decoding conflicts were encountered.
365 bool hasConflict() const { return HasConflict; }
366
367private:
368 /// Applies the given filter to the set of encodings this FilterChooser
369 /// works with, creating inferior FilterChoosers as necessary.
370 void applyFilter(const Filter &F);
371
372 /// dumpStack - dumpStack traverses the filter chooser chain and calls
373 /// dumpFilterArray on each filter chooser up to the top level one.
374 void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
375
376 bool isPositionFiltered(unsigned Idx) const {
377 return FilterBits.Zero[Idx] || FilterBits.One[Idx];
378 }
379
380 /// Scans the well-known encoding bits of the encodings and, builds up a list
381 /// of candidate filters, and then returns the best one, if any.
382 std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
383 bool AllowMixed,
384 bool Greedy = true) const;
385
386 std::unique_ptr<Filter> findBestFilter() const;
387
388 // Decides on the best configuration of filter(s) to use in order to decode
389 // the instructions. A conflict of instructions may occur, in which case we
390 // dump the conflict set to the standard error.
391 void doFilter();
392
393public:
394 void dump() const;
395};
396
397} // end anonymous namespace
398
399///////////////////////////
400// //
401// Filter Implementation //
402// //
403///////////////////////////
404
405Filter::Filter(ArrayRef<InstructionEncoding> Encodings,
406 ArrayRef<unsigned> EncodingIDs, unsigned StartBit,
407 unsigned NumBits)
408 : StartBit(StartBit), NumBits(NumBits) {
409 for (unsigned EncodingID : EncodingIDs) {
410 const InstructionEncoding &Encoding = Encodings[EncodingID];
411 KnownBits EncodingBits = Encoding.getMandatoryBits();
412
413 // Scans the segment for possibly well-specified encoding bits.
414 KnownBits FieldBits = EncodingBits.extractBits(NumBits, BitPosition: StartBit);
415
416 if (FieldBits.isConstant()) {
417 // The encoding bits are well-known. Lets add the uid of the
418 // instruction into the bucket keyed off the constant field value.
419 FilteredIDs[FieldBits.getConstant().getZExtValue()].push_back(x: EncodingID);
420 } else {
421 // Some of the encoding bit(s) are unspecified. This contributes to
422 // one additional member of "Variable" instructions.
423 VariableIDs.push_back(x: EncodingID);
424 }
425 }
426
427 assert((FilteredIDs.size() + VariableIDs.size() > 0) &&
428 "Filter returns no instruction categories");
429}
430
431void FilterChooser::applyFilter(const Filter &F) {
432 StartBit = F.StartBit;
433 NumBits = F.NumBits;
434 assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
435
436 if (!F.VariableIDs.empty()) {
437 // Delegates to an inferior filter chooser for further processing on this
438 // group of instructions whose segment values are variable.
439 VariableFC = std::make_unique<FilterChooser>(args&: Encodings, args: F.VariableIDs,
440 args&: FilterBits, args&: *this);
441 HasConflict |= VariableFC->HasConflict;
442 }
443
444 // Otherwise, create sub choosers.
445 for (const auto &[FilterVal, InferiorEncodingIDs] : F.FilteredIDs) {
446 // Create a new filter by inserting the field bits into the parent filter.
447 APInt FieldBits(NumBits, FilterVal);
448 KnownBits InferiorFilterBits = FilterBits;
449 InferiorFilterBits.insertBits(SubBits: KnownBits::makeConstant(C: FieldBits), BitPosition: StartBit);
450
451 // Delegates to an inferior filter chooser for further processing on this
452 // category of instructions.
453 auto [It, _] = FilterChooserMap.try_emplace(
454 k: FilterVal,
455 args: std::make_unique<FilterChooser>(args&: Encodings, args: InferiorEncodingIDs,
456 args&: InferiorFilterBits, args&: *this));
457 HasConflict |= It->second->HasConflict;
458 }
459}
460
461// Returns the number of fanout produced by the filter. More fanout implies
462// the filter distinguishes more categories of instructions.
463unsigned Filter::usefulness() const {
464 return FilteredIDs.size() + VariableIDs.empty();
465}
466
467//////////////////////////////////
468// //
469// Filterchooser Implementation //
470// //
471//////////////////////////////////
472
473void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
474 ArrayRef<unsigned> InstrLen) const {
475 OS << "static const uint8_t InstrLenTable[] = {\n";
476 for (unsigned Len : InstrLen)
477 OS << Len << ",\n";
478 OS << "};\n\n";
479}
480
481void DecoderEmitter::emitPredicateFunction(
482 formatted_raw_ostream &OS, const PredicateSet &Predicates) const {
483 // The predicate function is just a big switch statement based on the
484 // input predicate index.
485 OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
486 "&FB) {\n";
487 OS << " switch (Idx) {\n";
488 OS << " default: llvm_unreachable(\"Invalid index!\");\n";
489 for (const auto &[Index, Predicate] : enumerate(First: Predicates)) {
490 OS << " case " << Index << ":\n";
491 OS << " return " << Predicate << ";\n";
492 }
493 OS << " }\n";
494 OS << "}\n\n";
495}
496
497/// Emit a default implementation of a decoder for all RegClassByHwModes which
498/// do not have an explicit DecoderMethodSet, which dispatches over the decoder
499/// methods for the member classes.
500void DecoderEmitter::emitRegClassByHwModeDecoders(
501 formatted_raw_ostream &OS) const {
502 const CodeGenHwModes &CGH = Target.getHwModes();
503 if (CGH.getNumModeIds() == 1)
504 return;
505
506 ArrayRef<const Record *> RegClassByHwMode = Target.getAllRegClassByHwMode();
507 if (RegClassByHwMode.empty())
508 return;
509
510 for (const Record *ClassByHwMode : RegClassByHwMode) {
511 // Ignore cases that had an explicit DecoderMethod set.
512 if (!InstructionEncoding::findOperandDecoderMethod(Record: ClassByHwMode).second)
513 continue;
514
515 const HwModeSelect &ModeSelect = CGH.getHwModeSelect(R: ClassByHwMode);
516
517 // Mips has a system where this is only used by compound operands with
518 // custom decoders, and we don't try to detect if this decoder is really
519 // needed.
520 OS << "[[maybe_unused]]\n";
521
522 OS << "static DecodeStatus Decode" << ClassByHwMode->getName()
523 << "RegClassByHwMode";
524 OS << R"((MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) {
525 switch (Decoder->getSubtargetInfo().getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
526)";
527 for (auto [ModeID, RegClassRec] : ModeSelect.Items) {
528 OS << indent(2) << "case " << ModeID << ": // "
529 << CGH.getModeName(Id: ModeID, /*IncludeDefault=*/true) << '\n'
530 << indent(4) << "return "
531 << InstructionEncoding::findOperandDecoderMethod(Record: RegClassRec).first
532 << "(Inst, Imm, Addr, Decoder);\n";
533 }
534 OS << indent(2) << R"(default:
535 llvm_unreachable("no decoder for hwmode");
536 }
537}
538
539)";
540 }
541
542 OS << '\n';
543}
544
545void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
546 const DecoderSet &Decoders,
547 unsigned BucketBitWidth) const {
548 // The decoder function is just a big switch statement or a table of function
549 // pointers based on the input decoder index.
550
551 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
552 // It would be better for emitBinaryParser to use a 64-bit tmp whenever
553 // possible but fall back to an InsnType-sized tmp for truly large fields.
554 StringRef TmpTypeDecl =
555 "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
556 "InsnType, uint64_t>;\n";
557 StringRef DecodeParams =
558 "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
559 "MCDisassembler *Decoder, bool &DecodeComplete";
560
561 // Print the name of the decode function to OS.
562 auto PrintDecodeFnName = [&OS, BucketBitWidth](unsigned DecodeIdx) {
563 OS << "decodeFn";
564 if (BucketBitWidth != 0) {
565 OS << '_' << BucketBitWidth << "bit";
566 }
567 OS << '_' << DecodeIdx;
568 };
569
570 // Print the template statement.
571 auto PrintTemplate = [&OS, BucketBitWidth]() {
572 OS << "template <typename InsnType>\n";
573 OS << "static ";
574 if (BucketBitWidth != 0)
575 OS << "std::enable_if_t<InsnBitWidth<InsnType> == " << BucketBitWidth
576 << ", DecodeStatus>\n";
577 else
578 OS << "DecodeStatus ";
579 };
580
581 if (UseFnTableInDecodeToMCInst) {
582 // Emit a function for each case first.
583 for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
584 PrintTemplate();
585 PrintDecodeFnName(Index);
586 OS << "(" << DecodeParams << ") {\n";
587 OS << " " << TmpTypeDecl;
588 OS << " [[maybe_unused]] TmpType tmp;\n";
589 OS << Decoder;
590 OS << " return S;\n";
591 OS << "}\n\n";
592 }
593 }
594
595 OS << "// Handling " << Decoders.size() << " cases.\n";
596 PrintTemplate();
597 OS << "decodeToMCInst(unsigned Idx, " << DecodeParams << ") {\n";
598 OS << " DecodeComplete = true;\n";
599
600 if (UseFnTableInDecodeToMCInst) {
601 // Build a table of function pointers
602 OS << " using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n";
603 OS << " static constexpr DecodeFnTy decodeFnTable[] = {\n";
604 for (size_t Index : llvm::seq(Size: Decoders.size())) {
605 OS << " ";
606 PrintDecodeFnName(Index);
607 OS << ",\n";
608 }
609 OS << " };\n";
610 OS << " if (Idx >= " << Decoders.size() << ")\n";
611 OS << " llvm_unreachable(\"Invalid decoder index!\");\n";
612 OS << " return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
613 "DecodeComplete);\n";
614 } else {
615 OS << " " << TmpTypeDecl;
616 OS << " TmpType tmp;\n";
617 OS << " switch (Idx) {\n";
618 OS << " default: llvm_unreachable(\"Invalid decoder index!\");\n";
619 for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
620 OS << " case " << Index << ":\n";
621 OS << Decoder;
622 OS << " return S;\n";
623 }
624 OS << " }\n";
625 }
626 OS << "}\n";
627}
628
629/// dumpStack - dumpStack traverses the filter chooser chain and calls
630/// dumpFilterArray on each filter chooser up to the top level one.
631void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
632 unsigned PadToWidth) const {
633 if (Parent)
634 Parent->dumpStack(OS, Indent, PadToWidth);
635 assert(PadToWidth >= FilterBits.getBitWidth());
636 OS << Indent << indent(PadToWidth - FilterBits.getBitWidth());
637 printKnownBits(OS, Bits: FilterBits, Unknown: '.');
638 OS << '\n';
639}
640
641// Calculates the island(s) needed to decode the instruction.
642// This returns a list of undecoded bits of an instructions, for example,
643// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
644// decoded bits in order to verify that the instruction matches the Opcode.
645static std::vector<EncodingIsland> getIslands(const KnownBits &EncodingBits,
646 const KnownBits &FilterBits) {
647 std::vector<EncodingIsland> Islands;
648 uint64_t FieldVal;
649 unsigned StartBit;
650
651 bool OnIsland = false;
652 unsigned FilterWidth = FilterBits.getBitWidth();
653 for (unsigned I = 0; I != FilterWidth; ++I) {
654 bool IsKnown = EncodingBits.Zero[I] || EncodingBits.One[I];
655 bool IsFiltered = FilterBits.Zero[I] || FilterBits.One[I];
656 if (!IsFiltered && IsKnown) {
657 if (OnIsland) {
658 // Accumulate island bits.
659 const unsigned BitNo = I - StartBit;
660 FieldVal |= static_cast<uint64_t>(EncodingBits.One[I]) << (BitNo);
661 // If island becomes larger than 64-bits complete the island and start a
662 // new one
663 if (BitNo >= 63) {
664 Islands.push_back(x: {.StartBit: StartBit, .NumBits: 64, .FieldVal: FieldVal});
665 OnIsland = false;
666 }
667 } else {
668 // Onto an island.
669 StartBit = I;
670 FieldVal = static_cast<uint64_t>(EncodingBits.One[I]);
671 OnIsland = true;
672 }
673 } else if (OnIsland) {
674 // Into the water.
675 Islands.push_back(x: {.StartBit: StartBit, .NumBits: I - StartBit, .FieldVal: FieldVal});
676 OnIsland = false;
677 }
678 }
679
680 if (OnIsland)
681 Islands.push_back(x: {.StartBit: StartBit, .NumBits: FilterWidth - StartBit, .FieldVal: FieldVal});
682
683 return Islands;
684}
685
686static void emitBinaryParser(raw_ostream &OS, indent Indent,
687 const InstructionEncoding &Encoding,
688 const OperandInfo &OpInfo) {
689 if (OpInfo.HasNoEncoding) {
690 // If an operand has no encoding, the old behavior is to not decode it
691 // automatically and let the target do it. This is error-prone, so the
692 // new behavior is to report an error.
693 if (!IgnoreNonDecodableOperands)
694 PrintError(ErrorLoc: Encoding.getRecord()->getLoc(),
695 Msg: "could not find field for operand '" + OpInfo.Name + "'");
696 return;
697 }
698
699 // Special case for 'bits<0>'.
700 if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
701 assert(!OpInfo.Decoder.empty());
702 // The operand has no encoding, so the corresponding argument is omitted.
703 // This avoids confusion and allows the function to be overloaded if the
704 // operand does have an encoding in other instructions.
705 OS << Indent << "if (!Check(S, " << OpInfo.Decoder << "(MI, Decoder)))\n"
706 << Indent << " return MCDisassembler::Fail;\n";
707 return;
708 }
709
710 if (OpInfo.fields().empty()) {
711 // Only a constant part. The old behavior is to not decode this operand.
712 if (IgnoreFullyDefinedOperands)
713 return;
714 // Initialize `tmp` with the constant part.
715 OS << Indent << "tmp = " << format_hex(N: *OpInfo.InitValue, Width: 0) << ";\n";
716 } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(u: 0)) {
717 // One variable part and no/zero constant part. Initialize `tmp` with the
718 // variable part.
719 auto [Base, Width, Offset] = OpInfo.fields().front();
720 OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
721 << ')';
722 if (Offset)
723 OS << " << " << Offset;
724 OS << ";\n";
725 } else {
726 // General case. Initialize `tmp` with the constant part, if any, and
727 // insert the variable parts into it.
728 OS << Indent << "tmp = " << format_hex(N: OpInfo.InitValue.value_or(u: 0), Width: 0)
729 << ";\n";
730 for (auto [Base, Width, Offset] : OpInfo.fields()) {
731 OS << Indent << "tmp |= fieldFromInstruction(insn, " << Base << ", "
732 << Width << ')';
733 if (Offset)
734 OS << " << " << Offset;
735 OS << ";\n";
736 }
737 }
738
739 StringRef Decoder = OpInfo.Decoder;
740 if (!Decoder.empty()) {
741 OS << Indent << "if (!Check(S, " << Decoder
742 << "(MI, tmp, Address, Decoder))) { "
743 << (OpInfo.HasCompleteDecoder ? "" : "DecodeComplete = false; ")
744 << "return MCDisassembler::Fail; }\n";
745 } else {
746 OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
747 }
748}
749
750static std::string getDecoderString(const InstructionEncoding &Encoding) {
751 std::string Decoder;
752 raw_string_ostream OS(Decoder);
753 indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
754
755 // If a custom instruction decoder was specified, use that.
756 StringRef DecoderMethod = Encoding.getDecoderMethod();
757 if (!DecoderMethod.empty()) {
758 OS << Indent << "if (!Check(S, " << DecoderMethod
759 << "(MI, insn, Address, Decoder))) { "
760 << (Encoding.hasCompleteDecoder() ? "" : "DecodeComplete = false; ")
761 << "return MCDisassembler::Fail; }\n";
762 } else {
763 for (const OperandInfo &Op : Encoding.getOperands())
764 emitBinaryParser(OS, Indent, Encoding, OpInfo: Op);
765 }
766 return Decoder;
767}
768
769static std::string getPredicateString(const InstructionEncoding &Encoding,
770 StringRef TargetName) {
771 std::vector<const Record *> Predicates =
772 Encoding.getRecord()->getValueAsListOfDefs(FieldName: "Predicates");
773 auto It = llvm::find_if(Range&: Predicates, P: [](const Record *R) {
774 return R->getValueAsBit(FieldName: "AssemblerMatcherPredicate");
775 });
776 if (It == Predicates.end())
777 return std::string();
778
779 std::string Predicate;
780 raw_string_ostream OS(Predicate);
781 SubtargetFeatureInfo::emitMCPredicateCheck(OS, TargetName, Predicates);
782 return Predicate;
783}
784
785std::unique_ptr<Filter>
786FilterChooser::findBestFilter(ArrayRef<bitAttr_t> BitAttrs, bool AllowMixed,
787 bool Greedy) const {
788 assert(EncodingIDs.size() >= 2 && "Nothing to filter");
789
790 // Heuristics. See also doFilter()'s "Heuristics" comment when num of
791 // instructions is 3.
792 if (AllowMixed && !Greedy) {
793 assert(EncodingIDs.size() == 3);
794
795 for (unsigned EncodingID : EncodingIDs) {
796 const InstructionEncoding &Encoding = Encodings[EncodingID];
797 KnownBits EncodingBits = Encoding.getMandatoryBits();
798
799 // Look for islands of undecoded bits of any instruction.
800 std::vector<EncodingIsland> Islands =
801 getIslands(EncodingBits, FilterBits);
802 if (!Islands.empty()) {
803 // Found an instruction with island(s). Now just assign a filter.
804 return std::make_unique<Filter>(
805 args: Encodings, args: EncodingIDs, args&: Islands[0].StartBit, args&: Islands[0].NumBits);
806 }
807 }
808 }
809
810 // The regionAttr automaton consumes the bitAttrs automatons' state,
811 // lowest-to-highest.
812 //
813 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
814 // States: NONE, ALL_SET, MIXED
815 // Initial state: NONE
816 //
817 // (NONE) ----- F --> (NONE)
818 // (NONE) ----- S --> (ALL_SET) ; and set region start
819 // (NONE) ----- U --> (NONE)
820 // (NONE) ----- M --> (MIXED) ; and set region start
821 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
822 // (ALL_SET) -- S --> (ALL_SET)
823 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
824 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
825 // (MIXED) ---- F --> (NONE) ; and report a MIXED region
826 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
827 // (MIXED) ---- U --> (NONE) ; and report a MIXED region
828 // (MIXED) ---- M --> (MIXED)
829
830 bitAttr_t RA = ATTR_NONE;
831 unsigned StartBit = 0;
832
833 std::vector<std::unique_ptr<Filter>> Filters;
834
835 auto addCandidateFilter = [&](unsigned StartBit, unsigned EndBit) {
836 Filters.push_back(x: std::make_unique<Filter>(args: Encodings, args: EncodingIDs, args&: StartBit,
837 args: EndBit - StartBit));
838 };
839
840 unsigned FilterWidth = FilterBits.getBitWidth();
841 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
842 bitAttr_t bitAttr = BitAttrs[BitIndex];
843
844 assert(bitAttr != ATTR_NONE && "Bit without attributes");
845
846 switch (RA) {
847 case ATTR_NONE:
848 switch (bitAttr) {
849 case ATTR_FILTERED:
850 break;
851 case ATTR_ALL_SET:
852 StartBit = BitIndex;
853 RA = ATTR_ALL_SET;
854 break;
855 case ATTR_ALL_UNSET:
856 break;
857 case ATTR_MIXED:
858 StartBit = BitIndex;
859 RA = ATTR_MIXED;
860 break;
861 default:
862 llvm_unreachable("Unexpected bitAttr!");
863 }
864 break;
865 case ATTR_ALL_SET:
866 if (!AllowMixed && bitAttr != ATTR_ALL_SET)
867 addCandidateFilter(StartBit, BitIndex);
868 switch (bitAttr) {
869 case ATTR_FILTERED:
870 RA = ATTR_NONE;
871 break;
872 case ATTR_ALL_SET:
873 break;
874 case ATTR_ALL_UNSET:
875 RA = ATTR_NONE;
876 break;
877 case ATTR_MIXED:
878 StartBit = BitIndex;
879 RA = ATTR_MIXED;
880 break;
881 default:
882 llvm_unreachable("Unexpected bitAttr!");
883 }
884 break;
885 case ATTR_MIXED:
886 if (AllowMixed && bitAttr != ATTR_MIXED)
887 addCandidateFilter(StartBit, BitIndex);
888 switch (bitAttr) {
889 case ATTR_FILTERED:
890 StartBit = BitIndex;
891 RA = ATTR_NONE;
892 break;
893 case ATTR_ALL_SET:
894 StartBit = BitIndex;
895 RA = ATTR_ALL_SET;
896 break;
897 case ATTR_ALL_UNSET:
898 RA = ATTR_NONE;
899 break;
900 case ATTR_MIXED:
901 break;
902 default:
903 llvm_unreachable("Unexpected bitAttr!");
904 }
905 break;
906 case ATTR_ALL_UNSET:
907 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
908 case ATTR_FILTERED:
909 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
910 }
911 }
912
913 // At the end, if we're still in ALL_SET or MIXED states, report a region
914 switch (RA) {
915 case ATTR_NONE:
916 break;
917 case ATTR_FILTERED:
918 break;
919 case ATTR_ALL_SET:
920 if (!AllowMixed)
921 addCandidateFilter(StartBit, FilterWidth);
922 break;
923 case ATTR_ALL_UNSET:
924 break;
925 case ATTR_MIXED:
926 if (AllowMixed)
927 addCandidateFilter(StartBit, FilterWidth);
928 break;
929 }
930
931 // We have finished with the filter processings. Now it's time to choose
932 // the best performing filter.
933 auto MaxIt = llvm::max_element(Range&: Filters, C: [](const std::unique_ptr<Filter> &A,
934 const std::unique_ptr<Filter> &B) {
935 return A->usefulness() < B->usefulness();
936 });
937 if (MaxIt == Filters.end() || (*MaxIt)->usefulness() == 0)
938 return nullptr;
939 return std::move(*MaxIt);
940}
941
942std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
943 // We maintain BIT_WIDTH copies of the bitAttrs automaton.
944 // The automaton consumes the corresponding bit from each
945 // instruction.
946 //
947 // Input symbols: 0, 1, _ (unset), and . (any of the above).
948 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
949 // Initial state: NONE.
950 //
951 // (NONE) ------- [01] -> (ALL_SET)
952 // (NONE) ------- _ ----> (ALL_UNSET)
953 // (ALL_SET) ---- [01] -> (ALL_SET)
954 // (ALL_SET) ---- _ ----> (MIXED)
955 // (ALL_UNSET) -- [01] -> (MIXED)
956 // (ALL_UNSET) -- _ ----> (ALL_UNSET)
957 // (MIXED) ------ . ----> (MIXED)
958 // (FILTERED)---- . ----> (FILTERED)
959
960 unsigned FilterWidth = FilterBits.getBitWidth();
961 SmallVector<bitAttr_t, 128> BitAttrs(FilterWidth, ATTR_NONE);
962
963 // FILTERED bit positions provide no entropy and are not worthy of pursuing.
964 // Filter::recurse() set either 1 or 0 for each position.
965 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex)
966 if (isPositionFiltered(Idx: BitIndex))
967 BitAttrs[BitIndex] = ATTR_FILTERED;
968
969 for (unsigned EncodingID : EncodingIDs) {
970 const InstructionEncoding &Encoding = Encodings[EncodingID];
971 KnownBits EncodingBits = Encoding.getMandatoryBits();
972
973 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
974 bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
975 switch (BitAttrs[BitIndex]) {
976 case ATTR_NONE:
977 if (IsKnown)
978 BitAttrs[BitIndex] = ATTR_ALL_SET;
979 else
980 BitAttrs[BitIndex] = ATTR_ALL_UNSET;
981 break;
982 case ATTR_ALL_SET:
983 if (!IsKnown)
984 BitAttrs[BitIndex] = ATTR_MIXED;
985 break;
986 case ATTR_ALL_UNSET:
987 if (IsKnown)
988 BitAttrs[BitIndex] = ATTR_MIXED;
989 break;
990 case ATTR_MIXED:
991 case ATTR_FILTERED:
992 break;
993 }
994 }
995 }
996
997 // Try regions of consecutive known bit values first.
998 if (std::unique_ptr<Filter> F =
999 findBestFilter(BitAttrs, /*AllowMixed=*/false))
1000 return F;
1001
1002 // Then regions of mixed bits (both known and unitialized bit values allowed).
1003 if (std::unique_ptr<Filter> F = findBestFilter(BitAttrs, /*AllowMixed=*/true))
1004 return F;
1005
1006 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1007 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1008 // well-known encoding pattern. In such case, we backtrack and scan for the
1009 // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1010 if (EncodingIDs.size() == 3) {
1011 if (std::unique_ptr<Filter> F =
1012 findBestFilter(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false))
1013 return F;
1014 }
1015
1016 // There is a conflict we could not resolve.
1017 return nullptr;
1018}
1019
1020// Decides on the best configuration of filter(s) to use in order to decode
1021// the instructions. A conflict of instructions may occur, in which case we
1022// dump the conflict set to the standard error.
1023void FilterChooser::doFilter() {
1024 assert(!EncodingIDs.empty() && "FilterChooser created with no instructions");
1025
1026 // No filter needed.
1027 if (EncodingIDs.size() == 1) {
1028 SingletonEncodingID = EncodingIDs.front();
1029 return;
1030 }
1031
1032 std::unique_ptr<Filter> BestFilter = findBestFilter();
1033 if (BestFilter) {
1034 applyFilter(F: *BestFilter);
1035 return;
1036 }
1037
1038 // Print out useful conflict information for postmortem analysis.
1039 errs() << "Decoding Conflict:\n";
1040 dump();
1041 HasConflict = true;
1042}
1043
1044void FilterChooser::dump() const {
1045 indent Indent(4);
1046 // Helps to keep the output right-justified.
1047 unsigned PadToWidth = getMaxEncodingWidth();
1048
1049 // Dump filter stack.
1050 dumpStack(OS&: errs(), Indent, PadToWidth);
1051
1052 // Dump encodings.
1053 for (unsigned EncodingID : EncodingIDs) {
1054 const InstructionEncoding &Encoding = Encodings[EncodingID];
1055 errs() << Indent << indent(PadToWidth - Encoding.getBitWidth());
1056 printKnownBits(OS&: errs(), Bits: Encoding.getMandatoryBits(), Unknown: '_');
1057 errs() << " " << Encoding.getName() << '\n';
1058 }
1059}
1060
1061// emitDecodeInstruction - Emit the templated helper function
1062// decodeInstruction().
1063static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
1064 const DecoderTableInfo &TableInfo) {
1065 OS << R"(
1066template <typename InsnType>
1067static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
1068 InsnType insn, uint64_t Address,
1069 const MCDisassembler *DisAsm,
1070 const MCSubtargetInfo &STI)";
1071 if (IsVarLenInst) {
1072 OS << ",\n "
1073 "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
1074 }
1075 OS << ") {\n";
1076 if (TableInfo.HasCheckPredicate)
1077 OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
1078 OS << " const uint8_t *Ptr = DecodeTable;\n";
1079
1080 if (SpecializeDecodersPerBitwidth) {
1081 // Fail with a fatal error if decoder table's bitwidth does not match
1082 // `InsnType` bitwidth.
1083 OS << R"(
1084 [[maybe_unused]] uint32_t BitWidth = decodeULEB128AndIncUnsafe(Ptr);
1085 assert(InsnBitWidth<InsnType> == BitWidth &&
1086 "Table and instruction bitwidth mismatch");
1087)";
1088 }
1089
1090 OS << R"(
1091 SmallVector<const uint8_t *, 8> ScopeStack;
1092 DecodeStatus S = MCDisassembler::Success;
1093 while (true) {
1094 ptrdiff_t Loc = Ptr - DecodeTable;
1095 const uint8_t DecoderOp = *Ptr++;
1096 switch (DecoderOp) {
1097 default:
1098 errs() << Loc << ": Unexpected decode table opcode: "
1099 << (int)DecoderOp << '\n';
1100 return MCDisassembler::Fail;
1101 case OPC_Scope: {
1102 unsigned NumToSkip = decodeULEB128AndIncUnsafe(Ptr);
1103 const uint8_t *SkipTo = Ptr + NumToSkip;
1104 ScopeStack.push_back(SkipTo);
1105 LLVM_DEBUG(dbgs() << Loc << ": OPC_Scope(" << SkipTo - DecodeTable
1106 << ")\n");
1107 continue;
1108 }
1109 case OPC_SwitchField: {
1110 // Decode the start value.
1111 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1112 unsigned Len = *Ptr++;)";
1113 if (IsVarLenInst)
1114 OS << "\n makeUp(insn, Start + Len);";
1115 OS << R"(
1116 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1117 uint64_t CaseValue;
1118 unsigned CaseSize;
1119 while (true) {
1120 CaseValue = decodeULEB128AndIncUnsafe(Ptr);
1121 CaseSize = decodeULEB128AndIncUnsafe(Ptr);
1122 if (FieldValue == CaseValue || !CaseSize)
1123 break;
1124 Ptr += CaseSize;
1125 }
1126 if (FieldValue == CaseValue) {
1127 LLVM_DEBUG(dbgs() << Loc << ": OPC_SwitchField(" << Start << ", " << Len
1128 << "): " << FieldValue << '\n');
1129 continue;
1130 }
1131 break;
1132 }
1133 case OPC_CheckField: {
1134 // Decode the start value.
1135 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1136 unsigned Len = *Ptr;)";
1137 if (IsVarLenInst)
1138 OS << "\n makeUp(insn, Start + Len);";
1139 OS << R"(
1140 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1141 // Decode the field value.
1142 unsigned PtrLen = 0;
1143 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
1144 Ptr += PtrLen;
1145 bool Failed = ExpectedValue != FieldValue;
1146
1147 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " << Len
1148 << ", " << ExpectedValue << "): FieldValue = "
1149 << FieldValue << ", ExpectedValue = " << ExpectedValue
1150 << ": " << (Failed ? "FAIL, " : "PASS\n"););
1151 if (!Failed)
1152 continue;
1153 break;
1154 })";
1155 if (TableInfo.HasCheckPredicate) {
1156 OS << R"(
1157 case OPC_CheckPredicate: {
1158 // Decode the Predicate Index value.
1159 unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
1160 // Check the predicate.
1161 bool Failed = !checkDecoderPredicate(PIdx, Bits);
1162
1163 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
1164 << (Failed ? "FAIL, " : "PASS\n"););
1165 if (!Failed)
1166 continue;
1167 break;
1168 })";
1169 }
1170 OS << R"(
1171 case OPC_Decode: {
1172 // Decode the Opcode value.
1173 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
1174 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
1175
1176 MI.clear();
1177 MI.setOpcode(Opc);
1178 bool DecodeComplete;)";
1179 if (IsVarLenInst) {
1180 OS << "\n unsigned Len = InstrLenTable[Opc];\n"
1181 << " makeUp(insn, Len);";
1182 }
1183 OS << R"(
1184 S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm,
1185 DecodeComplete);
1186 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
1187 << ", using decoder " << DecodeIdx << ": "
1188 << (S ? "PASS, " : "FAIL, "));
1189
1190 if (DecodeComplete) {
1191 LLVM_DEBUG(dbgs() << "decoding complete\n");
1192 return S;
1193 }
1194 assert(S == MCDisassembler::Fail);
1195 // Reset decode status. This also drops a SoftFail status that could be
1196 // set before the decode attempt.
1197 S = MCDisassembler::Success;
1198 break;
1199 })";
1200 if (TableInfo.HasSoftFail) {
1201 OS << R"(
1202 case OPC_SoftFail: {
1203 // Decode the mask values.
1204 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
1205 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
1206 bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
1207 if (Failed)
1208 S = MCDisassembler::SoftFail;
1209 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
1210 continue;
1211 })";
1212 }
1213 OS << R"(
1214 }
1215 if (ScopeStack.empty()) {
1216 LLVM_DEBUG(dbgs() << "returning Fail\n");
1217 return MCDisassembler::Fail;
1218 }
1219 Ptr = ScopeStack.pop_back_val();
1220 LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
1221 }
1222 llvm_unreachable("bogosity detected in disassembler state machine!");
1223}
1224
1225)";
1226}
1227
1228namespace {
1229
1230class DecoderTreeBuilder {
1231 DecoderContext &Ctx;
1232 const CodeGenTarget &Target;
1233 ArrayRef<InstructionEncoding> Encodings;
1234
1235public:
1236 DecoderTreeBuilder(DecoderContext &Ctx, const CodeGenTarget &Target,
1237 ArrayRef<InstructionEncoding> Encodings)
1238 : Ctx(Ctx), Target(Target), Encodings(Encodings) {}
1239
1240 std::unique_ptr<DecoderTreeNode> buildTree(ArrayRef<unsigned> EncodingIDs);
1241
1242private:
1243 std::unique_ptr<DecoderTreeNode>
1244 convertSingleton(unsigned EncodingID, const KnownBits &FilterBits);
1245
1246 std::unique_ptr<DecoderTreeNode> convertFilterChooserMap(
1247 unsigned StartBit, unsigned NumBits,
1248 const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap);
1249
1250 std::unique_ptr<DecoderTreeNode>
1251 convertFilterChooser(const FilterChooser *FC);
1252};
1253
1254} // namespace
1255
1256std::unique_ptr<DecoderTreeNode>
1257DecoderTreeBuilder::convertSingleton(unsigned EncodingID,
1258 const KnownBits &FilterBits) {
1259 const InstructionEncoding &Encoding = Encodings[EncodingID];
1260 auto N = std::make_unique<CheckAllNode>();
1261
1262 std::string Predicate = getPredicateString(Encoding, TargetName: Target.getName());
1263 if (!Predicate.empty()) {
1264 unsigned PredicateIndex = Ctx.getPredicateIndex(Predicate);
1265 N->addChild(Child: std::make_unique<CheckPredicateNode>(args&: PredicateIndex));
1266 }
1267
1268 std::vector<EncodingIsland> Islands =
1269 getIslands(EncodingBits: Encoding.getMandatoryBits(), FilterBits);
1270 for (const EncodingIsland &Island : reverse(C&: Islands)) {
1271 N->addChild(Child: std::make_unique<CheckFieldNode>(
1272 args: Island.StartBit, args: Island.NumBits, args: Island.FieldVal));
1273 }
1274
1275 const KnownBits &InstBits = Encoding.getInstBits();
1276 const APInt &SoftFailMask = Encoding.getSoftFailMask();
1277 if (!SoftFailMask.isZero()) {
1278 APInt PositiveMask = InstBits.Zero & SoftFailMask;
1279 APInt NegativeMask = InstBits.One & SoftFailMask;
1280 N->addChild(Child: std::make_unique<SoftFailNode>(args: PositiveMask.getZExtValue(),
1281 args: NegativeMask.getZExtValue()));
1282 }
1283
1284 unsigned DecoderIndex = Ctx.getDecoderIndex(Decoder: getDecoderString(Encoding));
1285 N->addChild(Child: std::make_unique<DecodeNode>(
1286 args: Encoding.getName(), args&: Encoding.getInstruction()->EnumVal, args&: DecoderIndex));
1287
1288 return N;
1289}
1290
1291std::unique_ptr<DecoderTreeNode> DecoderTreeBuilder::convertFilterChooserMap(
1292 unsigned StartBit, unsigned NumBits,
1293 const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap) {
1294 if (FCMap.size() == 1) {
1295 const auto &[FieldVal, ChildFC] = *FCMap.begin();
1296 auto N = std::make_unique<CheckAllNode>();
1297 N->addChild(Child: std::make_unique<CheckFieldNode>(args&: StartBit, args&: NumBits, args: FieldVal));
1298 N->addChild(Child: convertFilterChooser(FC: ChildFC.get()));
1299 return N;
1300 }
1301 auto N = std::make_unique<SwitchFieldNode>(args&: StartBit, args&: NumBits);
1302 for (const auto &[FieldVal, ChildFC] : FCMap)
1303 N->addCase(Value: FieldVal, N: convertFilterChooser(FC: ChildFC.get()));
1304 return N;
1305}
1306
1307std::unique_ptr<DecoderTreeNode>
1308DecoderTreeBuilder::convertFilterChooser(const FilterChooser *FC) {
1309 auto N = std::make_unique<CheckAnyNode>();
1310
1311 do {
1312 if (FC->SingletonEncodingID)
1313 N->addChild(Child: convertSingleton(EncodingID: *FC->SingletonEncodingID, FilterBits: FC->FilterBits));
1314 else
1315 N->addChild(Child: convertFilterChooserMap(StartBit: FC->StartBit, NumBits: FC->NumBits,
1316 FCMap: FC->FilterChooserMap));
1317 FC = FC->VariableFC.get();
1318 } while (FC);
1319
1320 return N;
1321}
1322
1323std::unique_ptr<DecoderTreeNode>
1324DecoderTreeBuilder::buildTree(ArrayRef<unsigned> EncodingIDs) {
1325 FilterChooser FC(Encodings, EncodingIDs);
1326 if (FC.hasConflict())
1327 return nullptr;
1328 return convertFilterChooser(FC: &FC);
1329}
1330
1331/// Collects all HwModes referenced by the target for encoding purposes.
1332void DecoderEmitter::collectHwModesReferencedForEncodings(
1333 std::vector<unsigned> &HwModeIDs,
1334 NamespacesHwModesMap &NamespacesWithHwModes) const {
1335 SmallBitVector BV(CGH.getNumModeIds());
1336 for (const auto &MS : CGH.getHwModeSelects()) {
1337 for (auto [HwModeID, EncodingDef] : MS.second.Items) {
1338 if (EncodingDef->isSubClassOf(Name: "InstructionEncoding")) {
1339 StringRef DecoderNamespace =
1340 EncodingDef->getValueAsString(FieldName: "DecoderNamespace");
1341 NamespacesWithHwModes[DecoderNamespace].insert(x: HwModeID);
1342 BV.set(HwModeID);
1343 }
1344 }
1345 }
1346 // FIXME: Can't do `HwModeIDs.assign(BV.set_bits_begin(), BV.set_bits_end())`
1347 // because const_set_bits_iterator_impl is not copy-assignable.
1348 // This breaks some MacOS builds.
1349 llvm::copy(Range: BV.set_bits(), Out: std::back_inserter(x&: HwModeIDs));
1350}
1351
1352void DecoderEmitter::handleHwModesUnrelatedEncodings(
1353 unsigned EncodingID, ArrayRef<unsigned> HwModeIDs,
1354 NamespacesHwModesMap &NamespacesWithHwModes) {
1355 switch (DecoderEmitterSuppressDuplicates) {
1356 case SUPPRESSION_DISABLE: {
1357 for (unsigned HwModeID : HwModeIDs)
1358 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1359 break;
1360 }
1361 case SUPPRESSION_LEVEL1: {
1362 StringRef DecoderNamespace = Encodings[EncodingID].getDecoderNamespace();
1363 auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
1364 if (It != NamespacesWithHwModes.end()) {
1365 for (unsigned HwModeID : It->second)
1366 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1367 } else {
1368 // Only emit the encoding once, as it's DecoderNamespace doesn't
1369 // contain any HwModes.
1370 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1371 }
1372 break;
1373 }
1374 case SUPPRESSION_LEVEL2:
1375 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1376 break;
1377 }
1378}
1379
1380/// Checks if the given target-specific non-pseudo instruction
1381/// is a candidate for decoding.
1382static bool isDecodableInstruction(const Record *InstDef) {
1383 return !InstDef->getValueAsBit(FieldName: "isAsmParserOnly") &&
1384 !InstDef->getValueAsBit(FieldName: "isCodeGenOnly");
1385}
1386
1387/// Checks if the given encoding is valid.
1388static bool isValidEncoding(const Record *EncodingDef) {
1389 const RecordVal *InstField = EncodingDef->getValue(Name: "Inst");
1390 if (!InstField)
1391 return false;
1392
1393 if (const auto *InstInit = dyn_cast<BitsInit>(Val: InstField->getValue())) {
1394 // Fixed-length encoding. Size must be non-zero.
1395 if (!EncodingDef->getValueAsInt(FieldName: "Size"))
1396 return false;
1397
1398 // At least one of the encoding bits must be complete (not '?').
1399 // FIXME: This should take SoftFail field into account.
1400 return !InstInit->allInComplete();
1401 }
1402
1403 if (const auto *InstInit = dyn_cast<DagInit>(Val: InstField->getValue())) {
1404 // Variable-length encoding.
1405 // At least one of the encoding bits must be complete (not '?').
1406 VarLenInst VLI(InstInit, InstField);
1407 return !all_of(Range&: VLI, P: [](const EncodingSegment &Segment) {
1408 return isa<UnsetInit>(Val: Segment.Value);
1409 });
1410 }
1411
1412 // Inst field is neither BitsInit nor DagInit. This is something unsupported.
1413 return false;
1414}
1415
1416/// Parses all InstructionEncoding instances and fills internal data structures.
1417void DecoderEmitter::parseInstructionEncodings() {
1418 // First, collect all encoding-related HwModes referenced by the target.
1419 // And establish a mapping table between DecoderNamespace and HwMode.
1420 // If HwModeNames is empty, add the default mode so we always have one HwMode.
1421 std::vector<unsigned> HwModeIDs;
1422 NamespacesHwModesMap NamespacesWithHwModes;
1423 collectHwModesReferencedForEncodings(HwModeIDs, NamespacesWithHwModes);
1424 if (HwModeIDs.empty())
1425 HwModeIDs.push_back(x: DefaultMode);
1426
1427 ArrayRef<const CodeGenInstruction *> Instructions =
1428 Target.getTargetNonPseudoInstructions();
1429 Encodings.reserve(n: Instructions.size());
1430
1431 for (const CodeGenInstruction *Inst : Instructions) {
1432 const Record *InstDef = Inst->TheDef;
1433 if (!isDecodableInstruction(InstDef)) {
1434 ++NumEncodingsLackingDisasm;
1435 continue;
1436 }
1437
1438 if (const Record *RV = InstDef->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
1439 EncodingInfoByHwMode EBM(RV, CGH);
1440 for (auto [HwModeID, EncodingDef] : EBM) {
1441 if (!isValidEncoding(EncodingDef)) {
1442 // TODO: Should probably give a warning.
1443 ++NumEncodingsOmitted;
1444 continue;
1445 }
1446 unsigned EncodingID = Encodings.size();
1447 Encodings.emplace_back(args&: EncodingDef, args&: Inst);
1448 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1449 }
1450 continue; // Ignore encoding specified by Instruction itself.
1451 }
1452
1453 if (!isValidEncoding(EncodingDef: InstDef)) {
1454 ++NumEncodingsOmitted;
1455 continue;
1456 }
1457
1458 unsigned EncodingID = Encodings.size();
1459 Encodings.emplace_back(args&: InstDef, args&: Inst);
1460
1461 // This instruction is encoded the same on all HwModes.
1462 // According to user needs, add it to all, some, or only the default HwMode.
1463 handleHwModesUnrelatedEncodings(EncodingID, HwModeIDs,
1464 NamespacesWithHwModes);
1465 }
1466
1467 for (const Record *EncodingDef :
1468 RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding")) {
1469 const Record *InstDef = EncodingDef->getValueAsDef(FieldName: "AliasOf");
1470 // TODO: Should probably give a warning in these cases.
1471 // What's the point of specifying an additional encoding
1472 // if it is invalid or if the instruction is not decodable?
1473 if (!isDecodableInstruction(InstDef)) {
1474 ++NumEncodingsLackingDisasm;
1475 continue;
1476 }
1477 if (!isValidEncoding(EncodingDef)) {
1478 ++NumEncodingsOmitted;
1479 continue;
1480 }
1481 unsigned EncodingID = Encodings.size();
1482 Encodings.emplace_back(args&: EncodingDef, args: &Target.getInstruction(InstRec: InstDef));
1483 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1484 }
1485
1486 // Do some statistics.
1487 NumInstructions = Instructions.size();
1488 NumEncodingsSupported = Encodings.size();
1489 NumEncodings = NumEncodingsSupported + NumEncodingsOmitted;
1490}
1491
1492DecoderEmitter::DecoderEmitter(const RecordKeeper &RK)
1493 : RK(RK), Target(RK), CGH(Target.getHwModes()) {
1494 Target.reverseBitsForLittleEndianEncoding();
1495 parseInstructionEncodings();
1496}
1497
1498// Emits disassembler code for instruction decoding.
1499void DecoderEmitter::run(raw_ostream &o) const {
1500 formatted_raw_ostream OS(o);
1501 OS << R"(
1502#include "llvm/MC/MCInst.h"
1503#include "llvm/MC/MCSubtargetInfo.h"
1504#include "llvm/Support/DataTypes.h"
1505#include "llvm/Support/Debug.h"
1506#include "llvm/Support/LEB128.h"
1507#include "llvm/Support/raw_ostream.h"
1508#include "llvm/TargetParser/SubtargetFeature.h"
1509#include <assert.h>
1510
1511namespace {
1512
1513// InsnBitWidth is essentially a type trait used by the decoder emitter to query
1514// the supported bitwidth for a given type. But default, the value is 0, making
1515// it an invalid type for use as `InsnType` when instantiating the decoder.
1516// Individual targets are expected to provide specializations for these based
1517// on their usage.
1518template <typename T> constexpr uint32_t InsnBitWidth = 0;
1519
1520)";
1521
1522 // Do extra bookkeeping for variable-length encodings.
1523 bool IsVarLenInst = Target.hasVariableLengthEncodings();
1524 unsigned MaxInstLen = 0;
1525 if (IsVarLenInst) {
1526 std::vector<unsigned> InstrLen(Target.getInstructions().size(), 0);
1527 for (const InstructionEncoding &Encoding : Encodings) {
1528 MaxInstLen = std::max(a: MaxInstLen, b: Encoding.getBitWidth());
1529 InstrLen[Target.getInstrIntValue(R: Encoding.getInstruction()->TheDef)] =
1530 Encoding.getBitWidth();
1531 }
1532
1533 // For variable instruction, we emit an instruction length table to let the
1534 // decoder know how long the instructions are. You can see example usage in
1535 // M68k's disassembler.
1536 emitInstrLenTable(OS, InstrLen);
1537 }
1538
1539 emitRegClassByHwModeDecoders(OS);
1540
1541 // Map of (bitwidth, namespace, hwmode) tuple to encoding IDs.
1542 // Its organized as a nested map, with the (namespace, hwmode) as the key for
1543 // the inner map and bitwidth as the key for the outer map. We use std::map
1544 // for deterministic iteration order so that the code emitted is also
1545 // deterministic.
1546 using InnerKeyTy = std::pair<StringRef, unsigned>;
1547 using InnerMapTy = std::map<InnerKeyTy, std::vector<unsigned>>;
1548 std::map<unsigned, InnerMapTy> EncMap;
1549
1550 for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
1551 for (unsigned EncodingID : EncodingIDs) {
1552 const InstructionEncoding &Encoding = Encodings[EncodingID];
1553 const unsigned BitWidth =
1554 IsVarLenInst ? MaxInstLen : Encoding.getBitWidth();
1555 StringRef DecoderNamespace = Encoding.getDecoderNamespace();
1556 EncMap[BitWidth][{DecoderNamespace, HwModeID}].push_back(x: EncodingID);
1557 }
1558 }
1559
1560 // Variable length instructions use the same `APInt` type for all instructions
1561 // so we cannot specialize decoders based on instruction bitwidths (which
1562 // requires using different `InstType` for differet bitwidths for the correct
1563 // template specialization to kick in).
1564 if (IsVarLenInst && SpecializeDecodersPerBitwidth)
1565 PrintFatalError(
1566 Msg: "Cannot specialize decoders for variable length instuctions");
1567
1568 DecoderContext Ctx;
1569 DecoderTreeBuilder TreeBuilder(Ctx, Target, Encodings);
1570
1571 DecoderTableInfo TableInfo;
1572 DecoderTableEmitter TableEmitter(TableInfo, OS);
1573
1574 // Emit a table for each (namespace, hwmode, bitwidth) combination.
1575 // Entries in `EncMap` are already sorted by bitwidth. So bucketing per
1576 // bitwidth can be done on-the-fly as we iterate over the map.
1577 bool HasConflict = false;
1578 for (const auto &[BitWidth, BWMap] : EncMap) {
1579 for (const auto &[Key, EncodingIDs] : BWMap) {
1580 auto [DecoderNamespace, HwModeID] = Key;
1581
1582 std::unique_ptr<DecoderTreeNode> Tree =
1583 TreeBuilder.buildTree(EncodingIDs);
1584
1585 // Skip emitting the table if a conflict has been detected.
1586 if (!Tree) {
1587 HasConflict = true;
1588 continue;
1589 }
1590
1591 // Form the table name.
1592 SmallString<32> TableName({"DecoderTable", DecoderNamespace});
1593 if (HwModeID != DefaultMode)
1594 TableName.append(Refs: {"_", Target.getHwModes().getModeName(Id: HwModeID)});
1595 TableName.append(RHS: utostr(X: BitWidth));
1596
1597 TableEmitter.emitTable(
1598 TableName, BitWidth: SpecializeDecodersPerBitwidth ? BitWidth : 0, Root: Tree.get());
1599 }
1600
1601 // Each BitWidth get's its own decoders and decoder function if
1602 // SpecializeDecodersPerBitwidth is enabled.
1603 if (SpecializeDecodersPerBitwidth) {
1604 emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: BitWidth);
1605 Ctx.Decoders.clear();
1606 }
1607 }
1608
1609 if (HasConflict)
1610 PrintFatalError(Msg: "Decoding conflict encountered");
1611
1612 // Emit the decoder function for the last bucket. This will also emit the
1613 // single decoder function if SpecializeDecodersPerBitwidth = false.
1614 if (!SpecializeDecodersPerBitwidth)
1615 emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: 0);
1616
1617 // Emit the predicate function.
1618 if (TableInfo.HasCheckPredicate)
1619 emitPredicateFunction(OS, Predicates: Ctx.Predicates);
1620
1621 // Emit the main entry point for the decoder, decodeInstruction().
1622 emitDecodeInstruction(OS, IsVarLenInst, TableInfo);
1623
1624 OS << "\n} // namespace\n";
1625}
1626
1627void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS) {
1628 DecoderEmitter(RK).run(o&: OS);
1629}
1630