1//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// It contains the tablegen backend that emits the decoder functions for
10// targets with fixed/variable length instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Common/CodeGenHwModes.h"
15#include "Common/CodeGenInstruction.h"
16#include "Common/CodeGenRegisters.h"
17#include "Common/CodeGenTarget.h"
18#include "Common/InfoByHwMode.h"
19#include "Common/InstructionEncoding.h"
20#include "Common/SubtargetFeatureInfo.h"
21#include "Common/VarLenCodeEmitterGen.h"
22#include "DecoderTableEmitter.h"
23#include "DecoderTree.h"
24#include "TableGenBackends.h"
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallBitVector.h"
30#include "llvm/ADT/SmallString.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/ADT/StringRef.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/Format.h"
39#include "llvm/Support/FormatVariadic.h"
40#include "llvm/Support/FormattedStream.h"
41#include "llvm/Support/KnownBits.h"
42#include "llvm/Support/MathExtras.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/TableGen/Error.h"
45#include "llvm/TableGen/Record.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <cstdint>
50#include <map>
51#include <memory>
52#include <set>
53#include <string>
54#include <utility>
55#include <vector>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "decoder-emitter"
60
61extern cl::OptionCategory DisassemblerEmitterCat;
62
63enum SuppressLevel {
64 SUPPRESSION_DISABLE,
65 SUPPRESSION_LEVEL1,
66 SUPPRESSION_LEVEL2
67};
68
69static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
70 "suppress-per-hwmode-duplicates",
71 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
72 cl::values(
73 clEnumValN(
74 SUPPRESSION_DISABLE, "O0",
75 "Do not prevent DecoderTable duplications caused by HwModes"),
76 clEnumValN(
77 SUPPRESSION_LEVEL1, "O1",
78 "Remove duplicate DecoderTable entries generated due to HwModes"),
79 clEnumValN(
80 SUPPRESSION_LEVEL2, "O2",
81 "Extract HwModes-specific instructions into new DecoderTables, "
82 "significantly reducing Table Duplications")),
83 cl::init(Val: SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
84
85static cl::opt<bool> UseFnTableInDecodeToMCInst(
86 "use-fn-table-in-decode-to-mcinst",
87 cl::desc(
88 "Use a table of function pointers instead of a switch case in the\n"
89 "generated `decodeToMCInst` function. Helps improve compile time\n"
90 "of the generated code."),
91 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
92
93// Enabling this option requires use of different `InsnType` for different
94// bitwidths and defining `InsnBitWidth` template specialization for the
95// `InsnType` types used. Some common specializations are already defined in
96// MCDecoder.h.
97static cl::opt<bool> SpecializeDecodersPerBitwidth(
98 "specialize-decoders-per-bitwidth",
99 cl::desc("Specialize the generated `decodeToMCInst` function per bitwidth. "
100 "Helps reduce the code size."),
101 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
102
103static cl::opt<bool> IgnoreNonDecodableOperands(
104 "ignore-non-decodable-operands",
105 cl::desc(
106 "Do not issue an error if an operand cannot be decoded automatically."),
107 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
108
109static cl::opt<bool> IgnoreFullyDefinedOperands(
110 "ignore-fully-defined-operands",
111 cl::desc(
112 "Do not automatically decode operands with no '?' in their encoding."),
113 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
114
115STATISTIC(NumEncodings, "Number of encodings considered");
116STATISTIC(NumEncodingsLackingDisasm,
117 "Number of encodings without disassembler info");
118STATISTIC(NumInstructions, "Number of instructions considered");
119STATISTIC(NumEncodingsSupported, "Number of encodings supported");
120STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
121
122/// Similar to KnownBits::print(), but allows you to specify a character to use
123/// to print unknown bits.
124static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
125 char Unknown) {
126 for (unsigned I = Bits.getBitWidth(); I--;) {
127 if (Bits.Zero[I] && Bits.One[I])
128 OS << '!';
129 else if (Bits.Zero[I])
130 OS << '0';
131 else if (Bits.One[I])
132 OS << '1';
133 else
134 OS << Unknown;
135 }
136}
137
138namespace {
139
140/// Sorting predicate to sort encoding IDs by encoding width.
141class LessEncodingIDByWidth {
142 ArrayRef<InstructionEncoding> Encodings;
143
144public:
145 explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
146 : Encodings(Encodings) {}
147
148 bool operator()(unsigned ID1, unsigned ID2) const {
149 return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
150 }
151};
152
153using NamespacesHwModesMap = std::map<StringRef, std::set<unsigned>>;
154
155class DecoderEmitter {
156 const RecordKeeper &RK;
157 CodeGenTarget Target;
158 const CodeGenHwModes &CGH;
159
160 /// All parsed encodings.
161 std::vector<InstructionEncoding> Encodings;
162
163 /// Encodings IDs for each HwMode. An ID is an index into Encodings.
164 SmallDenseMap<unsigned, std::vector<unsigned>> EncodingIDsByHwMode;
165
166public:
167 explicit DecoderEmitter(const RecordKeeper &RK);
168
169 const CodeGenTarget &getTarget() const { return Target; }
170
171 void emitInstrLenTable(formatted_raw_ostream &OS,
172 ArrayRef<unsigned> InstrLen) const;
173 void emitPredicateFunction(formatted_raw_ostream &OS,
174 const PredicateSet &Predicates) const;
175
176 void emitRegClassByHwModeDecoders(formatted_raw_ostream &OS) const;
177 void emitDecoderFunction(formatted_raw_ostream &OS,
178 const DecoderSet &Decoders,
179 unsigned BucketBitWidth) const;
180
181 // run - Output the code emitter
182 void run(raw_ostream &o) const;
183
184private:
185 void collectHwModesReferencedForEncodings(
186 std::vector<unsigned> &HwModeIDs,
187 NamespacesHwModesMap &NamespacesWithHwModes) const;
188
189 void
190 handleHwModesUnrelatedEncodings(unsigned EncodingID,
191 ArrayRef<unsigned> HwModeIDs,
192 NamespacesHwModesMap &NamespacesWithHwModes);
193
194 void parseInstructionEncodings();
195};
196
197struct EncodingIsland {
198 unsigned StartBit;
199 unsigned NumBits;
200 uint64_t FieldVal;
201};
202
203/// Filter - Filter works with FilterChooser to produce the decoding tree for
204/// the ISA.
205///
206/// It is useful to think of a Filter as governing the switch stmts of the
207/// decoding tree in a certain level. Each case stmt delegates to an inferior
208/// FilterChooser to decide what further decoding logic to employ, or in another
209/// words, what other remaining bits to look at. The FilterChooser eventually
210/// chooses a best Filter to do its job.
211///
212/// This recursive scheme ends when the number of Opcodes assigned to the
213/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
214/// the Filter/FilterChooser combo does not know how to distinguish among the
215/// Opcodes assigned.
216///
217/// An example of a conflict is
218///
219/// Decoding Conflict:
220/// ................................
221/// 1111............................
222/// 1111010.........................
223/// 1111010...00....................
224/// 1111010...00........0001........
225/// 111101000.00........0001........
226/// 111101000.00........00010000....
227/// 111101000_00________00010000____ VST4q8a
228/// 111101000_00________00010000____ VST4q8b
229///
230/// The Debug output shows the path that the decoding tree follows to reach the
231/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
232/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
233///
234/// The encoding info in the .td files does not specify this meta information,
235/// which could have been used by the decoder to resolve the conflict. The
236/// decoder could try to decode the even/odd register numbering and assign to
237/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
238/// version and return the Opcode since the two have the same Asm format string.
239struct Filter {
240 unsigned StartBit; // the starting bit position
241 unsigned NumBits; // number of bits to filter
242
243 // Map of well-known segment value to the set of uid's with that value.
244 std::map<uint64_t, std::vector<unsigned>> FilteredIDs;
245
246 // Set of uid's with non-constant segment values.
247 std::vector<unsigned> VariableIDs;
248
249 Filter(ArrayRef<InstructionEncoding> Encodings,
250 ArrayRef<unsigned> EncodingIDs, unsigned StartBit, unsigned NumBits);
251
252 // Returns the number of fanout produced by the filter. More fanout implies
253 // the filter distinguishes more categories of instructions.
254 unsigned usefulness() const;
255}; // end class Filter
256
257// These are states of our finite state machines used in FilterChooser's
258// filterProcessor() which produces the filter candidates to use.
259enum bitAttr_t {
260 ATTR_NONE,
261 ATTR_FILTERED,
262 ATTR_ALL_SET,
263 ATTR_ALL_UNSET,
264 ATTR_MIXED
265};
266
267/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
268/// in order to perform the decoding of instructions at the current level.
269///
270/// Decoding proceeds from the top down. Based on the well-known encoding bits
271/// of instructions available, FilterChooser builds up the possible Filters that
272/// can further the task of decoding by distinguishing among the remaining
273/// candidate instructions.
274///
275/// Once a filter has been chosen, it is called upon to divide the decoding task
276/// into sub-tasks and delegates them to its inferior FilterChoosers for further
277/// processings.
278///
279/// It is useful to think of a Filter as governing the switch stmts of the
280/// decoding tree. And each case is delegated to an inferior FilterChooser to
281/// decide what further remaining bits to look at.
282
283class FilterChooser {
284 // TODO: Unfriend by providing the necessary accessors.
285 friend class DecoderTreeBuilder;
286
287 // Vector of encodings to choose our filter.
288 ArrayRef<InstructionEncoding> Encodings;
289
290 /// Encoding IDs for this filter chooser to work on.
291 /// Sorted by non-decreasing encoding width.
292 SmallVector<unsigned, 0> EncodingIDs;
293
294 // Array of bit values passed down from our parent.
295 // Set to all unknown for Parent == nullptr.
296 KnownBits FilterBits;
297
298 // Links to the FilterChooser above us in the decoding tree.
299 const FilterChooser *Parent;
300
301 /// If the selected filter matches multiple encodings, then this is the
302 /// starting position and the width of the filtered range.
303 unsigned StartBit;
304 unsigned NumBits;
305
306 /// If the selected filter matches multiple encodings, and there is
307 /// *exactly one* encoding in which all bits are known in the filtered range,
308 /// then this is the ID of that encoding.
309 /// Also used when there is only one encoding.
310 std::optional<unsigned> SingletonEncodingID;
311
312 /// If the selected filter matches multiple encodings, and there is
313 /// *at least one* encoding in which all bits are known in the filtered range,
314 /// then this is the FilterChooser created for the subset of encodings that
315 /// contain some unknown bits in the filtered range.
316 std::unique_ptr<const FilterChooser> VariableFC;
317
318 /// If the selected filter matches multiple encodings, and there is
319 /// *more than one* encoding in which all bits are known in the filtered
320 /// range, then this is a map of field values to FilterChoosers created for
321 /// the subset of encodings sharing that field value.
322 /// The "field value" here refers to the encoding bits in the filtered range.
323 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324
325 /// Set to true if decoding conflict was encountered.
326 bool HasConflict = false;
327
328public:
329 /// Constructs a top-level filter chooser.
330 FilterChooser(ArrayRef<InstructionEncoding> Encodings,
331 ArrayRef<unsigned> EncodingIDs)
332 : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr) {
333 // Sort encoding IDs once.
334 stable_sort(Range&: this->EncodingIDs, C: LessEncodingIDByWidth(Encodings));
335 // Filter width is the width of the smallest encoding.
336 unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
337 FilterBits = KnownBits(FilterWidth);
338 doFilter();
339 }
340
341 /// Constructs an inferior filter chooser.
342 FilterChooser(ArrayRef<InstructionEncoding> Encodings,
343 ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
344 const FilterChooser &Parent)
345 : Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent) {
346 // Inferior filter choosers are created from sorted array of encoding IDs.
347 assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
348 assert(!FilterBits.hasConflict() && "Broken filter");
349 // Filter width is the width of the smallest encoding.
350 unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
351 this->FilterBits = FilterBits.anyext(BitWidth: FilterWidth);
352 doFilter();
353 }
354
355 FilterChooser(const FilterChooser &) = delete;
356 void operator=(const FilterChooser &) = delete;
357
358 /// Returns the width of the largest encoding.
359 unsigned getMaxEncodingWidth() const {
360 // The last encoding ID is the ID of an encoding with the largest width.
361 return Encodings[EncodingIDs.back()].getBitWidth();
362 }
363
364 /// Returns true if any decoding conflicts were encountered.
365 bool hasConflict() const { return HasConflict; }
366
367private:
368 /// Applies the given filter to the set of encodings this FilterChooser
369 /// works with, creating inferior FilterChoosers as necessary.
370 void applyFilter(const Filter &F);
371
372 /// dumpStack - dumpStack traverses the filter chooser chain and calls
373 /// dumpFilterArray on each filter chooser up to the top level one.
374 void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
375
376 bool isPositionFiltered(unsigned Idx) const {
377 return FilterBits.Zero[Idx] || FilterBits.One[Idx];
378 }
379
380 /// Scans the well-known encoding bits of the encodings and, builds up a list
381 /// of candidate filters, and then returns the best one, if any.
382 std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
383 bool AllowMixed,
384 bool Greedy = true) const;
385
386 std::unique_ptr<Filter> findBestFilter() const;
387
388 // Decides on the best configuration of filter(s) to use in order to decode
389 // the instructions. A conflict of instructions may occur, in which case we
390 // dump the conflict set to the standard error.
391 void doFilter();
392
393public:
394 void dump() const;
395};
396
397} // end anonymous namespace
398
399///////////////////////////
400// //
401// Filter Implementation //
402// //
403///////////////////////////
404
405Filter::Filter(ArrayRef<InstructionEncoding> Encodings,
406 ArrayRef<unsigned> EncodingIDs, unsigned StartBit,
407 unsigned NumBits)
408 : StartBit(StartBit), NumBits(NumBits) {
409 for (unsigned EncodingID : EncodingIDs) {
410 const InstructionEncoding &Encoding = Encodings[EncodingID];
411 KnownBits EncodingBits = Encoding.getMandatoryBits();
412
413 // Scans the segment for possibly well-specified encoding bits.
414 KnownBits FieldBits = EncodingBits.extractBits(NumBits, BitPosition: StartBit);
415
416 if (FieldBits.isConstant()) {
417 // The encoding bits are well-known. Lets add the uid of the
418 // instruction into the bucket keyed off the constant field value.
419 FilteredIDs[FieldBits.getConstant().getZExtValue()].push_back(x: EncodingID);
420 } else {
421 // Some of the encoding bit(s) are unspecified. This contributes to
422 // one additional member of "Variable" instructions.
423 VariableIDs.push_back(x: EncodingID);
424 }
425 }
426
427 assert((FilteredIDs.size() + VariableIDs.size() > 0) &&
428 "Filter returns no instruction categories");
429}
430
431void FilterChooser::applyFilter(const Filter &F) {
432 StartBit = F.StartBit;
433 NumBits = F.NumBits;
434 assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
435
436 if (!F.VariableIDs.empty()) {
437 // Delegates to an inferior filter chooser for further processing on this
438 // group of instructions whose segment values are variable.
439 VariableFC = std::make_unique<FilterChooser>(args&: Encodings, args: F.VariableIDs,
440 args&: FilterBits, args&: *this);
441 HasConflict |= VariableFC->HasConflict;
442 }
443
444 // Otherwise, create sub choosers.
445 for (const auto &[FilterVal, InferiorEncodingIDs] : F.FilteredIDs) {
446 // Create a new filter by inserting the field bits into the parent filter.
447 APInt FieldBits(NumBits, FilterVal);
448 KnownBits InferiorFilterBits = FilterBits;
449 InferiorFilterBits.insertBits(SubBits: KnownBits::makeConstant(C: FieldBits), BitPosition: StartBit);
450
451 // Delegates to an inferior filter chooser for further processing on this
452 // category of instructions.
453 auto [It, _] = FilterChooserMap.try_emplace(
454 k: FilterVal,
455 args: std::make_unique<FilterChooser>(args&: Encodings, args: InferiorEncodingIDs,
456 args&: InferiorFilterBits, args&: *this));
457 HasConflict |= It->second->HasConflict;
458 }
459}
460
461// Returns the number of fanout produced by the filter. More fanout implies
462// the filter distinguishes more categories of instructions.
463unsigned Filter::usefulness() const {
464 return FilteredIDs.size() + VariableIDs.empty();
465}
466
467//////////////////////////////////
468// //
469// Filterchooser Implementation //
470// //
471//////////////////////////////////
472
473void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
474 ArrayRef<unsigned> InstrLen) const {
475 OS << "static const uint8_t InstrLenTable[] = {\n";
476 for (unsigned Len : InstrLen)
477 OS << Len << ",\n";
478 OS << "};\n\n";
479}
480
481void DecoderEmitter::emitPredicateFunction(
482 formatted_raw_ostream &OS, const PredicateSet &Predicates) const {
483 // The predicate function is just a big switch statement based on the
484 // input predicate index.
485 OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
486 "&FB) {\n";
487 OS << " switch (Idx) {\n";
488 OS << " default: llvm_unreachable(\"Invalid index!\");\n";
489 for (const auto &[Index, Predicate] : enumerate(First: Predicates)) {
490 OS << " case " << Index << ":\n";
491 OS << " return " << Predicate << ";\n";
492 }
493 OS << " }\n";
494 OS << "}\n\n";
495}
496
497/// Emit a default implementation of a decoder for all RegClassByHwModes which
498/// do not have an explicit DecoderMethodSet, which dispatches over the decoder
499/// methods for the member classes.
500void DecoderEmitter::emitRegClassByHwModeDecoders(
501 formatted_raw_ostream &OS) const {
502 const CodeGenHwModes &CGH = Target.getHwModes();
503 if (CGH.getNumModeIds() == 1)
504 return;
505
506 ArrayRef<const Record *> RegClassByHwMode = Target.getAllRegClassByHwMode();
507 if (RegClassByHwMode.empty())
508 return;
509
510 for (const Record *ClassByHwMode : RegClassByHwMode) {
511 // Ignore cases that had an explicit DecoderMethod set.
512 if (!InstructionEncoding::findOperandDecoderMethod(Record: ClassByHwMode).second)
513 continue;
514
515 const HwModeSelect &ModeSelect = CGH.getHwModeSelect(R: ClassByHwMode);
516
517 // Mips has a system where this is only used by compound operands with
518 // custom decoders, and we don't try to detect if this decoder is really
519 // needed.
520 OS << "[[maybe_unused]]\n";
521
522 OS << "static DecodeStatus Decode" << ClassByHwMode->getName()
523 << "RegClassByHwMode";
524 OS << R"((MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) {
525 switch (Decoder->getSubtargetInfo().getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
526)";
527 for (auto [ModeID, RegClassRec] : ModeSelect.Items) {
528 OS << indent(2) << "case " << ModeID << ": // "
529 << CGH.getModeName(Id: ModeID, /*IncludeDefault=*/true) << '\n'
530 << indent(4) << "return "
531 << InstructionEncoding::findOperandDecoderMethod(Record: RegClassRec).first
532 << "(Inst, Imm, Addr, Decoder);\n";
533 }
534 OS << indent(2) << R"(default:
535 llvm_unreachable("no decoder for hwmode");
536 }
537}
538
539)";
540 }
541
542 OS << '\n';
543}
544
545void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
546 const DecoderSet &Decoders,
547 unsigned BucketBitWidth) const {
548 // The decoder function is just a big switch statement or a table of function
549 // pointers based on the input decoder index.
550
551 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
552 // It would be better for emitBinaryParser to use a 64-bit tmp whenever
553 // possible but fall back to an InsnType-sized tmp for truly large fields.
554 StringRef TmpTypeDecl =
555 "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
556 "InsnType, uint64_t>;\n";
557 StringRef DecodeParams =
558 "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
559 "MCDisassembler *Decoder, bool &DecodeComplete";
560
561 // Print the name of the decode function to OS.
562 auto PrintDecodeFnName = [&OS, BucketBitWidth](unsigned DecodeIdx) {
563 OS << "decodeFn";
564 if (BucketBitWidth != 0) {
565 OS << '_' << BucketBitWidth << "bit";
566 }
567 OS << '_' << DecodeIdx;
568 };
569
570 // Print the template statement.
571 auto PrintTemplate = [&OS, BucketBitWidth]() {
572 OS << "template <typename InsnType>\n";
573 OS << "static ";
574 if (BucketBitWidth != 0)
575 OS << "std::enable_if_t<InsnBitWidth<InsnType> == " << BucketBitWidth
576 << ", DecodeStatus>\n";
577 else
578 OS << "DecodeStatus ";
579 };
580
581 if (UseFnTableInDecodeToMCInst) {
582 // Emit a function for each case first.
583 for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
584 PrintTemplate();
585 PrintDecodeFnName(Index);
586 OS << "(" << DecodeParams << ") {\n";
587 OS << " " << TmpTypeDecl;
588 OS << " [[maybe_unused]] TmpType tmp;\n";
589 OS << Decoder;
590 OS << " return S;\n";
591 OS << "}\n\n";
592 }
593 }
594
595 OS << "// Handling " << Decoders.size() << " cases.\n";
596 PrintTemplate();
597 OS << "decodeToMCInst(unsigned Idx, " << DecodeParams << ") {\n";
598 OS << " DecodeComplete = true;\n";
599
600 if (UseFnTableInDecodeToMCInst) {
601 // Build a table of function pointers
602 OS << " using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n";
603 OS << " static constexpr DecodeFnTy decodeFnTable[] = {\n";
604 for (size_t Index : llvm::seq(Size: Decoders.size())) {
605 OS << " ";
606 PrintDecodeFnName(Index);
607 OS << ",\n";
608 }
609 OS << " };\n";
610 OS << " if (Idx >= " << Decoders.size() << ")\n";
611 OS << " llvm_unreachable(\"Invalid decoder index!\");\n";
612 OS << " return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
613 "DecodeComplete);\n";
614 } else {
615 OS << " " << TmpTypeDecl;
616 OS << " TmpType tmp;\n";
617 OS << " switch (Idx) {\n";
618 OS << " default: llvm_unreachable(\"Invalid decoder index!\");\n";
619 for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
620 OS << " case " << Index << ":\n";
621 OS << Decoder;
622 OS << " return S;\n";
623 }
624 OS << " }\n";
625 }
626 OS << "}\n";
627}
628
629/// dumpStack - dumpStack traverses the filter chooser chain and calls
630/// dumpFilterArray on each filter chooser up to the top level one.
631void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
632 unsigned PadToWidth) const {
633 if (Parent)
634 Parent->dumpStack(OS, Indent, PadToWidth);
635 assert(PadToWidth >= FilterBits.getBitWidth());
636 OS << Indent << indent(PadToWidth - FilterBits.getBitWidth());
637 printKnownBits(OS, Bits: FilterBits, Unknown: '.');
638 OS << '\n';
639}
640
641// Calculates the island(s) needed to decode the instruction.
642// This returns a list of undecoded bits of an instructions, for example,
643// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
644// decoded bits in order to verify that the instruction matches the Opcode.
645static std::vector<EncodingIsland> getIslands(const KnownBits &EncodingBits,
646 const KnownBits &FilterBits) {
647 std::vector<EncodingIsland> Islands;
648 uint64_t FieldVal;
649 unsigned StartBit;
650
651 bool OnIsland = false;
652 unsigned FilterWidth = FilterBits.getBitWidth();
653 for (unsigned I = 0; I != FilterWidth; ++I) {
654 bool IsKnown = EncodingBits.Zero[I] || EncodingBits.One[I];
655 bool IsFiltered = FilterBits.Zero[I] || FilterBits.One[I];
656 if (!IsFiltered && IsKnown) {
657 if (OnIsland) {
658 // Accumulate island bits.
659 FieldVal |= static_cast<uint64_t>(EncodingBits.One[I])
660 << (I - StartBit);
661 } else {
662 // Onto an island.
663 StartBit = I;
664 FieldVal = static_cast<uint64_t>(EncodingBits.One[I]);
665 OnIsland = true;
666 }
667 } else if (OnIsland) {
668 // Into the water.
669 Islands.push_back(x: {.StartBit: StartBit, .NumBits: I - StartBit, .FieldVal: FieldVal});
670 OnIsland = false;
671 }
672 }
673
674 if (OnIsland)
675 Islands.push_back(x: {.StartBit: StartBit, .NumBits: FilterWidth - StartBit, .FieldVal: FieldVal});
676
677 return Islands;
678}
679
680static void emitBinaryParser(raw_ostream &OS, indent Indent,
681 const InstructionEncoding &Encoding,
682 const OperandInfo &OpInfo) {
683 if (OpInfo.HasNoEncoding) {
684 // If an operand has no encoding, the old behavior is to not decode it
685 // automatically and let the target do it. This is error-prone, so the
686 // new behavior is to report an error.
687 if (!IgnoreNonDecodableOperands)
688 PrintError(ErrorLoc: Encoding.getRecord()->getLoc(),
689 Msg: "could not find field for operand '" + OpInfo.Name + "'");
690 return;
691 }
692
693 // Special case for 'bits<0>'.
694 if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
695 assert(!OpInfo.Decoder.empty());
696 // The operand has no encoding, so the corresponding argument is omitted.
697 // This avoids confusion and allows the function to be overloaded if the
698 // operand does have an encoding in other instructions.
699 OS << Indent << "if (!Check(S, " << OpInfo.Decoder << "(MI, Decoder)))\n"
700 << Indent << " return MCDisassembler::Fail;\n";
701 return;
702 }
703
704 if (OpInfo.fields().empty()) {
705 // Only a constant part. The old behavior is to not decode this operand.
706 if (IgnoreFullyDefinedOperands)
707 return;
708 // Initialize `tmp` with the constant part.
709 OS << Indent << "tmp = " << format_hex(N: *OpInfo.InitValue, Width: 0) << ";\n";
710 } else if (OpInfo.fields().size() == 1 && !OpInfo.InitValue.value_or(u: 0)) {
711 // One variable part and no/zero constant part. Initialize `tmp` with the
712 // variable part.
713 auto [Base, Width, Offset] = OpInfo.fields().front();
714 OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
715 << ')';
716 if (Offset)
717 OS << " << " << Offset;
718 OS << ";\n";
719 } else {
720 // General case. Initialize `tmp` with the constant part, if any, and
721 // insert the variable parts into it.
722 OS << Indent << "tmp = " << format_hex(N: OpInfo.InitValue.value_or(u: 0), Width: 0)
723 << ";\n";
724 for (auto [Base, Width, Offset] : OpInfo.fields()) {
725 OS << Indent << "tmp |= fieldFromInstruction(insn, " << Base << ", "
726 << Width << ')';
727 if (Offset)
728 OS << " << " << Offset;
729 OS << ";\n";
730 }
731 }
732
733 StringRef Decoder = OpInfo.Decoder;
734 if (!Decoder.empty()) {
735 OS << Indent << "if (!Check(S, " << Decoder
736 << "(MI, tmp, Address, Decoder))) { "
737 << (OpInfo.HasCompleteDecoder ? "" : "DecodeComplete = false; ")
738 << "return MCDisassembler::Fail; }\n";
739 } else {
740 OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
741 }
742}
743
744static std::string getDecoderString(const InstructionEncoding &Encoding) {
745 std::string Decoder;
746 raw_string_ostream OS(Decoder);
747 indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
748
749 // If a custom instruction decoder was specified, use that.
750 StringRef DecoderMethod = Encoding.getDecoderMethod();
751 if (!DecoderMethod.empty()) {
752 OS << Indent << "if (!Check(S, " << DecoderMethod
753 << "(MI, insn, Address, Decoder))) { "
754 << (Encoding.hasCompleteDecoder() ? "" : "DecodeComplete = false; ")
755 << "return MCDisassembler::Fail; }\n";
756 } else {
757 for (const OperandInfo &Op : Encoding.getOperands())
758 emitBinaryParser(OS, Indent, Encoding, OpInfo: Op);
759 }
760 return Decoder;
761}
762
763static std::string getPredicateString(const InstructionEncoding &Encoding,
764 StringRef TargetName) {
765 std::vector<const Record *> Predicates =
766 Encoding.getRecord()->getValueAsListOfDefs(FieldName: "Predicates");
767 auto It = llvm::find_if(Range&: Predicates, P: [](const Record *R) {
768 return R->getValueAsBit(FieldName: "AssemblerMatcherPredicate");
769 });
770 if (It == Predicates.end())
771 return std::string();
772
773 std::string Predicate;
774 raw_string_ostream OS(Predicate);
775 SubtargetFeatureInfo::emitMCPredicateCheck(OS, TargetName, Predicates);
776 return Predicate;
777}
778
779std::unique_ptr<Filter>
780FilterChooser::findBestFilter(ArrayRef<bitAttr_t> BitAttrs, bool AllowMixed,
781 bool Greedy) const {
782 assert(EncodingIDs.size() >= 2 && "Nothing to filter");
783
784 // Heuristics. See also doFilter()'s "Heuristics" comment when num of
785 // instructions is 3.
786 if (AllowMixed && !Greedy) {
787 assert(EncodingIDs.size() == 3);
788
789 for (unsigned EncodingID : EncodingIDs) {
790 const InstructionEncoding &Encoding = Encodings[EncodingID];
791 KnownBits EncodingBits = Encoding.getMandatoryBits();
792
793 // Look for islands of undecoded bits of any instruction.
794 std::vector<EncodingIsland> Islands =
795 getIslands(EncodingBits, FilterBits);
796 if (!Islands.empty()) {
797 // Found an instruction with island(s). Now just assign a filter.
798 return std::make_unique<Filter>(
799 args: Encodings, args: EncodingIDs, args&: Islands[0].StartBit, args&: Islands[0].NumBits);
800 }
801 }
802 }
803
804 // The regionAttr automaton consumes the bitAttrs automatons' state,
805 // lowest-to-highest.
806 //
807 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
808 // States: NONE, ALL_SET, MIXED
809 // Initial state: NONE
810 //
811 // (NONE) ----- F --> (NONE)
812 // (NONE) ----- S --> (ALL_SET) ; and set region start
813 // (NONE) ----- U --> (NONE)
814 // (NONE) ----- M --> (MIXED) ; and set region start
815 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
816 // (ALL_SET) -- S --> (ALL_SET)
817 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
818 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
819 // (MIXED) ---- F --> (NONE) ; and report a MIXED region
820 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
821 // (MIXED) ---- U --> (NONE) ; and report a MIXED region
822 // (MIXED) ---- M --> (MIXED)
823
824 bitAttr_t RA = ATTR_NONE;
825 unsigned StartBit = 0;
826
827 std::vector<std::unique_ptr<Filter>> Filters;
828
829 auto addCandidateFilter = [&](unsigned StartBit, unsigned EndBit) {
830 Filters.push_back(x: std::make_unique<Filter>(args: Encodings, args: EncodingIDs, args&: StartBit,
831 args: EndBit - StartBit));
832 };
833
834 unsigned FilterWidth = FilterBits.getBitWidth();
835 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
836 bitAttr_t bitAttr = BitAttrs[BitIndex];
837
838 assert(bitAttr != ATTR_NONE && "Bit without attributes");
839
840 switch (RA) {
841 case ATTR_NONE:
842 switch (bitAttr) {
843 case ATTR_FILTERED:
844 break;
845 case ATTR_ALL_SET:
846 StartBit = BitIndex;
847 RA = ATTR_ALL_SET;
848 break;
849 case ATTR_ALL_UNSET:
850 break;
851 case ATTR_MIXED:
852 StartBit = BitIndex;
853 RA = ATTR_MIXED;
854 break;
855 default:
856 llvm_unreachable("Unexpected bitAttr!");
857 }
858 break;
859 case ATTR_ALL_SET:
860 if (!AllowMixed && bitAttr != ATTR_ALL_SET)
861 addCandidateFilter(StartBit, BitIndex);
862 switch (bitAttr) {
863 case ATTR_FILTERED:
864 RA = ATTR_NONE;
865 break;
866 case ATTR_ALL_SET:
867 break;
868 case ATTR_ALL_UNSET:
869 RA = ATTR_NONE;
870 break;
871 case ATTR_MIXED:
872 StartBit = BitIndex;
873 RA = ATTR_MIXED;
874 break;
875 default:
876 llvm_unreachable("Unexpected bitAttr!");
877 }
878 break;
879 case ATTR_MIXED:
880 if (AllowMixed && bitAttr != ATTR_MIXED)
881 addCandidateFilter(StartBit, BitIndex);
882 switch (bitAttr) {
883 case ATTR_FILTERED:
884 StartBit = BitIndex;
885 RA = ATTR_NONE;
886 break;
887 case ATTR_ALL_SET:
888 StartBit = BitIndex;
889 RA = ATTR_ALL_SET;
890 break;
891 case ATTR_ALL_UNSET:
892 RA = ATTR_NONE;
893 break;
894 case ATTR_MIXED:
895 break;
896 default:
897 llvm_unreachable("Unexpected bitAttr!");
898 }
899 break;
900 case ATTR_ALL_UNSET:
901 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
902 case ATTR_FILTERED:
903 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
904 }
905 }
906
907 // At the end, if we're still in ALL_SET or MIXED states, report a region
908 switch (RA) {
909 case ATTR_NONE:
910 break;
911 case ATTR_FILTERED:
912 break;
913 case ATTR_ALL_SET:
914 if (!AllowMixed)
915 addCandidateFilter(StartBit, FilterWidth);
916 break;
917 case ATTR_ALL_UNSET:
918 break;
919 case ATTR_MIXED:
920 if (AllowMixed)
921 addCandidateFilter(StartBit, FilterWidth);
922 break;
923 }
924
925 // We have finished with the filter processings. Now it's time to choose
926 // the best performing filter.
927 auto MaxIt = llvm::max_element(Range&: Filters, C: [](const std::unique_ptr<Filter> &A,
928 const std::unique_ptr<Filter> &B) {
929 return A->usefulness() < B->usefulness();
930 });
931 if (MaxIt == Filters.end() || (*MaxIt)->usefulness() == 0)
932 return nullptr;
933 return std::move(*MaxIt);
934}
935
936std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
937 // We maintain BIT_WIDTH copies of the bitAttrs automaton.
938 // The automaton consumes the corresponding bit from each
939 // instruction.
940 //
941 // Input symbols: 0, 1, _ (unset), and . (any of the above).
942 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
943 // Initial state: NONE.
944 //
945 // (NONE) ------- [01] -> (ALL_SET)
946 // (NONE) ------- _ ----> (ALL_UNSET)
947 // (ALL_SET) ---- [01] -> (ALL_SET)
948 // (ALL_SET) ---- _ ----> (MIXED)
949 // (ALL_UNSET) -- [01] -> (MIXED)
950 // (ALL_UNSET) -- _ ----> (ALL_UNSET)
951 // (MIXED) ------ . ----> (MIXED)
952 // (FILTERED)---- . ----> (FILTERED)
953
954 unsigned FilterWidth = FilterBits.getBitWidth();
955 SmallVector<bitAttr_t, 128> BitAttrs(FilterWidth, ATTR_NONE);
956
957 // FILTERED bit positions provide no entropy and are not worthy of pursuing.
958 // Filter::recurse() set either 1 or 0 for each position.
959 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex)
960 if (isPositionFiltered(Idx: BitIndex))
961 BitAttrs[BitIndex] = ATTR_FILTERED;
962
963 for (unsigned EncodingID : EncodingIDs) {
964 const InstructionEncoding &Encoding = Encodings[EncodingID];
965 KnownBits EncodingBits = Encoding.getMandatoryBits();
966
967 for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
968 bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
969 switch (BitAttrs[BitIndex]) {
970 case ATTR_NONE:
971 if (IsKnown)
972 BitAttrs[BitIndex] = ATTR_ALL_SET;
973 else
974 BitAttrs[BitIndex] = ATTR_ALL_UNSET;
975 break;
976 case ATTR_ALL_SET:
977 if (!IsKnown)
978 BitAttrs[BitIndex] = ATTR_MIXED;
979 break;
980 case ATTR_ALL_UNSET:
981 if (IsKnown)
982 BitAttrs[BitIndex] = ATTR_MIXED;
983 break;
984 case ATTR_MIXED:
985 case ATTR_FILTERED:
986 break;
987 }
988 }
989 }
990
991 // Try regions of consecutive known bit values first.
992 if (std::unique_ptr<Filter> F =
993 findBestFilter(BitAttrs, /*AllowMixed=*/false))
994 return F;
995
996 // Then regions of mixed bits (both known and unitialized bit values allowed).
997 if (std::unique_ptr<Filter> F = findBestFilter(BitAttrs, /*AllowMixed=*/true))
998 return F;
999
1000 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1001 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1002 // well-known encoding pattern. In such case, we backtrack and scan for the
1003 // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1004 if (EncodingIDs.size() == 3) {
1005 if (std::unique_ptr<Filter> F =
1006 findBestFilter(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false))
1007 return F;
1008 }
1009
1010 // There is a conflict we could not resolve.
1011 return nullptr;
1012}
1013
1014// Decides on the best configuration of filter(s) to use in order to decode
1015// the instructions. A conflict of instructions may occur, in which case we
1016// dump the conflict set to the standard error.
1017void FilterChooser::doFilter() {
1018 assert(!EncodingIDs.empty() && "FilterChooser created with no instructions");
1019
1020 // No filter needed.
1021 if (EncodingIDs.size() == 1) {
1022 SingletonEncodingID = EncodingIDs.front();
1023 return;
1024 }
1025
1026 std::unique_ptr<Filter> BestFilter = findBestFilter();
1027 if (BestFilter) {
1028 applyFilter(F: *BestFilter);
1029 return;
1030 }
1031
1032 // Print out useful conflict information for postmortem analysis.
1033 errs() << "Decoding Conflict:\n";
1034 dump();
1035 HasConflict = true;
1036}
1037
1038void FilterChooser::dump() const {
1039 indent Indent(4);
1040 // Helps to keep the output right-justified.
1041 unsigned PadToWidth = getMaxEncodingWidth();
1042
1043 // Dump filter stack.
1044 dumpStack(OS&: errs(), Indent, PadToWidth);
1045
1046 // Dump encodings.
1047 for (unsigned EncodingID : EncodingIDs) {
1048 const InstructionEncoding &Encoding = Encodings[EncodingID];
1049 errs() << Indent << indent(PadToWidth - Encoding.getBitWidth());
1050 printKnownBits(OS&: errs(), Bits: Encoding.getMandatoryBits(), Unknown: '_');
1051 errs() << " " << Encoding.getName() << '\n';
1052 }
1053}
1054
1055// emitDecodeInstruction - Emit the templated helper function
1056// decodeInstruction().
1057static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
1058 const DecoderTableInfo &TableInfo) {
1059 OS << R"(
1060template <typename InsnType>
1061static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
1062 InsnType insn, uint64_t Address,
1063 const MCDisassembler *DisAsm,
1064 const MCSubtargetInfo &STI)";
1065 if (IsVarLenInst) {
1066 OS << ",\n "
1067 "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
1068 }
1069 OS << ") {\n";
1070 if (TableInfo.HasCheckPredicate)
1071 OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
1072 OS << " const uint8_t *Ptr = DecodeTable;\n";
1073
1074 if (SpecializeDecodersPerBitwidth) {
1075 // Fail with a fatal error if decoder table's bitwidth does not match
1076 // `InsnType` bitwidth.
1077 OS << R"(
1078 [[maybe_unused]] uint32_t BitWidth = decodeULEB128AndIncUnsafe(Ptr);
1079 assert(InsnBitWidth<InsnType> == BitWidth &&
1080 "Table and instruction bitwidth mismatch");
1081)";
1082 }
1083
1084 OS << R"(
1085 SmallVector<const uint8_t *, 8> ScopeStack;
1086 DecodeStatus S = MCDisassembler::Success;
1087 while (true) {
1088 ptrdiff_t Loc = Ptr - DecodeTable;
1089 const uint8_t DecoderOp = *Ptr++;
1090 switch (DecoderOp) {
1091 default:
1092 errs() << Loc << ": Unexpected decode table opcode: "
1093 << (int)DecoderOp << '\n';
1094 return MCDisassembler::Fail;
1095 case OPC_Scope: {
1096 unsigned NumToSkip = decodeULEB128AndIncUnsafe(Ptr);
1097 const uint8_t *SkipTo = Ptr + NumToSkip;
1098 ScopeStack.push_back(SkipTo);
1099 LLVM_DEBUG(dbgs() << Loc << ": OPC_Scope(" << SkipTo - DecodeTable
1100 << ")\n");
1101 continue;
1102 }
1103 case OPC_SwitchField: {
1104 // Decode the start value.
1105 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1106 unsigned Len = *Ptr++;)";
1107 if (IsVarLenInst)
1108 OS << "\n makeUp(insn, Start + Len);";
1109 OS << R"(
1110 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1111 uint64_t CaseValue;
1112 unsigned CaseSize;
1113 while (true) {
1114 CaseValue = decodeULEB128AndIncUnsafe(Ptr);
1115 CaseSize = decodeULEB128AndIncUnsafe(Ptr);
1116 if (FieldValue == CaseValue || !CaseSize)
1117 break;
1118 Ptr += CaseSize;
1119 }
1120 if (FieldValue == CaseValue) {
1121 LLVM_DEBUG(dbgs() << Loc << ": OPC_SwitchField(" << Start << ", " << Len
1122 << "): " << FieldValue << '\n');
1123 continue;
1124 }
1125 break;
1126 }
1127 case OPC_CheckField: {
1128 // Decode the start value.
1129 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1130 unsigned Len = *Ptr;)";
1131 if (IsVarLenInst)
1132 OS << "\n makeUp(insn, Start + Len);";
1133 OS << R"(
1134 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1135 // Decode the field value.
1136 unsigned PtrLen = 0;
1137 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
1138 Ptr += PtrLen;
1139 bool Failed = ExpectedValue != FieldValue;
1140
1141 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " << Len
1142 << ", " << ExpectedValue << "): FieldValue = "
1143 << FieldValue << ", ExpectedValue = " << ExpectedValue
1144 << ": " << (Failed ? "FAIL, " : "PASS\n"););
1145 if (!Failed)
1146 continue;
1147 break;
1148 })";
1149 if (TableInfo.HasCheckPredicate) {
1150 OS << R"(
1151 case OPC_CheckPredicate: {
1152 // Decode the Predicate Index value.
1153 unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
1154 // Check the predicate.
1155 bool Failed = !checkDecoderPredicate(PIdx, Bits);
1156
1157 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
1158 << (Failed ? "FAIL, " : "PASS\n"););
1159 if (!Failed)
1160 continue;
1161 break;
1162 })";
1163 }
1164 OS << R"(
1165 case OPC_Decode: {
1166 // Decode the Opcode value.
1167 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
1168 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
1169
1170 MI.clear();
1171 MI.setOpcode(Opc);
1172 bool DecodeComplete;)";
1173 if (IsVarLenInst) {
1174 OS << "\n unsigned Len = InstrLenTable[Opc];\n"
1175 << " makeUp(insn, Len);";
1176 }
1177 OS << R"(
1178 S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm,
1179 DecodeComplete);
1180 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
1181 << ", using decoder " << DecodeIdx << ": "
1182 << (S ? "PASS, " : "FAIL, "));
1183
1184 if (DecodeComplete) {
1185 LLVM_DEBUG(dbgs() << "decoding complete\n");
1186 return S;
1187 }
1188 assert(S == MCDisassembler::Fail);
1189 // Reset decode status. This also drops a SoftFail status that could be
1190 // set before the decode attempt.
1191 S = MCDisassembler::Success;
1192 break;
1193 })";
1194 if (TableInfo.HasSoftFail) {
1195 OS << R"(
1196 case OPC_SoftFail: {
1197 // Decode the mask values.
1198 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
1199 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
1200 bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
1201 if (Failed)
1202 S = MCDisassembler::SoftFail;
1203 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
1204 continue;
1205 })";
1206 }
1207 OS << R"(
1208 }
1209 if (ScopeStack.empty()) {
1210 LLVM_DEBUG(dbgs() << "returning Fail\n");
1211 return MCDisassembler::Fail;
1212 }
1213 Ptr = ScopeStack.pop_back_val();
1214 LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
1215 }
1216 llvm_unreachable("bogosity detected in disassembler state machine!");
1217}
1218
1219)";
1220}
1221
1222namespace {
1223
1224class DecoderTreeBuilder {
1225 DecoderContext &Ctx;
1226 const CodeGenTarget &Target;
1227 ArrayRef<InstructionEncoding> Encodings;
1228
1229public:
1230 DecoderTreeBuilder(DecoderContext &Ctx, const CodeGenTarget &Target,
1231 ArrayRef<InstructionEncoding> Encodings)
1232 : Ctx(Ctx), Target(Target), Encodings(Encodings) {}
1233
1234 std::unique_ptr<DecoderTreeNode> buildTree(ArrayRef<unsigned> EncodingIDs);
1235
1236private:
1237 std::unique_ptr<DecoderTreeNode>
1238 convertSingleton(unsigned EncodingID, const KnownBits &FilterBits);
1239
1240 std::unique_ptr<DecoderTreeNode> convertFilterChooserMap(
1241 unsigned StartBit, unsigned NumBits,
1242 const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap);
1243
1244 std::unique_ptr<DecoderTreeNode>
1245 convertFilterChooser(const FilterChooser *FC);
1246};
1247
1248} // namespace
1249
1250std::unique_ptr<DecoderTreeNode>
1251DecoderTreeBuilder::convertSingleton(unsigned EncodingID,
1252 const KnownBits &FilterBits) {
1253 const InstructionEncoding &Encoding = Encodings[EncodingID];
1254 auto N = std::make_unique<CheckAllNode>();
1255
1256 std::string Predicate = getPredicateString(Encoding, TargetName: Target.getName());
1257 if (!Predicate.empty()) {
1258 unsigned PredicateIndex = Ctx.getPredicateIndex(Predicate);
1259 N->addChild(Child: std::make_unique<CheckPredicateNode>(args&: PredicateIndex));
1260 }
1261
1262 std::vector<EncodingIsland> Islands =
1263 getIslands(EncodingBits: Encoding.getMandatoryBits(), FilterBits);
1264 for (const EncodingIsland &Island : reverse(C&: Islands)) {
1265 N->addChild(Child: std::make_unique<CheckFieldNode>(
1266 args: Island.StartBit, args: Island.NumBits, args: Island.FieldVal));
1267 }
1268
1269 const KnownBits &InstBits = Encoding.getInstBits();
1270 const APInt &SoftFailMask = Encoding.getSoftFailMask();
1271 if (!SoftFailMask.isZero()) {
1272 APInt PositiveMask = InstBits.Zero & SoftFailMask;
1273 APInt NegativeMask = InstBits.One & SoftFailMask;
1274 N->addChild(Child: std::make_unique<SoftFailNode>(args: PositiveMask.getZExtValue(),
1275 args: NegativeMask.getZExtValue()));
1276 }
1277
1278 unsigned DecoderIndex = Ctx.getDecoderIndex(Decoder: getDecoderString(Encoding));
1279 N->addChild(Child: std::make_unique<DecodeNode>(
1280 args: Encoding.getName(), args&: Encoding.getInstruction()->EnumVal, args&: DecoderIndex));
1281
1282 return N;
1283}
1284
1285std::unique_ptr<DecoderTreeNode> DecoderTreeBuilder::convertFilterChooserMap(
1286 unsigned StartBit, unsigned NumBits,
1287 const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap) {
1288 if (FCMap.size() == 1) {
1289 const auto &[FieldVal, ChildFC] = *FCMap.begin();
1290 auto N = std::make_unique<CheckAllNode>();
1291 N->addChild(Child: std::make_unique<CheckFieldNode>(args&: StartBit, args&: NumBits, args: FieldVal));
1292 N->addChild(Child: convertFilterChooser(FC: ChildFC.get()));
1293 return N;
1294 }
1295 auto N = std::make_unique<SwitchFieldNode>(args&: StartBit, args&: NumBits);
1296 for (const auto &[FieldVal, ChildFC] : FCMap)
1297 N->addCase(Value: FieldVal, N: convertFilterChooser(FC: ChildFC.get()));
1298 return N;
1299}
1300
1301std::unique_ptr<DecoderTreeNode>
1302DecoderTreeBuilder::convertFilterChooser(const FilterChooser *FC) {
1303 auto N = std::make_unique<CheckAnyNode>();
1304
1305 do {
1306 if (FC->SingletonEncodingID)
1307 N->addChild(Child: convertSingleton(EncodingID: *FC->SingletonEncodingID, FilterBits: FC->FilterBits));
1308 else
1309 N->addChild(Child: convertFilterChooserMap(StartBit: FC->StartBit, NumBits: FC->NumBits,
1310 FCMap: FC->FilterChooserMap));
1311 FC = FC->VariableFC.get();
1312 } while (FC);
1313
1314 return N;
1315}
1316
1317std::unique_ptr<DecoderTreeNode>
1318DecoderTreeBuilder::buildTree(ArrayRef<unsigned> EncodingIDs) {
1319 FilterChooser FC(Encodings, EncodingIDs);
1320 if (FC.hasConflict())
1321 return nullptr;
1322 return convertFilterChooser(FC: &FC);
1323}
1324
1325/// Collects all HwModes referenced by the target for encoding purposes.
1326void DecoderEmitter::collectHwModesReferencedForEncodings(
1327 std::vector<unsigned> &HwModeIDs,
1328 NamespacesHwModesMap &NamespacesWithHwModes) const {
1329 SmallBitVector BV(CGH.getNumModeIds());
1330 for (const auto &MS : CGH.getHwModeSelects()) {
1331 for (auto [HwModeID, EncodingDef] : MS.second.Items) {
1332 if (EncodingDef->isSubClassOf(Name: "InstructionEncoding")) {
1333 StringRef DecoderNamespace =
1334 EncodingDef->getValueAsString(FieldName: "DecoderNamespace");
1335 NamespacesWithHwModes[DecoderNamespace].insert(x: HwModeID);
1336 BV.set(HwModeID);
1337 }
1338 }
1339 }
1340 // FIXME: Can't do `HwModeIDs.assign(BV.set_bits_begin(), BV.set_bits_end())`
1341 // because const_set_bits_iterator_impl is not copy-assignable.
1342 // This breaks some MacOS builds.
1343 llvm::copy(Range: BV.set_bits(), Out: std::back_inserter(x&: HwModeIDs));
1344}
1345
1346void DecoderEmitter::handleHwModesUnrelatedEncodings(
1347 unsigned EncodingID, ArrayRef<unsigned> HwModeIDs,
1348 NamespacesHwModesMap &NamespacesWithHwModes) {
1349 switch (DecoderEmitterSuppressDuplicates) {
1350 case SUPPRESSION_DISABLE: {
1351 for (unsigned HwModeID : HwModeIDs)
1352 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1353 break;
1354 }
1355 case SUPPRESSION_LEVEL1: {
1356 StringRef DecoderNamespace = Encodings[EncodingID].getDecoderNamespace();
1357 auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
1358 if (It != NamespacesWithHwModes.end()) {
1359 for (unsigned HwModeID : It->second)
1360 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1361 } else {
1362 // Only emit the encoding once, as it's DecoderNamespace doesn't
1363 // contain any HwModes.
1364 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1365 }
1366 break;
1367 }
1368 case SUPPRESSION_LEVEL2:
1369 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1370 break;
1371 }
1372}
1373
1374/// Checks if the given target-specific non-pseudo instruction
1375/// is a candidate for decoding.
1376static bool isDecodableInstruction(const Record *InstDef) {
1377 return !InstDef->getValueAsBit(FieldName: "isAsmParserOnly") &&
1378 !InstDef->getValueAsBit(FieldName: "isCodeGenOnly");
1379}
1380
1381/// Checks if the given encoding is valid.
1382static bool isValidEncoding(const Record *EncodingDef) {
1383 const RecordVal *InstField = EncodingDef->getValue(Name: "Inst");
1384 if (!InstField)
1385 return false;
1386
1387 if (const auto *InstInit = dyn_cast<BitsInit>(Val: InstField->getValue())) {
1388 // Fixed-length encoding. Size must be non-zero.
1389 if (!EncodingDef->getValueAsInt(FieldName: "Size"))
1390 return false;
1391
1392 // At least one of the encoding bits must be complete (not '?').
1393 // FIXME: This should take SoftFail field into account.
1394 return !InstInit->allInComplete();
1395 }
1396
1397 if (const auto *InstInit = dyn_cast<DagInit>(Val: InstField->getValue())) {
1398 // Variable-length encoding.
1399 // At least one of the encoding bits must be complete (not '?').
1400 VarLenInst VLI(InstInit, InstField);
1401 return !all_of(Range&: VLI, P: [](const EncodingSegment &Segment) {
1402 return isa<UnsetInit>(Val: Segment.Value);
1403 });
1404 }
1405
1406 // Inst field is neither BitsInit nor DagInit. This is something unsupported.
1407 return false;
1408}
1409
1410/// Parses all InstructionEncoding instances and fills internal data structures.
1411void DecoderEmitter::parseInstructionEncodings() {
1412 // First, collect all encoding-related HwModes referenced by the target.
1413 // And establish a mapping table between DecoderNamespace and HwMode.
1414 // If HwModeNames is empty, add the default mode so we always have one HwMode.
1415 std::vector<unsigned> HwModeIDs;
1416 NamespacesHwModesMap NamespacesWithHwModes;
1417 collectHwModesReferencedForEncodings(HwModeIDs, NamespacesWithHwModes);
1418 if (HwModeIDs.empty())
1419 HwModeIDs.push_back(x: DefaultMode);
1420
1421 ArrayRef<const CodeGenInstruction *> Instructions =
1422 Target.getTargetNonPseudoInstructions();
1423 Encodings.reserve(n: Instructions.size());
1424
1425 for (const CodeGenInstruction *Inst : Instructions) {
1426 const Record *InstDef = Inst->TheDef;
1427 if (!isDecodableInstruction(InstDef)) {
1428 ++NumEncodingsLackingDisasm;
1429 continue;
1430 }
1431
1432 if (const Record *RV = InstDef->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
1433 EncodingInfoByHwMode EBM(RV, CGH);
1434 for (auto [HwModeID, EncodingDef] : EBM) {
1435 if (!isValidEncoding(EncodingDef)) {
1436 // TODO: Should probably give a warning.
1437 ++NumEncodingsOmitted;
1438 continue;
1439 }
1440 unsigned EncodingID = Encodings.size();
1441 Encodings.emplace_back(args&: EncodingDef, args&: Inst);
1442 EncodingIDsByHwMode[HwModeID].push_back(x: EncodingID);
1443 }
1444 continue; // Ignore encoding specified by Instruction itself.
1445 }
1446
1447 if (!isValidEncoding(EncodingDef: InstDef)) {
1448 ++NumEncodingsOmitted;
1449 continue;
1450 }
1451
1452 unsigned EncodingID = Encodings.size();
1453 Encodings.emplace_back(args&: InstDef, args&: Inst);
1454
1455 // This instruction is encoded the same on all HwModes.
1456 // According to user needs, add it to all, some, or only the default HwMode.
1457 handleHwModesUnrelatedEncodings(EncodingID, HwModeIDs,
1458 NamespacesWithHwModes);
1459 }
1460
1461 for (const Record *EncodingDef :
1462 RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding")) {
1463 const Record *InstDef = EncodingDef->getValueAsDef(FieldName: "AliasOf");
1464 // TODO: Should probably give a warning in these cases.
1465 // What's the point of specifying an additional encoding
1466 // if it is invalid or if the instruction is not decodable?
1467 if (!isDecodableInstruction(InstDef)) {
1468 ++NumEncodingsLackingDisasm;
1469 continue;
1470 }
1471 if (!isValidEncoding(EncodingDef)) {
1472 ++NumEncodingsOmitted;
1473 continue;
1474 }
1475 unsigned EncodingID = Encodings.size();
1476 Encodings.emplace_back(args&: EncodingDef, args: &Target.getInstruction(InstRec: InstDef));
1477 EncodingIDsByHwMode[DefaultMode].push_back(x: EncodingID);
1478 }
1479
1480 // Do some statistics.
1481 NumInstructions = Instructions.size();
1482 NumEncodingsSupported = Encodings.size();
1483 NumEncodings = NumEncodingsSupported + NumEncodingsOmitted;
1484}
1485
1486DecoderEmitter::DecoderEmitter(const RecordKeeper &RK)
1487 : RK(RK), Target(RK), CGH(Target.getHwModes()) {
1488 Target.reverseBitsForLittleEndianEncoding();
1489 parseInstructionEncodings();
1490}
1491
1492// Emits disassembler code for instruction decoding.
1493void DecoderEmitter::run(raw_ostream &o) const {
1494 formatted_raw_ostream OS(o);
1495 OS << R"(
1496#include "llvm/MC/MCInst.h"
1497#include "llvm/MC/MCSubtargetInfo.h"
1498#include "llvm/Support/DataTypes.h"
1499#include "llvm/Support/Debug.h"
1500#include "llvm/Support/LEB128.h"
1501#include "llvm/Support/raw_ostream.h"
1502#include "llvm/TargetParser/SubtargetFeature.h"
1503#include <assert.h>
1504
1505namespace {
1506
1507// InsnBitWidth is essentially a type trait used by the decoder emitter to query
1508// the supported bitwidth for a given type. But default, the value is 0, making
1509// it an invalid type for use as `InsnType` when instantiating the decoder.
1510// Individual targets are expected to provide specializations for these based
1511// on their usage.
1512template <typename T> constexpr uint32_t InsnBitWidth = 0;
1513
1514)";
1515
1516 // Do extra bookkeeping for variable-length encodings.
1517 bool IsVarLenInst = Target.hasVariableLengthEncodings();
1518 unsigned MaxInstLen = 0;
1519 if (IsVarLenInst) {
1520 std::vector<unsigned> InstrLen(Target.getInstructions().size(), 0);
1521 for (const InstructionEncoding &Encoding : Encodings) {
1522 MaxInstLen = std::max(a: MaxInstLen, b: Encoding.getBitWidth());
1523 InstrLen[Target.getInstrIntValue(R: Encoding.getInstruction()->TheDef)] =
1524 Encoding.getBitWidth();
1525 }
1526
1527 // For variable instruction, we emit an instruction length table to let the
1528 // decoder know how long the instructions are. You can see example usage in
1529 // M68k's disassembler.
1530 emitInstrLenTable(OS, InstrLen);
1531 }
1532
1533 emitRegClassByHwModeDecoders(OS);
1534
1535 // Map of (bitwidth, namespace, hwmode) tuple to encoding IDs.
1536 // Its organized as a nested map, with the (namespace, hwmode) as the key for
1537 // the inner map and bitwidth as the key for the outer map. We use std::map
1538 // for deterministic iteration order so that the code emitted is also
1539 // deterministic.
1540 using InnerKeyTy = std::pair<StringRef, unsigned>;
1541 using InnerMapTy = std::map<InnerKeyTy, std::vector<unsigned>>;
1542 std::map<unsigned, InnerMapTy> EncMap;
1543
1544 for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
1545 for (unsigned EncodingID : EncodingIDs) {
1546 const InstructionEncoding &Encoding = Encodings[EncodingID];
1547 const unsigned BitWidth =
1548 IsVarLenInst ? MaxInstLen : Encoding.getBitWidth();
1549 StringRef DecoderNamespace = Encoding.getDecoderNamespace();
1550 EncMap[BitWidth][{DecoderNamespace, HwModeID}].push_back(x: EncodingID);
1551 }
1552 }
1553
1554 // Variable length instructions use the same `APInt` type for all instructions
1555 // so we cannot specialize decoders based on instruction bitwidths (which
1556 // requires using different `InstType` for differet bitwidths for the correct
1557 // template specialization to kick in).
1558 if (IsVarLenInst && SpecializeDecodersPerBitwidth)
1559 PrintFatalError(
1560 Msg: "Cannot specialize decoders for variable length instuctions");
1561
1562 DecoderContext Ctx;
1563 DecoderTreeBuilder TreeBuilder(Ctx, Target, Encodings);
1564
1565 DecoderTableInfo TableInfo;
1566 DecoderTableEmitter TableEmitter(TableInfo, OS);
1567
1568 // Emit a table for each (namespace, hwmode, bitwidth) combination.
1569 // Entries in `EncMap` are already sorted by bitwidth. So bucketing per
1570 // bitwidth can be done on-the-fly as we iterate over the map.
1571 bool HasConflict = false;
1572 for (const auto &[BitWidth, BWMap] : EncMap) {
1573 for (const auto &[Key, EncodingIDs] : BWMap) {
1574 auto [DecoderNamespace, HwModeID] = Key;
1575
1576 std::unique_ptr<DecoderTreeNode> Tree =
1577 TreeBuilder.buildTree(EncodingIDs);
1578
1579 // Skip emitting the table if a conflict has been detected.
1580 if (!Tree) {
1581 HasConflict = true;
1582 continue;
1583 }
1584
1585 // Form the table name.
1586 SmallString<32> TableName({"DecoderTable", DecoderNamespace});
1587 if (HwModeID != DefaultMode)
1588 TableName.append(Refs: {"_", Target.getHwModes().getModeName(Id: HwModeID)});
1589 TableName.append(RHS: utostr(X: BitWidth));
1590
1591 TableEmitter.emitTable(
1592 TableName, BitWidth: SpecializeDecodersPerBitwidth ? BitWidth : 0, Root: Tree.get());
1593 }
1594
1595 // Each BitWidth get's its own decoders and decoder function if
1596 // SpecializeDecodersPerBitwidth is enabled.
1597 if (SpecializeDecodersPerBitwidth) {
1598 emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: BitWidth);
1599 Ctx.Decoders.clear();
1600 }
1601 }
1602
1603 if (HasConflict)
1604 PrintFatalError(Msg: "Decoding conflict encountered");
1605
1606 // Emit the decoder function for the last bucket. This will also emit the
1607 // single decoder function if SpecializeDecodersPerBitwidth = false.
1608 if (!SpecializeDecodersPerBitwidth)
1609 emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: 0);
1610
1611 // Emit the predicate function.
1612 if (TableInfo.HasCheckPredicate)
1613 emitPredicateFunction(OS, Predicates: Ctx.Predicates);
1614
1615 // Emit the main entry point for the decoder, decodeInstruction().
1616 emitDecodeInstruction(OS, IsVarLenInst, TableInfo);
1617
1618 OS << "\n} // namespace\n";
1619}
1620
1621void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS) {
1622 DecoderEmitter(RK).run(o&: OS);
1623}
1624