1//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// It contains the tablegen backend that emits the decoder functions for
10// targets with fixed/variable length instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Common/CodeGenHwModes.h"
15#include "Common/CodeGenInstruction.h"
16#include "Common/CodeGenTarget.h"
17#include "Common/InfoByHwMode.h"
18#include "Common/VarLenCodeEmitterGen.h"
19#include "TableGenBackends.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/CachedHashString.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/MC/MCDecoderOps.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/FormatVariadic.h"
36#include "llvm/Support/FormattedStream.h"
37#include "llvm/Support/LEB128.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/TableGen/Error.h"
41#include "llvm/TableGen/Record.h"
42#include <algorithm>
43#include <cassert>
44#include <cstddef>
45#include <cstdint>
46#include <map>
47#include <memory>
48#include <set>
49#include <string>
50#include <utility>
51#include <vector>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "decoder-emitter"
56
57extern cl::OptionCategory DisassemblerEmitterCat;
58
59enum SuppressLevel {
60 SUPPRESSION_DISABLE,
61 SUPPRESSION_LEVEL1,
62 SUPPRESSION_LEVEL2
63};
64
65static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
66 "suppress-per-hwmode-duplicates",
67 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
68 cl::values(
69 clEnumValN(
70 SUPPRESSION_DISABLE, "O0",
71 "Do not prevent DecoderTable duplications caused by HwModes"),
72 clEnumValN(
73 SUPPRESSION_LEVEL1, "O1",
74 "Remove duplicate DecoderTable entries generated due to HwModes"),
75 clEnumValN(
76 SUPPRESSION_LEVEL2, "O2",
77 "Extract HwModes-specific instructions into new DecoderTables, "
78 "significantly reducing Table Duplications")),
79 cl::init(Val: SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
80
81static cl::opt<bool> LargeTable(
82 "large-decoder-table",
83 cl::desc("Use large decoder table format. This uses 24 bits for offset\n"
84 "in the table instead of the default 16 bits."),
85 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
86
87static cl::opt<bool> UseFnTableInDecodeToMCInst(
88 "use-fn-table-in-decode-to-mcinst",
89 cl::desc(
90 "Use a table of function pointers instead of a switch case in the\n"
91 "generated `decodeToMCInst` function. Helps improve compile time\n"
92 "of the generated code."),
93 cl::init(Val: false), cl::cat(DisassemblerEmitterCat));
94
95STATISTIC(NumEncodings, "Number of encodings considered");
96STATISTIC(NumEncodingsLackingDisasm,
97 "Number of encodings without disassembler info");
98STATISTIC(NumInstructions, "Number of instructions considered");
99STATISTIC(NumEncodingsSupported, "Number of encodings supported");
100STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
101
102static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
103
104namespace {
105
106struct EncodingField {
107 unsigned Base, Width, Offset;
108 EncodingField(unsigned B, unsigned W, unsigned O)
109 : Base(B), Width(W), Offset(O) {}
110};
111
112struct OperandInfo {
113 std::vector<EncodingField> Fields;
114 std::string Decoder;
115 bool HasCompleteDecoder;
116 uint64_t InitValue = 0;
117
118 OperandInfo(std::string D, bool HCD) : Decoder(D), HasCompleteDecoder(HCD) {}
119
120 void addField(unsigned Base, unsigned Width, unsigned Offset) {
121 Fields.push_back(x: EncodingField(Base, Width, Offset));
122 }
123
124 unsigned numFields() const { return Fields.size(); }
125
126 typedef std::vector<EncodingField>::const_iterator const_iterator;
127
128 const_iterator begin() const { return Fields.begin(); }
129 const_iterator end() const { return Fields.end(); }
130};
131
132typedef std::vector<uint32_t> FixupList;
133typedef std::vector<FixupList> FixupScopeList;
134typedef SmallSetVector<CachedHashString, 16> PredicateSet;
135typedef SmallSetVector<CachedHashString, 16> DecoderSet;
136
137class DecoderTable {
138public:
139 DecoderTable() { Data.reserve(n: 16384); }
140
141 void clear() { Data.clear(); }
142 void push_back(uint8_t Item) { Data.push_back(x: Item); }
143 size_t size() const { return Data.size(); }
144 const uint8_t *data() const { return Data.data(); }
145
146 using const_iterator = std::vector<uint8_t>::const_iterator;
147 const_iterator begin() const { return Data.begin(); }
148 const_iterator end() const { return Data.end(); }
149
150 // Insert a ULEB128 encoded value into the table.
151 void insertULEB128(uint64_t Value) {
152 // Encode and emit the value to filter against.
153 uint8_t Buffer[16];
154 unsigned Len = encodeULEB128(Value, p: Buffer);
155 Data.insert(position: Data.end(), first: Buffer, last: Buffer + Len);
156 }
157
158 // Insert space for `NumToSkip` and return the position
159 // in the table for patching.
160 size_t insertNumToSkip() {
161 size_t Size = Data.size();
162 Data.insert(position: Data.end(), n: getNumToSkipInBytes(), x: 0);
163 return Size;
164 }
165
166 void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
167 // Calculate the distance from the byte following the fixup entry byte
168 // to the destination. The Target is calculated from after the
169 // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
170 // `getNumToSkipInBytes()` from the displacement here to account for that.
171 assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
172 "Expecting a forward jump in the decoding table");
173 uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
174 if (!isUIntN(N: 8 * getNumToSkipInBytes(), x: Delta))
175 PrintFatalError(
176 Msg: "disassembler decoding table too large, try --large-decoder-table");
177
178 Data[FixupIdx] = static_cast<uint8_t>(Delta);
179 Data[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
180 if (getNumToSkipInBytes() == 3)
181 Data[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
182 }
183
184private:
185 std::vector<uint8_t> Data;
186};
187
188struct DecoderTableInfo {
189 DecoderTable Table;
190 FixupScopeList FixupStack;
191 PredicateSet Predicates;
192 DecoderSet Decoders;
193
194 bool isOutermostScope() const { return FixupStack.size() == 1; }
195};
196
197struct EncodingAndInst {
198 const Record *EncodingDef;
199 const CodeGenInstruction *Inst;
200 StringRef HwModeName;
201
202 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
203 StringRef HwModeName = "")
204 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
205};
206
207struct EncodingIDAndOpcode {
208 unsigned EncodingID;
209 unsigned Opcode;
210
211 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
212 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
213 : EncodingID(EncodingID), Opcode(Opcode) {}
214};
215
216using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
217using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
218
219class DecoderEmitter {
220 const RecordKeeper &RK;
221 std::vector<EncodingAndInst> NumberedEncodings;
222
223public:
224 DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
225 : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
226
227 // Emit the decoder state machine table. Returns a mask of MCD decoder ops
228 // that were emitted.
229 unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
230 indent Indent, unsigned BitWidth, StringRef Namespace,
231 const EncodingIDsVec &EncodingIDs) const;
232 void emitInstrLenTable(formatted_raw_ostream &OS,
233 ArrayRef<unsigned> InstrLen) const;
234 void emitPredicateFunction(formatted_raw_ostream &OS,
235 PredicateSet &Predicates, indent Indent) const;
236 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
237 indent Indent) const;
238
239 // run - Output the code emitter
240 void run(raw_ostream &o);
241
242private:
243 CodeGenTarget Target;
244
245public:
246 StringRef PredicateNamespace;
247};
248
249// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
250// for a bit value.
251//
252// BIT_UNFILTERED is used as the init value for a filter position. It is used
253// only for filter processings.
254struct BitValue {
255 enum bit_value_t : uint8_t {
256 BIT_FALSE, // '0'
257 BIT_TRUE, // '1'
258 BIT_UNSET, // '?', printed as '_'
259 BIT_UNFILTERED // unfiltered, printed as '.'
260 };
261
262 BitValue(bit_value_t V) : V(V) {}
263 explicit BitValue(const Init *Init) {
264 if (const auto *Bit = dyn_cast<BitInit>(Val: Init))
265 V = Bit->getValue() ? BIT_TRUE : BIT_FALSE;
266 else
267 V = BIT_UNSET;
268 }
269 BitValue(const BitsInit &Bits, unsigned Idx) : BitValue(Bits.getBit(Bit: Idx)) {}
270
271 bool isSet() const { return V == BIT_TRUE || V == BIT_FALSE; }
272 bool isUnset() const { return V == BIT_UNSET; }
273 std::optional<uint64_t> getValue() const {
274 if (isSet())
275 return static_cast<uint64_t>(V);
276 return std::nullopt;
277 }
278
279 // For printing a bit value.
280 operator StringRef() const {
281 switch (V) {
282 case BIT_FALSE:
283 return "0";
284 case BIT_TRUE:
285 return "1";
286 case BIT_UNSET:
287 return "_";
288 case BIT_UNFILTERED:
289 return ".";
290 }
291 llvm_unreachable("Unknow bit value");
292 }
293
294 bool operator==(bit_value_t Other) const { return Other == V; }
295 bool operator!=(bit_value_t Other) const { return Other != V; }
296
297private:
298 bit_value_t V;
299};
300
301} // end anonymous namespace
302
303static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
304 if (Value.EncodingDef != Value.Inst->TheDef)
305 OS << Value.EncodingDef->getName() << ":";
306 OS << Value.Inst->TheDef->getName();
307 return OS;
308}
309
310// Prints the bit value for each position.
311static void dumpBits(raw_ostream &OS, const BitsInit &Bits) {
312 for (const Init *Bit : reverse(C: Bits.getBits()))
313 OS << BitValue(Bit);
314}
315
316static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
317 const RecordVal *RV = Def.getValue(Name: FieldName);
318 if (const BitsInit *Bits = dyn_cast<BitsInit>(Val: RV->getValue()))
319 return *Bits;
320
321 // Handle variable length instructions.
322 VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV);
323 SmallVector<const Init *, 16> Bits;
324
325 for (const auto &SI : VLI) {
326 if (const BitsInit *BI = dyn_cast<BitsInit>(Val: SI.Value))
327 llvm::append_range(C&: Bits, R: BI->getBits());
328 else if (const BitInit *BI = dyn_cast<BitInit>(Val: SI.Value))
329 Bits.push_back(Elt: BI);
330 else
331 Bits.append(NumInputs: SI.BitWidth, Elt: UnsetInit::get(RK&: Def.getRecords()));
332 }
333
334 return *BitsInit::get(RK&: Def.getRecords(), Range: Bits);
335}
336
337// Representation of the instruction to work on.
338typedef std::vector<BitValue> insn_t;
339
340namespace {
341
342static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL =
343 std::numeric_limits<uint64_t>::max();
344
345class FilterChooser;
346
347/// Filter - Filter works with FilterChooser to produce the decoding tree for
348/// the ISA.
349///
350/// It is useful to think of a Filter as governing the switch stmts of the
351/// decoding tree in a certain level. Each case stmt delegates to an inferior
352/// FilterChooser to decide what further decoding logic to employ, or in another
353/// words, what other remaining bits to look at. The FilterChooser eventually
354/// chooses a best Filter to do its job.
355///
356/// This recursive scheme ends when the number of Opcodes assigned to the
357/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
358/// the Filter/FilterChooser combo does not know how to distinguish among the
359/// Opcodes assigned.
360///
361/// An example of a conflict is
362///
363/// Conflict:
364/// 111101000.00........00010000....
365/// 111101000.00........0001........
366/// 1111010...00........0001........
367/// 1111010...00....................
368/// 1111010.........................
369/// 1111............................
370/// ................................
371/// VST4q8a 111101000_00________00010000____
372/// VST4q8b 111101000_00________00010000____
373///
374/// The Debug output shows the path that the decoding tree follows to reach the
375/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
376/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
377///
378/// The encoding info in the .td files does not specify this meta information,
379/// which could have been used by the decoder to resolve the conflict. The
380/// decoder could try to decode the even/odd register numbering and assign to
381/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
382/// version and return the Opcode since the two have the same Asm format string.
383class Filter {
384protected:
385 const FilterChooser &Owner; // FilterChooser who owns this filter
386 unsigned StartBit; // the starting bit position
387 unsigned NumBits; // number of bits to filter
388 bool Mixed; // a mixed region contains both set and unset bits
389
390 // Map of well-known segment value to the set of uid's with that value.
391 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
392
393 // Set of uid's with non-constant segment values.
394 std::vector<EncodingIDAndOpcode> VariableInstructions;
395
396 // Map of well-known segment value to its delegate.
397 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
398
399 // Number of instructions which fall under FilteredInstructions category.
400 unsigned NumFiltered;
401
402 // Keeps track of the last opcode in the filtered bucket.
403 EncodingIDAndOpcode LastOpcFiltered;
404
405public:
406 Filter(Filter &&f);
407 Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
408 bool mixed);
409
410 ~Filter() = default;
411
412 unsigned getNumFiltered() const { return NumFiltered; }
413
414 EncodingIDAndOpcode getSingletonOpc() const {
415 assert(NumFiltered == 1);
416 return LastOpcFiltered;
417 }
418
419 // Return the filter chooser for the group of instructions without constant
420 // segment values.
421 const FilterChooser &getVariableFC() const {
422 assert(NumFiltered == 1 && FilterChooserMap.size() == 1);
423 return *(FilterChooserMap.find(x: NO_FIXED_SEGMENTS_SENTINEL)->second);
424 }
425
426 // Divides the decoding task into sub tasks and delegates them to the
427 // inferior FilterChooser's.
428 //
429 // A special case arises when there's only one entry in the filtered
430 // instructions. In order to unambiguously decode the singleton, we need to
431 // match the remaining undecoded encoding bits against the singleton.
432 void recurse();
433
434 // Emit table entries to decode instructions given a segment or segments of
435 // bits.
436 void emitTableEntry(DecoderTableInfo &TableInfo) const;
437
438 // Returns the number of fanout produced by the filter. More fanout implies
439 // the filter distinguishes more categories of instructions.
440 unsigned usefulness() const;
441}; // end class Filter
442
443// These are states of our finite state machines used in FilterChooser's
444// filterProcessor() which produces the filter candidates to use.
445enum bitAttr_t {
446 ATTR_NONE,
447 ATTR_FILTERED,
448 ATTR_ALL_SET,
449 ATTR_ALL_UNSET,
450 ATTR_MIXED
451};
452
453/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
454/// in order to perform the decoding of instructions at the current level.
455///
456/// Decoding proceeds from the top down. Based on the well-known encoding bits
457/// of instructions available, FilterChooser builds up the possible Filters that
458/// can further the task of decoding by distinguishing among the remaining
459/// candidate instructions.
460///
461/// Once a filter has been chosen, it is called upon to divide the decoding task
462/// into sub-tasks and delegates them to its inferior FilterChoosers for further
463/// processings.
464///
465/// It is useful to think of a Filter as governing the switch stmts of the
466/// decoding tree. And each case is delegated to an inferior FilterChooser to
467/// decide what further remaining bits to look at.
468
469class FilterChooser {
470protected:
471 friend class Filter;
472
473 // Vector of codegen instructions to choose our filter.
474 ArrayRef<EncodingAndInst> AllInstructions;
475
476 // Vector of uid's for this filter chooser to work on.
477 // The first member of the pair is the opcode id being decoded, the second is
478 // the opcode id that should be emitted.
479 ArrayRef<EncodingIDAndOpcode> Opcodes;
480
481 // Lookup table for the operand decoding of instructions.
482 const std::map<unsigned, std::vector<OperandInfo>> &Operands;
483
484 // Vector of candidate filters.
485 std::vector<Filter> Filters;
486
487 // Array of bit values passed down from our parent.
488 // Set to all BIT_UNFILTERED's for Parent == NULL.
489 std::vector<BitValue> FilterBitValues;
490
491 // Links to the FilterChooser above us in the decoding tree.
492 const FilterChooser *Parent;
493
494 // Index of the best filter from Filters.
495 int BestIndex;
496
497 // Width of instructions
498 unsigned BitWidth;
499
500 // Parent emitter
501 const DecoderEmitter *Emitter;
502
503 struct Island {
504 unsigned StartBit;
505 unsigned NumBits;
506 uint64_t FieldVal;
507 };
508
509public:
510 FilterChooser(ArrayRef<EncodingAndInst> Insts,
511 ArrayRef<EncodingIDAndOpcode> IDs,
512 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
513 unsigned BW, const DecoderEmitter *E)
514 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
515 FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr),
516 BestIndex(-1), BitWidth(BW), Emitter(E) {
517 doFilter();
518 }
519
520 FilterChooser(ArrayRef<EncodingAndInst> Insts,
521 ArrayRef<EncodingIDAndOpcode> IDs,
522 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
523 const std::vector<BitValue> &ParentFilterBitValues,
524 const FilterChooser &parent)
525 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
526 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
527 BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
528 doFilter();
529 }
530
531 FilterChooser(const FilterChooser &) = delete;
532 void operator=(const FilterChooser &) = delete;
533
534 unsigned getBitWidth() const { return BitWidth; }
535
536protected:
537 // Populates the insn given the uid.
538 void insnWithID(insn_t &Insn, unsigned Opcode) const {
539 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
540 const BitsInit &Bits = getBitsField(Def: *EncodingDef, FieldName: "Inst");
541 Insn.resize(new_size: std::max(a: BitWidth, b: Bits.getNumBits()), x: BitValue::BIT_UNSET);
542 // We may have a SoftFail bitmask, which specifies a mask where an encoding
543 // may differ from the value in "Inst" and yet still be valid, but the
544 // disassembler should return SoftFail instead of Success.
545 //
546 // This is used for marking UNPREDICTABLE instructions in the ARM world.
547 const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
548 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr;
549 for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
550 if (SFBits && BitValue(*SFBits, i) == BitValue::BIT_TRUE)
551 Insn[i] = BitValue::BIT_UNSET;
552 else
553 Insn[i] = BitValue(Bits, i);
554 }
555 }
556
557 // Populates the field of the insn given the start position and the number of
558 // consecutive bits to scan for.
559 //
560 // Returns a pair of values (indicator, field), where the indicator is false
561 // if there exists any uninitialized bit value in the range and true if all
562 // bits are well-known. The second value is the potentially populated field.
563 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
564 unsigned NumBits) const;
565
566 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
567 /// filter array as a series of chars.
568 void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const;
569
570 /// dumpStack - dumpStack traverses the filter chooser chain and calls
571 /// dumpFilterArray on each filter chooser up to the top level one.
572 void dumpStack(raw_ostream &OS, const char *prefix) const;
573
574 Filter &bestFilter() {
575 assert(BestIndex != -1 && "BestIndex not set");
576 return Filters[BestIndex];
577 }
578
579 bool PositionFiltered(unsigned Idx) const {
580 return FilterBitValues[Idx].isSet();
581 }
582
583 // Calculates the island(s) needed to decode the instruction.
584 // This returns a list of undecoded bits of an instructions, for example,
585 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
586 // decoded bits in order to verify that the instruction matches the Opcode.
587 unsigned getIslands(std::vector<Island> &Islands, const insn_t &Insn) const;
588
589 // Emits code to check the Predicates member of an instruction are true.
590 // Returns true if predicate matches were emitted, false otherwise.
591 bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const;
592 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
593 raw_ostream &OS) const;
594
595 bool doesOpcodeNeedPredicate(unsigned Opc) const;
596 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
597 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
598
599 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
600
601 // Emits table entries to decode the singleton.
602 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
603 EncodingIDAndOpcode Opc) const;
604
605 // Emits code to decode the singleton, and then to decode the rest.
606 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
607 const Filter &Best) const;
608
609 bool emitBinaryParser(raw_ostream &OS, indent Indent,
610 const OperandInfo &OpInfo) const;
611
612 bool emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc) const;
613 std::pair<unsigned, bool> getDecoderIndex(DecoderSet &Decoders,
614 unsigned Opc) const;
615
616 // Assign a single filter and run with it.
617 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
618
619 // reportRegion is a helper function for filterProcessor to mark a region as
620 // eligible for use as a filter region.
621 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
622 bool AllowMixed);
623
624 // FilterProcessor scans the well-known encoding bits of the instructions and
625 // builds up a list of candidate filters. It chooses the best filter and
626 // recursively descends down the decoding tree.
627 bool filterProcessor(bool AllowMixed, bool Greedy = true);
628
629 // Decides on the best configuration of filter(s) to use in order to decode
630 // the instructions. A conflict of instructions may occur, in which case we
631 // dump the conflict set to the standard error.
632 void doFilter();
633
634public:
635 // emitTableEntries - Emit state machine entries to decode our share of
636 // instructions.
637 void emitTableEntries(DecoderTableInfo &TableInfo) const;
638};
639
640} // end anonymous namespace
641
642///////////////////////////
643// //
644// Filter Implementation //
645// //
646///////////////////////////
647
648Filter::Filter(Filter &&f)
649 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
650 FilteredInstructions(std::move(f.FilteredInstructions)),
651 VariableInstructions(std::move(f.VariableInstructions)),
652 FilterChooserMap(std::move(f.FilterChooserMap)),
653 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {}
654
655Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
656 bool mixed)
657 : Owner(owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
658 assert(StartBit + NumBits - 1 < Owner.BitWidth);
659
660 NumFiltered = 0;
661 LastOpcFiltered = {0, 0};
662
663 for (const auto &OpcPair : Owner.Opcodes) {
664 insn_t Insn;
665
666 // Populates the insn given the uid.
667 Owner.insnWithID(Insn, Opcode: OpcPair.EncodingID);
668
669 // Scans the segment for possibly well-specified encoding bits.
670 auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits);
671
672 if (Ok) {
673 // The encoding bits are well-known. Lets add the uid of the
674 // instruction into the bucket keyed off the constant field value.
675 LastOpcFiltered = OpcPair;
676 FilteredInstructions[Field].push_back(x: LastOpcFiltered);
677 ++NumFiltered;
678 } else {
679 // Some of the encoding bit(s) are unspecified. This contributes to
680 // one additional member of "Variable" instructions.
681 VariableInstructions.push_back(x: OpcPair);
682 }
683 }
684
685 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) &&
686 "Filter returns no instruction categories");
687}
688
689// Divides the decoding task into sub tasks and delegates them to the
690// inferior FilterChooser's.
691//
692// A special case arises when there's only one entry in the filtered
693// instructions. In order to unambiguously decode the singleton, we need to
694// match the remaining undecoded encoding bits against the singleton.
695void Filter::recurse() {
696 // Starts by inheriting our parent filter chooser's filter bit values.
697 std::vector<BitValue> BitValueArray(Owner.FilterBitValues);
698
699 if (!VariableInstructions.empty()) {
700 // Conservatively marks each segment position as BIT_UNSET.
701 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
702 BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNSET;
703
704 // Delegates to an inferior filter chooser for further processing on this
705 // group of instructions whose segment values are variable.
706 FilterChooserMap.try_emplace(
707 k: NO_FIXED_SEGMENTS_SENTINEL,
708 args: std::make_unique<FilterChooser>(args: Owner.AllInstructions,
709 args&: VariableInstructions, args: Owner.Operands,
710 args&: BitValueArray, args: Owner));
711 }
712
713 // No need to recurse for a singleton filtered instruction.
714 // See also Filter::emit*().
715 if (getNumFiltered() == 1) {
716 assert(FilterChooserMap.size() == 1);
717 return;
718 }
719
720 // Otherwise, create sub choosers.
721 for (const auto &Inst : FilteredInstructions) {
722 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
723 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
724 BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex)
725 ? BitValue::BIT_TRUE
726 : BitValue::BIT_FALSE;
727
728 // Delegates to an inferior filter chooser for further processing on this
729 // category of instructions.
730 FilterChooserMap.try_emplace(
731 k: Inst.first,
732 args: std::make_unique<FilterChooser>(args: Owner.AllInstructions, args: Inst.second,
733 args: Owner.Operands, args&: BitValueArray, args: Owner));
734 }
735}
736
737static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
738 uint32_t DestIdx) {
739 // Any NumToSkip fixups in the current scope can resolve to the
740 // current location.
741 for (uint32_t FixupIdx : Fixups)
742 Table.patchNumToSkip(FixupIdx, DestIdx);
743}
744
745// Emit table entries to decode instructions given a segment or segments
746// of bits.
747void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
748 assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
749 TableInfo.Table.push_back(Item: MCD::OPC_ExtractField);
750
751 TableInfo.Table.insertULEB128(Value: StartBit);
752 TableInfo.Table.push_back(Item: NumBits);
753
754 // If the NO_FIXED_SEGMENTS_SENTINEL is present, we need to add a new scope
755 // for this filter. Otherwise, we can skip adding a new scope and any
756 // patching added will automatically be added to the enclosing scope.
757
758 // If NO_FIXED_SEGMENTS_SENTINEL is present, it will be last entry in
759 // FilterChooserMap.
760
761 const uint64_t LastFilter = FilterChooserMap.rbegin()->first;
762 bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL;
763 if (HasFallthrough)
764 TableInfo.FixupStack.emplace_back();
765
766 DecoderTable &Table = TableInfo.Table;
767
768 size_t PrevFilter = 0;
769 for (const auto &[FilterVal, Delegate] : FilterChooserMap) {
770 // Field value NO_FIXED_SEGMENTS_SENTINEL implies a non-empty set of
771 // variable instructions. See also recurse().
772 if (FilterVal == NO_FIXED_SEGMENTS_SENTINEL) {
773 // Each scope should always have at least one filter value to check
774 // for.
775 assert(PrevFilter != 0 && "empty filter set!");
776 FixupList &CurScope = TableInfo.FixupStack.back();
777 // Resolve any NumToSkip fixups in the current scope.
778 resolveTableFixups(Table, Fixups: CurScope, DestIdx: Table.size());
779
780 // Delete the scope we have added here.
781 TableInfo.FixupStack.pop_back();
782
783 PrevFilter = 0; // Don't re-process the filter's fallthrough.
784 } else {
785 // The last filtervalue emitted can be OPC_FilterValue if we are at
786 // outermost scope.
787 const uint8_t DecoderOp =
788 FilterVal == LastFilter && TableInfo.isOutermostScope()
789 ? MCD::OPC_FilterValueOrFail
790 : MCD::OPC_FilterValue;
791 Table.push_back(Item: DecoderOp);
792 Table.insertULEB128(Value: FilterVal);
793 if (DecoderOp == MCD::OPC_FilterValue) {
794 // Reserve space for the NumToSkip entry. We'll backpatch the value
795 // later.
796 PrevFilter = Table.insertNumToSkip();
797 } else {
798 PrevFilter = 0;
799 }
800 }
801
802 // We arrive at a category of instructions with the same segment value.
803 // Now delegate to the sub filter chooser for further decodings.
804 // The case may fallthrough, which happens if the remaining well-known
805 // encoding bits do not match exactly.
806 Delegate->emitTableEntries(TableInfo);
807
808 // Now that we've emitted the body of the handler, update the NumToSkip
809 // of the filter itself to be able to skip forward when false.
810 if (PrevFilter)
811 Table.patchNumToSkip(FixupIdx: PrevFilter, DestIdx: Table.size());
812 }
813
814 // If there is no fallthrough and the final filter was not in the outermost
815 // scope, then it must be fixed up according to the enclosing scope rather
816 // than the current position.
817 if (PrevFilter)
818 TableInfo.FixupStack.back().push_back(x: PrevFilter);
819}
820
821// Returns the number of fanout produced by the filter. More fanout implies
822// the filter distinguishes more categories of instructions.
823unsigned Filter::usefulness() const {
824 return FilteredInstructions.size() + VariableInstructions.empty();
825}
826
827//////////////////////////////////
828// //
829// Filterchooser Implementation //
830// //
831//////////////////////////////////
832
833// Emit the decoder state machine table. Returns a mask of MCD decoder ops
834// that were emitted.
835unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
836 DecoderTable &Table, indent Indent,
837 unsigned BitWidth, StringRef Namespace,
838 const EncodingIDsVec &EncodingIDs) const {
839 // We'll need to be able to map from a decoded opcode into the corresponding
840 // EncodingID for this specific combination of BitWidth and Namespace. This
841 // is used below to index into NumberedEncodings.
842 DenseMap<unsigned, unsigned> OpcodeToEncodingID;
843 OpcodeToEncodingID.reserve(NumEntries: EncodingIDs.size());
844 for (const auto &EI : EncodingIDs)
845 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
846
847 OS << Indent << "static const uint8_t DecoderTable" << Namespace << BitWidth
848 << "[] = {\n";
849
850 Indent += 2;
851
852 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
853 auto emitULEB128 = [](DecoderTable::const_iterator &I,
854 formatted_raw_ostream &OS) {
855 while (*I >= 128)
856 OS << (unsigned)*I++ << ", ";
857 OS << (unsigned)*I++ << ", ";
858 };
859
860 // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
861 // NumToSkip value.
862 auto emitNumToSkip = [](DecoderTable::const_iterator &I,
863 formatted_raw_ostream &OS) {
864 uint8_t Byte = *I++;
865 uint32_t NumToSkip = Byte;
866 OS << (unsigned)Byte << ", ";
867 Byte = *I++;
868 OS << (unsigned)Byte << ", ";
869 NumToSkip |= Byte << 8;
870 if (getNumToSkipInBytes() == 3) {
871 Byte = *I++;
872 OS << (unsigned)(Byte) << ", ";
873 NumToSkip |= Byte << 16;
874 }
875 return NumToSkip;
876 };
877
878 // FIXME: We may be able to use the NumToSkip values to recover
879 // appropriate indentation levels.
880 DecoderTable::const_iterator I = Table.begin();
881 DecoderTable::const_iterator E = Table.end();
882 const uint8_t *const EndPtr = Table.data() + Table.size();
883
884 auto emitNumToSkipComment = [&](uint32_t NumToSkip, bool InComment = false) {
885 uint32_t Index = ((I - Table.begin()) + NumToSkip);
886 OS << (InComment ? ", " : "// ");
887 OS << "Skip to: " << Index;
888 if (*(I + NumToSkip) == MCD::OPC_Fail)
889 OS << " (Fail)";
890 };
891
892 unsigned OpcodeMask = 0;
893
894 while (I != E) {
895 assert(I < E && "incomplete decode table entry!");
896
897 uint64_t Pos = I - Table.begin();
898 OS << "/* " << Pos << " */";
899 OS.PadToColumn(NewCol: 12);
900
901 const uint8_t DecoderOp = *I++;
902 OpcodeMask |= (1 << DecoderOp);
903 switch (DecoderOp) {
904 default:
905 PrintFatalError(Msg: "Invalid decode table opcode: " + Twine((int)DecoderOp) +
906 " at index " + Twine(Pos));
907 case MCD::OPC_ExtractField: {
908 OS << Indent << "MCD::OPC_ExtractField, ";
909
910 // ULEB128 encoded start value.
911 const char *ErrMsg = nullptr;
912 unsigned Start = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg);
913 assert(ErrMsg == nullptr && "ULEB128 value too large!");
914 emitULEB128(I, OS);
915
916 unsigned Len = *I++;
917 OS << Len << ", // Inst{";
918 if (Len > 1)
919 OS << (Start + Len - 1) << "-";
920 OS << Start << "} ...\n";
921 break;
922 }
923 case MCD::OPC_FilterValue:
924 case MCD::OPC_FilterValueOrFail: {
925 bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
926 OS << Indent << "MCD::OPC_FilterValue" << (IsFail ? "OrFail, " : ", ");
927 // The filter value is ULEB128 encoded.
928 emitULEB128(I, OS);
929
930 if (!IsFail) {
931 uint32_t NumToSkip = emitNumToSkip(I, OS);
932 emitNumToSkipComment(NumToSkip);
933 }
934 OS << '\n';
935 break;
936 }
937 case MCD::OPC_CheckField:
938 case MCD::OPC_CheckFieldOrFail: {
939 bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
940 OS << Indent << "MCD::OPC_CheckField" << (IsFail ? "OrFail, " : ", ");
941 // ULEB128 encoded start value.
942 emitULEB128(I, OS);
943 // 8-bit length.
944 unsigned Len = *I++;
945 OS << Len << ", ";
946 // ULEB128 encoded field value.
947 emitULEB128(I, OS);
948
949 if (!IsFail) {
950 uint32_t NumToSkip = emitNumToSkip(I, OS);
951 emitNumToSkipComment(NumToSkip);
952 }
953 OS << '\n';
954 break;
955 }
956 case MCD::OPC_CheckPredicate:
957 case MCD::OPC_CheckPredicateOrFail: {
958 bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
959
960 OS << Indent << "MCD::OPC_CheckPredicate" << (IsFail ? "OrFail, " : ", ");
961 emitULEB128(I, OS);
962
963 if (!IsFail) {
964 uint32_t NumToSkip = emitNumToSkip(I, OS);
965 emitNumToSkipComment(NumToSkip);
966 }
967 OS << '\n';
968 break;
969 }
970 case MCD::OPC_Decode:
971 case MCD::OPC_TryDecode:
972 case MCD::OPC_TryDecodeOrFail: {
973 bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
974 bool IsTry = DecoderOp == MCD::OPC_TryDecode || IsFail;
975 // Decode the Opcode value.
976 const char *ErrMsg = nullptr;
977 unsigned Opc = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg);
978 assert(ErrMsg == nullptr && "ULEB128 value too large!");
979
980 OS << Indent << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode"
981 << (IsFail ? "OrFail, " : ", ");
982 emitULEB128(I, OS);
983
984 // Decoder index.
985 unsigned DecodeIdx = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg);
986 assert(ErrMsg == nullptr && "ULEB128 value too large!");
987 emitULEB128(I, OS);
988
989 auto EncI = OpcodeToEncodingID.find(Val: Opc);
990 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
991 auto EncodingID = EncI->second;
992
993 if (!IsTry) {
994 OS << "// Opcode: " << NumberedEncodings[EncodingID]
995 << ", DecodeIdx: " << DecodeIdx << '\n';
996 break;
997 }
998
999 // Fallthrough for OPC_TryDecode.
1000 if (!IsFail) {
1001 uint32_t NumToSkip = emitNumToSkip(I, OS);
1002 OS << "// Opcode: " << NumberedEncodings[EncodingID]
1003 << ", DecodeIdx: " << DecodeIdx;
1004 emitNumToSkipComment(NumToSkip, /*InComment=*/true);
1005 }
1006 OS << '\n';
1007 break;
1008 }
1009 case MCD::OPC_SoftFail: {
1010 OS << Indent << "MCD::OPC_SoftFail, ";
1011 // Decode the positive mask.
1012 const char *ErrMsg = nullptr;
1013 uint64_t PositiveMask = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg);
1014 assert(ErrMsg == nullptr && "ULEB128 value too large!");
1015 emitULEB128(I, OS);
1016
1017 // Decode the negative mask.
1018 uint64_t NegativeMask = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg);
1019 assert(ErrMsg == nullptr && "ULEB128 value too large!");
1020 emitULEB128(I, OS);
1021 OS << "// +ve mask: 0x";
1022 OS.write_hex(N: PositiveMask);
1023 OS << ", -ve mask: 0x";
1024 OS.write_hex(N: NegativeMask);
1025 OS << '\n';
1026 break;
1027 }
1028 case MCD::OPC_Fail:
1029 OS << Indent << "MCD::OPC_Fail,\n";
1030 break;
1031 }
1032 }
1033 OS << Indent << "0\n";
1034
1035 Indent -= 2;
1036
1037 OS << Indent << "};\n\n";
1038
1039 return OpcodeMask;
1040}
1041
1042void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
1043 ArrayRef<unsigned> InstrLen) const {
1044 OS << "static const uint8_t InstrLenTable[] = {\n";
1045 for (unsigned Len : InstrLen)
1046 OS << Len << ",\n";
1047 OS << "};\n\n";
1048}
1049
1050void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1051 PredicateSet &Predicates,
1052 indent Indent) const {
1053 // The predicate function is just a big switch statement based on the
1054 // input predicate index.
1055 OS << Indent << "static bool checkDecoderPredicate(unsigned Idx, "
1056 << "const FeatureBitset &Bits) {\n";
1057 Indent += 2;
1058 OS << Indent << "switch (Idx) {\n";
1059 OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1060 for (const auto &[Index, Predicate] : enumerate(First&: Predicates)) {
1061 OS << Indent << "case " << Index << ":\n";
1062 OS << Indent + 2 << "return (" << Predicate << ");\n";
1063 }
1064 OS << Indent << "}\n";
1065 Indent -= 2;
1066 OS << Indent << "}\n\n";
1067}
1068
1069void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1070 DecoderSet &Decoders,
1071 indent Indent) const {
1072 // The decoder function is just a big switch statement or a table of function
1073 // pointers based on the input decoder index.
1074
1075 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1076 // It would be better for emitBinaryParser to use a 64-bit tmp whenever
1077 // possible but fall back to an InsnType-sized tmp for truly large fields.
1078 StringRef TmpTypeDecl =
1079 "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
1080 "InsnType, uint64_t>;\n";
1081 StringRef DecodeParams =
1082 "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
1083 "MCDisassembler *Decoder, bool &DecodeComplete";
1084
1085 if (UseFnTableInDecodeToMCInst) {
1086 // Emit a function for each case first.
1087 for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) {
1088 OS << Indent << "template <typename InsnType>\n";
1089 OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams
1090 << ") {\n";
1091 Indent += 2;
1092 OS << Indent << TmpTypeDecl;
1093 OS << Indent << "[[maybe_unused]] TmpType tmp;\n";
1094 OS << Decoder;
1095 OS << Indent << "return S;\n";
1096 Indent -= 2;
1097 OS << Indent << "}\n\n";
1098 }
1099 }
1100
1101 OS << Indent << "// Handling " << Decoders.size() << " cases.\n";
1102 OS << Indent << "template <typename InsnType>\n";
1103 OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, "
1104 << DecodeParams << ") {\n";
1105 Indent += 2;
1106 OS << Indent << "DecodeComplete = true;\n";
1107
1108 if (UseFnTableInDecodeToMCInst) {
1109 // Build a table of function pointers.
1110 OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams
1111 << ");\n";
1112 OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n";
1113 for (size_t Index : llvm::seq(Size: Decoders.size()))
1114 OS << Indent + 2 << "decodeFn" << Index << ",\n";
1115 OS << Indent << "};\n";
1116 OS << Indent << "if (Idx >= " << Decoders.size() << ")\n";
1117 OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n";
1118 OS << Indent
1119 << "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
1120 "DecodeComplete);\n";
1121 } else {
1122 OS << Indent << TmpTypeDecl;
1123 OS << Indent << "TmpType tmp;\n";
1124 OS << Indent << "switch (Idx) {\n";
1125 OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1126 for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) {
1127 OS << Indent << "case " << Index << ":\n";
1128 OS << Decoder;
1129 OS << Indent + 2 << "return S;\n";
1130 }
1131 OS << Indent << "}\n";
1132 }
1133 Indent -= 2;
1134 OS << Indent << "}\n";
1135}
1136
1137// Populates the field of the insn given the start position and the number of
1138// consecutive bits to scan for.
1139//
1140// Returns a pair of values (indicator, field), where the indicator is false
1141// if there exists any uninitialized bit value in the range and true if all
1142// bits are well-known. The second value is the potentially populated field.
1143std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1144 unsigned StartBit,
1145 unsigned NumBits) const {
1146 uint64_t Field = 0;
1147
1148 for (unsigned i = 0; i < NumBits; ++i) {
1149 if (Insn[StartBit + i] == BitValue::BIT_UNSET)
1150 return {false, Field};
1151
1152 if (Insn[StartBit + i] == BitValue::BIT_TRUE)
1153 Field = Field | (1ULL << i);
1154 }
1155
1156 return {true, Field};
1157}
1158
1159/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1160/// filter array as a series of chars.
1161void FilterChooser::dumpFilterArray(raw_ostream &OS,
1162 ArrayRef<BitValue> Filter) const {
1163 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--)
1164 OS << Filter[bitIndex - 1];
1165}
1166
1167/// dumpStack - dumpStack traverses the filter chooser chain and calls
1168/// dumpFilterArray on each filter chooser up to the top level one.
1169void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const {
1170 const FilterChooser *current = this;
1171
1172 while (current) {
1173 OS << prefix;
1174 dumpFilterArray(OS, Filter: current->FilterBitValues);
1175 OS << '\n';
1176 current = current->Parent;
1177 }
1178}
1179
1180// Calculates the island(s) needed to decode the instruction.
1181// This returns a list of undecoded bits of an instructions, for example,
1182// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1183// decoded bits in order to verify that the instruction matches the Opcode.
1184unsigned FilterChooser::getIslands(std::vector<Island> &Islands,
1185 const insn_t &Insn) const {
1186 uint64_t FieldVal;
1187 unsigned StartBit;
1188
1189 // 0: Init
1190 // 1: Water (the bit value does not affect decoding)
1191 // 2: Island (well-known bit value needed for decoding)
1192 unsigned State = 0;
1193
1194 for (unsigned i = 0; i < BitWidth; ++i) {
1195 std::optional<uint64_t> Val = Insn[i].getValue();
1196 bool Filtered = PositionFiltered(Idx: i);
1197 switch (State) {
1198 default:
1199 llvm_unreachable("Unreachable code!");
1200 case 0:
1201 case 1:
1202 if (Filtered || !Val) {
1203 State = 1; // Still in Water
1204 } else {
1205 State = 2; // Into the Island
1206 StartBit = i;
1207 FieldVal = *Val;
1208 }
1209 break;
1210 case 2:
1211 if (Filtered || !Val) {
1212 State = 1; // Into the Water
1213 Islands.push_back(x: {.StartBit: StartBit, .NumBits: i - StartBit, .FieldVal: FieldVal});
1214 } else {
1215 State = 2; // Still in Island
1216 FieldVal |= *Val << (i - StartBit);
1217 }
1218 break;
1219 }
1220 }
1221 // If we are still in Island after the loop, do some housekeeping.
1222 if (State == 2)
1223 Islands.push_back(x: {.StartBit: StartBit, .NumBits: BitWidth - StartBit, .FieldVal: FieldVal});
1224
1225 return Islands.size();
1226}
1227
1228bool FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent,
1229 const OperandInfo &OpInfo) const {
1230 const std::string &Decoder = OpInfo.Decoder;
1231
1232 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
1233
1234 if (UseInsertBits) {
1235 OS << Indent << "tmp = 0x";
1236 OS.write_hex(N: OpInfo.InitValue);
1237 OS << ";\n";
1238 }
1239
1240 for (const EncodingField &EF : OpInfo) {
1241 OS << Indent;
1242 if (UseInsertBits)
1243 OS << "insertBits(tmp, ";
1244 else
1245 OS << "tmp = ";
1246 OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
1247 if (UseInsertBits)
1248 OS << ", " << EF.Offset << ", " << EF.Width << ')';
1249 else if (EF.Offset != 0)
1250 OS << " << " << EF.Offset;
1251 OS << ";\n";
1252 }
1253
1254 bool OpHasCompleteDecoder;
1255 if (!Decoder.empty()) {
1256 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1257 OS << Indent << "if (!Check(S, " << Decoder
1258 << "(MI, tmp, Address, Decoder))) { "
1259 << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
1260 << "return MCDisassembler::Fail; }\n";
1261 } else {
1262 OpHasCompleteDecoder = true;
1263 OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
1264 }
1265 return OpHasCompleteDecoder;
1266}
1267
1268bool FilterChooser::emitDecoder(raw_ostream &OS, indent Indent,
1269 unsigned Opc) const {
1270 bool HasCompleteDecoder = true;
1271
1272 for (const auto &Op : Operands.find(x: Opc)->second) {
1273 // If a custom instruction decoder was specified, use that.
1274 if (Op.numFields() == 0 && !Op.Decoder.empty()) {
1275 HasCompleteDecoder = Op.HasCompleteDecoder;
1276 OS << Indent << "if (!Check(S, " << Op.Decoder
1277 << "(MI, insn, Address, Decoder))) { "
1278 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1279 << "return MCDisassembler::Fail; }\n";
1280 break;
1281 }
1282
1283 HasCompleteDecoder &= emitBinaryParser(OS, Indent, OpInfo: Op);
1284 }
1285 return HasCompleteDecoder;
1286}
1287
1288std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
1289 unsigned Opc) const {
1290 // Build up the predicate string.
1291 SmallString<256> Decoder;
1292 // FIXME: emitDecoder() function can take a buffer directly rather than
1293 // a stream.
1294 raw_svector_ostream S(Decoder);
1295 indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
1296 bool HasCompleteDecoder = emitDecoder(OS&: S, Indent, Opc);
1297
1298 // Using the full decoder string as the key value here is a bit
1299 // heavyweight, but is effective. If the string comparisons become a
1300 // performance concern, we can implement a mangling of the predicate
1301 // data easily enough with a map back to the actual string. That's
1302 // overkill for now, though.
1303
1304 // Make sure the predicate is in the table.
1305 Decoders.insert(X: CachedHashString(Decoder));
1306 // Now figure out the index for when we write out the table.
1307 DecoderSet::const_iterator P = find(Range&: Decoders, Val: Decoder.str());
1308 return {(unsigned)(P - Decoders.begin()), HasCompleteDecoder};
1309}
1310
1311// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
1312bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1313 raw_ostream &OS) const {
1314 if (const auto *D = dyn_cast<DefInit>(Val: &Val)) {
1315 if (!D->getDef()->isSubClassOf(Name: "SubtargetFeature"))
1316 return true;
1317 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1318 << "]";
1319 return false;
1320 }
1321 if (const auto *D = dyn_cast<DagInit>(Val: &Val)) {
1322 std::string Op = D->getOperator()->getAsString();
1323 if (Op == "not" && D->getNumArgs() == 1) {
1324 OS << '!';
1325 return emitPredicateMatchAux(Val: *D->getArg(Num: 0), ParenIfBinOp: true, OS);
1326 }
1327 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
1328 bool Paren = D->getNumArgs() > 1 && std::exchange(obj&: ParenIfBinOp, new_val: true);
1329 if (Paren)
1330 OS << '(';
1331 ListSeparator LS(Op == "any_of" ? " || " : " && ");
1332 for (auto *Arg : D->getArgs()) {
1333 OS << LS;
1334 if (emitPredicateMatchAux(Val: *Arg, ParenIfBinOp, OS))
1335 return true;
1336 }
1337 if (Paren)
1338 OS << ')';
1339 return false;
1340 }
1341 }
1342 return true;
1343}
1344
1345bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const {
1346 const ListInit *Predicates =
1347 AllInstructions[Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1348 bool IsFirstEmission = true;
1349 for (unsigned i = 0; i < Predicates->size(); ++i) {
1350 const Record *Pred = Predicates->getElementAsRecord(Idx: i);
1351 if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1352 continue;
1353
1354 if (!isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1355 continue;
1356
1357 if (!IsFirstEmission)
1358 OS << " && ";
1359 if (emitPredicateMatchAux(Val: *Pred->getValueAsDag(FieldName: "AssemblerCondDag"),
1360 ParenIfBinOp: Predicates->size() > 1, OS))
1361 PrintFatalError(ErrorLoc: Pred->getLoc(), Msg: "Invalid AssemblerCondDag!");
1362 IsFirstEmission = false;
1363 }
1364 return !Predicates->empty();
1365}
1366
1367bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1368 const ListInit *Predicates =
1369 AllInstructions[Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1370 for (unsigned i = 0; i < Predicates->size(); ++i) {
1371 const Record *Pred = Predicates->getElementAsRecord(Idx: i);
1372 if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1373 continue;
1374
1375 if (isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1376 return true;
1377 }
1378 return false;
1379}
1380
1381unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1382 StringRef Predicate) const {
1383 // Using the full predicate string as the key value here is a bit
1384 // heavyweight, but is effective. If the string comparisons become a
1385 // performance concern, we can implement a mangling of the predicate
1386 // data easily enough with a map back to the actual string. That's
1387 // overkill for now, though.
1388
1389 // Make sure the predicate is in the table.
1390 TableInfo.Predicates.insert(X: CachedHashString(Predicate));
1391 // Now figure out the index for when we write out the table.
1392 PredicateSet::const_iterator P = find(Range&: TableInfo.Predicates, Val: Predicate);
1393 return (unsigned)(P - TableInfo.Predicates.begin());
1394}
1395
1396void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1397 unsigned Opc) const {
1398 if (!doesOpcodeNeedPredicate(Opc))
1399 return;
1400
1401 // Build up the predicate string.
1402 SmallString<256> Predicate;
1403 // FIXME: emitPredicateMatch() functions can take a buffer directly rather
1404 // than a stream.
1405 raw_svector_ostream PS(Predicate);
1406 emitPredicateMatch(OS&: PS, Opc);
1407
1408 // Figure out the index into the predicate table for the predicate just
1409 // computed.
1410 unsigned PIdx = getPredicateIndex(TableInfo, Predicate: PS.str());
1411
1412 const uint8_t DecoderOp = TableInfo.isOutermostScope()
1413 ? MCD::OPC_CheckPredicateOrFail
1414 : MCD::OPC_CheckPredicate;
1415 TableInfo.Table.push_back(Item: DecoderOp);
1416 TableInfo.Table.insertULEB128(Value: PIdx);
1417
1418 if (DecoderOp == MCD::OPC_CheckPredicate) {
1419 // Push location for NumToSkip backpatching.
1420 TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1421 }
1422}
1423
1424void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1425 unsigned Opc) const {
1426 const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
1427 const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
1428 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr;
1429
1430 if (!SFBits)
1431 return;
1432 const BitsInit *InstBits = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
1433
1434 APInt PositiveMask(BitWidth, 0ULL);
1435 APInt NegativeMask(BitWidth, 0ULL);
1436 for (unsigned i = 0; i < BitWidth; ++i) {
1437 BitValue B(*SFBits, i);
1438 BitValue IB(*InstBits, i);
1439
1440 if (B != BitValue::BIT_TRUE)
1441 continue;
1442
1443 if (IB == BitValue::BIT_FALSE) {
1444 // The bit is meant to be false, so emit a check to see if it is true.
1445 PositiveMask.setBit(i);
1446 } else if (IB == BitValue::BIT_TRUE) {
1447 // The bit is meant to be true, so emit a check to see if it is false.
1448 NegativeMask.setBit(i);
1449 } else {
1450 // The bit is not set; this must be an error!
1451 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1452 << AllInstructions[Opc] << " is set but Inst{" << i
1453 << "} is unset!\n"
1454 << " - You can only mark a bit as SoftFail if it is fully defined"
1455 << " (1/0 - not '?') in Inst\n";
1456 return;
1457 }
1458 }
1459
1460 bool NeedPositiveMask = PositiveMask.getBoolValue();
1461 bool NeedNegativeMask = NegativeMask.getBoolValue();
1462
1463 if (!NeedPositiveMask && !NeedNegativeMask)
1464 return;
1465
1466 TableInfo.Table.push_back(Item: MCD::OPC_SoftFail);
1467 TableInfo.Table.insertULEB128(Value: PositiveMask.getZExtValue());
1468 TableInfo.Table.insertULEB128(Value: NegativeMask.getZExtValue());
1469}
1470
1471// Emits table entries to decode the singleton.
1472void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1473 EncodingIDAndOpcode Opc) const {
1474 std::vector<Island> Islands;
1475 insn_t Insn;
1476 insnWithID(Insn, Opcode: Opc.EncodingID);
1477
1478 // Look for islands of undecoded bits of the singleton.
1479 getIslands(Islands, Insn);
1480
1481 // Emit the predicate table entry if one is needed.
1482 emitPredicateTableEntry(TableInfo, Opc: Opc.EncodingID);
1483
1484 // Check any additional encoding fields needed.
1485 for (const Island &Ilnd : reverse(C&: Islands)) {
1486 unsigned NumBits = Ilnd.NumBits;
1487 assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
1488 const uint8_t DecoderOp = TableInfo.isOutermostScope()
1489 ? MCD::OPC_CheckFieldOrFail
1490 : MCD::OPC_CheckField;
1491 TableInfo.Table.push_back(Item: DecoderOp);
1492
1493 TableInfo.Table.insertULEB128(Value: Ilnd.StartBit);
1494 TableInfo.Table.push_back(Item: NumBits);
1495 TableInfo.Table.insertULEB128(Value: Ilnd.FieldVal);
1496
1497 if (DecoderOp == MCD::OPC_CheckField) {
1498 // Allocate space in the table for fixup so all our relative position
1499 // calculations work OK even before we fully resolve the real value here.
1500
1501 // Push location for NumToSkip backpatching.
1502 TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1503 }
1504 }
1505
1506 // Check for soft failure of the match.
1507 emitSoftFailTableEntry(TableInfo, Opc: Opc.EncodingID);
1508
1509 auto [DIdx, HasCompleteDecoder] =
1510 getDecoderIndex(Decoders&: TableInfo.Decoders, Opc: Opc.EncodingID);
1511
1512 // Produce OPC_Decode or OPC_TryDecode opcode based on the information
1513 // whether the instruction decoder is complete or not. If it is complete
1514 // then it handles all possible values of remaining variable/unfiltered bits
1515 // and for any value can determine if the bitpattern is a valid instruction
1516 // or not. This means OPC_Decode will be the final step in the decoding
1517 // process. If it is not complete, then the Fail return code from the
1518 // decoder method indicates that additional processing should be done to see
1519 // if there is any other instruction that also matches the bitpattern and
1520 // can decode it.
1521 const uint8_t DecoderOp = HasCompleteDecoder ? MCD::OPC_Decode
1522 : (TableInfo.isOutermostScope()
1523 ? MCD::OPC_TryDecodeOrFail
1524 : MCD::OPC_TryDecode);
1525 TableInfo.Table.push_back(Item: DecoderOp);
1526 NumEncodingsSupported++;
1527 TableInfo.Table.insertULEB128(Value: Opc.Opcode);
1528 TableInfo.Table.insertULEB128(Value: DIdx);
1529
1530 if (DecoderOp == MCD::OPC_TryDecode) {
1531 // Push location for NumToSkip backpatching.
1532 TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1533 }
1534}
1535
1536// Emits table entries to decode the singleton, and then to decode the rest.
1537void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1538 const Filter &Best) const {
1539 EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1540
1541 // complex singletons need predicate checks from the first singleton
1542 // to refer forward to the variable filterchooser that follows.
1543 TableInfo.FixupStack.emplace_back();
1544
1545 emitSingletonTableEntry(TableInfo, Opc);
1546
1547 resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
1548 DestIdx: TableInfo.Table.size());
1549 TableInfo.FixupStack.pop_back();
1550
1551 Best.getVariableFC().emitTableEntries(TableInfo);
1552}
1553
1554// Assign a single filter and run with it. Top level API client can initialize
1555// with a single filter to start the filtering process.
1556void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1557 bool mixed) {
1558 Filters.clear();
1559 Filters.emplace_back(args&: *this, args&: startBit, args&: numBit, args: true);
1560 BestIndex = 0; // Sole Filter instance to choose from.
1561 bestFilter().recurse();
1562}
1563
1564// reportRegion is a helper function for filterProcessor to mark a region as
1565// eligible for use as a filter region.
1566void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1567 unsigned BitIndex, bool AllowMixed) {
1568 if (RA == ATTR_MIXED && AllowMixed)
1569 Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: true);
1570 else if (RA == ATTR_ALL_SET && !AllowMixed)
1571 Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: false);
1572}
1573
1574// FilterProcessor scans the well-known encoding bits of the instructions and
1575// builds up a list of candidate filters. It chooses the best filter and
1576// recursively descends down the decoding tree.
1577bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1578 Filters.clear();
1579 BestIndex = -1;
1580 unsigned numInstructions = Opcodes.size();
1581
1582 assert(numInstructions && "Filter created with no instructions");
1583
1584 // No further filtering is necessary.
1585 if (numInstructions == 1)
1586 return true;
1587
1588 // Heuristics. See also doFilter()'s "Heuristics" comment when num of
1589 // instructions is 3.
1590 if (AllowMixed && !Greedy) {
1591 assert(numInstructions == 3);
1592
1593 for (const auto &Opcode : Opcodes) {
1594 std::vector<Island> Islands;
1595 insn_t Insn;
1596
1597 insnWithID(Insn, Opcode: Opcode.EncodingID);
1598
1599 // Look for islands of undecoded bits of any instruction.
1600 if (getIslands(Islands, Insn) > 0) {
1601 // Found an instruction with island(s). Now just assign a filter.
1602 runSingleFilter(startBit: Islands[0].StartBit, numBit: Islands[0].NumBits, mixed: true);
1603 return true;
1604 }
1605 }
1606 }
1607
1608 unsigned BitIndex;
1609
1610 // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1611 // The automaton consumes the corresponding bit from each
1612 // instruction.
1613 //
1614 // Input symbols: 0, 1, and _ (unset).
1615 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1616 // Initial state: NONE.
1617 //
1618 // (NONE) ------- [01] -> (ALL_SET)
1619 // (NONE) ------- _ ----> (ALL_UNSET)
1620 // (ALL_SET) ---- [01] -> (ALL_SET)
1621 // (ALL_SET) ---- _ ----> (MIXED)
1622 // (ALL_UNSET) -- [01] -> (MIXED)
1623 // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1624 // (MIXED) ------ . ----> (MIXED)
1625 // (FILTERED)---- . ----> (FILTERED)
1626
1627 std::vector<bitAttr_t> bitAttrs(BitWidth, ATTR_NONE);
1628
1629 // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1630 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1631 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1632 if (FilterBitValues[BitIndex].isSet())
1633 bitAttrs[BitIndex] = ATTR_FILTERED;
1634
1635 for (const auto &OpcPair : Opcodes) {
1636 insn_t insn;
1637
1638 insnWithID(Insn&: insn, Opcode: OpcPair.EncodingID);
1639
1640 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1641 switch (bitAttrs[BitIndex]) {
1642 case ATTR_NONE:
1643 if (insn[BitIndex] == BitValue::BIT_UNSET)
1644 bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1645 else
1646 bitAttrs[BitIndex] = ATTR_ALL_SET;
1647 break;
1648 case ATTR_ALL_SET:
1649 if (insn[BitIndex] == BitValue::BIT_UNSET)
1650 bitAttrs[BitIndex] = ATTR_MIXED;
1651 break;
1652 case ATTR_ALL_UNSET:
1653 if (insn[BitIndex] != BitValue::BIT_UNSET)
1654 bitAttrs[BitIndex] = ATTR_MIXED;
1655 break;
1656 case ATTR_MIXED:
1657 case ATTR_FILTERED:
1658 break;
1659 }
1660 }
1661 }
1662
1663 // The regionAttr automaton consumes the bitAttrs automatons' state,
1664 // lowest-to-highest.
1665 //
1666 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1667 // States: NONE, ALL_SET, MIXED
1668 // Initial state: NONE
1669 //
1670 // (NONE) ----- F --> (NONE)
1671 // (NONE) ----- S --> (ALL_SET) ; and set region start
1672 // (NONE) ----- U --> (NONE)
1673 // (NONE) ----- M --> (MIXED) ; and set region start
1674 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
1675 // (ALL_SET) -- S --> (ALL_SET)
1676 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
1677 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
1678 // (MIXED) ---- F --> (NONE) ; and report a MIXED region
1679 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
1680 // (MIXED) ---- U --> (NONE) ; and report a MIXED region
1681 // (MIXED) ---- M --> (MIXED)
1682
1683 bitAttr_t RA = ATTR_NONE;
1684 unsigned StartBit = 0;
1685
1686 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1687 bitAttr_t bitAttr = bitAttrs[BitIndex];
1688
1689 assert(bitAttr != ATTR_NONE && "Bit without attributes");
1690
1691 switch (RA) {
1692 case ATTR_NONE:
1693 switch (bitAttr) {
1694 case ATTR_FILTERED:
1695 break;
1696 case ATTR_ALL_SET:
1697 StartBit = BitIndex;
1698 RA = ATTR_ALL_SET;
1699 break;
1700 case ATTR_ALL_UNSET:
1701 break;
1702 case ATTR_MIXED:
1703 StartBit = BitIndex;
1704 RA = ATTR_MIXED;
1705 break;
1706 default:
1707 llvm_unreachable("Unexpected bitAttr!");
1708 }
1709 break;
1710 case ATTR_ALL_SET:
1711 switch (bitAttr) {
1712 case ATTR_FILTERED:
1713 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1714 RA = ATTR_NONE;
1715 break;
1716 case ATTR_ALL_SET:
1717 break;
1718 case ATTR_ALL_UNSET:
1719 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1720 RA = ATTR_NONE;
1721 break;
1722 case ATTR_MIXED:
1723 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724 StartBit = BitIndex;
1725 RA = ATTR_MIXED;
1726 break;
1727 default:
1728 llvm_unreachable("Unexpected bitAttr!");
1729 }
1730 break;
1731 case ATTR_MIXED:
1732 switch (bitAttr) {
1733 case ATTR_FILTERED:
1734 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1735 StartBit = BitIndex;
1736 RA = ATTR_NONE;
1737 break;
1738 case ATTR_ALL_SET:
1739 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1740 StartBit = BitIndex;
1741 RA = ATTR_ALL_SET;
1742 break;
1743 case ATTR_ALL_UNSET:
1744 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1745 RA = ATTR_NONE;
1746 break;
1747 case ATTR_MIXED:
1748 break;
1749 default:
1750 llvm_unreachable("Unexpected bitAttr!");
1751 }
1752 break;
1753 case ATTR_ALL_UNSET:
1754 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1755 case ATTR_FILTERED:
1756 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1757 }
1758 }
1759
1760 // At the end, if we're still in ALL_SET or MIXED states, report a region
1761 switch (RA) {
1762 case ATTR_NONE:
1763 break;
1764 case ATTR_FILTERED:
1765 break;
1766 case ATTR_ALL_SET:
1767 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1768 break;
1769 case ATTR_ALL_UNSET:
1770 break;
1771 case ATTR_MIXED:
1772 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1773 break;
1774 }
1775
1776 // We have finished with the filter processings. Now it's time to choose
1777 // the best performing filter.
1778 BestIndex = 0;
1779 bool AllUseless = true;
1780 unsigned BestScore = 0;
1781
1782 for (const auto &[Idx, Filter] : enumerate(First&: Filters)) {
1783 unsigned Usefulness = Filter.usefulness();
1784
1785 if (Usefulness)
1786 AllUseless = false;
1787
1788 if (Usefulness > BestScore) {
1789 BestIndex = Idx;
1790 BestScore = Usefulness;
1791 }
1792 }
1793
1794 if (!AllUseless)
1795 bestFilter().recurse();
1796
1797 return !AllUseless;
1798} // end of FilterChooser::filterProcessor(bool)
1799
1800// Decides on the best configuration of filter(s) to use in order to decode
1801// the instructions. A conflict of instructions may occur, in which case we
1802// dump the conflict set to the standard error.
1803void FilterChooser::doFilter() {
1804 unsigned Num = Opcodes.size();
1805 assert(Num && "FilterChooser created with no instructions");
1806
1807 // Try regions of consecutive known bit values first.
1808 if (filterProcessor(AllowMixed: false))
1809 return;
1810
1811 // Then regions of mixed bits (both known and unitialized bit values allowed).
1812 if (filterProcessor(AllowMixed: true))
1813 return;
1814
1815 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1816 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1817 // well-known encoding pattern. In such case, we backtrack and scan for the
1818 // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1819 if (Num == 3 && filterProcessor(AllowMixed: true, Greedy: false))
1820 return;
1821
1822 // If we come to here, the instruction decoding has failed.
1823 // Set the BestIndex to -1 to indicate so.
1824 BestIndex = -1;
1825}
1826
1827// emitTableEntries - Emit state machine entries to decode our share of
1828// instructions.
1829void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1830 if (Opcodes.size() == 1) {
1831 // There is only one instruction in the set, which is great!
1832 // Call emitSingletonDecoder() to see whether there are any remaining
1833 // encodings bits.
1834 emitSingletonTableEntry(TableInfo, Opc: Opcodes[0]);
1835 return;
1836 }
1837
1838 // Choose the best filter to do the decodings!
1839 if (BestIndex != -1) {
1840 const Filter &Best = Filters[BestIndex];
1841 if (Best.getNumFiltered() == 1)
1842 emitSingletonTableEntry(TableInfo, Best);
1843 else
1844 Best.emitTableEntry(TableInfo);
1845 return;
1846 }
1847
1848 // We don't know how to decode these instructions! Dump the
1849 // conflict set and bail.
1850
1851 // Print out useful conflict information for postmortem analysis.
1852 errs() << "Decoding Conflict:\n";
1853
1854 dumpStack(OS&: errs(), prefix: "\t\t");
1855
1856 for (auto Opcode : Opcodes) {
1857 const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID];
1858 errs() << '\t' << Enc << ' ';
1859 dumpBits(OS&: errs(), Bits: getBitsField(Def: *Enc.EncodingDef, FieldName: "Inst"));
1860 errs() << '\n';
1861 }
1862 PrintFatalError(Msg: "Decoding conflict encountered");
1863}
1864
1865static std::string findOperandDecoderMethod(const Record *Record) {
1866 std::string Decoder;
1867
1868 const RecordVal *DecoderString = Record->getValue(Name: "DecoderMethod");
1869 const StringInit *String =
1870 DecoderString ? dyn_cast<StringInit>(Val: DecoderString->getValue()) : nullptr;
1871 if (String) {
1872 Decoder = String->getValue().str();
1873 if (!Decoder.empty())
1874 return Decoder;
1875 }
1876
1877 if (Record->isSubClassOf(Name: "RegisterOperand"))
1878 // Allows use of a DecoderMethod in referenced RegisterClass if set.
1879 return findOperandDecoderMethod(Record: Record->getValueAsDef(FieldName: "RegClass"));
1880
1881 if (Record->isSubClassOf(Name: "RegisterClass")) {
1882 Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1883 } else if (Record->isSubClassOf(Name: "PointerLikeRegClass")) {
1884 Decoder = "DecodePointerLikeRegClass" +
1885 utostr(X: Record->getValueAsInt(FieldName: "RegClassKind"));
1886 }
1887
1888 return Decoder;
1889}
1890
1891OperandInfo getOpInfo(const Record *TypeRecord) {
1892 const RecordVal *HasCompleteDecoderVal =
1893 TypeRecord->getValue(Name: "hasCompleteDecoder");
1894 const BitInit *HasCompleteDecoderBit =
1895 HasCompleteDecoderVal
1896 ? dyn_cast<BitInit>(Val: HasCompleteDecoderVal->getValue())
1897 : nullptr;
1898 bool HasCompleteDecoder =
1899 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1900
1901 return OperandInfo(findOperandDecoderMethod(Record: TypeRecord), HasCompleteDecoder);
1902}
1903
1904static void parseVarLenInstOperand(const Record &Def,
1905 std::vector<OperandInfo> &Operands,
1906 const CodeGenInstruction &CGI) {
1907
1908 const RecordVal *RV = Def.getValue(Name: "Inst");
1909 VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV);
1910 SmallVector<int> TiedTo;
1911
1912 for (const auto &[Idx, Op] : enumerate(First: CGI.Operands)) {
1913 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
1914 for (auto *Arg : Op.MIOperandInfo->getArgs())
1915 Operands.push_back(x: getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef()));
1916 else
1917 Operands.push_back(x: getOpInfo(TypeRecord: Op.Rec));
1918
1919 int TiedReg = Op.getTiedRegister();
1920 TiedTo.push_back(Elt: -1);
1921 if (TiedReg != -1) {
1922 TiedTo[Idx] = TiedReg;
1923 TiedTo[TiedReg] = Idx;
1924 }
1925 }
1926
1927 unsigned CurrBitPos = 0;
1928 for (const auto &EncodingSegment : VLI) {
1929 unsigned Offset = 0;
1930 StringRef OpName;
1931
1932 if (const StringInit *SI = dyn_cast<StringInit>(Val: EncodingSegment.Value)) {
1933 OpName = SI->getValue();
1934 } else if (const DagInit *DI = dyn_cast<DagInit>(Val: EncodingSegment.Value)) {
1935 OpName = cast<StringInit>(Val: DI->getArg(Num: 0))->getValue();
1936 Offset = cast<IntInit>(Val: DI->getArg(Num: 2))->getValue();
1937 }
1938
1939 if (!OpName.empty()) {
1940 auto OpSubOpPair =
1941 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1942 Op: OpName);
1943 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(Op: OpSubOpPair);
1944 Operands[OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1945 if (!EncodingSegment.CustomDecoder.empty())
1946 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1947
1948 int TiedReg = TiedTo[OpSubOpPair.first];
1949 if (TiedReg != -1) {
1950 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1951 Op: {TiedReg, OpSubOpPair.second});
1952 Operands[OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1953 }
1954 }
1955
1956 CurrBitPos += EncodingSegment.BitWidth;
1957 }
1958}
1959
1960static void debugDumpRecord(const Record &Rec) {
1961 // Dump the record, so we can see what's going on.
1962 PrintNote(PrintMsg: [&Rec](raw_ostream &OS) {
1963 OS << "Dumping record for previous error:\n";
1964 OS << Rec;
1965 });
1966}
1967
1968/// For an operand field named OpName: populate OpInfo.InitValue with the
1969/// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1970/// insert from the decoded instruction.
1971static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1972 std::map<StringRef, StringRef> &TiedNames,
1973 StringRef OpName, OperandInfo &OpInfo) {
1974 // Some bits of the operand may be required to be 1 depending on the
1975 // instruction's encoding. Collect those bits.
1976 if (const RecordVal *EncodedValue = EncodingDef.getValue(Name: OpName))
1977 if (const BitsInit *OpBits = dyn_cast<BitsInit>(Val: EncodedValue->getValue()))
1978 for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
1979 if (const BitInit *OpBit = dyn_cast<BitInit>(Val: OpBits->getBit(Bit: I)))
1980 if (OpBit->getValue())
1981 OpInfo.InitValue |= 1ULL << I;
1982
1983 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) {
1984 const VarInit *Var;
1985 unsigned Offset = 0;
1986 for (; J != Bits.getNumBits(); ++J) {
1987 const VarBitInit *BJ = dyn_cast<VarBitInit>(Val: Bits.getBit(Bit: J));
1988 if (BJ) {
1989 Var = dyn_cast<VarInit>(Val: BJ->getBitVar());
1990 if (I == J)
1991 Offset = BJ->getBitNum();
1992 else if (BJ->getBitNum() != Offset + J - I)
1993 break;
1994 } else {
1995 Var = dyn_cast<VarInit>(Val: Bits.getBit(Bit: J));
1996 }
1997 if (!Var ||
1998 (Var->getName() != OpName && Var->getName() != TiedNames[OpName]))
1999 break;
2000 }
2001 if (I == J)
2002 ++J;
2003 else
2004 OpInfo.addField(Base: I, Width: J - I, Offset);
2005 }
2006}
2007
2008static unsigned
2009populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
2010 const CodeGenInstruction &CGI, unsigned Opc,
2011 std::map<unsigned, std::vector<OperandInfo>> &Operands,
2012 bool IsVarLenInst) {
2013 const Record &Def = *CGI.TheDef;
2014 // If all the bit positions are not specified; do not decode this instruction.
2015 // We are bound to fail! For proper disassembly, the well-known encoding bits
2016 // of the instruction must be fully specified.
2017
2018 const BitsInit &Bits = getBitsField(Def: EncodingDef, FieldName: "Inst");
2019 if (Bits.allInComplete())
2020 return 0;
2021
2022 std::vector<OperandInfo> InsnOperands;
2023
2024 // If the instruction has specified a custom decoding hook, use that instead
2025 // of trying to auto-generate the decoder.
2026 StringRef InstDecoder = EncodingDef.getValueAsString(FieldName: "DecoderMethod");
2027 if (!InstDecoder.empty()) {
2028 bool HasCompleteInstDecoder =
2029 EncodingDef.getValueAsBit(FieldName: "hasCompleteDecoder");
2030 InsnOperands.push_back(
2031 x: OperandInfo(InstDecoder.str(), HasCompleteInstDecoder));
2032 Operands[Opc] = std::move(InsnOperands);
2033 return Bits.getNumBits();
2034 }
2035
2036 // Generate a description of the operand of the instruction that we know
2037 // how to decode automatically.
2038 // FIXME: We'll need to have a way to manually override this as needed.
2039
2040 // Gather the outputs/inputs of the instruction, so we can find their
2041 // positions in the encoding. This assumes for now that they appear in the
2042 // MCInst in the order that they're listed.
2043 std::vector<std::pair<const Init *, StringRef>> InOutOperands;
2044 const DagInit *Out = Def.getValueAsDag(FieldName: "OutOperandList");
2045 const DagInit *In = Def.getValueAsDag(FieldName: "InOperandList");
2046 for (const auto &[Idx, Arg] : enumerate(First: Out->getArgs()))
2047 InOutOperands.emplace_back(args: Arg, args: Out->getArgNameStr(Num: Idx));
2048 for (const auto &[Idx, Arg] : enumerate(First: In->getArgs()))
2049 InOutOperands.emplace_back(args: Arg, args: In->getArgNameStr(Num: Idx));
2050
2051 // Search for tied operands, so that we can correctly instantiate
2052 // operands that are not explicitly represented in the encoding.
2053 std::map<StringRef, StringRef> TiedNames;
2054 for (const auto &Op : CGI.Operands) {
2055 for (const auto &[J, CI] : enumerate(First: Op.Constraints)) {
2056 if (!CI.isTied())
2057 continue;
2058 std::pair<unsigned, unsigned> SO =
2059 CGI.Operands.getSubOperandNumber(Op: CI.getTiedOperand());
2060 StringRef TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
2061 if (TiedName.empty())
2062 TiedName = CGI.Operands[SO.first].Name;
2063 StringRef MyName = Op.SubOpNames[J];
2064 if (MyName.empty())
2065 MyName = Op.Name;
2066
2067 TiedNames[MyName] = TiedName;
2068 TiedNames[TiedName] = MyName;
2069 }
2070 }
2071
2072 if (IsVarLenInst) {
2073 parseVarLenInstOperand(Def: EncodingDef, Operands&: InsnOperands, CGI);
2074 } else {
2075 // For each operand, see if we can figure out where it is encoded.
2076 for (const auto &Op : InOutOperands) {
2077 const Init *OpInit = Op.first;
2078 StringRef OpName = Op.second;
2079
2080 // We're ready to find the instruction encoding locations for this
2081 // operand.
2082
2083 // First, find the operand type ("OpInit"), and sub-op names
2084 // ("SubArgDag") if present.
2085 const DagInit *SubArgDag = dyn_cast<DagInit>(Val: OpInit);
2086 if (SubArgDag)
2087 OpInit = SubArgDag->getOperator();
2088 const Record *OpTypeRec = cast<DefInit>(Val: OpInit)->getDef();
2089 // Lookup the sub-operands from the operand type record (note that only
2090 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2091 const DagInit *SubOps = OpTypeRec->isSubClassOf(Name: "Operand")
2092 ? OpTypeRec->getValueAsDag(FieldName: "MIOperandInfo")
2093 : nullptr;
2094
2095 // Lookup the decoder method and construct a new OperandInfo to hold our
2096 // result.
2097 OperandInfo OpInfo = getOpInfo(TypeRecord: OpTypeRec);
2098
2099 // If we have named sub-operands...
2100 if (SubArgDag) {
2101 // Then there should not be a custom decoder specified on the top-level
2102 // type.
2103 if (!OpInfo.Decoder.empty()) {
2104 PrintError(ErrorLoc: EncodingDef.getLoc(),
2105 Msg: "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2106 OpInit->getAsString() +
2107 "\" with a custom DecoderMethod, but also named "
2108 "sub-operands.");
2109 continue;
2110 }
2111
2112 // Decode each of the sub-ops separately.
2113 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2114 for (const auto &[I, Arg] : enumerate(First: SubOps->getArgs())) {
2115 StringRef SubOpName = SubArgDag->getArgNameStr(Num: I);
2116 OperandInfo SubOpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef());
2117
2118 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName: SubOpName,
2119 OpInfo&: SubOpInfo);
2120 InsnOperands.push_back(x: std::move(SubOpInfo));
2121 }
2122 continue;
2123 }
2124
2125 // Otherwise, if we have an operand with sub-operands, but they aren't
2126 // named...
2127 if (SubOps && OpInfo.Decoder.empty()) {
2128 // If it's a single sub-operand, and no custom decoder, use the decoder
2129 // from the one sub-operand.
2130 if (SubOps->getNumArgs() == 1)
2131 OpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: SubOps->getArg(Num: 0))->getDef());
2132
2133 // If we have multiple sub-ops, there'd better have a custom
2134 // decoder. (Otherwise we don't know how to populate them properly...)
2135 if (SubOps->getNumArgs() > 1) {
2136 PrintError(ErrorLoc: EncodingDef.getLoc(),
2137 Msg: "DecoderEmitter: operand \"" + OpName +
2138 "\" uses MIOperandInfo with multiple ops, but doesn't "
2139 "have a custom decoder!");
2140 debugDumpRecord(Rec: EncodingDef);
2141 continue;
2142 }
2143 }
2144
2145 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2146 // FIXME: it should be an error not to find a definition for a given
2147 // operand, rather than just failing to add it to the resulting
2148 // instruction! (This is a longstanding bug, which will be addressed in an
2149 // upcoming change.)
2150 if (OpInfo.numFields() > 0)
2151 InsnOperands.push_back(x: std::move(OpInfo));
2152 }
2153 }
2154 Operands[Opc] = std::move(InsnOperands);
2155
2156#if 0
2157 LLVM_DEBUG({
2158 // Dumps the instruction encoding bits.
2159 dumpBits(errs(), Bits);
2160
2161 errs() << '\n';
2162
2163 // Dumps the list of operand info.
2164 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
2165 const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2166 const std::string &OperandName = Info.Name;
2167 const Record &OperandDef = *Info.Rec;
2168
2169 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2170 }
2171 });
2172#endif
2173
2174 return Bits.getNumBits();
2175}
2176
2177// emitFieldFromInstruction - Emit the templated helper function
2178// fieldFromInstruction().
2179// On Windows we make sure that this function is not inlined when
2180// using the VS compiler. It has a bug which causes the function
2181// to be optimized out in some circumstances. See llvm.org/pr38292
2182static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2183 OS << R"(
2184// Helper functions for extracting fields from encoded instructions.
2185// InsnType must either be integral or an APInt-like object that must:
2186// * be default-constructible and copy-constructible
2187// * be constructible from an APInt (this can be private)
2188// * Support insertBits(bits, startBit, numBits)
2189// * Support extractBitsAsZExtValue(numBits, startBit)
2190// * Support the ~, &, ==, and != operators with other objects of the same type
2191// * Support the != and bitwise & with uint64_t
2192// * Support put (<<) to raw_ostream&
2193template <typename InsnType>
2194#if defined(_MSC_VER) && !defined(__clang__)
2195__declspec(noinline)
2196#endif
2197static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2198fieldFromInstruction(const InsnType &insn, unsigned startBit,
2199 unsigned numBits) {
2200 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2201 assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2202 "Instruction field out of bounds!");
2203 InsnType fieldMask;
2204 if (numBits == sizeof(InsnType) * 8)
2205 fieldMask = (InsnType)(-1LL);
2206 else
2207 fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2208 return (insn & fieldMask) >> startBit;
2209}
2210
2211template <typename InsnType>
2212static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2213fieldFromInstruction(const InsnType &insn, unsigned startBit,
2214 unsigned numBits) {
2215 return insn.extractBitsAsZExtValue(numBits, startBit);
2216}
2217)";
2218}
2219
2220// emitInsertBits - Emit the templated helper function insertBits().
2221static void emitInsertBits(formatted_raw_ostream &OS) {
2222 OS << R"(
2223// Helper function for inserting bits extracted from an encoded instruction into
2224// a field.
2225template <typename InsnType>
2226static void insertBits(InsnType &field, InsnType bits, unsigned startBit,
2227 unsigned numBits) {
2228 if constexpr (std::is_integral<InsnType>::value) {
2229 assert(startBit + numBits <= sizeof field * 8);
2230 (void)numBits;
2231 field |= (InsnType)bits << startBit;
2232 } else {
2233 field.insertBits(bits, startBit, numBits);
2234 }
2235}
2236)";
2237}
2238
2239// emitDecodeInstruction - Emit the templated helper function
2240// decodeInstruction().
2241static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
2242 unsigned OpcodeMask) {
2243 const bool HasTryDecode = OpcodeMask & ((1 << MCD::OPC_TryDecode) |
2244 (1 << MCD::OPC_TryDecodeOrFail));
2245 const bool HasCheckPredicate =
2246 OpcodeMask &
2247 ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2248 const bool HasSoftFail = OpcodeMask & (1 << MCD::OPC_SoftFail);
2249
2250 OS << R"(
2251static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
2252 unsigned NumToSkip = *Ptr++;
2253 NumToSkip |= (*Ptr++) << 8;
2254)";
2255 if (getNumToSkipInBytes() == 3)
2256 OS << " NumToSkip |= (*Ptr++) << 16;\n";
2257 OS << R"( return NumToSkip;
2258}
2259
2260template <typename InsnType>
2261static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2262 InsnType insn, uint64_t Address,
2263 const MCDisassembler *DisAsm,
2264 const MCSubtargetInfo &STI)";
2265 if (IsVarLenInst) {
2266 OS << ",\n "
2267 "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2268 }
2269 OS << ") {\n";
2270 if (HasCheckPredicate)
2271 OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
2272
2273 OS << R"(
2274 const uint8_t *Ptr = DecodeTable;
2275 uint64_t CurFieldValue = 0;
2276 DecodeStatus S = MCDisassembler::Success;
2277 while (true) {
2278 ptrdiff_t Loc = Ptr - DecodeTable;
2279 const uint8_t DecoderOp = *Ptr++;
2280 switch (DecoderOp) {
2281 default:
2282 errs() << Loc << ": Unexpected decode table opcode: "
2283 << (int)DecoderOp << '\n';
2284 return MCDisassembler::Fail;
2285 case MCD::OPC_ExtractField: {
2286 // Decode the start value.
2287 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2288 unsigned Len = *Ptr++;)";
2289 if (IsVarLenInst)
2290 OS << "\n makeUp(insn, Start + Len);";
2291 OS << R"(
2292 CurFieldValue = fieldFromInstruction(insn, Start, Len);
2293 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2294 << Len << "): " << CurFieldValue << "\n");
2295 break;
2296 }
2297 case MCD::OPC_FilterValue:
2298 case MCD::OPC_FilterValueOrFail: {
2299 bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
2300 // Decode the field value.
2301 uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
2302 bool Failed = Val != CurFieldValue;
2303 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2304
2305 // Note: Print NumToSkip even for OPC_FilterValueOrFail to simplify debug
2306 // prints.
2307 LLVM_DEBUG({
2308 StringRef OpName = IsFail ? "OPC_FilterValueOrFail" : "OPC_FilterValue";
2309 dbgs() << Loc << ": " << OpName << '(' << Val << ", " << NumToSkip
2310 << ") " << (Failed ? "FAIL:" : "PASS:")
2311 << " continuing at " << (Ptr - DecodeTable) << '\n';
2312 });
2313
2314 // Perform the filter operation.
2315 if (Failed) {
2316 if (IsFail)
2317 return MCDisassembler::Fail;
2318 Ptr += NumToSkip;
2319 }
2320 break;
2321 }
2322 case MCD::OPC_CheckField:
2323 case MCD::OPC_CheckFieldOrFail: {
2324 bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
2325 // Decode the start value.
2326 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2327 unsigned Len = *Ptr;)";
2328 if (IsVarLenInst)
2329 OS << "\n makeUp(insn, Start + Len);";
2330 OS << R"(
2331 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2332 // Decode the field value.
2333 unsigned PtrLen = 0;
2334 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2335 Ptr += PtrLen;
2336 bool Failed = ExpectedValue != FieldValue;
2337 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2338
2339 LLVM_DEBUG({
2340 StringRef OpName = IsFail ? "OPC_CheckFieldOrFail" : "OPC_CheckField";
2341 dbgs() << Loc << ": " << OpName << '(' << Start << ", " << Len << ", "
2342 << ExpectedValue << ", " << NumToSkip << "): FieldValue = "
2343 << FieldValue << ", ExpectedValue = " << ExpectedValue << ": "
2344 << (Failed ? "FAIL\n" : "PASS\n");
2345 });
2346
2347 // If the actual and expected values don't match, skip or fail.
2348 if (Failed) {
2349 if (IsFail)
2350 return MCDisassembler::Fail;
2351 Ptr += NumToSkip;
2352 }
2353 break;
2354 })";
2355 if (HasCheckPredicate) {
2356 OS << R"(
2357 case MCD::OPC_CheckPredicate:
2358 case MCD::OPC_CheckPredicateOrFail: {
2359 bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
2360 // Decode the Predicate Index value.
2361 unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
2362 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2363 // Check the predicate.
2364 bool Failed = !checkDecoderPredicate(PIdx, Bits);
2365
2366 LLVM_DEBUG({
2367 StringRef OpName = IsFail ? "OPC_CheckPredicateOrFail" : "OPC_CheckPredicate";
2368 dbgs() << Loc << ": " << OpName << '(' << PIdx << ", " << NumToSkip
2369 << "): " << (Failed ? "FAIL\n" : "PASS\n");
2370 });
2371
2372 if (Failed) {
2373 if (IsFail)
2374 return MCDisassembler::Fail;
2375 Ptr += NumToSkip;
2376 }
2377 break;
2378 })";
2379 }
2380 OS << R"(
2381 case MCD::OPC_Decode: {
2382 // Decode the Opcode value.
2383 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2384 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2385
2386 MI.clear();
2387 MI.setOpcode(Opc);
2388 bool DecodeComplete;)";
2389 if (IsVarLenInst) {
2390 OS << "\n unsigned Len = InstrLenTable[Opc];\n"
2391 << " makeUp(insn, Len);";
2392 }
2393 OS << R"(
2394 S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
2395 assert(DecodeComplete);
2396
2397 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2398 << ", using decoder " << DecodeIdx << ": "
2399 << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2400 return S;
2401 })";
2402 if (HasTryDecode) {
2403 OS << R"(
2404 case MCD::OPC_TryDecode:
2405 case MCD::OPC_TryDecodeOrFail: {
2406 bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
2407 // Decode the Opcode value.
2408 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2409 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2410 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2411
2412 // Perform the decode operation.
2413 MCInst TmpMI;
2414 TmpMI.setOpcode(Opc);
2415 bool DecodeComplete;
2416 S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
2417 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2418 << ", using decoder " << DecodeIdx << ": ");
2419
2420 if (DecodeComplete) {
2421 // Decoding complete.
2422 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2423 MI = TmpMI;
2424 return S;
2425 }
2426 assert(S == MCDisassembler::Fail);
2427 if (IsFail) {
2428 LLVM_DEBUG(dbgs() << "FAIL: returning FAIL\n");
2429 return MCDisassembler::Fail;
2430 }
2431 // If the decoding was incomplete, skip.
2432 Ptr += NumToSkip;
2433 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2434 // Reset decode status. This also drops a SoftFail status that could be
2435 // set before the decode attempt.
2436 S = MCDisassembler::Success;
2437 break;
2438 })";
2439 }
2440 if (HasSoftFail) {
2441 OS << R"(
2442 case MCD::OPC_SoftFail: {
2443 // Decode the mask values.
2444 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
2445 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2446 bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
2447 if (Failed)
2448 S = MCDisassembler::SoftFail;
2449 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
2450 break;
2451 })";
2452 }
2453 OS << R"(
2454 case MCD::OPC_Fail: {
2455 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2456 return MCDisassembler::Fail;
2457 }
2458 }
2459 }
2460 llvm_unreachable("bogosity detected in disassembler state machine!");
2461}
2462
2463)";
2464}
2465
2466// Helper to propagate SoftFail status. Returns false if the status is Fail;
2467// callers are expected to early-exit in that condition. (Note, the '&' operator
2468// is correct to propagate the values of this enum; see comment on 'enum
2469// DecodeStatus'.)
2470static void emitCheck(formatted_raw_ostream &OS) {
2471 OS << R"(
2472static bool Check(DecodeStatus &Out, DecodeStatus In) {
2473 Out = static_cast<DecodeStatus>(Out & In);
2474 return Out != MCDisassembler::Fail;
2475}
2476
2477)";
2478}
2479
2480// Collect all HwModes referenced by the target for encoding purposes,
2481// returning a vector of corresponding names.
2482static void collectHwModesReferencedForEncodings(
2483 const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2484 NamespacesHwModesMap &NamespacesWithHwModes) {
2485 SmallBitVector BV(HWM.getNumModeIds());
2486 for (const auto &MS : HWM.getHwModeSelects()) {
2487 for (const HwModeSelect::PairType &P : MS.second.Items) {
2488 if (P.second->isSubClassOf(Name: "InstructionEncoding")) {
2489 std::string DecoderNamespace =
2490 P.second->getValueAsString(FieldName: "DecoderNamespace").str();
2491 if (P.first == DefaultMode) {
2492 NamespacesWithHwModes[DecoderNamespace].insert(x: "");
2493 } else {
2494 NamespacesWithHwModes[DecoderNamespace].insert(
2495 x: HWM.getMode(Id: P.first).Name);
2496 }
2497 BV.set(P.first);
2498 }
2499 }
2500 }
2501 transform(Range: BV.set_bits(), d_first: std::back_inserter(x&: Names), F: [&HWM](const int &M) {
2502 if (M == DefaultMode)
2503 return StringRef("");
2504 return HWM.getModeName(Id: M, /*IncludeDefault=*/true);
2505 });
2506}
2507
2508static void
2509handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2510 ArrayRef<StringRef> HwModeNames,
2511 NamespacesHwModesMap &NamespacesWithHwModes,
2512 std::vector<EncodingAndInst> &GlobalEncodings) {
2513 const Record *InstDef = Instr->TheDef;
2514
2515 switch (DecoderEmitterSuppressDuplicates) {
2516 case SUPPRESSION_DISABLE: {
2517 for (StringRef HwModeName : HwModeNames)
2518 GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2519 break;
2520 }
2521 case SUPPRESSION_LEVEL1: {
2522 std::string DecoderNamespace =
2523 InstDef->getValueAsString(FieldName: "DecoderNamespace").str();
2524 auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
2525 if (It != NamespacesWithHwModes.end()) {
2526 for (StringRef HwModeName : It->second)
2527 GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2528 } else {
2529 // Only emit the encoding once, as it's DecoderNamespace doesn't
2530 // contain any HwModes.
2531 GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2532 }
2533 break;
2534 }
2535 case SUPPRESSION_LEVEL2:
2536 GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2537 break;
2538 }
2539}
2540
2541// Emits disassembler code for instruction decoding.
2542void DecoderEmitter::run(raw_ostream &o) {
2543 formatted_raw_ostream OS(o);
2544 OS << R"(
2545#include "llvm/MC/MCInst.h"
2546#include "llvm/MC/MCSubtargetInfo.h"
2547#include "llvm/Support/DataTypes.h"
2548#include "llvm/Support/Debug.h"
2549#include "llvm/Support/LEB128.h"
2550#include "llvm/Support/raw_ostream.h"
2551#include "llvm/TargetParser/SubtargetFeature.h"
2552#include <assert.h>
2553
2554namespace {
2555)";
2556
2557 emitFieldFromInstruction(OS);
2558 emitInsertBits(OS);
2559 emitCheck(OS);
2560
2561 Target.reverseBitsForLittleEndianEncoding();
2562
2563 // Parameterize the decoders based on namespace and instruction width.
2564
2565 // First, collect all encoding-related HwModes referenced by the target.
2566 // And establish a mapping table between DecoderNamespace and HwMode.
2567 // If HwModeNames is empty, add the empty string so we always have one HwMode.
2568 const CodeGenHwModes &HWM = Target.getHwModes();
2569 std::vector<StringRef> HwModeNames;
2570 NamespacesHwModesMap NamespacesWithHwModes;
2571 collectHwModesReferencedForEncodings(HWM, Names&: HwModeNames, NamespacesWithHwModes);
2572 if (HwModeNames.empty())
2573 HwModeNames.push_back(x: "");
2574
2575 const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
2576 NumberedEncodings.reserve(n: NumberedInstructions.size());
2577 for (const auto &NumberedInstruction : NumberedInstructions) {
2578 const Record *InstDef = NumberedInstruction->TheDef;
2579 if (const RecordVal *RV = InstDef->getValue(Name: "EncodingInfos")) {
2580 if (const DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
2581 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2582 for (auto &[ModeId, Encoding] : EBM) {
2583 // DecoderTables with DefaultMode should not have any suffix.
2584 if (ModeId == DefaultMode) {
2585 NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction, args: "");
2586 } else {
2587 NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction,
2588 args: HWM.getMode(Id: ModeId).Name);
2589 }
2590 }
2591 continue;
2592 }
2593 }
2594 // This instruction is encoded the same on all HwModes.
2595 // According to user needs, provide varying degrees of suppression.
2596 handleHwModesUnrelatedEncodings(Instr: NumberedInstruction, HwModeNames,
2597 NamespacesWithHwModes, GlobalEncodings&: NumberedEncodings);
2598 }
2599 for (const Record *NumberedAlias :
2600 RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding"))
2601 NumberedEncodings.emplace_back(
2602 args&: NumberedAlias,
2603 args: &Target.getInstruction(InstRec: NumberedAlias->getValueAsDef(FieldName: "AliasOf")));
2604
2605 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2606 OpcMap;
2607 std::map<unsigned, std::vector<OperandInfo>> Operands;
2608 std::vector<unsigned> InstrLen;
2609 bool IsVarLenInst = Target.hasVariableLengthEncodings();
2610 unsigned MaxInstLen = 0;
2611
2612 for (const auto &[NEI, NumberedEncoding] : enumerate(First&: NumberedEncodings)) {
2613 const Record *EncodingDef = NumberedEncoding.EncodingDef;
2614 const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2615 const Record *Def = Inst->TheDef;
2616 unsigned Size = EncodingDef->getValueAsInt(FieldName: "Size");
2617 if (Def->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
2618 Def->getValueAsBit(FieldName: "isPseudo") ||
2619 Def->getValueAsBit(FieldName: "isAsmParserOnly") ||
2620 Def->getValueAsBit(FieldName: "isCodeGenOnly")) {
2621 NumEncodingsLackingDisasm++;
2622 continue;
2623 }
2624
2625 if (NEI < NumberedInstructions.size())
2626 NumInstructions++;
2627 NumEncodings++;
2628
2629 if (!Size && !IsVarLenInst)
2630 continue;
2631
2632 if (IsVarLenInst)
2633 InstrLen.resize(new_size: NumberedInstructions.size(), x: 0);
2634
2635 if (unsigned Len = populateInstruction(Target, EncodingDef: *EncodingDef, CGI: *Inst, Opc: NEI,
2636 Operands, IsVarLenInst)) {
2637 if (IsVarLenInst) {
2638 MaxInstLen = std::max(a: MaxInstLen, b: Len);
2639 InstrLen[NEI] = Len;
2640 }
2641 std::string DecoderNamespace =
2642 EncodingDef->getValueAsString(FieldName: "DecoderNamespace").str();
2643 if (!NumberedEncoding.HwModeName.empty())
2644 DecoderNamespace += "_" + NumberedEncoding.HwModeName.str();
2645 OpcMap[{DecoderNamespace, Size}].emplace_back(
2646 args&: NEI, args: Target.getInstrIntValue(R: Def));
2647 } else {
2648 NumEncodingsOmitted++;
2649 }
2650 }
2651
2652 DecoderTableInfo TableInfo;
2653 unsigned OpcodeMask = 0;
2654 for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
2655 const std::string &DecoderNamespace = NSAndByteSize.first;
2656 const unsigned BitWidth = 8 * NSAndByteSize.second;
2657 // Emit the decoder for this namespace+width combination.
2658 FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
2659 IsVarLenInst ? MaxInstLen : BitWidth, this);
2660
2661 // The decode table is cleared for each top level decoder function. The
2662 // predicates and decoders themselves, however, are shared across all
2663 // decoders to give more opportunities for uniqueing.
2664 TableInfo.Table.clear();
2665 TableInfo.FixupStack.clear();
2666 TableInfo.FixupStack.emplace_back();
2667 FC.emitTableEntries(TableInfo);
2668 // Any NumToSkip fixups in the top level scope can resolve to the
2669 // OPC_Fail at the end of the table.
2670 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
2671 // Resolve any NumToSkip fixups in the current scope.
2672 resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
2673 DestIdx: TableInfo.Table.size());
2674 TableInfo.FixupStack.clear();
2675
2676 TableInfo.Table.push_back(Item: MCD::OPC_Fail);
2677
2678 // Print the table to the output stream.
2679 OpcodeMask |= emitTable(OS, Table&: TableInfo.Table, Indent: indent(0), BitWidth: FC.getBitWidth(),
2680 Namespace: DecoderNamespace, EncodingIDs);
2681 }
2682
2683 // For variable instruction, we emit a instruction length table
2684 // to let the decoder know how long the instructions are.
2685 // You can see example usage in M68k's disassembler.
2686 if (IsVarLenInst)
2687 emitInstrLenTable(OS, InstrLen);
2688
2689 const bool HasCheckPredicate =
2690 OpcodeMask &
2691 ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2692
2693 // Emit the predicate function.
2694 if (HasCheckPredicate)
2695 emitPredicateFunction(OS, Predicates&: TableInfo.Predicates, Indent: indent(0));
2696
2697 // Emit the decoder function.
2698 emitDecoderFunction(OS, Decoders&: TableInfo.Decoders, Indent: indent(0));
2699
2700 // Emit the main entry point for the decoder, decodeInstruction().
2701 emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
2702
2703 OS << "\n} // namespace\n";
2704}
2705
2706void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS,
2707 StringRef PredicateNamespace) {
2708 DecoderEmitter(RK, PredicateNamespace).run(o&: OS);
2709}
2710