DecoderEmitter.cpp source code [llvm_projects/llvm/utils/TableGen/DecoderEmitter.cpp]

1	//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// It contains the tablegen backend that emits the decoder functions for
10	// targets with fixed/variable length instruction set.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "Common/CodeGenHwModes.h"
15	#include "Common/CodeGenInstruction.h"
16	#include "Common/CodeGenTarget.h"
17	#include "Common/InfoByHwMode.h"
18	#include "Common/VarLenCodeEmitterGen.h"
19	#include "TableGenBackends.h"
20	#include "llvm/ADT/APInt.h"
21	#include "llvm/ADT/ArrayRef.h"
22	#include "llvm/ADT/CachedHashString.h"
23	#include "llvm/ADT/STLExtras.h"
24	#include "llvm/ADT/SetVector.h"
25	#include "llvm/ADT/SmallBitVector.h"
26	#include "llvm/ADT/SmallString.h"
27	#include "llvm/ADT/Statistic.h"
28	#include "llvm/ADT/StringExtras.h"
29	#include "llvm/ADT/StringRef.h"
30	#include "llvm/MC/MCDecoderOps.h"
31	#include "llvm/Support/Casting.h"
32	#include "llvm/Support/CommandLine.h"
33	#include "llvm/Support/Debug.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/FormatVariadic.h"
36	#include "llvm/Support/FormattedStream.h"
37	#include "llvm/Support/LEB128.h"
38	#include "llvm/Support/MathExtras.h"
39	#include "llvm/Support/raw_ostream.h"
40	#include "llvm/TableGen/Error.h"
41	#include "llvm/TableGen/Record.h"
42	#include <algorithm>
43	#include <cassert>
44	#include <cstddef>
45	#include <cstdint>
46	#include <map>
47	#include <memory>
48	#include <set>
49	#include <string>
50	#include <utility>
51	#include <vector>
52
53	using namespace llvm;
54
55	#define DEBUG_TYPE "decoder-emitter"
56
57	extern cl::OptionCategory DisassemblerEmitterCat;
58
59	enum SuppressLevel {
60	SUPPRESSION_DISABLE,
61	SUPPRESSION_LEVEL1,
62	SUPPRESSION_LEVEL2
63	};
64
65	static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
66	"suppress-per-hwmode-duplicates",
67	cl::desc ("Suppress duplication of instrs into per-HwMode decoder tables"),
68	cl::values(
69	clEnumValN(
70	SUPPRESSION_DISABLE, "O0",
71	"Do not prevent DecoderTable duplications caused by HwModes"),
72	clEnumValN(
73	SUPPRESSION_LEVEL1, "O1",
74	"Remove duplicate DecoderTable entries generated due to HwModes"),
75	clEnumValN(
76	SUPPRESSION_LEVEL2, "O2",
77	"Extract HwModes-specific instructions into new DecoderTables, "
78	"significantly reducing Table Duplications")),
79	cl::init(Val: SUPPRESSION_DISABLE), cl::cat (DisassemblerEmitterCat));
80
81	static cl::opt<bool> LargeTable(
82	"large-decoder-table",
83	cl::desc ("Use large decoder table format. This uses 24 bits for offset\n"
84	"in the table instead of the default 16 bits."),
85	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
86
87	static cl::opt<bool> UseFnTableInDecodeToMCInst(
88	"use-fn-table-in-decode-to-mcinst",
89	cl::desc (
90	"Use a table of function pointers instead of a switch case in the\n"
91	"generated `decodeToMCInst` function. Helps improve compile time\n"
92	"of the generated code."),
93	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
94
95	STATISTIC(NumEncodings, "Number of encodings considered");
96	STATISTIC(NumEncodingsLackingDisasm,
97	"Number of encodings without disassembler info");
98	STATISTIC(NumInstructions, "Number of instructions considered");
99	STATISTIC(NumEncodingsSupported, "Number of encodings supported");
100	STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
101
102	static unsigned getNumToSkipInBytes() { return LargeTable ? `3` : `2`; }
103
104	namespace {
105
106	struct EncodingField {
107	unsigned Base, Width, Offset;
108	EncodingField(unsigned B, unsigned W, unsigned O)
109	: Base(B), Width(W), Offset(O) {}
110	};
111
112	struct OperandInfo {
113	std::vector<EncodingField> Fields;
114	std::string Decoder;
115	bool HasCompleteDecoder;
116	uint64_t InitValue = `0`;
117
118	OperandInfo(std::string D, bool HCD) : Decoder (D), HasCompleteDecoder(HCD) {}
119
120	void addField(unsigned Base, unsigned Width, unsigned Offset) {
121	Fields.push_back(x: EncodingField (Base, Width, Offset));
122	}
123
124	unsigned numFields() const { return Fields.size(); }
125
126	typedef std::vector<EncodingField>::const_iterator const_iterator;
127
128	const_iterator begin() const { return Fields.begin(); }
129	const_iterator end() const { return Fields.end(); }
130	};
131
132	typedef std::vector<uint32_t> FixupList;
133	typedef std::vector<FixupList> FixupScopeList;
134	typedef SmallSetVector<CachedHashString, `16`> PredicateSet;
135	typedef SmallSetVector<CachedHashString, `16`> DecoderSet;
136
137	class DecoderTable {
138	public:
139	DecoderTable() { Data.reserve(n: `16384`); }
140
141	void clear() { Data.clear(); }
142	void push_back(uint8_t Item) { Data.push_back(x: Item); }
143	size_t size() const { return Data.size(); }
144	const uint8_t data() const* { return Data.data(); }
145
146	using const_iterator = std::vector<uint8_t>::const_iterator;
147	const_iterator begin() const { return Data.begin(); }
148	const_iterator end() const { return Data.end(); }
149
150	// Insert a ULEB128 encoded value into the table.
151	void insertULEB128(uint64_t Value) {
152	// Encode and emit the value to filter against.
153	uint8_t Buffer[`16`];
154	unsigned Len = encodeULEB128(Value, p: Buffer);
155	Data.insert(position: Data.end(), first: Buffer, last: Buffer + Len);
156	}
157
158	// Insert space for `NumToSkip` and return the position
159	// in the table for patching.
160	size_t insertNumToSkip() {
161	size_t Size = Data.size();
162	Data.insert(position: Data.end(), n: getNumToSkipInBytes(), x: `0`);
163	return Size;
164	}
165
166	void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
167	// Calculate the distance from the byte following the fixup entry byte
168	// to the destination. The Target is calculated from after the
169	// `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
170	// `getNumToSkipInBytes()` from the displacement here to account for that.
171	assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
172	"Expecting a forward jump in the decoding table");
173	uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
174	if (!isUIntN(N: `8` * getNumToSkipInBytes(), x: Delta))
175	PrintFatalError(
176	Msg: "disassembler decoding table too large, try --large-decoder-table");
177
178	Data [FixupIdx] = static_cast<uint8_t>(Delta);
179	Data [FixupIdx + `1`] = static_cast<uint8_t>(Delta >> `8`);
180	if (getNumToSkipInBytes() == `3`)
181	Data [FixupIdx + `2`] = static_cast<uint8_t>(Delta >> `16`);
182	}
183
184	private:
185	std::vector<uint8_t> Data;
186	};
187
188	struct DecoderTableInfo {
189	DecoderTable Table;
190	FixupScopeList FixupStack;
191	PredicateSet Predicates;
192	DecoderSet Decoders;
193
194	bool isOutermostScope() const { return FixupStack.size() == `1`; }
195	};
196
197	struct EncodingAndInst {
198	const Record *EncodingDef;
199	const CodeGenInstruction *Inst;
200	StringRef HwModeName;
201
202	EncodingAndInst(const Record EncodingDef, const* CodeGenInstruction *Inst,
203	StringRef HwModeName = "")
204	: EncodingDef(EncodingDef), Inst(Inst), HwModeName (HwModeName) {}
205	};
206
207	struct EncodingIDAndOpcode {
208	unsigned EncodingID;
209	unsigned Opcode;
210
211	EncodingIDAndOpcode() : EncodingID(`0`), Opcode(`0`) {}
212	EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
213	: EncodingID(EncodingID), Opcode(Opcode) {}
214	};
215
216	using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
217	using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
218
219	class DecoderEmitter {
220	const RecordKeeper &RK;
221	std::vector<EncodingAndInst> NumberedEncodings;
222
223	public:
224	DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
225	: RK(R), Target (R), PredicateNamespace (PredicateNamespace) {}
226
227	// Emit the decoder state machine table. Returns a mask of MCD decoder ops
228	// that were emitted.
229	unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
230	indent Indent, unsigned BitWidth, StringRef Namespace,
231	const EncodingIDsVec &EncodingIDs) const;
232	void emitInstrLenTable(formatted_raw_ostream &OS,
233	ArrayRef<unsigned> InstrLen) const;
234	void emitPredicateFunction(formatted_raw_ostream &OS,
235	PredicateSet &Predicates, indent Indent) const;
236	void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
237	indent Indent) const;
238
239	// run - Output the code emitter
240	void run(raw_ostream &o);
241
242	private:
243	CodeGenTarget Target;
244
245	public:
246	StringRef PredicateNamespace;
247	};
248
249	// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
250	// for a bit value.
251	//
252	// BIT_UNFILTERED is used as the init value for a filter position. It is used
253	// only for filter processings.
254	struct BitValue {
255	enum bit_value_t : uint8_t {
256	BIT_FALSE, // '0'
257	BIT_TRUE, // '1'
258	BIT_UNSET, // '?', printed as '_'
259	BIT_UNFILTERED // unfiltered, printed as '.'
260	};
261
262	BitValue(bit_value_t V) : V(V) {}
263	explicit BitValue(const Init *Init) {
264	if (const auto *Bit = dyn_cast<BitInit>(Val: Init))
265	V = Bit->getValue() ? BIT_TRUE : BIT_FALSE;
266	else
267	V = BIT_UNSET;
268	}
269	BitValue(const BitsInit &Bits, unsigned Idx) : BitValue (Bits.getBit(Bit: Idx)) {}
270
271	bool isSet() const { return V == BIT_TRUE \|\| V == BIT_FALSE; }
272	bool isUnset() const { return V == BIT_UNSET; }
273	std::optional<uint64_t> getValue() const {
274	if (isSet())
275	return static_cast<uint64_t>(V);
276	return std::nullopt;
277	}
278
279	// For printing a bit value.
280	operator StringRef() const {
281	switch (V) {
282	case BIT_FALSE:
283	return "0";
284	case BIT_TRUE:
285	return "1";
286	case BIT_UNSET:
287	return "_";
288	case BIT_UNFILTERED:
289	return ".";
290	}
291	llvm_unreachable("Unknow bit value");
292	}
293
294	bool operator==(bit_value_t Other) const { return Other == V; }
295	bool operator!=(bit_value_t Other) const { return Other != V; }
296
297	private:
298	bit_value_t V;
299	};
300
301	} // end anonymous namespace
302
303	static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
304	if (Value.EncodingDef != Value.Inst->TheDef)
305	OS << Value.EncodingDef->getName() << ":";
306	OS << Value.Inst->TheDef->getName();
307	return OS;
308	}
309
310	// Prints the bit value for each position.
311	static void dumpBits(raw_ostream &OS, const BitsInit &Bits) {
312	for (const Init *Bit : reverse(C: Bits.getBits()))
313	OS << BitValue (Bit);
314	}
315
316	static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
317	const RecordVal *RV = Def.getValue(Name: FieldName);
318	if (const BitsInit *Bits = dyn_cast<BitsInit>(Val: RV->getValue()))
319	return *Bits;
320
321	// Handle variable length instructions.
322	VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV);
323	SmallVector<const Init *, `16`> Bits;
324
325	for (const auto &SI : VLI) {
326	if (const BitsInit *BI = dyn_cast<BitsInit>(Val: SI.Value))
327	llvm::append_range(C&: Bits, R: BI->getBits());
328	else if (const BitInit *BI = dyn_cast<BitInit>(Val: SI.Value))
329	Bits.push_back(Elt: BI);
330	else
331	Bits.append(NumInputs: SI.BitWidth, Elt: UnsetInit::get(RK&: Def.getRecords()));
332	}
333
334	return *BitsInit::get(RK&: Def.getRecords(), Range: Bits);
335	}
336
337	// Representation of the instruction to work on.
338	typedef std::vector<BitValue> insn_t;
339
340	namespace {
341
342	static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL =
343	std::numeric_limits<uint64_t>::max();
344
345	class FilterChooser;
346
347	/// Filter - Filter works with FilterChooser to produce the decoding tree for
348	/// the ISA.
349	///
350	/// It is useful to think of a Filter as governing the switch stmts of the
351	/// decoding tree in a certain level. Each case stmt delegates to an inferior
352	/// FilterChooser to decide what further decoding logic to employ, or in another
353	/// words, what other remaining bits to look at. The FilterChooser eventually
354	/// chooses a best Filter to do its job.
355	///
356	/// This recursive scheme ends when the number of Opcodes assigned to the
357	/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
358	/// the Filter/FilterChooser combo does not know how to distinguish among the
359	/// Opcodes assigned.
360	///
361	/// An example of a conflict is
362	///
363	/// Conflict:
364	/// 111101000.00........00010000....
365	/// 111101000.00........0001........
366	/// 1111010...00........0001........
367	/// 1111010...00....................
368	/// 1111010.........................
369	/// 1111............................
370	/// ................................
371	/// VST4q8a 111101000_00________00010000____
372	/// VST4q8b 111101000_00________00010000____
373	///
374	/// The Debug output shows the path that the decoding tree follows to reach the
375	/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
376	/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
377	///
378	/// The encoding info in the .td files does not specify this meta information,
379	/// which could have been used by the decoder to resolve the conflict. The
380	/// decoder could try to decode the even/odd register numbering and assign to
381	/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
382	/// version and return the Opcode since the two have the same Asm format string.
383	class Filter {
384	protected:
385	const FilterChooser &Owner; // FilterChooser who owns this filter
386	unsigned StartBit; // the starting bit position
387	unsigned NumBits; // number of bits to filter
388	bool Mixed; // a mixed region contains both set and unset bits
389
390	// Map of well-known segment value to the set of uid's with that value.
391	std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
392
393	// Set of uid's with non-constant segment values.
394	std::vector<EncodingIDAndOpcode> VariableInstructions;
395
396	// Map of well-known segment value to its delegate.
397	std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
398
399	// Number of instructions which fall under FilteredInstructions category.
400	unsigned NumFiltered;
401
402	// Keeps track of the last opcode in the filtered bucket.
403	EncodingIDAndOpcode LastOpcFiltered;
404
405	public:
406	Filter(Filter &&f);
407	Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
408	bool mixed);
409
410	~Filter() = default;
411
412	unsigned getNumFiltered() const { return NumFiltered; }
413
414	EncodingIDAndOpcode getSingletonOpc() const {
415	assert(NumFiltered == `1`);
416	return LastOpcFiltered;
417	}
418
419	// Return the filter chooser for the group of instructions without constant
420	// segment values.
421	const FilterChooser &getVariableFC() const {
422	assert(NumFiltered == `1` && FilterChooserMap.size() == `1`);
423	return *(FilterChooserMap.find(x: NO_FIXED_SEGMENTS_SENTINEL)->second);
424	}
425
426	// Divides the decoding task into sub tasks and delegates them to the
427	// inferior FilterChooser's.
428	//
429	// A special case arises when there's only one entry in the filtered
430	// instructions. In order to unambiguously decode the singleton, we need to
431	// match the remaining undecoded encoding bits against the singleton.
432	void recurse();
433
434	// Emit table entries to decode instructions given a segment or segments of
435	// bits.
436	void emitTableEntry(DecoderTableInfo &TableInfo) const;
437
438	// Returns the number of fanout produced by the filter. More fanout implies
439	// the filter distinguishes more categories of instructions.
440	unsigned usefulness() const;
441	}; // end class Filter
442
443	// These are states of our finite state machines used in FilterChooser's
444	// filterProcessor() which produces the filter candidates to use.
445	enum bitAttr_t {
446	ATTR_NONE,
447	ATTR_FILTERED,
448	ATTR_ALL_SET,
449	ATTR_ALL_UNSET,
450	ATTR_MIXED
451	};
452
453	/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
454	/// in order to perform the decoding of instructions at the current level.
455	///
456	/// Decoding proceeds from the top down. Based on the well-known encoding bits
457	/// of instructions available, FilterChooser builds up the possible Filters that
458	/// can further the task of decoding by distinguishing among the remaining
459	/// candidate instructions.
460	///
461	/// Once a filter has been chosen, it is called upon to divide the decoding task
462	/// into sub-tasks and delegates them to its inferior FilterChoosers for further
463	/// processings.
464	///
465	/// It is useful to think of a Filter as governing the switch stmts of the
466	/// decoding tree. And each case is delegated to an inferior FilterChooser to
467	/// decide what further remaining bits to look at.
468
469	class FilterChooser {
470	protected:
471	friend class Filter;
472
473	// Vector of codegen instructions to choose our filter.
474	ArrayRef<EncodingAndInst> AllInstructions;
475
476	// Vector of uid's for this filter chooser to work on.
477	// The first member of the pair is the opcode id being decoded, the second is
478	// the opcode id that should be emitted.
479	ArrayRef<EncodingIDAndOpcode> Opcodes;
480
481	// Lookup table for the operand decoding of instructions.
482	const std::map<unsigned, std::vector<OperandInfo>> &Operands;
483
484	// Vector of candidate filters.
485	std::vector<Filter> Filters;
486
487	// Array of bit values passed down from our parent.
488	// Set to all BIT_UNFILTERED's for Parent == NULL.
489	std::vector<BitValue> FilterBitValues;
490
491	// Links to the FilterChooser above us in the decoding tree.
492	const FilterChooser *Parent;
493
494	// Index of the best filter from Filters.
495	int BestIndex;
496
497	// Width of instructions
498	unsigned BitWidth;
499
500	// Parent emitter
501	const DecoderEmitter *Emitter;
502
503	struct Island {
504	unsigned StartBit;
505	unsigned NumBits;
506	uint64_t FieldVal;
507	};
508
509	public:
510	FilterChooser(ArrayRef<EncodingAndInst> Insts,
511	ArrayRef<EncodingIDAndOpcode> IDs,
512	const std::map<unsigned, std::vector<OperandInfo>> &Ops,
513	unsigned BW, const DecoderEmitter *E)
514	: AllInstructions (Insts), Opcodes (IDs), Operands(Ops),
515	FilterBitValues (BW, BitValue::BIT_UNFILTERED), Parent(nullptr),
516	BestIndex(-`1`), BitWidth(BW), Emitter(E) {
517	doFilter();
518	}
519
520	FilterChooser(ArrayRef<EncodingAndInst> Insts,
521	ArrayRef<EncodingIDAndOpcode> IDs,
522	const std::map<unsigned, std::vector<OperandInfo>> &Ops,
523	const std::vector<BitValue> &ParentFilterBitValues,
524	const FilterChooser &parent)
525	: AllInstructions (Insts), Opcodes (IDs), Operands(Ops),
526	FilterBitValues (ParentFilterBitValues), Parent(&parent), BestIndex(-`1`),
527	BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
528	doFilter();
529	}
530
531	FilterChooser(const FilterChooser &) = delete;
532	void operator=(const FilterChooser &) = delete;
533
534	unsigned getBitWidth() const { return BitWidth; }
535
536	protected:
537	// Populates the insn given the uid.
538	void insnWithID(insn_t &Insn, unsigned Opcode) const {
539	const Record *EncodingDef = AllInstructions [Opcode].EncodingDef;
540	const BitsInit &Bits = getBitsField(Def: *EncodingDef, FieldName: "Inst");
541	Insn.resize(new_size: std::max(a: BitWidth, b: Bits.getNumBits()), x: BitValue::BIT_UNSET);
542	// We may have a SoftFail bitmask, which specifies a mask where an encoding
543	// may differ from the value in "Inst" and yet still be valid, but the
544	// disassembler should return SoftFail instead of Success.
545	//
546	// This is used for marking UNPREDICTABLE instructions in the ARM world.
547	const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
548	const BitsInit SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr*;
549	for (unsigned i = `0`; i < Bits.getNumBits(); ++i) {
550	if (SFBits && BitValue (*SFBits, i) == BitValue::BIT_TRUE)
551	Insn [i] = BitValue::BIT_UNSET;
552	else
553	Insn [i] = BitValue (Bits, i);
554	}
555	}
556
557	// Populates the field of the insn given the start position and the number of
558	// consecutive bits to scan for.
559	//
560	// Returns a pair of values (indicator, field), where the indicator is false
561	// if there exists any uninitialized bit value in the range and true if all
562	// bits are well-known. The second value is the potentially populated field.
563	std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
564	unsigned NumBits) const;
565
566	/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
567	/// filter array as a series of chars.
568	void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const;
569
570	/// dumpStack - dumpStack traverses the filter chooser chain and calls
571	/// dumpFilterArray on each filter chooser up to the top level one.
572	void dumpStack(raw_ostream &OS, const char prefix) const*;
573
574	Filter &bestFilter() {
575	assert(BestIndex != -`1` && "BestIndex not set");
576	return Filters [BestIndex];
577	}
578
579	bool PositionFiltered(unsigned Idx) const {
580	return FilterBitValues [Idx].isSet();
581	}
582
583	// Calculates the island(s) needed to decode the instruction.
584	// This returns a list of undecoded bits of an instructions, for example,
585	// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
586	// decoded bits in order to verify that the instruction matches the Opcode.
587	unsigned getIslands(std::vector<Island> &Islands, const insn_t &Insn) const;
588
589	// Emits code to check the Predicates member of an instruction are true.
590	// Returns true if predicate matches were emitted, false otherwise.
591	bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const;
592	bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
593	raw_ostream &OS) const;
594
595	bool doesOpcodeNeedPredicate(unsigned Opc) const;
596	unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
597	void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
598
599	void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
600
601	// Emits table entries to decode the singleton.
602	void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
603	EncodingIDAndOpcode Opc) const;
604
605	// Emits code to decode the singleton, and then to decode the rest.
606	void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
607	const Filter &Best) const;
608
609	bool emitBinaryParser(raw_ostream &OS, indent Indent,
610	const OperandInfo &OpInfo) const;
611
612	bool emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc) const;
613	std::pair<unsigned, bool> getDecoderIndex(DecoderSet &Decoders,
614	unsigned Opc) const;
615
616	// Assign a single filter and run with it.
617	void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
618
619	// reportRegion is a helper function for filterProcessor to mark a region as
620	// eligible for use as a filter region.
621	void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
622	bool AllowMixed);
623
624	// FilterProcessor scans the well-known encoding bits of the instructions and
625	// builds up a list of candidate filters. It chooses the best filter and
626	// recursively descends down the decoding tree.
627	bool filterProcessor(bool AllowMixed, bool Greedy = true);
628
629	// Decides on the best configuration of filter(s) to use in order to decode
630	// the instructions. A conflict of instructions may occur, in which case we
631	// dump the conflict set to the standard error.
632	void doFilter();
633
634	public:
635	// emitTableEntries - Emit state machine entries to decode our share of
636	// instructions.
637	void emitTableEntries(DecoderTableInfo &TableInfo) const;
638	};
639
640	} // end anonymous namespace
641
642	///////////////////////////
643	// //
644	// Filter Implementation //
645	// //
646	///////////////////////////
647
648	Filter::Filter(Filter &&f)
649	: Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
650	FilteredInstructions (std::move(f.FilteredInstructions)),
651	VariableInstructions (std::move(f.VariableInstructions)),
652	FilterChooserMap (std::move(f.FilterChooserMap)),
653	NumFiltered(f.NumFiltered), LastOpcFiltered (f.LastOpcFiltered) {}
654
655	Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
656	bool mixed)
657	: Owner(owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
658	assert(StartBit + NumBits - `1` < Owner.BitWidth);
659
660	NumFiltered = `0`;
661	LastOpcFiltered = {`0`, `0`};
662
663	for (const auto &OpcPair : Owner.Opcodes) {
664	insn_t Insn;
665
666	// Populates the insn given the uid.
667	Owner.insnWithID(Insn, Opcode: OpcPair.EncodingID);
668
669	// Scans the segment for possibly well-specified encoding bits.
670	auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits);
671
672	if (Ok) {
673	// The encoding bits are well-known. Lets add the uid of the
674	// instruction into the bucket keyed off the constant field value.
675	LastOpcFiltered = OpcPair;
676	FilteredInstructions [Field].push_back(x: LastOpcFiltered);
677	++NumFiltered;
678	} else {
679	// Some of the encoding bit(s) are unspecified. This contributes to
680	// one additional member of "Variable" instructions.
681	VariableInstructions.push_back(x: OpcPair);
682	}
683	}
684
685	assert((FilteredInstructions.size() + VariableInstructions.size() > `0`) &&
686	"Filter returns no instruction categories");
687	}
688
689	// Divides the decoding task into sub tasks and delegates them to the
690	// inferior FilterChooser's.
691	//
692	// A special case arises when there's only one entry in the filtered
693	// instructions. In order to unambiguously decode the singleton, we need to
694	// match the remaining undecoded encoding bits against the singleton.
695	void Filter::recurse() {
696	// Starts by inheriting our parent filter chooser's filter bit values.
697	std::vector<BitValue> BitValueArray(Owner.FilterBitValues);
698
699	if (!VariableInstructions.empty()) {
700	// Conservatively marks each segment position as BIT_UNSET.
701	for (unsigned bitIndex = `0`; bitIndex < NumBits; ++bitIndex)
702	BitValueArray [StartBit + bitIndex] = BitValue::BIT_UNSET;
703
704	// Delegates to an inferior filter chooser for further processing on this
705	// group of instructions whose segment values are variable.
706	FilterChooserMap.try_emplace(
707	k: NO_FIXED_SEGMENTS_SENTINEL,
708	args: std::make_unique<FilterChooser>(args: Owner.AllInstructions,
709	args&: VariableInstructions, args: Owner.Operands,
710	args&: BitValueArray, args: Owner));
711	}
712
713	// No need to recurse for a singleton filtered instruction.
714	// See also Filter::emit().*
715	if (getNumFiltered() == `1`) {
716	assert(FilterChooserMap.size() == `1`);
717	return;
718	}
719
720	// Otherwise, create sub choosers.
721	for (const auto &Inst : FilteredInstructions) {
722	// Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
723	for (unsigned bitIndex = `0`; bitIndex < NumBits; ++bitIndex)
724	BitValueArray [StartBit + bitIndex] = Inst.first & (`1ULL` << bitIndex)
725	? BitValue::BIT_TRUE
726	: BitValue::BIT_FALSE;
727
728	// Delegates to an inferior filter chooser for further processing on this
729	// category of instructions.
730	FilterChooserMap.try_emplace(
731	k: Inst.first,
732	args: std::make_unique<FilterChooser>(args: Owner.AllInstructions, args: Inst.second,
733	args: Owner.Operands, args&: BitValueArray, args: Owner));
734	}
735	}
736
737	static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
738	uint32_t DestIdx) {
739	// Any NumToSkip fixups in the current scope can resolve to the
740	// current location.
741	for (uint32_t FixupIdx : Fixups)
742	Table.patchNumToSkip(FixupIdx, DestIdx);
743	}
744
745	// Emit table entries to decode instructions given a segment or segments
746	// of bits.
747	void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
748	assert(isUInt<`8`>(NumBits) && "NumBits overflowed uint8 table entry!");
749	TableInfo.Table.push_back(Item: MCD::OPC_ExtractField);
750
751	TableInfo.Table.insertULEB128(Value: StartBit);
752	TableInfo.Table.push_back(Item: NumBits);
753
754	// If the NO_FIXED_SEGMENTS_SENTINEL is present, we need to add a new scope
755	// for this filter. Otherwise, we can skip adding a new scope and any
756	// patching added will automatically be added to the enclosing scope.
757
758	// If NO_FIXED_SEGMENTS_SENTINEL is present, it will be last entry in
759	// FilterChooserMap.
760
761	const uint64_t LastFilter = FilterChooserMap.rbegin()->first;
762	bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL;
763	if (HasFallthrough)
764	TableInfo.FixupStack.emplace_back();
765
766	DecoderTable &Table = TableInfo.Table;
767
768	size_t PrevFilter = `0`;
769	for (const auto &[FilterVal, Delegate] : FilterChooserMap) {
770	// Field value NO_FIXED_SEGMENTS_SENTINEL implies a non-empty set of
771	// variable instructions. See also recurse().
772	if (FilterVal == NO_FIXED_SEGMENTS_SENTINEL) {
773	// Each scope should always have at least one filter value to check
774	// for.
775	assert(PrevFilter != `0` && "empty filter set!");
776	FixupList &CurScope = TableInfo.FixupStack.back();
777	// Resolve any NumToSkip fixups in the current scope.
778	resolveTableFixups(Table, Fixups: CurScope, DestIdx: Table.size());
779
780	// Delete the scope we have added here.
781	TableInfo.FixupStack.pop_back();
782
783	PrevFilter = `0`; // Don't re-process the filter's fallthrough.
784	} else {
785	// The last filtervalue emitted can be OPC_FilterValue if we are at
786	// outermost scope.
787	const uint8_t DecoderOp =
788	FilterVal == LastFilter && TableInfo.isOutermostScope()
789	? MCD::OPC_FilterValueOrFail
790	: MCD::OPC_FilterValue;
791	Table.push_back(Item: DecoderOp);
792	Table.insertULEB128(Value: FilterVal);
793	if (DecoderOp == MCD::OPC_FilterValue) {
794	// Reserve space for the NumToSkip entry. We'll backpatch the value
795	// later.
796	PrevFilter = Table.insertNumToSkip();
797	} else {
798	PrevFilter = `0`;
799	}
800	}
801
802	// We arrive at a category of instructions with the same segment value.
803	// Now delegate to the sub filter chooser for further decodings.
804	// The case may fallthrough, which happens if the remaining well-known
805	// encoding bits do not match exactly.
806	Delegate ->emitTableEntries(TableInfo);
807
808	// Now that we've emitted the body of the handler, update the NumToSkip
809	// of the filter itself to be able to skip forward when false.
810	if (PrevFilter)
811	Table.patchNumToSkip(FixupIdx: PrevFilter, DestIdx: Table.size());
812	}
813
814	// If there is no fallthrough and the final filter was not in the outermost
815	// scope, then it must be fixed up according to the enclosing scope rather
816	// than the current position.
817	if (PrevFilter)
818	TableInfo.FixupStack.back().push_back(x: PrevFilter);
819	}
820
821	// Returns the number of fanout produced by the filter. More fanout implies
822	// the filter distinguishes more categories of instructions.
823	unsigned Filter::usefulness() const {
824	return FilteredInstructions.size() + VariableInstructions.empty();
825	}
826
827	//////////////////////////////////
828	// //
829	// Filterchooser Implementation //
830	// //
831	//////////////////////////////////
832
833	// Emit the decoder state machine table. Returns a mask of MCD decoder ops
834	// that were emitted.
835	unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
836	DecoderTable &Table, indent Indent,
837	unsigned BitWidth, StringRef Namespace,
838	const EncodingIDsVec &EncodingIDs) const {
839	// We'll need to be able to map from a decoded opcode into the corresponding
840	// EncodingID for this specific combination of BitWidth and Namespace. This
841	// is used below to index into NumberedEncodings.
842	DenseMap<unsigned, unsigned> OpcodeToEncodingID;
843	OpcodeToEncodingID.reserve(NumEntries: EncodingIDs.size());
844	for (const auto &EI : EncodingIDs)
845	OpcodeToEncodingID [EI.Opcode] = EI.EncodingID;
846
847	OS << Indent << "static const uint8_t DecoderTable" << Namespace << BitWidth
848	<< "[] = {\n";
849
850	Indent += `2`;
851
852	// Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
853	auto emitULEB128 = [](DecoderTable::const_iterator &I,
854	formatted_raw_ostream &OS) {
855	while (*I >= `128`)
856	OS << (unsigned)*I ++ << ", ";
857	OS << (unsigned)*I ++ << ", ";
858	};
859
860	// Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
861	// NumToSkip value.
862	auto emitNumToSkip = [](DecoderTable::const_iterator &I,
863	formatted_raw_ostream &OS) {
864	uint8_t Byte = *I ++;
865	uint32_t NumToSkip = Byte;
866	OS << (unsigned)Byte << ", ";
867	Byte = *I ++;
868	OS << (unsigned)Byte << ", ";
869	NumToSkip \|= Byte << `8`;
870	if (getNumToSkipInBytes() == `3`) {
871	Byte = *I ++;
872	OS << (unsigned)(Byte) << ", ";
873	NumToSkip \|= Byte << `16`;
874	}
875	return NumToSkip;
876	};
877
878	// FIXME: We may be able to use the NumToSkip values to recover
879	// appropriate indentation levels.
880	DecoderTable::const_iterator I = Table.begin();
881	DecoderTable::const_iterator E = Table.end();
882	const uint8_t *const EndPtr = Table.data() + Table.size();
883
884	auto emitNumToSkipComment = [&](uint32_t NumToSkip, bool InComment = false) {
885	uint32_t Index = ((I - Table.begin()) + NumToSkip);
886	OS << (InComment ? ", " : "// ");
887	OS << "Skip to: " << Index;
888	if (*(I + NumToSkip) == MCD::OPC_Fail)
889	OS << " (Fail)";
890	};
891
892	unsigned OpcodeMask = `0`;
893
894	while (I != E) {
895	assert(I < E && "incomplete decode table entry!");
896
897	uint64_t Pos = I - Table.begin();
898	OS << "/* " << Pos << " */";
899	OS.PadToColumn(NewCol: `12`);
900
901	const uint8_t DecoderOp = *I ++;
902	OpcodeMask \|= (`1` << DecoderOp);
903	switch (DecoderOp) {
904	default:
905	PrintFatalError(Msg: "Invalid decode table opcode: " + Twine ((int)DecoderOp) +
906	" at index " + Twine (Pos));
907	case MCD::OPC_ExtractField: {
908	OS << Indent << "MCD::OPC_ExtractField, ";
909
910	// ULEB128 encoded start value.
911	const char ErrMsg = nullptr*;
912	unsigned Start = decodeULEB128(p: &I, n: nullptr*, end: EndPtr, error: &ErrMsg);
913	assert(ErrMsg == nullptr && "ULEB128 value too large!");
914	emitULEB128 (I, OS);
915
916	unsigned Len = *I ++;
917	OS << Len << ", // Inst{";
918	if (Len > `1`)
919	OS << (Start + Len - `1`) << "-";
920	OS << Start << "} ...\n";
921	break;
922	}
923	case MCD::OPC_FilterValue:
924	case MCD::OPC_FilterValueOrFail: {
925	bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
926	OS << Indent << "MCD::OPC_FilterValue" << (IsFail ? "OrFail, " : ", ");
927	// The filter value is ULEB128 encoded.
928	emitULEB128 (I, OS);
929
930	if (!IsFail) {
931	uint32_t NumToSkip = emitNumToSkip (I, OS);
932	emitNumToSkipComment (NumToSkip);
933	}
934	OS << `'\n'`;
935	break;
936	}
937	case MCD::OPC_CheckField:
938	case MCD::OPC_CheckFieldOrFail: {
939	bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
940	OS << Indent << "MCD::OPC_CheckField" << (IsFail ? "OrFail, " : ", ");
941	// ULEB128 encoded start value.
942	emitULEB128 (I, OS);
943	// 8-bit length.
944	unsigned Len = *I ++;
945	OS << Len << ", ";
946	// ULEB128 encoded field value.
947	emitULEB128 (I, OS);
948
949	if (!IsFail) {
950	uint32_t NumToSkip = emitNumToSkip (I, OS);
951	emitNumToSkipComment (NumToSkip);
952	}
953	OS << `'\n'`;
954	break;
955	}
956	case MCD::OPC_CheckPredicate:
957	case MCD::OPC_CheckPredicateOrFail: {
958	bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
959
960	OS << Indent << "MCD::OPC_CheckPredicate" << (IsFail ? "OrFail, " : ", ");
961	emitULEB128 (I, OS);
962
963	if (!IsFail) {
964	uint32_t NumToSkip = emitNumToSkip (I, OS);
965	emitNumToSkipComment (NumToSkip);
966	}
967	OS << `'\n'`;
968	break;
969	}
970	case MCD::OPC_Decode:
971	case MCD::OPC_TryDecode:
972	case MCD::OPC_TryDecodeOrFail: {
973	bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
974	bool IsTry = DecoderOp == MCD::OPC_TryDecode \|\| IsFail;
975	// Decode the Opcode value.
976	const char ErrMsg = nullptr*;
977	unsigned Opc = decodeULEB128(p: &I, n: nullptr*, end: EndPtr, error: &ErrMsg);
978	assert(ErrMsg == nullptr && "ULEB128 value too large!");
979
980	OS << Indent << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode"
981	<< (IsFail ? "OrFail, " : ", ");
982	emitULEB128 (I, OS);
983
984	// Decoder index.
985	unsigned DecodeIdx = decodeULEB128(p: &I, n: nullptr*, end: EndPtr, error: &ErrMsg);
986	assert(ErrMsg == nullptr && "ULEB128 value too large!");
987	emitULEB128 (I, OS);
988
989	auto EncI = OpcodeToEncodingID.find(Val: Opc);
990	assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
991	auto EncodingID = EncI ->second;
992
993	if (!IsTry) {
994	OS << "// Opcode: " << NumberedEncodings [EncodingID]
995	<< ", DecodeIdx: " << DecodeIdx << `'\n'`;
996	break;
997	}
998
999	// Fallthrough for OPC_TryDecode.
1000	if (!IsFail) {
1001	uint32_t NumToSkip = emitNumToSkip (I, OS);
1002	OS << "// Opcode: " << NumberedEncodings [EncodingID]
1003	<< ", DecodeIdx: " << DecodeIdx;
1004	emitNumToSkipComment (NumToSkip, /InComment=/true);
1005	}
1006	OS << `'\n'`;
1007	break;
1008	}
1009	case MCD::OPC_SoftFail: {
1010	OS << Indent << "MCD::OPC_SoftFail, ";
1011	// Decode the positive mask.
1012	const char ErrMsg = nullptr*;
1013	uint64_t PositiveMask = decodeULEB128(p: &I, n: nullptr*, end: EndPtr, error: &ErrMsg);
1014	assert(ErrMsg == nullptr && "ULEB128 value too large!");
1015	emitULEB128 (I, OS);
1016
1017	// Decode the negative mask.
1018	uint64_t NegativeMask = decodeULEB128(p: &I, n: nullptr*, end: EndPtr, error: &ErrMsg);
1019	assert(ErrMsg == nullptr && "ULEB128 value too large!");
1020	emitULEB128 (I, OS);
1021	OS << "// +ve mask: 0x";
1022	OS.write_hex(N: PositiveMask);
1023	OS << ", -ve mask: 0x";
1024	OS.write_hex(N: NegativeMask);
1025	OS << `'\n'`;
1026	break;
1027	}
1028	case MCD::OPC_Fail:
1029	OS << Indent << "MCD::OPC_Fail,\n";
1030	break;
1031	}
1032	}
1033	OS << Indent << "0\n";
1034
1035	Indent -= `2`;
1036
1037	OS << Indent << "};\n\n";
1038
1039	return OpcodeMask;
1040	}
1041
1042	void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
1043	ArrayRef<unsigned> InstrLen) const {
1044	OS << "static const uint8_t InstrLenTable[] = {\n";
1045	for (unsigned Len : InstrLen)
1046	OS << Len << ",\n";
1047	OS << "};\n\n";
1048	}
1049
1050	void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1051	PredicateSet &Predicates,
1052	indent Indent) const {
1053	// The predicate function is just a big switch statement based on the
1054	// input predicate index.
1055	OS << Indent << "static bool checkDecoderPredicate(unsigned Idx, "
1056	<< "const FeatureBitset &Bits) {\n";
1057	Indent += `2`;
1058	OS << Indent << "switch (Idx) {\n";
1059	OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1060	for (const auto &[Index, Predicate] : enumerate(First&: Predicates)) {
1061	OS << Indent << "case " << Index << ":\n";
1062	OS << Indent + `2` << "return (" << Predicate << ");\n";
1063	}
1064	OS << Indent << "}\n";
1065	Indent -= `2`;
1066	OS << Indent << "}\n\n";
1067	}
1068
1069	void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1070	DecoderSet &Decoders,
1071	indent Indent) const {
1072	// The decoder function is just a big switch statement or a table of function
1073	// pointers based on the input decoder index.
1074
1075	// TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1076	// It would be better for emitBinaryParser to use a 64-bit tmp whenever
1077	// possible but fall back to an InsnType-sized tmp for truly large fields.
1078	StringRef TmpTypeDecl =
1079	"using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
1080	"InsnType, uint64_t>;\n";
1081	StringRef DecodeParams =
1082	"DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
1083	"MCDisassembler *Decoder, bool &DecodeComplete";
1084
1085	if (UseFnTableInDecodeToMCInst) {
1086	// Emit a function for each case first.
1087	for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) {
1088	OS << Indent << "template <typename InsnType>\n";
1089	OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams
1090	<< ") {\n";
1091	Indent += `2`;
1092	OS << Indent << TmpTypeDecl;
1093	OS << Indent << "[[maybe_unused]] TmpType tmp;\n";
1094	OS << Decoder;
1095	OS << Indent << "return S;\n";
1096	Indent -= `2`;
1097	OS << Indent << "}\n\n";
1098	}
1099	}
1100
1101	OS << Indent << "// Handling " << Decoders.size() << " cases.\n";
1102	OS << Indent << "template <typename InsnType>\n";
1103	OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, "
1104	<< DecodeParams << ") {\n";
1105	Indent += `2`;
1106	OS << Indent << "DecodeComplete = true;\n";
1107
1108	if (UseFnTableInDecodeToMCInst) {
1109	// Build a table of function pointers.
1110	OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams
1111	<< ");\n";
1112	OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n";
1113	for (size_t Index : llvm::seq(Size: Decoders.size()))
1114	OS << Indent + `2` << "decodeFn" << Index << ",\n";
1115	OS << Indent << "};\n";
1116	OS << Indent << "if (Idx >= " << Decoders.size() << ")\n";
1117	OS << Indent + `2` << "llvm_unreachable(\"Invalid index!\");\n";
1118	OS << Indent
1119	<< "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
1120	"DecodeComplete);\n";
1121	} else {
1122	OS << Indent << TmpTypeDecl;
1123	OS << Indent << "TmpType tmp;\n";
1124	OS << Indent << "switch (Idx) {\n";
1125	OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1126	for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) {
1127	OS << Indent << "case " << Index << ":\n";
1128	OS << Decoder;
1129	OS << Indent + `2` << "return S;\n";
1130	}
1131	OS << Indent << "}\n";
1132	}
1133	Indent -= `2`;
1134	OS << Indent << "}\n";
1135	}
1136
1137	// Populates the field of the insn given the start position and the number of
1138	// consecutive bits to scan for.
1139	//
1140	// Returns a pair of values (indicator, field), where the indicator is false
1141	// if there exists any uninitialized bit value in the range and true if all
1142	// bits are well-known. The second value is the potentially populated field.
1143	std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1144	unsigned StartBit,
1145	unsigned NumBits) const {
1146	uint64_t Field = `0`;
1147
1148	for (unsigned i = `0`; i < NumBits; ++i) {
1149	if (Insn [StartBit + i] == BitValue::BIT_UNSET)
1150	return {false, Field};
1151
1152	if (Insn [StartBit + i] == BitValue::BIT_TRUE)
1153	Field = Field \| (`1ULL` << i);
1154	}
1155
1156	return {true, Field};
1157	}
1158
1159	/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1160	/// filter array as a series of chars.
1161	void FilterChooser::dumpFilterArray(raw_ostream &OS,
1162	ArrayRef<BitValue> Filter) const {
1163	for (unsigned bitIndex = BitWidth; bitIndex > `0`; bitIndex--)
1164	OS << Filter [bitIndex - `1`];
1165	}
1166
1167	/// dumpStack - dumpStack traverses the filter chooser chain and calls
1168	/// dumpFilterArray on each filter chooser up to the top level one.
1169	void FilterChooser::dumpStack(raw_ostream &OS, const char prefix) const* {
1170	const FilterChooser current = this*;
1171
1172	while (current) {
1173	OS << prefix;
1174	dumpFilterArray(OS, Filter: current->FilterBitValues);
1175	OS << `'\n'`;
1176	current = current->Parent;
1177	}
1178	}
1179
1180	// Calculates the island(s) needed to decode the instruction.
1181	// This returns a list of undecoded bits of an instructions, for example,
1182	// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1183	// decoded bits in order to verify that the instruction matches the Opcode.
1184	unsigned FilterChooser::getIslands(std::vector<Island> &Islands,
1185	const insn_t &Insn) const {
1186	uint64_t FieldVal;
1187	unsigned StartBit;
1188
1189	// 0: Init
1190	// 1: Water (the bit value does not affect decoding)
1191	// 2: Island (well-known bit value needed for decoding)
1192	unsigned State = `0`;
1193
1194	for (unsigned i = `0`; i < BitWidth; ++i) {
1195	std::optional<uint64_t> Val = Insn [i].getValue();
1196	bool Filtered = PositionFiltered(Idx: i);
1197	switch (State) {
1198	default:
1199	llvm_unreachable("Unreachable code!");
1200	case `0`:
1201	case `1`:
1202	if (Filtered \|\| !Val) {
1203	State = `1`; // Still in Water
1204	} else {
1205	State = `2`; // Into the Island
1206	StartBit = i;
1207	FieldVal = *Val;
1208	}
1209	break;
1210	case `2`:
1211	if (Filtered \|\| !Val) {
1212	State = `1`; // Into the Water
1213	Islands.push_back(x: {.StartBit: StartBit, .NumBits: i - StartBit, .FieldVal: FieldVal});
1214	} else {
1215	State = `2`; // Still in Island
1216	FieldVal \|= *Val << (i - StartBit);
1217	}
1218	break;
1219	}
1220	}
1221	// If we are still in Island after the loop, do some housekeeping.
1222	if (State == `2`)
1223	Islands.push_back(x: {.StartBit: StartBit, .NumBits: BitWidth - StartBit, .FieldVal: FieldVal});
1224
1225	return Islands.size();
1226	}
1227
1228	bool FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent,
1229	const OperandInfo &OpInfo) const {
1230	const std::string &Decoder = OpInfo.Decoder;
1231
1232	bool UseInsertBits = OpInfo.numFields() != `1` \|\| OpInfo.InitValue != `0`;
1233
1234	if (UseInsertBits) {
1235	OS << Indent << "tmp = 0x";
1236	OS.write_hex(N: OpInfo.InitValue);
1237	OS << ";\n";
1238	}
1239
1240	for (const EncodingField &EF : OpInfo) {
1241	OS << Indent;
1242	if (UseInsertBits)
1243	OS << "insertBits(tmp, ";
1244	else
1245	OS << "tmp = ";
1246	OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << `')'`;
1247	if (UseInsertBits)
1248	OS << ", " << EF.Offset << ", " << EF.Width << `')'`;
1249	else if (EF.Offset != `0`)
1250	OS << " << " << EF.Offset;
1251	OS << ";\n";
1252	}
1253
1254	bool OpHasCompleteDecoder;
1255	if (!Decoder.empty()) {
1256	OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1257	OS << Indent << "if (!Check(S, " << Decoder
1258	<< "(MI, tmp, Address, Decoder))) { "
1259	<< (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
1260	<< "return MCDisassembler::Fail; }\n";
1261	} else {
1262	OpHasCompleteDecoder = true;
1263	OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
1264	}
1265	return OpHasCompleteDecoder;
1266	}
1267
1268	bool FilterChooser::emitDecoder(raw_ostream &OS, indent Indent,
1269	unsigned Opc) const {
1270	bool HasCompleteDecoder = true;
1271
1272	for (const auto &Op : Operands.find(x: Opc)->second) {
1273	// If a custom instruction decoder was specified, use that.
1274	if (Op.numFields() == `0` && !Op.Decoder.empty()) {
1275	HasCompleteDecoder = Op.HasCompleteDecoder;
1276	OS << Indent << "if (!Check(S, " << Op.Decoder
1277	<< "(MI, insn, Address, Decoder))) { "
1278	<< (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1279	<< "return MCDisassembler::Fail; }\n";
1280	break;
1281	}
1282
1283	HasCompleteDecoder &= emitBinaryParser(OS, Indent, OpInfo: Op);
1284	}
1285	return HasCompleteDecoder;
1286	}
1287
1288	std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
1289	unsigned Opc) const {
1290	// Build up the predicate string.
1291	SmallString<`256`> Decoder;
1292	// FIXME: emitDecoder() function can take a buffer directly rather than
1293	// a stream.
1294	raw_svector_ostream S(Decoder);
1295	indent Indent(UseFnTableInDecodeToMCInst ? `2` : `4`);
1296	bool HasCompleteDecoder = emitDecoder(OS&: S, Indent, Opc);
1297
1298	// Using the full decoder string as the key value here is a bit
1299	// heavyweight, but is effective. If the string comparisons become a
1300	// performance concern, we can implement a mangling of the predicate
1301	// data easily enough with a map back to the actual string. That's
1302	// overkill for now, though.
1303
1304	// Make sure the predicate is in the table.
1305	Decoders.insert(X: CachedHashString (Decoder));
1306	// Now figure out the index for when we write out the table.
1307	DecoderSet::const_iterator P = find(Range&: Decoders, Val: Decoder.str());
1308	return {(unsigned)(P - Decoders.begin()), HasCompleteDecoder};
1309	}
1310
1311	// If ParenIfBinOp is true, print a surrounding () if Val uses && or \|\|.
1312	bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1313	raw_ostream &OS) const {
1314	if (const auto *D = dyn_cast<DefInit>(Val: &Val)) {
1315	if (!D->getDef()->isSubClassOf(Name: "SubtargetFeature"))
1316	return true;
1317	OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1318	<< "]";
1319	return false;
1320	}
1321	if (const auto *D = dyn_cast<DagInit>(Val: &Val)) {
1322	std::string Op = D->getOperator()->getAsString();
1323	if (Op == "not" && D->getNumArgs() == `1`) {
1324	OS << `'!'`;
1325	return emitPredicateMatchAux(Val: D->getArg(Num: `0`), ParenIfBinOp: true*, OS);
1326	}
1327	if ((Op == "any_of" \|\| Op == "all_of") && D->getNumArgs() > `0`) {
1328	bool Paren = D->getNumArgs() > `1` && std::exchange(obj&: ParenIfBinOp, new_val: true);
1329	if (Paren)
1330	OS << `'('`;
1331	ListSeparator LS(Op == "any_of" ? " \|\| " : " && ");
1332	for (auto *Arg : D->getArgs()) {
1333	OS << LS;
1334	if (emitPredicateMatchAux(Val: *Arg, ParenIfBinOp, OS))
1335	return true;
1336	}
1337	if (Paren)
1338	OS << `')'`;
1339	return false;
1340	}
1341	}
1342	return true;
1343	}
1344
1345	bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const {
1346	const ListInit *Predicates =
1347	AllInstructions [Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1348	bool IsFirstEmission = true;
1349	for (unsigned i = `0`; i < Predicates->size(); ++i) {
1350	const Record *Pred = Predicates->getElementAsRecord(Idx: i);
1351	if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1352	continue;
1353
1354	if (!isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1355	continue;
1356
1357	if (!IsFirstEmission)
1358	OS << " && ";
1359	if (emitPredicateMatchAux(Val: *Pred->getValueAsDag(FieldName: "AssemblerCondDag"),
1360	ParenIfBinOp: Predicates->size() > `1`, OS))
1361	PrintFatalError(ErrorLoc: Pred->getLoc(), Msg: "Invalid AssemblerCondDag!");
1362	IsFirstEmission = false;
1363	}
1364	return !Predicates->empty();
1365	}
1366
1367	bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1368	const ListInit *Predicates =
1369	AllInstructions [Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1370	for (unsigned i = `0`; i < Predicates->size(); ++i) {
1371	const Record *Pred = Predicates->getElementAsRecord(Idx: i);
1372	if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1373	continue;
1374
1375	if (isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1376	return true;
1377	}
1378	return false;
1379	}
1380
1381	unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1382	StringRef Predicate) const {
1383	// Using the full predicate string as the key value here is a bit
1384	// heavyweight, but is effective. If the string comparisons become a
1385	// performance concern, we can implement a mangling of the predicate
1386	// data easily enough with a map back to the actual string. That's
1387	// overkill for now, though.
1388
1389	// Make sure the predicate is in the table.
1390	TableInfo.Predicates.insert(X: CachedHashString (Predicate));
1391	// Now figure out the index for when we write out the table.
1392	PredicateSet::const_iterator P = find(Range&: TableInfo.Predicates, Val: Predicate);
1393	return (unsigned)(P - TableInfo.Predicates.begin());
1394	}
1395
1396	void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1397	unsigned Opc) const {
1398	if (!doesOpcodeNeedPredicate(Opc))
1399	return;
1400
1401	// Build up the predicate string.
1402	SmallString<`256`> Predicate;
1403	// FIXME: emitPredicateMatch() functions can take a buffer directly rather
1404	// than a stream.
1405	raw_svector_ostream PS(Predicate);
1406	emitPredicateMatch(OS&: PS, Opc);
1407
1408	// Figure out the index into the predicate table for the predicate just
1409	// computed.
1410	unsigned PIdx = getPredicateIndex(TableInfo, Predicate: PS.str());
1411
1412	const uint8_t DecoderOp = TableInfo.isOutermostScope()
1413	? MCD::OPC_CheckPredicateOrFail
1414	: MCD::OPC_CheckPredicate;
1415	TableInfo.Table.push_back(Item: DecoderOp);
1416	TableInfo.Table.insertULEB128(Value: PIdx);
1417
1418	if (DecoderOp == MCD::OPC_CheckPredicate) {
1419	// Push location for NumToSkip backpatching.
1420	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1421	}
1422	}
1423
1424	void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1425	unsigned Opc) const {
1426	const Record *EncodingDef = AllInstructions [Opc].EncodingDef;
1427	const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
1428	const BitsInit SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr*;
1429
1430	if (!SFBits)
1431	return;
1432	const BitsInit *InstBits = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
1433
1434	APInt PositiveMask(BitWidth, `0ULL`);
1435	APInt NegativeMask(BitWidth, `0ULL`);
1436	for (unsigned i = `0`; i < BitWidth; ++i) {
1437	BitValue B(*SFBits, i);
1438	BitValue IB(*InstBits, i);
1439
1440	if (B != BitValue::BIT_TRUE)
1441	continue;
1442
1443	if (IB == BitValue::BIT_FALSE) {
1444	// The bit is meant to be false, so emit a check to see if it is true.
1445	PositiveMask.setBit(i);
1446	} else if (IB == BitValue::BIT_TRUE) {
1447	// The bit is meant to be true, so emit a check to see if it is false.
1448	NegativeMask.setBit(i);
1449	} else {
1450	// The bit is not set; this must be an error!
1451	errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1452	<< AllInstructions [Opc] << " is set but Inst{" << i
1453	<< "} is unset!\n"
1454	<< " - You can only mark a bit as SoftFail if it is fully defined"
1455	<< " (1/0 - not '?') in Inst\n";
1456	return;
1457	}
1458	}
1459
1460	bool NeedPositiveMask = PositiveMask.getBoolValue();
1461	bool NeedNegativeMask = NegativeMask.getBoolValue();
1462
1463	if (!NeedPositiveMask && !NeedNegativeMask)
1464	return;
1465
1466	TableInfo.Table.push_back(Item: MCD::OPC_SoftFail);
1467	TableInfo.Table.insertULEB128(Value: PositiveMask.getZExtValue());
1468	TableInfo.Table.insertULEB128(Value: NegativeMask.getZExtValue());
1469	}
1470
1471	// Emits table entries to decode the singleton.
1472	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1473	EncodingIDAndOpcode Opc) const {
1474	std::vector<Island> Islands;
1475	insn_t Insn;
1476	insnWithID(Insn, Opcode: Opc.EncodingID);
1477
1478	// Look for islands of undecoded bits of the singleton.
1479	getIslands(Islands, Insn);
1480
1481	// Emit the predicate table entry if one is needed.
1482	emitPredicateTableEntry(TableInfo, Opc: Opc.EncodingID);
1483
1484	// Check any additional encoding fields needed.
1485	for (const Island &Ilnd : reverse(C&: Islands)) {
1486	unsigned NumBits = Ilnd.NumBits;
1487	assert(isUInt<`8`>(NumBits) && "NumBits overflowed uint8 table entry!");
1488	const uint8_t DecoderOp = TableInfo.isOutermostScope()
1489	? MCD::OPC_CheckFieldOrFail
1490	: MCD::OPC_CheckField;
1491	TableInfo.Table.push_back(Item: DecoderOp);
1492
1493	TableInfo.Table.insertULEB128(Value: Ilnd.StartBit);
1494	TableInfo.Table.push_back(Item: NumBits);
1495	TableInfo.Table.insertULEB128(Value: Ilnd.FieldVal);
1496
1497	if (DecoderOp == MCD::OPC_CheckField) {
1498	// Allocate space in the table for fixup so all our relative position
1499	// calculations work OK even before we fully resolve the real value here.
1500
1501	// Push location for NumToSkip backpatching.
1502	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1503	}
1504	}
1505
1506	// Check for soft failure of the match.
1507	emitSoftFailTableEntry(TableInfo, Opc: Opc.EncodingID);
1508
1509	auto [DIdx, HasCompleteDecoder] =
1510	getDecoderIndex(Decoders&: TableInfo.Decoders, Opc: Opc.EncodingID);
1511
1512	// Produce OPC_Decode or OPC_TryDecode opcode based on the information
1513	// whether the instruction decoder is complete or not. If it is complete
1514	// then it handles all possible values of remaining variable/unfiltered bits
1515	// and for any value can determine if the bitpattern is a valid instruction
1516	// or not. This means OPC_Decode will be the final step in the decoding
1517	// process. If it is not complete, then the Fail return code from the
1518	// decoder method indicates that additional processing should be done to see
1519	// if there is any other instruction that also matches the bitpattern and
1520	// can decode it.
1521	const uint8_t DecoderOp = HasCompleteDecoder ? MCD::OPC_Decode
1522	: (TableInfo.isOutermostScope()
1523	? MCD::OPC_TryDecodeOrFail
1524	: MCD::OPC_TryDecode);
1525	TableInfo.Table.push_back(Item: DecoderOp);
1526	NumEncodingsSupported ++;
1527	TableInfo.Table.insertULEB128(Value: Opc.Opcode);
1528	TableInfo.Table.insertULEB128(Value: DIdx);
1529
1530	if (DecoderOp == MCD::OPC_TryDecode) {
1531	// Push location for NumToSkip backpatching.
1532	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip());
1533	}
1534	}
1535
1536	// Emits table entries to decode the singleton, and then to decode the rest.
1537	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1538	const Filter &Best) const {
1539	EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1540
1541	// complex singletons need predicate checks from the first singleton
1542	// to refer forward to the variable filterchooser that follows.
1543	TableInfo.FixupStack.emplace_back();
1544
1545	emitSingletonTableEntry(TableInfo, Opc);
1546
1547	resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
1548	DestIdx: TableInfo.Table.size());
1549	TableInfo.FixupStack.pop_back();
1550
1551	Best.getVariableFC().emitTableEntries(TableInfo);
1552	}
1553
1554	// Assign a single filter and run with it. Top level API client can initialize
1555	// with a single filter to start the filtering process.
1556	void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1557	bool mixed) {
1558	Filters.clear();
1559	Filters.emplace_back(args&: *this, args&: startBit, args&: numBit, args: true);
1560	BestIndex = `0`; // Sole Filter instance to choose from.
1561	bestFilter().recurse();
1562	}
1563
1564	// reportRegion is a helper function for filterProcessor to mark a region as
1565	// eligible for use as a filter region.
1566	void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1567	unsigned BitIndex, bool AllowMixed) {
1568	if (RA == ATTR_MIXED && AllowMixed)
1569	Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: true);
1570	else if (RA == ATTR_ALL_SET && !AllowMixed)
1571	Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: false);
1572	}
1573
1574	// FilterProcessor scans the well-known encoding bits of the instructions and
1575	// builds up a list of candidate filters. It chooses the best filter and
1576	// recursively descends down the decoding tree.
1577	bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1578	Filters.clear();
1579	BestIndex = -`1`;
1580	unsigned numInstructions = Opcodes.size();
1581
1582	assert(numInstructions && "Filter created with no instructions");
1583
1584	// No further filtering is necessary.
1585	if (numInstructions == `1`)
1586	return true;
1587
1588	// Heuristics. See also doFilter()'s "Heuristics" comment when num of
1589	// instructions is 3.
1590	if (AllowMixed && !Greedy) {
1591	assert(numInstructions == `3`);
1592
1593	for (const auto &Opcode : Opcodes) {
1594	std::vector<Island> Islands;
1595	insn_t Insn;
1596
1597	insnWithID(Insn, Opcode: Opcode.EncodingID);
1598
1599	// Look for islands of undecoded bits of any instruction.
1600	if (getIslands(Islands, Insn) > `0`) {
1601	// Found an instruction with island(s). Now just assign a filter.
1602	runSingleFilter(startBit: Islands [`0`].StartBit, numBit: Islands [`0`].NumBits, mixed: true);
1603	return true;
1604	}
1605	}
1606	}
1607
1608	unsigned BitIndex;
1609
1610	// We maintain BIT_WIDTH copies of the bitAttrs automaton.
1611	// The automaton consumes the corresponding bit from each
1612	// instruction.
1613	//
1614	// Input symbols: 0, 1, and _ (unset).
1615	// States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1616	// Initial state: NONE.
1617	//
1618	// (NONE) ------- [01] -> (ALL_SET)
1619	// (NONE) ------- _ ----> (ALL_UNSET)
1620	// (ALL_SET) ---- [01] -> (ALL_SET)
1621	// (ALL_SET) ---- _ ----> (MIXED)
1622	// (ALL_UNSET) -- [01] -> (MIXED)
1623	// (ALL_UNSET) -- _ ----> (ALL_UNSET)
1624	// (MIXED) ------ . ----> (MIXED)
1625	// (FILTERED)---- . ----> (FILTERED)
1626
1627	std::vector<bitAttr_t> bitAttrs(BitWidth, ATTR_NONE);
1628
1629	// FILTERED bit positions provide no entropy and are not worthy of pursuing.
1630	// Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1631	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex)
1632	if (FilterBitValues [BitIndex].isSet())
1633	bitAttrs [BitIndex] = ATTR_FILTERED;
1634
1635	for (const auto &OpcPair : Opcodes) {
1636	insn_t insn;
1637
1638	insnWithID(Insn&: insn, Opcode: OpcPair.EncodingID);
1639
1640	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex) {
1641	switch (bitAttrs [BitIndex]) {
1642	case ATTR_NONE:
1643	if (insn [BitIndex] == BitValue::BIT_UNSET)
1644	bitAttrs [BitIndex] = ATTR_ALL_UNSET;
1645	else
1646	bitAttrs [BitIndex] = ATTR_ALL_SET;
1647	break;
1648	case ATTR_ALL_SET:
1649	if (insn [BitIndex] == BitValue::BIT_UNSET)
1650	bitAttrs [BitIndex] = ATTR_MIXED;
1651	break;
1652	case ATTR_ALL_UNSET:
1653	if (insn [BitIndex] != BitValue::BIT_UNSET)
1654	bitAttrs [BitIndex] = ATTR_MIXED;
1655	break;
1656	case ATTR_MIXED:
1657	case ATTR_FILTERED:
1658	break;
1659	}
1660	}
1661	}
1662
1663	// The regionAttr automaton consumes the bitAttrs automatons' state,
1664	// lowest-to-highest.
1665	//
1666	// Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1667	// States: NONE, ALL_SET, MIXED
1668	// Initial state: NONE
1669	//
1670	// (NONE) ----- F --> (NONE)
1671	// (NONE) ----- S --> (ALL_SET) ; and set region start
1672	// (NONE) ----- U --> (NONE)
1673	// (NONE) ----- M --> (MIXED) ; and set region start
1674	// (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
1675	// (ALL_SET) -- S --> (ALL_SET)
1676	// (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
1677	// (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
1678	// (MIXED) ---- F --> (NONE) ; and report a MIXED region
1679	// (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
1680	// (MIXED) ---- U --> (NONE) ; and report a MIXED region
1681	// (MIXED) ---- M --> (MIXED)
1682
1683	bitAttr_t RA = ATTR_NONE;
1684	unsigned StartBit = `0`;
1685
1686	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex) {
1687	bitAttr_t bitAttr = bitAttrs [BitIndex];
1688
1689	assert(bitAttr != ATTR_NONE && "Bit without attributes");
1690
1691	switch (RA) {
1692	case ATTR_NONE:
1693	switch (bitAttr) {
1694	case ATTR_FILTERED:
1695	break;
1696	case ATTR_ALL_SET:
1697	StartBit = BitIndex;
1698	RA = ATTR_ALL_SET;
1699	break;
1700	case ATTR_ALL_UNSET:
1701	break;
1702	case ATTR_MIXED:
1703	StartBit = BitIndex;
1704	RA = ATTR_MIXED;
1705	break;
1706	default:
1707	llvm_unreachable("Unexpected bitAttr!");
1708	}
1709	break;
1710	case ATTR_ALL_SET:
1711	switch (bitAttr) {
1712	case ATTR_FILTERED:
1713	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1714	RA = ATTR_NONE;
1715	break;
1716	case ATTR_ALL_SET:
1717	break;
1718	case ATTR_ALL_UNSET:
1719	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1720	RA = ATTR_NONE;
1721	break;
1722	case ATTR_MIXED:
1723	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724	StartBit = BitIndex;
1725	RA = ATTR_MIXED;
1726	break;
1727	default:
1728	llvm_unreachable("Unexpected bitAttr!");
1729	}
1730	break;
1731	case ATTR_MIXED:
1732	switch (bitAttr) {
1733	case ATTR_FILTERED:
1734	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1735	StartBit = BitIndex;
1736	RA = ATTR_NONE;
1737	break;
1738	case ATTR_ALL_SET:
1739	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1740	StartBit = BitIndex;
1741	RA = ATTR_ALL_SET;
1742	break;
1743	case ATTR_ALL_UNSET:
1744	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1745	RA = ATTR_NONE;
1746	break;
1747	case ATTR_MIXED:
1748	break;
1749	default:
1750	llvm_unreachable("Unexpected bitAttr!");
1751	}
1752	break;
1753	case ATTR_ALL_UNSET:
1754	llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1755	case ATTR_FILTERED:
1756	llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1757	}
1758	}
1759
1760	// At the end, if we're still in ALL_SET or MIXED states, report a region
1761	switch (RA) {
1762	case ATTR_NONE:
1763	break;
1764	case ATTR_FILTERED:
1765	break;
1766	case ATTR_ALL_SET:
1767	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1768	break;
1769	case ATTR_ALL_UNSET:
1770	break;
1771	case ATTR_MIXED:
1772	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1773	break;
1774	}
1775
1776	// We have finished with the filter processings. Now it's time to choose
1777	// the best performing filter.
1778	BestIndex = `0`;
1779	bool AllUseless = true;
1780	unsigned BestScore = `0`;
1781
1782	for (const auto &[Idx, Filter] : enumerate(First&: Filters)) {
1783	unsigned Usefulness = Filter.usefulness();
1784
1785	if (Usefulness)
1786	AllUseless = false;
1787
1788	if (Usefulness > BestScore) {
1789	BestIndex = Idx;
1790	BestScore = Usefulness;
1791	}
1792	}
1793
1794	if (!AllUseless)
1795	bestFilter().recurse();
1796
1797	return !AllUseless;
1798	} // end of FilterChooser::filterProcessor(bool)
1799
1800	// Decides on the best configuration of filter(s) to use in order to decode
1801	// the instructions. A conflict of instructions may occur, in which case we
1802	// dump the conflict set to the standard error.
1803	void FilterChooser::doFilter() {
1804	unsigned Num = Opcodes.size();
1805	assert(Num && "FilterChooser created with no instructions");
1806
1807	// Try regions of consecutive known bit values first.
1808	if (filterProcessor(AllowMixed: false))
1809	return;
1810
1811	// Then regions of mixed bits (both known and unitialized bit values allowed).
1812	if (filterProcessor(AllowMixed: true))
1813	return;
1814
1815	// Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1816	// no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1817	// well-known encoding pattern. In such case, we backtrack and scan for the
1818	// the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1819	if (Num == `3` && filterProcessor(AllowMixed: true, Greedy: false))
1820	return;
1821
1822	// If we come to here, the instruction decoding has failed.
1823	// Set the BestIndex to -1 to indicate so.
1824	BestIndex = -`1`;
1825	}
1826
1827	// emitTableEntries - Emit state machine entries to decode our share of
1828	// instructions.
1829	void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1830	if (Opcodes.size() == `1`) {
1831	// There is only one instruction in the set, which is great!
1832	// Call emitSingletonDecoder() to see whether there are any remaining
1833	// encodings bits.
1834	emitSingletonTableEntry(TableInfo, Opc: Opcodes [`0`]);
1835	return;
1836	}
1837
1838	// Choose the best filter to do the decodings!
1839	if (BestIndex != -`1`) {
1840	const Filter &Best = Filters [BestIndex];
1841	if (Best.getNumFiltered() == `1`)
1842	emitSingletonTableEntry(TableInfo, Best);
1843	else
1844	Best.emitTableEntry(TableInfo);
1845	return;
1846	}
1847
1848	// We don't know how to decode these instructions! Dump the
1849	// conflict set and bail.
1850
1851	// Print out useful conflict information for postmortem analysis.
1852	errs() << "Decoding Conflict:\n";
1853
1854	dumpStack(OS&: errs(), prefix: "\t\t");
1855
1856	for (auto Opcode : Opcodes) {
1857	const EncodingAndInst &Enc = AllInstructions [Opcode.EncodingID];
1858	errs() << `'\t'` << Enc << `' '`;
1859	dumpBits(OS&: errs(), Bits: getBitsField(Def: *Enc.EncodingDef, FieldName: "Inst"));
1860	errs() << `'\n'`;
1861	}
1862	PrintFatalError(Msg: "Decoding conflict encountered");
1863	}
1864
1865	static std::string findOperandDecoderMethod(const Record *Record) {
1866	std::string Decoder;
1867
1868	const RecordVal *DecoderString = Record->getValue(Name: "DecoderMethod");
1869	const StringInit *String =
1870	DecoderString ? dyn_cast<StringInit>(Val: DecoderString->getValue()) : nullptr;
1871	if (String) {
1872	Decoder = String->getValue().str();
1873	if (!Decoder.empty())
1874	return Decoder;
1875	}
1876
1877	if (Record->isSubClassOf(Name: "RegisterOperand"))
1878	// Allows use of a DecoderMethod in referenced RegisterClass if set.
1879	return findOperandDecoderMethod(Record: Record->getValueAsDef(FieldName: "RegClass"));
1880
1881	if (Record->isSubClassOf(Name: "RegisterClass")) {
1882	Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1883	} else if (Record->isSubClassOf(Name: "PointerLikeRegClass")) {
1884	Decoder = "DecodePointerLikeRegClass" +
1885	utostr(X: Record->getValueAsInt(FieldName: "RegClassKind"));
1886	}
1887
1888	return Decoder;
1889	}
1890
1891	OperandInfo getOpInfo(const Record *TypeRecord) {
1892	const RecordVal *HasCompleteDecoderVal =
1893	TypeRecord->getValue(Name: "hasCompleteDecoder");
1894	const BitInit *HasCompleteDecoderBit =
1895	HasCompleteDecoderVal
1896	? dyn_cast<BitInit>(Val: HasCompleteDecoderVal->getValue())
1897	: nullptr;
1898	bool HasCompleteDecoder =
1899	HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1900
1901	return OperandInfo (findOperandDecoderMethod(Record: TypeRecord), HasCompleteDecoder);
1902	}
1903
1904	static void parseVarLenInstOperand(const Record &Def,
1905	std::vector<OperandInfo> &Operands,
1906	const CodeGenInstruction &CGI) {
1907
1908	const RecordVal *RV = Def.getValue(Name: "Inst");
1909	VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV);
1910	SmallVector<int> TiedTo;
1911
1912	for (const auto &[Idx, Op] : enumerate(First: CGI.Operands)) {
1913	if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > `0`)
1914	for (auto *Arg : Op.MIOperandInfo->getArgs())
1915	Operands.push_back(x: getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef()));
1916	else
1917	Operands.push_back(x: getOpInfo(TypeRecord: Op.Rec));
1918
1919	int TiedReg = Op.getTiedRegister();
1920	TiedTo.push_back(Elt: -`1`);
1921	if (TiedReg != -`1`) {
1922	TiedTo [Idx] = TiedReg;
1923	TiedTo [TiedReg] = Idx;
1924	}
1925	}
1926
1927	unsigned CurrBitPos = `0`;
1928	for (const auto &EncodingSegment : VLI) {
1929	unsigned Offset = `0`;
1930	StringRef OpName;
1931
1932	if (const StringInit *SI = dyn_cast<StringInit>(Val: EncodingSegment.Value)) {
1933	OpName = SI->getValue();
1934	} else if (const DagInit *DI = dyn_cast<DagInit>(Val: EncodingSegment.Value)) {
1935	OpName = cast<StringInit>(Val: DI->getArg(Num: `0`))->getValue();
1936	Offset = cast<IntInit>(Val: DI->getArg(Num: `2`))->getValue();
1937	}
1938
1939	if (!OpName.empty()) {
1940	auto OpSubOpPair =
1941	const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1942	Op: OpName);
1943	unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(Op: OpSubOpPair);
1944	Operands [OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1945	if (!EncodingSegment.CustomDecoder.empty())
1946	Operands [OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1947
1948	int TiedReg = TiedTo [OpSubOpPair.first];
1949	if (TiedReg != -`1`) {
1950	unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1951	Op: {TiedReg, OpSubOpPair.second});
1952	Operands [OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1953	}
1954	}
1955
1956	CurrBitPos += EncodingSegment.BitWidth;
1957	}
1958	}
1959
1960	static void debugDumpRecord(const Record &Rec) {
1961	// Dump the record, so we can see what's going on.
1962	PrintNote(PrintMsg: [&Rec](raw_ostream &OS) {
1963	OS << "Dumping record for previous error:\n";
1964	OS << Rec;
1965	});
1966	}
1967
1968	/// For an operand field named OpName: populate OpInfo.InitValue with the
1969	/// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1970	/// insert from the decoded instruction.
1971	static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1972	std::map<StringRef, StringRef> &TiedNames,
1973	StringRef OpName, OperandInfo &OpInfo) {
1974	// Some bits of the operand may be required to be 1 depending on the
1975	// instruction's encoding. Collect those bits.
1976	if (const RecordVal *EncodedValue = EncodingDef.getValue(Name: OpName))
1977	if (const BitsInit *OpBits = dyn_cast<BitsInit>(Val: EncodedValue->getValue()))
1978	for (unsigned I = `0`; I < OpBits->getNumBits(); ++I)
1979	if (const BitInit *OpBit = dyn_cast<BitInit>(Val: OpBits->getBit(Bit: I)))
1980	if (OpBit->getValue())
1981	OpInfo.InitValue \|= `1ULL` << I;
1982
1983	for (unsigned I = `0`, J = `0`; I != Bits.getNumBits(); I = J) {
1984	const VarInit *Var;
1985	unsigned Offset = `0`;
1986	for (; J != Bits.getNumBits(); ++J) {
1987	const VarBitInit *BJ = dyn_cast<VarBitInit>(Val: Bits.getBit(Bit: J));
1988	if (BJ) {
1989	Var = dyn_cast<VarInit>(Val: BJ->getBitVar());
1990	if (I == J)
1991	Offset = BJ->getBitNum();
1992	else if (BJ->getBitNum() != Offset + J - I)
1993	break;
1994	} else {
1995	Var = dyn_cast<VarInit>(Val: Bits.getBit(Bit: J));
1996	}
1997	if (!Var \|\|
1998	(Var->getName() != OpName && Var->getName() != TiedNames [OpName]))
1999	break;
2000	}
2001	if (I == J)
2002	++J;
2003	else
2004	OpInfo.addField(Base: I, Width: J - I, Offset);
2005	}
2006	}
2007
2008	static unsigned
2009	populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
2010	const CodeGenInstruction &CGI, unsigned Opc,
2011	std::map<unsigned, std::vector<OperandInfo>> &Operands,
2012	bool IsVarLenInst) {
2013	const Record &Def = *CGI.TheDef;
2014	// If all the bit positions are not specified; do not decode this instruction.
2015	// We are bound to fail! For proper disassembly, the well-known encoding bits
2016	// of the instruction must be fully specified.
2017
2018	const BitsInit &Bits = getBitsField(Def: EncodingDef, FieldName: "Inst");
2019	if (Bits.allInComplete())
2020	return `0`;
2021
2022	std::vector<OperandInfo> InsnOperands;
2023
2024	// If the instruction has specified a custom decoding hook, use that instead
2025	// of trying to auto-generate the decoder.
2026	StringRef InstDecoder = EncodingDef.getValueAsString(FieldName: "DecoderMethod");
2027	if (!InstDecoder.empty()) {
2028	bool HasCompleteInstDecoder =
2029	EncodingDef.getValueAsBit(FieldName: "hasCompleteDecoder");
2030	InsnOperands.push_back(
2031	x: OperandInfo (InstDecoder.str(), HasCompleteInstDecoder));
2032	Operands [Opc] = std::move(InsnOperands);
2033	return Bits.getNumBits();
2034	}
2035
2036	// Generate a description of the operand of the instruction that we know
2037	// how to decode automatically.
2038	// FIXME: We'll need to have a way to manually override this as needed.
2039
2040	// Gather the outputs/inputs of the instruction, so we can find their
2041	// positions in the encoding. This assumes for now that they appear in the
2042	// MCInst in the order that they're listed.
2043	std::vector<std::pair<const Init *, StringRef>> InOutOperands;
2044	const DagInit *Out = Def.getValueAsDag(FieldName: "OutOperandList");
2045	const DagInit *In = Def.getValueAsDag(FieldName: "InOperandList");
2046	for (const auto &[Idx, Arg] : enumerate(First: Out->getArgs()))
2047	InOutOperands.emplace_back(args: Arg, args: Out->getArgNameStr(Num: Idx));
2048	for (const auto &[Idx, Arg] : enumerate(First: In->getArgs()))
2049	InOutOperands.emplace_back(args: Arg, args: In->getArgNameStr(Num: Idx));
2050
2051	// Search for tied operands, so that we can correctly instantiate
2052	// operands that are not explicitly represented in the encoding.
2053	std::map<StringRef, StringRef> TiedNames;
2054	for (const auto &Op : CGI.Operands) {
2055	for (const auto &[J, CI] : enumerate(First: Op.Constraints)) {
2056	if (!CI.isTied())
2057	continue;
2058	std::pair<unsigned, unsigned> SO =
2059	CGI.Operands.getSubOperandNumber(Op: CI.getTiedOperand());
2060	StringRef TiedName = CGI.Operands [SO.first].SubOpNames [SO.second];
2061	if (TiedName.empty())
2062	TiedName = CGI.Operands [SO.first].Name;
2063	StringRef MyName = Op.SubOpNames [J];
2064	if (MyName.empty())
2065	MyName = Op.Name;
2066
2067	TiedNames [MyName] = TiedName;
2068	TiedNames [TiedName] = MyName;
2069	}
2070	}
2071
2072	if (IsVarLenInst) {
2073	parseVarLenInstOperand(Def: EncodingDef, Operands&: InsnOperands, CGI);
2074	} else {
2075	// For each operand, see if we can figure out where it is encoded.
2076	for (const auto &Op : InOutOperands) {
2077	const Init *OpInit = Op.first;
2078	StringRef OpName = Op.second;
2079
2080	// We're ready to find the instruction encoding locations for this
2081	// operand.
2082
2083	// First, find the operand type ("OpInit"), and sub-op names
2084	// ("SubArgDag") if present.
2085	const DagInit *SubArgDag = dyn_cast<DagInit>(Val: OpInit);
2086	if (SubArgDag)
2087	OpInit = SubArgDag->getOperator();
2088	const Record *OpTypeRec = cast<DefInit>(Val: OpInit)->getDef();
2089	// Lookup the sub-operands from the operand type record (note that only
2090	// Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2091	const DagInit *SubOps = OpTypeRec->isSubClassOf(Name: "Operand")
2092	? OpTypeRec->getValueAsDag(FieldName: "MIOperandInfo")
2093	: nullptr;
2094
2095	// Lookup the decoder method and construct a new OperandInfo to hold our
2096	// result.
2097	OperandInfo OpInfo = getOpInfo(TypeRecord: OpTypeRec);
2098
2099	// If we have named sub-operands...
2100	if (SubArgDag) {
2101	// Then there should not be a custom decoder specified on the top-level
2102	// type.
2103	if (!OpInfo.Decoder.empty()) {
2104	PrintError(ErrorLoc: EncodingDef.getLoc(),
2105	Msg: "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2106	OpInit->getAsString() +
2107	"\" with a custom DecoderMethod, but also named "
2108	"sub-operands.");
2109	continue;
2110	}
2111
2112	// Decode each of the sub-ops separately.
2113	assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2114	for (const auto &[I, Arg] : enumerate(First: SubOps->getArgs())) {
2115	StringRef SubOpName = SubArgDag->getArgNameStr(Num: I);
2116	OperandInfo SubOpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef());
2117
2118	addOneOperandFields(EncodingDef, Bits, TiedNames, OpName: SubOpName,
2119	OpInfo&: SubOpInfo);
2120	InsnOperands.push_back(x: std::move(SubOpInfo));
2121	}
2122	continue;
2123	}
2124
2125	// Otherwise, if we have an operand with sub-operands, but they aren't
2126	// named...
2127	if (SubOps && OpInfo.Decoder.empty()) {
2128	// If it's a single sub-operand, and no custom decoder, use the decoder
2129	// from the one sub-operand.
2130	if (SubOps->getNumArgs() == `1`)
2131	OpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: SubOps->getArg(Num: `0`))->getDef());
2132
2133	// If we have multiple sub-ops, there'd better have a custom
2134	// decoder. (Otherwise we don't know how to populate them properly...)
2135	if (SubOps->getNumArgs() > `1`) {
2136	PrintError(ErrorLoc: EncodingDef.getLoc(),
2137	Msg: "DecoderEmitter: operand \"" + OpName +
2138	"\" uses MIOperandInfo with multiple ops, but doesn't "
2139	"have a custom decoder!");
2140	debugDumpRecord(Rec: EncodingDef);
2141	continue;
2142	}
2143	}
2144
2145	addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2146	// FIXME: it should be an error not to find a definition for a given
2147	// operand, rather than just failing to add it to the resulting
2148	// instruction! (This is a longstanding bug, which will be addressed in an
2149	// upcoming change.)
2150	if (OpInfo.numFields() > `0`)
2151	InsnOperands.push_back(x: std::move(OpInfo));
2152	}
2153	}
2154	Operands [Opc] = std::move(InsnOperands);
2155
2156	#if 0
2157	LLVM_DEBUG({
2158	// Dumps the instruction encoding bits.
2159	dumpBits(errs(), Bits);
2160
2161	errs() << `'\n'`;
2162
2163	// Dumps the list of operand info.
2164	for (unsigned i = `0`, e = CGI.Operands.size(); i != e; ++i) {
2165	const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2166	const std::string &OperandName = Info.Name;
2167	const Record &OperandDef = *Info.Rec;
2168
2169	errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2170	}
2171	});
2172	#endif
2173
2174	return Bits.getNumBits();
2175	}
2176
2177	// emitFieldFromInstruction - Emit the templated helper function
2178	// fieldFromInstruction().
2179	// On Windows we make sure that this function is not inlined when
2180	// using the VS compiler. It has a bug which causes the function
2181	// to be optimized out in some circumstances. See llvm.org/pr38292
2182	static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2183	OS << R"(
2184	// Helper functions for extracting fields from encoded instructions.
2185	// InsnType must either be integral or an APInt-like object that must:
2186	// * be default-constructible and copy-constructible
2187	// * be constructible from an APInt (this can be private)
2188	// * Support insertBits(bits, startBit, numBits)
2189	// * Support extractBitsAsZExtValue(numBits, startBit)
2190	// * Support the ~, &, ==, and != operators with other objects of the same type
2191	// * Support the != and bitwise & with uint64_t
2192	// * Support put (<<) to raw_ostream&
2193	template <typename InsnType>
2194	#if defined(_MSC_VER) && !defined(__clang__)
2195	__declspec(noinline)
2196	#endif
2197	static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2198	fieldFromInstruction(const InsnType &insn, unsigned startBit,
2199	unsigned numBits) {
2200	assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2201	assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2202	"Instruction field out of bounds!");
2203	InsnType fieldMask;
2204	if (numBits == sizeof(InsnType) * 8)
2205	fieldMask = (InsnType)(-1LL);
2206	else
2207	fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2208	return (insn & fieldMask) >> startBit;
2209	}
2210
2211	template <typename InsnType>
2212	static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2213	fieldFromInstruction(const InsnType &insn, unsigned startBit,
2214	unsigned numBits) {
2215	return insn.extractBitsAsZExtValue(numBits, startBit);
2216	}
2217	)";
2218	}
2219
2220	// emitInsertBits - Emit the templated helper function insertBits().
2221	static void emitInsertBits(formatted_raw_ostream &OS) {
2222	OS << R"(
2223	// Helper function for inserting bits extracted from an encoded instruction into
2224	// a field.
2225	template <typename InsnType>
2226	static void insertBits(InsnType &field, InsnType bits, unsigned startBit,
2227	unsigned numBits) {
2228	if constexpr (std::is_integral<InsnType>::value) {
2229	assert(startBit + numBits <= sizeof field * 8);
2230	(void)numBits;
2231	field \|= (InsnType)bits << startBit;
2232	} else {
2233	field.insertBits(bits, startBit, numBits);
2234	}
2235	}
2236	)";
2237	}
2238
2239	// emitDecodeInstruction - Emit the templated helper function
2240	// decodeInstruction().
2241	static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
2242	unsigned OpcodeMask) {
2243	const bool HasTryDecode = OpcodeMask & ((`1` << MCD::OPC_TryDecode) \|
2244	(`1` << MCD::OPC_TryDecodeOrFail));
2245	const bool HasCheckPredicate =
2246	OpcodeMask &
2247	((`1` << MCD::OPC_CheckPredicate) \| (`1` << MCD::OPC_CheckPredicateOrFail));
2248	const bool HasSoftFail = OpcodeMask & (`1` << MCD::OPC_SoftFail);
2249
2250	OS << R"(
2251	static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
2252	unsigned NumToSkip = *Ptr++;
2253	NumToSkip \|= (*Ptr++) << 8;
2254	)";
2255	if (getNumToSkipInBytes() == `3`)
2256	OS << " NumToSkip \|= (*Ptr++) << 16;\n";
2257	OS << R"( return NumToSkip;
2258	}
2259
2260	template <typename InsnType>
2261	static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2262	InsnType insn, uint64_t Address,
2263	const MCDisassembler *DisAsm,
2264	const MCSubtargetInfo &STI)";
2265	if (IsVarLenInst) {
2266	OS << ",\n "
2267	"llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2268	}
2269	OS << ") {\n";
2270	if (HasCheckPredicate)
2271	OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
2272
2273	OS << R"(
2274	const uint8_t *Ptr = DecodeTable;
2275	uint64_t CurFieldValue = 0;
2276	DecodeStatus S = MCDisassembler::Success;
2277	while (true) {
2278	ptrdiff_t Loc = Ptr - DecodeTable;
2279	const uint8_t DecoderOp = *Ptr++;
2280	switch (DecoderOp) {
2281	default:
2282	errs() << Loc << ": Unexpected decode table opcode: "
2283	<< (int)DecoderOp << '\n';
2284	return MCDisassembler::Fail;
2285	case MCD::OPC_ExtractField: {
2286	// Decode the start value.
2287	unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2288	unsigned Len = *Ptr++;)";
2289	if (IsVarLenInst)
2290	OS << "\n makeUp(insn, Start + Len);";
2291	OS << R"(
2292	CurFieldValue = fieldFromInstruction(insn, Start, Len);
2293	LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2294	<< Len << "): " << CurFieldValue << "\n");
2295	break;
2296	}
2297	case MCD::OPC_FilterValue:
2298	case MCD::OPC_FilterValueOrFail: {
2299	bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
2300	// Decode the field value.
2301	uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
2302	bool Failed = Val != CurFieldValue;
2303	unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2304
2305	// Note: Print NumToSkip even for OPC_FilterValueOrFail to simplify debug
2306	// prints.
2307	LLVM_DEBUG({
2308	StringRef OpName = IsFail ? "OPC_FilterValueOrFail" : "OPC_FilterValue";
2309	dbgs() << Loc << ": " << OpName << '(' << Val << ", " << NumToSkip
2310	<< ") " << (Failed ? "FAIL:" : "PASS:")
2311	<< " continuing at " << (Ptr - DecodeTable) << '\n';
2312	});
2313
2314	// Perform the filter operation.
2315	if (Failed) {
2316	if (IsFail)
2317	return MCDisassembler::Fail;
2318	Ptr += NumToSkip;
2319	}
2320	break;
2321	}
2322	case MCD::OPC_CheckField:
2323	case MCD::OPC_CheckFieldOrFail: {
2324	bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
2325	// Decode the start value.
2326	unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2327	unsigned Len = *Ptr;)";
2328	if (IsVarLenInst)
2329	OS << "\n makeUp(insn, Start + Len);";
2330	OS << R"(
2331	uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2332	// Decode the field value.
2333	unsigned PtrLen = 0;
2334	uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2335	Ptr += PtrLen;
2336	bool Failed = ExpectedValue != FieldValue;
2337	unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2338
2339	LLVM_DEBUG({
2340	StringRef OpName = IsFail ? "OPC_CheckFieldOrFail" : "OPC_CheckField";
2341	dbgs() << Loc << ": " << OpName << '(' << Start << ", " << Len << ", "
2342	<< ExpectedValue << ", " << NumToSkip << "): FieldValue = "
2343	<< FieldValue << ", ExpectedValue = " << ExpectedValue << ": "
2344	<< (Failed ? "FAIL\n" : "PASS\n");
2345	});
2346
2347	// If the actual and expected values don't match, skip or fail.
2348	if (Failed) {
2349	if (IsFail)
2350	return MCDisassembler::Fail;
2351	Ptr += NumToSkip;
2352	}
2353	break;
2354	})";
2355	if (HasCheckPredicate) {
2356	OS << R"(
2357	case MCD::OPC_CheckPredicate:
2358	case MCD::OPC_CheckPredicateOrFail: {
2359	bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
2360	// Decode the Predicate Index value.
2361	unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
2362	unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2363	// Check the predicate.
2364	bool Failed = !checkDecoderPredicate(PIdx, Bits);
2365
2366	LLVM_DEBUG({
2367	StringRef OpName = IsFail ? "OPC_CheckPredicateOrFail" : "OPC_CheckPredicate";
2368	dbgs() << Loc << ": " << OpName << '(' << PIdx << ", " << NumToSkip
2369	<< "): " << (Failed ? "FAIL\n" : "PASS\n");
2370	});
2371
2372	if (Failed) {
2373	if (IsFail)
2374	return MCDisassembler::Fail;
2375	Ptr += NumToSkip;
2376	}
2377	break;
2378	})";
2379	}
2380	OS << R"(
2381	case MCD::OPC_Decode: {
2382	// Decode the Opcode value.
2383	unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2384	unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2385
2386	MI.clear();
2387	MI.setOpcode(Opc);
2388	bool DecodeComplete;)";
2389	if (IsVarLenInst) {
2390	OS << "\n unsigned Len = InstrLenTable[Opc];\n"
2391	<< " makeUp(insn, Len);";
2392	}
2393	OS << R"(
2394	S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
2395	assert(DecodeComplete);
2396
2397	LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2398	<< ", using decoder " << DecodeIdx << ": "
2399	<< (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2400	return S;
2401	})";
2402	if (HasTryDecode) {
2403	OS << R"(
2404	case MCD::OPC_TryDecode:
2405	case MCD::OPC_TryDecodeOrFail: {
2406	bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
2407	// Decode the Opcode value.
2408	unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2409	unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2410	unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2411
2412	// Perform the decode operation.
2413	MCInst TmpMI;
2414	TmpMI.setOpcode(Opc);
2415	bool DecodeComplete;
2416	S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
2417	LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2418	<< ", using decoder " << DecodeIdx << ": ");
2419
2420	if (DecodeComplete) {
2421	// Decoding complete.
2422	LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2423	MI = TmpMI;
2424	return S;
2425	}
2426	assert(S == MCDisassembler::Fail);
2427	if (IsFail) {
2428	LLVM_DEBUG(dbgs() << "FAIL: returning FAIL\n");
2429	return MCDisassembler::Fail;
2430	}
2431	// If the decoding was incomplete, skip.
2432	Ptr += NumToSkip;
2433	LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2434	// Reset decode status. This also drops a SoftFail status that could be
2435	// set before the decode attempt.
2436	S = MCDisassembler::Success;
2437	break;
2438	})";
2439	}
2440	if (HasSoftFail) {
2441	OS << R"(
2442	case MCD::OPC_SoftFail: {
2443	// Decode the mask values.
2444	uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
2445	uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2446	bool Failed = (insn & PositiveMask) != 0 \|\| (~insn & NegativeMask) != 0;
2447	if (Failed)
2448	S = MCDisassembler::SoftFail;
2449	LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
2450	break;
2451	})";
2452	}
2453	OS << R"(
2454	case MCD::OPC_Fail: {
2455	LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2456	return MCDisassembler::Fail;
2457	}
2458	}
2459	}
2460	llvm_unreachable("bogosity detected in disassembler state machine!");
2461	}
2462
2463	)";
2464	}
2465
2466	// Helper to propagate SoftFail status. Returns false if the status is Fail;
2467	// callers are expected to early-exit in that condition. (Note, the '&' operator
2468	// is correct to propagate the values of this enum; see comment on 'enum
2469	// DecodeStatus'.)
2470	static void emitCheck(formatted_raw_ostream &OS) {
2471	OS << R"(
2472	static bool Check(DecodeStatus &Out, DecodeStatus In) {
2473	Out = static_cast<DecodeStatus>(Out & In);
2474	return Out != MCDisassembler::Fail;
2475	}
2476
2477	)";
2478	}
2479
2480	// Collect all HwModes referenced by the target for encoding purposes,
2481	// returning a vector of corresponding names.
2482	static void collectHwModesReferencedForEncodings(
2483	const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2484	NamespacesHwModesMap &NamespacesWithHwModes) {
2485	SmallBitVector BV(HWM.getNumModeIds());
2486	for (const auto &MS : HWM.getHwModeSelects()) {
2487	for (const HwModeSelect::PairType &P : MS.second.Items) {
2488	if (P.second->isSubClassOf(Name: "InstructionEncoding")) {
2489	std::string DecoderNamespace =
2490	P.second->getValueAsString(FieldName: "DecoderNamespace").str();
2491	if (P.first == DefaultMode) {
2492	NamespacesWithHwModes [DecoderNamespace].insert(x: "");
2493	} else {
2494	NamespacesWithHwModes [DecoderNamespace].insert(
2495	x: HWM.getMode(Id: P.first).Name);
2496	}
2497	BV.set(P.first);
2498	}
2499	}
2500	}
2501	transform(Range: BV.set_bits(), d_first: std::back_inserter(x&: Names), F: [&HWM](const int &M) {
2502	if (M == DefaultMode)
2503	return StringRef ("");
2504	return HWM.getModeName(Id: M, /IncludeDefault=/true);
2505	});
2506	}
2507
2508	static void
2509	handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2510	ArrayRef<StringRef> HwModeNames,
2511	NamespacesHwModesMap &NamespacesWithHwModes,
2512	std::vector<EncodingAndInst> &GlobalEncodings) {
2513	const Record *InstDef = Instr->TheDef;
2514
2515	switch (DecoderEmitterSuppressDuplicates) {
2516	case SUPPRESSION_DISABLE: {
2517	for (StringRef HwModeName : HwModeNames)
2518	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2519	break;
2520	}
2521	case SUPPRESSION_LEVEL1: {
2522	std::string DecoderNamespace =
2523	InstDef->getValueAsString(FieldName: "DecoderNamespace").str();
2524	auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
2525	if (It != NamespacesWithHwModes.end()) {
2526	for (StringRef HwModeName : It ->second)
2527	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2528	} else {
2529	// Only emit the encoding once, as it's DecoderNamespace doesn't
2530	// contain any HwModes.
2531	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2532	}
2533	break;
2534	}
2535	case SUPPRESSION_LEVEL2:
2536	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2537	break;
2538	}
2539	}
2540
2541	// Emits disassembler code for instruction decoding.
2542	void DecoderEmitter::run(raw_ostream &o) {
2543	formatted_raw_ostream OS(o);
2544	OS << R"(
2545	#include "llvm/MC/MCInst.h"
2546	#include "llvm/MC/MCSubtargetInfo.h"
2547	#include "llvm/Support/DataTypes.h"
2548	#include "llvm/Support/Debug.h"
2549	#include "llvm/Support/LEB128.h"
2550	#include "llvm/Support/raw_ostream.h"
2551	#include "llvm/TargetParser/SubtargetFeature.h"
2552	#include <assert.h>
2553
2554	namespace {
2555	)";
2556
2557	emitFieldFromInstruction(OS);
2558	emitInsertBits(OS);
2559	emitCheck(OS);
2560
2561	Target.reverseBitsForLittleEndianEncoding();
2562
2563	// Parameterize the decoders based on namespace and instruction width.
2564
2565	// First, collect all encoding-related HwModes referenced by the target.
2566	// And establish a mapping table between DecoderNamespace and HwMode.
2567	// If HwModeNames is empty, add the empty string so we always have one HwMode.
2568	const CodeGenHwModes &HWM = Target.getHwModes();
2569	std::vector<StringRef> HwModeNames;
2570	NamespacesHwModesMap NamespacesWithHwModes;
2571	collectHwModesReferencedForEncodings(HWM, Names&: HwModeNames, NamespacesWithHwModes);
2572	if (HwModeNames.empty())
2573	HwModeNames.push_back(x: "");
2574
2575	const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
2576	NumberedEncodings.reserve(n: NumberedInstructions.size());
2577	for (const auto &NumberedInstruction : NumberedInstructions) {
2578	const Record *InstDef = NumberedInstruction->TheDef;
2579	if (const RecordVal *RV = InstDef->getValue(Name: "EncodingInfos")) {
2580	if (const DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
2581	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2582	for (auto &[ModeId, Encoding] : EBM) {
2583	// DecoderTables with DefaultMode should not have any suffix.
2584	if (ModeId == DefaultMode) {
2585	NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction, args: "");
2586	} else {
2587	NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction,
2588	args: HWM.getMode(Id: ModeId).Name);
2589	}
2590	}
2591	continue;
2592	}
2593	}
2594	// This instruction is encoded the same on all HwModes.
2595	// According to user needs, provide varying degrees of suppression.
2596	handleHwModesUnrelatedEncodings(Instr: NumberedInstruction, HwModeNames,
2597	NamespacesWithHwModes, GlobalEncodings&: NumberedEncodings);
2598	}
2599	for (const Record *NumberedAlias :
2600	RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding"))
2601	NumberedEncodings.emplace_back(
2602	args&: NumberedAlias,
2603	args: &Target.getInstruction(InstRec: NumberedAlias->getValueAsDef(FieldName: "AliasOf")));
2604
2605	std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2606	OpcMap;
2607	std::map<unsigned, std::vector<OperandInfo>> Operands;
2608	std::vector<unsigned> InstrLen;
2609	bool IsVarLenInst = Target.hasVariableLengthEncodings();
2610	unsigned MaxInstLen = `0`;
2611
2612	for (const auto &[NEI, NumberedEncoding] : enumerate(First&: NumberedEncodings)) {
2613	const Record *EncodingDef = NumberedEncoding.EncodingDef;
2614	const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2615	const Record *Def = Inst->TheDef;
2616	unsigned Size = EncodingDef->getValueAsInt(FieldName: "Size");
2617	if (Def->getValueAsString(FieldName: "Namespace") == "TargetOpcode" \|\|
2618	Def->getValueAsBit(FieldName: "isPseudo") \|\|
2619	Def->getValueAsBit(FieldName: "isAsmParserOnly") \|\|
2620	Def->getValueAsBit(FieldName: "isCodeGenOnly")) {
2621	NumEncodingsLackingDisasm ++;
2622	continue;
2623	}
2624
2625	if (NEI < NumberedInstructions.size())
2626	NumInstructions ++;
2627	NumEncodings ++;
2628
2629	if (!Size && !IsVarLenInst)
2630	continue;
2631
2632	if (IsVarLenInst)
2633	InstrLen.resize(new_size: NumberedInstructions.size(), x: `0`);
2634
2635	if (unsigned Len = populateInstruction(Target, EncodingDef: EncodingDef, CGI: Inst, Opc: NEI,
2636	Operands, IsVarLenInst)) {
2637	if (IsVarLenInst) {
2638	MaxInstLen = std::max(a: MaxInstLen, b: Len);
2639	InstrLen [NEI] = Len;
2640	}
2641	std::string DecoderNamespace =
2642	EncodingDef->getValueAsString(FieldName: "DecoderNamespace").str();
2643	if (!NumberedEncoding.HwModeName.empty())
2644	DecoderNamespace += "_" + NumberedEncoding.HwModeName.str();
2645	OpcMap [{DecoderNamespace, Size}].emplace_back(
2646	args&: NEI, args: Target.getInstrIntValue(R: Def));
2647	} else {
2648	NumEncodingsOmitted ++;
2649	}
2650	}
2651
2652	DecoderTableInfo TableInfo;
2653	unsigned OpcodeMask = `0`;
2654	for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
2655	const std::string &DecoderNamespace = NSAndByteSize.first;
2656	const unsigned BitWidth = `8` * NSAndByteSize.second;
2657	// Emit the decoder for this namespace+width combination.
2658	FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
2659	IsVarLenInst ? MaxInstLen : BitWidth, this);
2660
2661	// The decode table is cleared for each top level decoder function. The
2662	// predicates and decoders themselves, however, are shared across all
2663	// decoders to give more opportunities for uniqueing.
2664	TableInfo.Table.clear();
2665	TableInfo.FixupStack.clear();
2666	TableInfo.FixupStack.emplace_back();
2667	FC.emitTableEntries(TableInfo);
2668	// Any NumToSkip fixups in the top level scope can resolve to the
2669	// OPC_Fail at the end of the table.
2670	assert(TableInfo.FixupStack.size() == `1` && "fixup stack phasing error!");
2671	// Resolve any NumToSkip fixups in the current scope.
2672	resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
2673	DestIdx: TableInfo.Table.size());
2674	TableInfo.FixupStack.clear();
2675
2676	TableInfo.Table.push_back(Item: MCD::OPC_Fail);
2677
2678	// Print the table to the output stream.
2679	OpcodeMask \|= emitTable(OS, Table&: TableInfo.Table, Indent: indent (`0`), BitWidth: FC.getBitWidth(),
2680	Namespace: DecoderNamespace, EncodingIDs);
2681	}
2682
2683	// For variable instruction, we emit a instruction length table
2684	// to let the decoder know how long the instructions are.
2685	// You can see example usage in M68k's disassembler.
2686	if (IsVarLenInst)
2687	emitInstrLenTable(OS, InstrLen);
2688
2689	const bool HasCheckPredicate =
2690	OpcodeMask &
2691	((`1` << MCD::OPC_CheckPredicate) \| (`1` << MCD::OPC_CheckPredicateOrFail));
2692
2693	// Emit the predicate function.
2694	if (HasCheckPredicate)
2695	emitPredicateFunction(OS, Predicates&: TableInfo.Predicates, Indent: indent (`0`));
2696
2697	// Emit the decoder function.
2698	emitDecoderFunction(OS, Decoders&: TableInfo.Decoders, Indent: indent (`0`));
2699
2700	// Emit the main entry point for the decoder, decodeInstruction().
2701	emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
2702
2703	OS << "\n} // namespace\n";
2704	}
2705
2706	void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS,
2707	StringRef PredicateNamespace) {
2708	DecoderEmitter (RK, PredicateNamespace).run(o&: OS);
2709	}
2710

Browse the source code of llvm_projects/llvm/utils/TableGen/DecoderEmitter.cpp