DecoderEmitter.cpp source code [llvm_projects/llvm/utils/TableGen/DecoderEmitter.cpp]

1	//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// It contains the tablegen backend that emits the decoder functions for
10	// targets with fixed/variable length instruction set.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "Common/CodeGenHwModes.h"
15	#include "Common/CodeGenInstruction.h"
16	#include "Common/CodeGenRegisters.h"
17	#include "Common/CodeGenTarget.h"
18	#include "Common/InfoByHwMode.h"
19	#include "Common/InstructionEncoding.h"
20	#include "Common/SubtargetFeatureInfo.h"
21	#include "Common/VarLenCodeEmitterGen.h"
22	#include "DecoderTableEmitter.h"
23	#include "DecoderTree.h"
24	#include "TableGenBackends.h"
25	#include "llvm/ADT/APInt.h"
26	#include "llvm/ADT/ArrayRef.h"
27	#include "llvm/ADT/STLExtras.h"
28	#include "llvm/ADT/SetVector.h"
29	#include "llvm/ADT/SmallBitVector.h"
30	#include "llvm/ADT/SmallString.h"
31	#include "llvm/ADT/Statistic.h"
32	#include "llvm/ADT/StringExtras.h"
33	#include "llvm/ADT/StringRef.h"
34	#include "llvm/Support/Casting.h"
35	#include "llvm/Support/CommandLine.h"
36	#include "llvm/Support/Debug.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Support/Format.h"
39	#include "llvm/Support/FormatVariadic.h"
40	#include "llvm/Support/FormattedStream.h"
41	#include "llvm/Support/KnownBits.h"
42	#include "llvm/Support/MathExtras.h"
43	#include "llvm/Support/raw_ostream.h"
44	#include "llvm/TableGen/Error.h"
45	#include "llvm/TableGen/Record.h"
46	#include <algorithm>
47	#include <cassert>
48	#include <cstddef>
49	#include <cstdint>
50	#include <map>
51	#include <memory>
52	#include <set>
53	#include <string>
54	#include <utility>
55	#include <vector>
56
57	using namespace llvm;
58
59	#define DEBUG_TYPE "decoder-emitter"
60
61	extern cl::OptionCategory DisassemblerEmitterCat;
62
63	enum SuppressLevel {
64	SUPPRESSION_DISABLE,
65	SUPPRESSION_LEVEL1,
66	SUPPRESSION_LEVEL2
67	};
68
69	static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
70	"suppress-per-hwmode-duplicates",
71	cl::desc ("Suppress duplication of instrs into per-HwMode decoder tables"),
72	cl::values(
73	clEnumValN(
74	SUPPRESSION_DISABLE, "O0",
75	"Do not prevent DecoderTable duplications caused by HwModes"),
76	clEnumValN(
77	SUPPRESSION_LEVEL1, "O1",
78	"Remove duplicate DecoderTable entries generated due to HwModes"),
79	clEnumValN(
80	SUPPRESSION_LEVEL2, "O2",
81	"Extract HwModes-specific instructions into new DecoderTables, "
82	"significantly reducing Table Duplications")),
83	cl::init(Val: SUPPRESSION_DISABLE), cl::cat (DisassemblerEmitterCat));
84
85	static cl::opt<bool> UseFnTableInDecodeToMCInst(
86	"use-fn-table-in-decode-to-mcinst",
87	cl::desc (
88	"Use a table of function pointers instead of a switch case in the\n"
89	"generated `decodeToMCInst` function. Helps improve compile time\n"
90	"of the generated code."),
91	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
92
93	// Enabling this option requires use of different `InsnType` for different
94	// bitwidths and defining `InsnBitWidth` template specialization for the
95	// `InsnType` types used. Some common specializations are already defined in
96	// MCDecoder.h.
97	static cl::opt<bool> SpecializeDecodersPerBitwidth(
98	"specialize-decoders-per-bitwidth",
99	cl::desc ("Specialize the generated `decodeToMCInst` function per bitwidth. "
100	"Helps reduce the code size."),
101	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
102
103	static cl::opt<bool> IgnoreNonDecodableOperands(
104	"ignore-non-decodable-operands",
105	cl::desc (
106	"Do not issue an error if an operand cannot be decoded automatically."),
107	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
108
109	static cl::opt<bool> IgnoreFullyDefinedOperands(
110	"ignore-fully-defined-operands",
111	cl::desc (
112	"Do not automatically decode operands with no '?' in their encoding."),
113	cl::init(Val: false), cl::cat (DisassemblerEmitterCat));
114
115	STATISTIC(NumEncodings, "Number of encodings considered");
116	STATISTIC(NumEncodingsLackingDisasm,
117	"Number of encodings without disassembler info");
118	STATISTIC(NumInstructions, "Number of instructions considered");
119	STATISTIC(NumEncodingsSupported, "Number of encodings supported");
120	STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
121
122	/// Similar to KnownBits::print(), but allows you to specify a character to use
123	/// to print unknown bits.
124	static void printKnownBits(raw_ostream &OS, const KnownBits &Bits,
125	char Unknown) {
126	for (unsigned I = Bits.getBitWidth(); I--;) {
127	if (Bits.Zero [I] && Bits.One [I])
128	OS << `'!'`;
129	else if (Bits.Zero [I])
130	OS << `'0'`;
131	else if (Bits.One [I])
132	OS << `'1'`;
133	else
134	OS << Unknown;
135	}
136	}
137
138	namespace {
139
140	/// Sorting predicate to sort encoding IDs by encoding width.
141	class LessEncodingIDByWidth {
142	ArrayRef<InstructionEncoding> Encodings;
143
144	public:
145	explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
146	: Encodings (Encodings) {}
147
148	bool operator()(unsigned ID1, unsigned ID2) const {
149	return Encodings [ID1].getBitWidth() < Encodings [ID2].getBitWidth();
150	}
151	};
152
153	using NamespacesHwModesMap = std::map<StringRef, std::set<unsigned>>;
154
155	class DecoderEmitter {
156	const RecordKeeper &RK;
157	CodeGenTarget Target;
158	const CodeGenHwModes &CGH;
159
160	/// All parsed encodings.
161	std::vector<InstructionEncoding> Encodings;
162
163	/// Encodings IDs for each HwMode. An ID is an index into Encodings.
164	SmallDenseMap<unsigned, std::vector<unsigned>> EncodingIDsByHwMode;
165
166	public:
167	explicit DecoderEmitter(const RecordKeeper &RK);
168
169	const CodeGenTarget &getTarget() const { return Target; }
170
171	void emitInstrLenTable(formatted_raw_ostream &OS,
172	ArrayRef<unsigned> InstrLen) const;
173	void emitPredicateFunction(formatted_raw_ostream &OS,
174	const PredicateSet &Predicates) const;
175
176	void emitRegClassByHwModeDecoders(formatted_raw_ostream &OS) const;
177	void emitDecoderFunction(formatted_raw_ostream &OS,
178	const DecoderSet &Decoders,
179	unsigned BucketBitWidth) const;
180
181	// run - Output the code emitter
182	void run(raw_ostream &o) const;
183
184	private:
185	void collectHwModesReferencedForEncodings(
186	std::vector<unsigned> &HwModeIDs,
187	NamespacesHwModesMap &NamespacesWithHwModes) const;
188
189	void
190	handleHwModesUnrelatedEncodings(unsigned EncodingID,
191	ArrayRef<unsigned> HwModeIDs,
192	NamespacesHwModesMap &NamespacesWithHwModes);
193
194	void parseInstructionEncodings();
195	};
196
197	struct EncodingIsland {
198	unsigned StartBit;
199	unsigned NumBits;
200	uint64_t FieldVal;
201	};
202
203	/// Filter - Filter works with FilterChooser to produce the decoding tree for
204	/// the ISA.
205	///
206	/// It is useful to think of a Filter as governing the switch stmts of the
207	/// decoding tree in a certain level. Each case stmt delegates to an inferior
208	/// FilterChooser to decide what further decoding logic to employ, or in another
209	/// words, what other remaining bits to look at. The FilterChooser eventually
210	/// chooses a best Filter to do its job.
211	///
212	/// This recursive scheme ends when the number of Opcodes assigned to the
213	/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
214	/// the Filter/FilterChooser combo does not know how to distinguish among the
215	/// Opcodes assigned.
216	///
217	/// An example of a conflict is
218	///
219	/// Decoding Conflict:
220	/// ................................
221	/// 1111............................
222	/// 1111010.........................
223	/// 1111010...00....................
224	/// 1111010...00........0001........
225	/// 111101000.00........0001........
226	/// 111101000.00........00010000....
227	/// 111101000_00________00010000____ VST4q8a
228	/// 111101000_00________00010000____ VST4q8b
229	///
230	/// The Debug output shows the path that the decoding tree follows to reach the
231	/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
232	/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
233	///
234	/// The encoding info in the .td files does not specify this meta information,
235	/// which could have been used by the decoder to resolve the conflict. The
236	/// decoder could try to decode the even/odd register numbering and assign to
237	/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
238	/// version and return the Opcode since the two have the same Asm format string.
239	struct Filter {
240	unsigned StartBit; // the starting bit position
241	unsigned NumBits; // number of bits to filter
242
243	// Map of well-known segment value to the set of uid's with that value.
244	std::map<uint64_t, std::vector<unsigned>> FilteredIDs;
245
246	// Set of uid's with non-constant segment values.
247	std::vector<unsigned> VariableIDs;
248
249	Filter(ArrayRef<InstructionEncoding> Encodings,
250	ArrayRef<unsigned> EncodingIDs, unsigned StartBit, unsigned NumBits);
251
252	// Returns the number of fanout produced by the filter. More fanout implies
253	// the filter distinguishes more categories of instructions.
254	unsigned usefulness() const;
255	}; // end class Filter
256
257	// These are states of our finite state machines used in FilterChooser's
258	// filterProcessor() which produces the filter candidates to use.
259	enum bitAttr_t {
260	ATTR_NONE,
261	ATTR_FILTERED,
262	ATTR_ALL_SET,
263	ATTR_ALL_UNSET,
264	ATTR_MIXED
265	};
266
267	/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
268	/// in order to perform the decoding of instructions at the current level.
269	///
270	/// Decoding proceeds from the top down. Based on the well-known encoding bits
271	/// of instructions available, FilterChooser builds up the possible Filters that
272	/// can further the task of decoding by distinguishing among the remaining
273	/// candidate instructions.
274	///
275	/// Once a filter has been chosen, it is called upon to divide the decoding task
276	/// into sub-tasks and delegates them to its inferior FilterChoosers for further
277	/// processings.
278	///
279	/// It is useful to think of a Filter as governing the switch stmts of the
280	/// decoding tree. And each case is delegated to an inferior FilterChooser to
281	/// decide what further remaining bits to look at.
282
283	class FilterChooser {
284	// TODO: Unfriend by providing the necessary accessors.
285	friend class DecoderTreeBuilder;
286
287	// Vector of encodings to choose our filter.
288	ArrayRef<InstructionEncoding> Encodings;
289
290	/// Encoding IDs for this filter chooser to work on.
291	/// Sorted by non-decreasing encoding width.
292	SmallVector<unsigned, `0`> EncodingIDs;
293
294	// Array of bit values passed down from our parent.
295	// Set to all unknown for Parent == nullptr.
296	KnownBits FilterBits;
297
298	// Links to the FilterChooser above us in the decoding tree.
299	const FilterChooser *Parent;
300
301	/// If the selected filter matches multiple encodings, then this is the
302	/// starting position and the width of the filtered range.
303	unsigned StartBit;
304	unsigned NumBits;
305
306	/// If the selected filter matches multiple encodings, and there is
307	/// exactly one* encoding in which all bits are known in the filtered range,*
308	/// then this is the ID of that encoding.
309	/// Also used when there is only one encoding.
310	std::optional<unsigned> SingletonEncodingID;
311
312	/// If the selected filter matches multiple encodings, and there is
313	/// at least one* encoding in which all bits are known in the filtered range,*
314	/// then this is the FilterChooser created for the subset of encodings that
315	/// contain some unknown bits in the filtered range.
316	std::unique_ptr<const FilterChooser> VariableFC;
317
318	/// If the selected filter matches multiple encodings, and there is
319	/// more than one* encoding in which all bits are known in the filtered*
320	/// range, then this is a map of field values to FilterChoosers created for
321	/// the subset of encodings sharing that field value.
322	/// The "field value" here refers to the encoding bits in the filtered range.
323	std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324
325	/// Set to true if decoding conflict was encountered.
326	bool HasConflict = false;
327
328	public:
329	/// Constructs a top-level filter chooser.
330	FilterChooser(ArrayRef<InstructionEncoding> Encodings,
331	ArrayRef<unsigned> EncodingIDs)
332	: Encodings (Encodings), EncodingIDs (EncodingIDs), Parent(nullptr) {
333	// Sort encoding IDs once.
334	stable_sort(Range&: this->EncodingIDs, C: LessEncodingIDByWidth (Encodings));
335	// Filter width is the width of the smallest encoding.
336	unsigned FilterWidth = Encodings [this->EncodingIDs.front()].getBitWidth();
337	FilterBits = KnownBits (FilterWidth);
338	doFilter();
339	}
340
341	/// Constructs an inferior filter chooser.
342	FilterChooser(ArrayRef<InstructionEncoding> Encodings,
343	ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
344	const FilterChooser &Parent)
345	: Encodings (Encodings), EncodingIDs (EncodingIDs), Parent(&Parent) {
346	// Inferior filter choosers are created from sorted array of encoding IDs.
347	assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
348	assert(!FilterBits.hasConflict() && "Broken filter");
349	// Filter width is the width of the smallest encoding.
350	unsigned FilterWidth = Encodings [EncodingIDs.front()].getBitWidth();
351	this->FilterBits = FilterBits.anyext(BitWidth: FilterWidth);
352	doFilter();
353	}
354
355	FilterChooser(const FilterChooser &) = delete;
356	void operator=(const FilterChooser &) = delete;
357
358	/// Returns the width of the largest encoding.
359	unsigned getMaxEncodingWidth() const {
360	// The last encoding ID is the ID of an encoding with the largest width.
361	return Encodings [EncodingIDs.back()].getBitWidth();
362	}
363
364	/// Returns true if any decoding conflicts were encountered.
365	bool hasConflict() const { return HasConflict; }
366
367	private:
368	/// Applies the given filter to the set of encodings this FilterChooser
369	/// works with, creating inferior FilterChoosers as necessary.
370	void applyFilter(const Filter &F);
371
372	/// dumpStack - dumpStack traverses the filter chooser chain and calls
373	/// dumpFilterArray on each filter chooser up to the top level one.
374	void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
375
376	bool isPositionFiltered(unsigned Idx) const {
377	return FilterBits.Zero [Idx] \|\| FilterBits.One [Idx];
378	}
379
380	/// Scans the well-known encoding bits of the encodings and, builds up a list
381	/// of candidate filters, and then returns the best one, if any.
382	std::unique_ptr<Filter> findBestFilter(ArrayRef<bitAttr_t> BitAttrs,
383	bool AllowMixed,
384	bool Greedy = true) const;
385
386	std::unique_ptr<Filter> findBestFilter() const;
387
388	// Decides on the best configuration of filter(s) to use in order to decode
389	// the instructions. A conflict of instructions may occur, in which case we
390	// dump the conflict set to the standard error.
391	void doFilter();
392
393	public:
394	void dump() const;
395	};
396
397	} // end anonymous namespace
398
399	///////////////////////////
400	// //
401	// Filter Implementation //
402	// //
403	///////////////////////////
404
405	Filter::Filter(ArrayRef<InstructionEncoding> Encodings,
406	ArrayRef<unsigned> EncodingIDs, unsigned StartBit,
407	unsigned NumBits)
408	: StartBit(StartBit), NumBits(NumBits) {
409	for (unsigned EncodingID : EncodingIDs) {
410	const InstructionEncoding &Encoding = Encodings [EncodingID];
411	KnownBits EncodingBits = Encoding.getMandatoryBits();
412
413	// Scans the segment for possibly well-specified encoding bits.
414	KnownBits FieldBits = EncodingBits.extractBits(NumBits, BitPosition: StartBit);
415
416	if (FieldBits.isConstant()) {
417	// The encoding bits are well-known. Lets add the uid of the
418	// instruction into the bucket keyed off the constant field value.
419	FilteredIDs [FieldBits.getConstant().getZExtValue()].push_back(x: EncodingID);
420	} else {
421	// Some of the encoding bit(s) are unspecified. This contributes to
422	// one additional member of "Variable" instructions.
423	VariableIDs.push_back(x: EncodingID);
424	}
425	}
426
427	assert((FilteredIDs.size() + VariableIDs.size() > `0`) &&
428	"Filter returns no instruction categories");
429	}
430
431	void FilterChooser::applyFilter(const Filter &F) {
432	StartBit = F.StartBit;
433	NumBits = F.NumBits;
434	assert(FilterBits.extractBits(NumBits, StartBit).isUnknown());
435
436	if (!F.VariableIDs.empty()) {
437	// Delegates to an inferior filter chooser for further processing on this
438	// group of instructions whose segment values are variable.
439	VariableFC = std::make_unique<FilterChooser>(args&: Encodings, args: F.VariableIDs,
440	args&: FilterBits, args&: *this);
441	HasConflict \|= VariableFC ->HasConflict;
442	}
443
444	// Otherwise, create sub choosers.
445	for (const auto &[FilterVal, InferiorEncodingIDs] : F.FilteredIDs) {
446	// Create a new filter by inserting the field bits into the parent filter.
447	APInt FieldBits(NumBits, FilterVal);
448	KnownBits InferiorFilterBits = FilterBits;
449	InferiorFilterBits.insertBits(SubBits: KnownBits::makeConstant(C: FieldBits), BitPosition: StartBit);
450
451	// Delegates to an inferior filter chooser for further processing on this
452	// category of instructions.
453	auto [It, _] = FilterChooserMap.try_emplace(
454	k: FilterVal,
455	args: std::make_unique<FilterChooser>(args&: Encodings, args: InferiorEncodingIDs,
456	args&: InferiorFilterBits, args&: *this));
457	HasConflict \|= It ->second ->HasConflict;
458	}
459	}
460
461	// Returns the number of fanout produced by the filter. More fanout implies
462	// the filter distinguishes more categories of instructions.
463	unsigned Filter::usefulness() const {
464	return FilteredIDs.size() + VariableIDs.empty();
465	}
466
467	//////////////////////////////////
468	// //
469	// Filterchooser Implementation //
470	// //
471	//////////////////////////////////
472
473	void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
474	ArrayRef<unsigned> InstrLen) const {
475	OS << "static const uint8_t InstrLenTable[] = {\n";
476	for (unsigned Len : InstrLen)
477	OS << Len << ",\n";
478	OS << "};\n\n";
479	}
480
481	void DecoderEmitter::emitPredicateFunction(
482	formatted_raw_ostream &OS, const PredicateSet &Predicates) const {
483	// The predicate function is just a big switch statement based on the
484	// input predicate index.
485	OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
486	"&FB) {\n";
487	OS << " switch (Idx) {\n";
488	OS << " default: llvm_unreachable(\"Invalid index!\");\n";
489	for (const auto &[Index, Predicate] : enumerate(First: Predicates)) {
490	OS << " case " << Index << ":\n";
491	OS << " return " << Predicate << ";\n";
492	}
493	OS << " }\n";
494	OS << "}\n\n";
495	}
496
497	/// Emit a default implementation of a decoder for all RegClassByHwModes which
498	/// do not have an explicit DecoderMethodSet, which dispatches over the decoder
499	/// methods for the member classes.
500	void DecoderEmitter::emitRegClassByHwModeDecoders(
501	formatted_raw_ostream &OS) const {
502	const CodeGenHwModes &CGH = Target.getHwModes();
503	if (CGH.getNumModeIds() == `1`)
504	return;
505
506	ArrayRef<const Record *> RegClassByHwMode = Target.getAllRegClassByHwMode();
507	if (RegClassByHwMode.empty())
508	return;
509
510	for (const Record *ClassByHwMode : RegClassByHwMode) {
511	// Ignore cases that had an explicit DecoderMethod set.
512	if (!InstructionEncoding::findOperandDecoderMethod(Record: ClassByHwMode).second)
513	continue;
514
515	const HwModeSelect &ModeSelect = CGH.getHwModeSelect(R: ClassByHwMode);
516
517	// Mips has a system where this is only used by compound operands with
518	// custom decoders, and we don't try to detect if this decoder is really
519	// needed.
520	OS << "[[maybe_unused]]\n";
521
522	OS << "static DecodeStatus Decode" << ClassByHwMode->getName()
523	<< "RegClassByHwMode";
524	OS << R"((MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) {
525	switch (Decoder->getSubtargetInfo().getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
526	)";
527	for (auto [ModeID, RegClassRec] : ModeSelect.Items) {
528	OS << indent (`2`) << "case " << ModeID << ": // "
529	<< CGH.getModeName(Id: ModeID, /IncludeDefault=/true) << `'\n'`
530	<< indent (`4`) << "return "
531	<< InstructionEncoding::findOperandDecoderMethod(Record: RegClassRec).first
532	<< "(Inst, Imm, Addr, Decoder);\n";
533	}
534	OS << indent (`2`) << R"(default:
535	llvm_unreachable("no decoder for hwmode");
536	}
537	}
538
539	)";
540	}
541
542	OS << `'\n'`;
543	}
544
545	void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
546	const DecoderSet &Decoders,
547	unsigned BucketBitWidth) const {
548	// The decoder function is just a big switch statement or a table of function
549	// pointers based on the input decoder index.
550
551	// TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
552	// It would be better for emitBinaryParser to use a 64-bit tmp whenever
553	// possible but fall back to an InsnType-sized tmp for truly large fields.
554	StringRef TmpTypeDecl =
555	"using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
556	"InsnType, uint64_t>;\n";
557	StringRef DecodeParams =
558	"DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
559	"MCDisassembler *Decoder, bool &DecodeComplete";
560
561	// Print the name of the decode function to OS.
562	auto PrintDecodeFnName = [&OS, BucketBitWidth](unsigned DecodeIdx) {
563	OS << "decodeFn";
564	if (BucketBitWidth != `0`) {
565	OS << `'_'` << BucketBitWidth << "bit";
566	}
567	OS << `'_'` << DecodeIdx;
568	};
569
570	// Print the template statement.
571	auto PrintTemplate = [&OS, BucketBitWidth]() {
572	OS << "template <typename InsnType>\n";
573	OS << "static ";
574	if (BucketBitWidth != `0`)
575	OS << "std::enable_if_t<InsnBitWidth<InsnType> == " << BucketBitWidth
576	<< ", DecodeStatus>\n";
577	else
578	OS << "DecodeStatus ";
579	};
580
581	if (UseFnTableInDecodeToMCInst) {
582	// Emit a function for each case first.
583	for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
584	PrintTemplate ();
585	PrintDecodeFnName (Index);
586	OS << "(" << DecodeParams << ") {\n";
587	OS << " " << TmpTypeDecl;
588	OS << " [[maybe_unused]] TmpType tmp;\n";
589	OS << Decoder;
590	OS << " return S;\n";
591	OS << "}\n\n";
592	}
593	}
594
595	OS << "// Handling " << Decoders.size() << " cases.\n";
596	PrintTemplate ();
597	OS << "decodeToMCInst(unsigned Idx, " << DecodeParams << ") {\n";
598	OS << " DecodeComplete = true;\n";
599
600	if (UseFnTableInDecodeToMCInst) {
601	// Build a table of function pointers
602	OS << " using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n";
603	OS << " static constexpr DecodeFnTy decodeFnTable[] = {\n";
604	for (size_t Index : llvm::seq(Size: Decoders.size())) {
605	OS << " ";
606	PrintDecodeFnName (Index);
607	OS << ",\n";
608	}
609	OS << " };\n";
610	OS << " if (Idx >= " << Decoders.size() << ")\n";
611	OS << " llvm_unreachable(\"Invalid decoder index!\");\n";
612	OS << " return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
613	"DecodeComplete);\n";
614	} else {
615	OS << " " << TmpTypeDecl;
616	OS << " TmpType tmp;\n";
617	OS << " switch (Idx) {\n";
618	OS << " default: llvm_unreachable(\"Invalid decoder index!\");\n";
619	for (const auto &[Index, Decoder] : enumerate(First: Decoders)) {
620	OS << " case " << Index << ":\n";
621	OS << Decoder;
622	OS << " return S;\n";
623	}
624	OS << " }\n";
625	}
626	OS << "}\n";
627	}
628
629	/// dumpStack - dumpStack traverses the filter chooser chain and calls
630	/// dumpFilterArray on each filter chooser up to the top level one.
631	void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
632	unsigned PadToWidth) const {
633	if (Parent)
634	Parent->dumpStack(OS, Indent, PadToWidth);
635	assert(PadToWidth >= FilterBits.getBitWidth());
636	OS << Indent << indent (PadToWidth - FilterBits.getBitWidth());
637	printKnownBits(OS, Bits: FilterBits, Unknown: `'.'`);
638	OS << `'\n'`;
639	}
640
641	// Calculates the island(s) needed to decode the instruction.
642	// This returns a list of undecoded bits of an instructions, for example,
643	// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
644	// decoded bits in order to verify that the instruction matches the Opcode.
645	static std::vector<EncodingIsland> getIslands(const KnownBits &EncodingBits,
646	const KnownBits &FilterBits) {
647	std::vector<EncodingIsland> Islands;
648	uint64_t FieldVal;
649	unsigned StartBit;
650
651	bool OnIsland = false;
652	unsigned FilterWidth = FilterBits.getBitWidth();
653	for (unsigned I = `0`; I != FilterWidth; ++I) {
654	bool IsKnown = EncodingBits.Zero [I] \|\| EncodingBits.One [I];
655	bool IsFiltered = FilterBits.Zero [I] \|\| FilterBits.One [I];
656	if (!IsFiltered && IsKnown) {
657	if (OnIsland) {
658	// Accumulate island bits.
659	const unsigned BitNo = I - StartBit;
660	FieldVal \|= static_cast<uint64_t>(EncodingBits.One [I]) << (BitNo);
661	// If island becomes larger than 64-bits complete the island and start a
662	// new one
663	if (BitNo >= `63`) {
664	Islands.push_back(x: {.StartBit: StartBit, .NumBits: `64`, .FieldVal: FieldVal});
665	OnIsland = false;
666	}
667	} else {
668	// Onto an island.
669	StartBit = I;
670	FieldVal = static_cast<uint64_t>(EncodingBits.One [I]);
671	OnIsland = true;
672	}
673	} else if (OnIsland) {
674	// Into the water.
675	Islands.push_back(x: {.StartBit: StartBit, .NumBits: I - StartBit, .FieldVal: FieldVal});
676	OnIsland = false;
677	}
678	}
679
680	if (OnIsland)
681	Islands.push_back(x: {.StartBit: StartBit, .NumBits: FilterWidth - StartBit, .FieldVal: FieldVal});
682
683	return Islands;
684	}
685
686	static void emitBinaryParser(raw_ostream &OS, indent Indent,
687	const InstructionEncoding &Encoding,
688	const OperandInfo &OpInfo) {
689	if (OpInfo.HasNoEncoding) {
690	// If an operand has no encoding, the old behavior is to not decode it
691	// automatically and let the target do it. This is error-prone, so the
692	// new behavior is to report an error.
693	if (!IgnoreNonDecodableOperands)
694	PrintError(ErrorLoc: Encoding.getRecord()->getLoc(),
695	Msg: "could not find field for operand '" + OpInfo.Name + "'");
696	return;
697	}
698
699	// Special case for 'bits<0>'.
700	if (OpInfo.Fields.empty() && !OpInfo.InitValue) {
701	assert(!OpInfo.Decoder.empty());
702	// The operand has no encoding, so the corresponding argument is omitted.
703	// This avoids confusion and allows the function to be overloaded if the
704	// operand does have an encoding in other instructions.
705	OS << Indent << "if (!Check(S, " << OpInfo.Decoder << "(MI, Decoder)))\n"
706	<< Indent << " return MCDisassembler::Fail;\n";
707	return;
708	}
709
710	if (OpInfo.fields().empty()) {
711	// Only a constant part. The old behavior is to not decode this operand.
712	if (IgnoreFullyDefinedOperands)
713	return;
714	// Initialize `tmp` with the constant part.
715	OS << Indent << "tmp = " << format_hex(N: *OpInfo.InitValue, Width: `0`) << ";\n";
716	} else if (OpInfo.fields().size() == `1` && !OpInfo.InitValue.value_or(u: `0`)) {
717	// One variable part and no/zero constant part. Initialize `tmp` with the
718	// variable part.
719	auto [Base, Width, Offset] = OpInfo.fields().front();
720	OS << Indent << "tmp = fieldFromInstruction(insn, " << Base << ", " << Width
721	<< `')'`;
722	if (Offset)
723	OS << " << " << Offset;
724	OS << ";\n";
725	} else {
726	// General case. Initialize `tmp` with the constant part, if any, and
727	// insert the variable parts into it.
728	OS << Indent << "tmp = " << format_hex(N: OpInfo.InitValue.value_or(u: `0`), Width: `0`)
729	<< ";\n";
730	for (auto [Base, Width, Offset] : OpInfo.fields()) {
731	OS << Indent << "tmp \|= fieldFromInstruction(insn, " << Base << ", "
732	<< Width << `')'`;
733	if (Offset)
734	OS << " << " << Offset;
735	OS << ";\n";
736	}
737	}
738
739	StringRef Decoder = OpInfo.Decoder;
740	if (!Decoder.empty()) {
741	OS << Indent << "if (!Check(S, " << Decoder
742	<< "(MI, tmp, Address, Decoder))) { "
743	<< (OpInfo.HasCompleteDecoder ? "" : "DecodeComplete = false; ")
744	<< "return MCDisassembler::Fail; }\n";
745	} else {
746	OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
747	}
748	}
749
750	static std::string getDecoderString(const InstructionEncoding &Encoding) {
751	std::string Decoder;
752	raw_string_ostream OS(Decoder);
753	indent Indent(UseFnTableInDecodeToMCInst ? `2` : `4`);
754
755	// If a custom instruction decoder was specified, use that.
756	StringRef DecoderMethod = Encoding.getDecoderMethod();
757	if (!DecoderMethod.empty()) {
758	OS << Indent << "if (!Check(S, " << DecoderMethod
759	<< "(MI, insn, Address, Decoder))) { "
760	<< (Encoding.hasCompleteDecoder() ? "" : "DecodeComplete = false; ")
761	<< "return MCDisassembler::Fail; }\n";
762	} else {
763	for (const OperandInfo &Op : Encoding.getOperands())
764	emitBinaryParser(OS, Indent, Encoding, OpInfo: Op);
765	}
766	return Decoder;
767	}
768
769	static std::string getPredicateString(const InstructionEncoding &Encoding,
770	StringRef TargetName) {
771	std::vector<const Record *> Predicates =
772	Encoding.getRecord()->getValueAsListOfDefs(FieldName: "Predicates");
773	auto It = llvm::find_if(Range&: Predicates, P: [](const Record *R) {
774	return R->getValueAsBit(FieldName: "AssemblerMatcherPredicate");
775	});
776	if (It == Predicates.end())
777	return std::string ();
778
779	std::string Predicate;
780	raw_string_ostream OS(Predicate);
781	SubtargetFeatureInfo::emitMCPredicateCheck(OS, TargetName, Predicates);
782	return Predicate;
783	}
784
785	std::unique_ptr<Filter>
786	FilterChooser::findBestFilter(ArrayRef<bitAttr_t> BitAttrs, bool AllowMixed,
787	bool Greedy) const {
788	assert(EncodingIDs.size() >= `2` && "Nothing to filter");
789
790	// Heuristics. See also doFilter()'s "Heuristics" comment when num of
791	// instructions is 3.
792	if (AllowMixed && !Greedy) {
793	assert(EncodingIDs.size() == `3`);
794
795	for (unsigned EncodingID : EncodingIDs) {
796	const InstructionEncoding &Encoding = Encodings [EncodingID];
797	KnownBits EncodingBits = Encoding.getMandatoryBits();
798
799	// Look for islands of undecoded bits of any instruction.
800	std::vector<EncodingIsland> Islands =
801	getIslands(EncodingBits, FilterBits);
802	if (!Islands.empty()) {
803	// Found an instruction with island(s). Now just assign a filter.
804	return std::make_unique<Filter>(
805	args: Encodings, args: EncodingIDs, args&: Islands [`0`].StartBit, args&: Islands [`0`].NumBits);
806	}
807	}
808	}
809
810	// The regionAttr automaton consumes the bitAttrs automatons' state,
811	// lowest-to-highest.
812	//
813	// Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
814	// States: NONE, ALL_SET, MIXED
815	// Initial state: NONE
816	//
817	// (NONE) ----- F --> (NONE)
818	// (NONE) ----- S --> (ALL_SET) ; and set region start
819	// (NONE) ----- U --> (NONE)
820	// (NONE) ----- M --> (MIXED) ; and set region start
821	// (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
822	// (ALL_SET) -- S --> (ALL_SET)
823	// (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
824	// (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
825	// (MIXED) ---- F --> (NONE) ; and report a MIXED region
826	// (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
827	// (MIXED) ---- U --> (NONE) ; and report a MIXED region
828	// (MIXED) ---- M --> (MIXED)
829
830	bitAttr_t RA = ATTR_NONE;
831	unsigned StartBit = `0`;
832
833	std::vector<std::unique_ptr<Filter>> Filters;
834
835	auto addCandidateFilter = [&](unsigned StartBit, unsigned EndBit) {
836	Filters.push_back(x: std::make_unique<Filter>(args: Encodings, args: EncodingIDs, args&: StartBit,
837	args: EndBit - StartBit));
838	};
839
840	unsigned FilterWidth = FilterBits.getBitWidth();
841	for (unsigned BitIndex = `0`; BitIndex != FilterWidth; ++BitIndex) {
842	bitAttr_t bitAttr = BitAttrs [BitIndex];
843
844	assert(bitAttr != ATTR_NONE && "Bit without attributes");
845
846	switch (RA) {
847	case ATTR_NONE:
848	switch (bitAttr) {
849	case ATTR_FILTERED:
850	break;
851	case ATTR_ALL_SET:
852	StartBit = BitIndex;
853	RA = ATTR_ALL_SET;
854	break;
855	case ATTR_ALL_UNSET:
856	break;
857	case ATTR_MIXED:
858	StartBit = BitIndex;
859	RA = ATTR_MIXED;
860	break;
861	default:
862	llvm_unreachable("Unexpected bitAttr!");
863	}
864	break;
865	case ATTR_ALL_SET:
866	if (!AllowMixed && bitAttr != ATTR_ALL_SET)
867	addCandidateFilter (StartBit, BitIndex);
868	switch (bitAttr) {
869	case ATTR_FILTERED:
870	RA = ATTR_NONE;
871	break;
872	case ATTR_ALL_SET:
873	break;
874	case ATTR_ALL_UNSET:
875	RA = ATTR_NONE;
876	break;
877	case ATTR_MIXED:
878	StartBit = BitIndex;
879	RA = ATTR_MIXED;
880	break;
881	default:
882	llvm_unreachable("Unexpected bitAttr!");
883	}
884	break;
885	case ATTR_MIXED:
886	if (AllowMixed && bitAttr != ATTR_MIXED)
887	addCandidateFilter (StartBit, BitIndex);
888	switch (bitAttr) {
889	case ATTR_FILTERED:
890	StartBit = BitIndex;
891	RA = ATTR_NONE;
892	break;
893	case ATTR_ALL_SET:
894	StartBit = BitIndex;
895	RA = ATTR_ALL_SET;
896	break;
897	case ATTR_ALL_UNSET:
898	RA = ATTR_NONE;
899	break;
900	case ATTR_MIXED:
901	break;
902	default:
903	llvm_unreachable("Unexpected bitAttr!");
904	}
905	break;
906	case ATTR_ALL_UNSET:
907	llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
908	case ATTR_FILTERED:
909	llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
910	}
911	}
912
913	// At the end, if we're still in ALL_SET or MIXED states, report a region
914	switch (RA) {
915	case ATTR_NONE:
916	break;
917	case ATTR_FILTERED:
918	break;
919	case ATTR_ALL_SET:
920	if (!AllowMixed)
921	addCandidateFilter (StartBit, FilterWidth);
922	break;
923	case ATTR_ALL_UNSET:
924	break;
925	case ATTR_MIXED:
926	if (AllowMixed)
927	addCandidateFilter (StartBit, FilterWidth);
928	break;
929	}
930
931	// We have finished with the filter processings. Now it's time to choose
932	// the best performing filter.
933	auto MaxIt = llvm::max_element(Range&: Filters, C: [](const std::unique_ptr<Filter> &A,
934	const std::unique_ptr<Filter> &B) {
935	return A ->usefulness() < B ->usefulness();
936	});
937	if (MaxIt == Filters.end() \|\| (*MaxIt)->usefulness() == `0`)
938	return nullptr;
939	return std::move(*MaxIt);
940	}
941
942	std::unique_ptr<Filter> FilterChooser::findBestFilter() const {
943	// We maintain BIT_WIDTH copies of the bitAttrs automaton.
944	// The automaton consumes the corresponding bit from each
945	// instruction.
946	//
947	// Input symbols: 0, 1, _ (unset), and . (any of the above).
948	// States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
949	// Initial state: NONE.
950	//
951	// (NONE) ------- [01] -> (ALL_SET)
952	// (NONE) ------- _ ----> (ALL_UNSET)
953	// (ALL_SET) ---- [01] -> (ALL_SET)
954	// (ALL_SET) ---- _ ----> (MIXED)
955	// (ALL_UNSET) -- [01] -> (MIXED)
956	// (ALL_UNSET) -- _ ----> (ALL_UNSET)
957	// (MIXED) ------ . ----> (MIXED)
958	// (FILTERED)---- . ----> (FILTERED)
959
960	unsigned FilterWidth = FilterBits.getBitWidth();
961	SmallVector<bitAttr_t, `128`> BitAttrs(FilterWidth, ATTR_NONE);
962
963	// FILTERED bit positions provide no entropy and are not worthy of pursuing.
964	// Filter::recurse() set either 1 or 0 for each position.
965	for (unsigned BitIndex = `0`; BitIndex != FilterWidth; ++BitIndex)
966	if (isPositionFiltered(Idx: BitIndex))
967	BitAttrs [BitIndex] = ATTR_FILTERED;
968
969	for (unsigned EncodingID : EncodingIDs) {
970	const InstructionEncoding &Encoding = Encodings [EncodingID];
971	KnownBits EncodingBits = Encoding.getMandatoryBits();
972
973	for (unsigned BitIndex = `0`; BitIndex != FilterWidth; ++BitIndex) {
974	bool IsKnown = EncodingBits.Zero [BitIndex] \|\| EncodingBits.One [BitIndex];
975	switch (BitAttrs [BitIndex]) {
976	case ATTR_NONE:
977	if (IsKnown)
978	BitAttrs [BitIndex] = ATTR_ALL_SET;
979	else
980	BitAttrs [BitIndex] = ATTR_ALL_UNSET;
981	break;
982	case ATTR_ALL_SET:
983	if (!IsKnown)
984	BitAttrs [BitIndex] = ATTR_MIXED;
985	break;
986	case ATTR_ALL_UNSET:
987	if (IsKnown)
988	BitAttrs [BitIndex] = ATTR_MIXED;
989	break;
990	case ATTR_MIXED:
991	case ATTR_FILTERED:
992	break;
993	}
994	}
995	}
996
997	// Try regions of consecutive known bit values first.
998	if (std::unique_ptr<Filter> F =
999	findBestFilter(BitAttrs, /AllowMixed=/false))
1000	return F;
1001
1002	// Then regions of mixed bits (both known and unitialized bit values allowed).
1003	if (std::unique_ptr<Filter> F = findBestFilter(BitAttrs, /AllowMixed=/true))
1004	return F;
1005
1006	// Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1007	// no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1008	// well-known encoding pattern. In such case, we backtrack and scan for the
1009	// the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1010	if (EncodingIDs.size() == `3`) {
1011	if (std::unique_ptr<Filter> F =
1012	findBestFilter(BitAttrs, /AllowMixed=/true, /Greedy=/false))
1013	return F;
1014	}
1015
1016	// There is a conflict we could not resolve.
1017	return nullptr;
1018	}
1019
1020	// Decides on the best configuration of filter(s) to use in order to decode
1021	// the instructions. A conflict of instructions may occur, in which case we
1022	// dump the conflict set to the standard error.
1023	void FilterChooser::doFilter() {
1024	assert(!EncodingIDs.empty() && "FilterChooser created with no instructions");
1025
1026	// No filter needed.
1027	if (EncodingIDs.size() == `1`) {
1028	SingletonEncodingID = EncodingIDs.front();
1029	return;
1030	}
1031
1032	std::unique_ptr<Filter> BestFilter = findBestFilter();
1033	if (BestFilter) {
1034	applyFilter(F: *BestFilter);
1035	return;
1036	}
1037
1038	// Print out useful conflict information for postmortem analysis.
1039	errs() << "Decoding Conflict:\n";
1040	dump();
1041	HasConflict = true;
1042	}
1043
1044	void FilterChooser::dump() const {
1045	indent Indent(`4`);
1046	// Helps to keep the output right-justified.
1047	unsigned PadToWidth = getMaxEncodingWidth();
1048
1049	// Dump filter stack.
1050	dumpStack(OS&: errs(), Indent, PadToWidth);
1051
1052	// Dump encodings.
1053	for (unsigned EncodingID : EncodingIDs) {
1054	const InstructionEncoding &Encoding = Encodings [EncodingID];
1055	errs() << Indent << indent (PadToWidth - Encoding.getBitWidth());
1056	printKnownBits(OS&: errs(), Bits: Encoding.getMandatoryBits(), Unknown: `'_'`);
1057	errs() << " " << Encoding.getName() << `'\n'`;
1058	}
1059	}
1060
1061	// emitDecodeInstruction - Emit the templated helper function
1062	// decodeInstruction().
1063	static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
1064	const DecoderTableInfo &TableInfo) {
1065	OS << R"(
1066	template <typename InsnType>
1067	static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
1068	InsnType insn, uint64_t Address,
1069	const MCDisassembler *DisAsm,
1070	const MCSubtargetInfo &STI)";
1071	if (IsVarLenInst) {
1072	OS << ",\n "
1073	"llvm::function_ref<void(APInt &, uint64_t)> makeUp";
1074	}
1075	OS << ") {\n";
1076	if (TableInfo.HasCheckPredicate)
1077	OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
1078	OS << " const uint8_t *Ptr = DecodeTable;\n";
1079
1080	if (SpecializeDecodersPerBitwidth) {
1081	// Fail with a fatal error if decoder table's bitwidth does not match
1082	// `InsnType` bitwidth.
1083	OS << R"(
1084	[[maybe_unused]] uint32_t BitWidth = decodeULEB128AndIncUnsafe(Ptr);
1085	assert(InsnBitWidth<InsnType> == BitWidth &&
1086	"Table and instruction bitwidth mismatch");
1087	)";
1088	}
1089
1090	OS << R"(
1091	SmallVector<const uint8_t *, 8> ScopeStack;
1092	DecodeStatus S = MCDisassembler::Success;
1093	while (true) {
1094	ptrdiff_t Loc = Ptr - DecodeTable;
1095	const uint8_t DecoderOp = *Ptr++;
1096	switch (DecoderOp) {
1097	default:
1098	errs() << Loc << ": Unexpected decode table opcode: "
1099	<< (int)DecoderOp << '\n';
1100	return MCDisassembler::Fail;
1101	case OPC_Scope: {
1102	unsigned NumToSkip = decodeULEB128AndIncUnsafe(Ptr);
1103	const uint8_t *SkipTo = Ptr + NumToSkip;
1104	ScopeStack.push_back(SkipTo);
1105	LLVM_DEBUG(dbgs() << Loc << ": OPC_Scope(" << SkipTo - DecodeTable
1106	<< ")\n");
1107	continue;
1108	}
1109	case OPC_SwitchField: {
1110	// Decode the start value.
1111	unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1112	unsigned Len = *Ptr++;)";
1113	if (IsVarLenInst)
1114	OS << "\n makeUp(insn, Start + Len);";
1115	OS << R"(
1116	uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1117	uint64_t CaseValue;
1118	unsigned CaseSize;
1119	while (true) {
1120	CaseValue = decodeULEB128AndIncUnsafe(Ptr);
1121	CaseSize = decodeULEB128AndIncUnsafe(Ptr);
1122	if (FieldValue == CaseValue \|\| !CaseSize)
1123	break;
1124	Ptr += CaseSize;
1125	}
1126	if (FieldValue == CaseValue) {
1127	LLVM_DEBUG(dbgs() << Loc << ": OPC_SwitchField(" << Start << ", " << Len
1128	<< "): " << FieldValue << '\n');
1129	continue;
1130	}
1131	break;
1132	}
1133	case OPC_CheckField: {
1134	// Decode the start value.
1135	unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
1136	unsigned Len = *Ptr;)";
1137	if (IsVarLenInst)
1138	OS << "\n makeUp(insn, Start + Len);";
1139	OS << R"(
1140	uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
1141	// Decode the field value.
1142	unsigned PtrLen = 0;
1143	uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
1144	Ptr += PtrLen;
1145	bool Failed = ExpectedValue != FieldValue;
1146
1147	LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " << Len
1148	<< ", " << ExpectedValue << "): FieldValue = "
1149	<< FieldValue << ", ExpectedValue = " << ExpectedValue
1150	<< ": " << (Failed ? "FAIL, " : "PASS\n"););
1151	if (!Failed)
1152	continue;
1153	break;
1154	})";
1155	if (TableInfo.HasCheckPredicate) {
1156	OS << R"(
1157	case OPC_CheckPredicate: {
1158	// Decode the Predicate Index value.
1159	unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
1160	// Check the predicate.
1161	bool Failed = !checkDecoderPredicate(PIdx, Bits);
1162
1163	LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
1164	<< (Failed ? "FAIL, " : "PASS\n"););
1165	if (!Failed)
1166	continue;
1167	break;
1168	})";
1169	}
1170	OS << R"(
1171	case OPC_Decode: {
1172	// Decode the Opcode value.
1173	unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
1174	unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
1175
1176	MI.clear();
1177	MI.setOpcode(Opc);
1178	bool DecodeComplete;)";
1179	if (IsVarLenInst) {
1180	OS << "\n unsigned Len = InstrLenTable[Opc];\n"
1181	<< " makeUp(insn, Len);";
1182	}
1183	OS << R"(
1184	S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm,
1185	DecodeComplete);
1186	LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
1187	<< ", using decoder " << DecodeIdx << ": "
1188	<< (S ? "PASS, " : "FAIL, "));
1189
1190	if (DecodeComplete) {
1191	LLVM_DEBUG(dbgs() << "decoding complete\n");
1192	return S;
1193	}
1194	assert(S == MCDisassembler::Fail);
1195	// Reset decode status. This also drops a SoftFail status that could be
1196	// set before the decode attempt.
1197	S = MCDisassembler::Success;
1198	break;
1199	})";
1200	if (TableInfo.HasSoftFail) {
1201	OS << R"(
1202	case OPC_SoftFail: {
1203	// Decode the mask values.
1204	uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
1205	uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
1206	bool Failed = (insn & PositiveMask) != 0 \|\| (~insn & NegativeMask) != 0;
1207	if (Failed)
1208	S = MCDisassembler::SoftFail;
1209	LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
1210	continue;
1211	})";
1212	}
1213	OS << R"(
1214	}
1215	if (ScopeStack.empty()) {
1216	LLVM_DEBUG(dbgs() << "returning Fail\n");
1217	return MCDisassembler::Fail;
1218	}
1219	Ptr = ScopeStack.pop_back_val();
1220	LLVM_DEBUG(dbgs() << "continuing at " << Ptr - DecodeTable << '\n');
1221	}
1222	llvm_unreachable("bogosity detected in disassembler state machine!");
1223	}
1224
1225	)";
1226	}
1227
1228	namespace {
1229
1230	class DecoderTreeBuilder {
1231	DecoderContext &Ctx;
1232	const CodeGenTarget &Target;
1233	ArrayRef<InstructionEncoding> Encodings;
1234
1235	public:
1236	DecoderTreeBuilder(DecoderContext &Ctx, const CodeGenTarget &Target,
1237	ArrayRef<InstructionEncoding> Encodings)
1238	: Ctx(Ctx), Target(Target), Encodings (Encodings) {}
1239
1240	std::unique_ptr<DecoderTreeNode> buildTree(ArrayRef<unsigned> EncodingIDs);
1241
1242	private:
1243	std::unique_ptr<DecoderTreeNode>
1244	convertSingleton(unsigned EncodingID, const KnownBits &FilterBits);
1245
1246	std::unique_ptr<DecoderTreeNode> convertFilterChooserMap(
1247	unsigned StartBit, unsigned NumBits,
1248	const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap);
1249
1250	std::unique_ptr<DecoderTreeNode>
1251	convertFilterChooser(const FilterChooser *FC);
1252	};
1253
1254	} // namespace
1255
1256	std::unique_ptr<DecoderTreeNode>
1257	DecoderTreeBuilder::convertSingleton(unsigned EncodingID,
1258	const KnownBits &FilterBits) {
1259	const InstructionEncoding &Encoding = Encodings [EncodingID];
1260	auto N = std::make_unique<CheckAllNode>();
1261
1262	std::string Predicate = getPredicateString(Encoding, TargetName: Target.getName());
1263	if (!Predicate.empty()) {
1264	unsigned PredicateIndex = Ctx.getPredicateIndex(Predicate);
1265	N ->addChild(Child: std::make_unique<CheckPredicateNode>(args&: PredicateIndex));
1266	}
1267
1268	std::vector<EncodingIsland> Islands =
1269	getIslands(EncodingBits: Encoding.getMandatoryBits(), FilterBits);
1270	for (const EncodingIsland &Island : reverse(C&: Islands)) {
1271	N ->addChild(Child: std::make_unique<CheckFieldNode>(
1272	args: Island.StartBit, args: Island.NumBits, args: Island.FieldVal));
1273	}
1274
1275	const KnownBits &InstBits = Encoding.getInstBits();
1276	const APInt &SoftFailMask = Encoding.getSoftFailMask();
1277	if (!SoftFailMask.isZero()) {
1278	APInt PositiveMask = InstBits.Zero & SoftFailMask;
1279	APInt NegativeMask = InstBits.One & SoftFailMask;
1280	N ->addChild(Child: std::make_unique<SoftFailNode>(args: PositiveMask.getZExtValue(),
1281	args: NegativeMask.getZExtValue()));
1282	}
1283
1284	unsigned DecoderIndex = Ctx.getDecoderIndex(Decoder: getDecoderString(Encoding));
1285	N ->addChild(Child: std::make_unique<DecodeNode>(
1286	args: Encoding.getName(), args&: Encoding.getInstruction()->EnumVal, args&: DecoderIndex));
1287
1288	return N;
1289	}
1290
1291	std::unique_ptr<DecoderTreeNode> DecoderTreeBuilder::convertFilterChooserMap(
1292	unsigned StartBit, unsigned NumBits,
1293	const std::map<uint64_t, std::unique_ptr<const FilterChooser>> &FCMap) {
1294	if (FCMap.size() == `1`) {
1295	const auto &[FieldVal, ChildFC] = *FCMap.begin();
1296	auto N = std::make_unique<CheckAllNode>();
1297	N ->addChild(Child: std::make_unique<CheckFieldNode>(args&: StartBit, args&: NumBits, args: FieldVal));
1298	N ->addChild(Child: convertFilterChooser(FC: ChildFC.get()));
1299	return N;
1300	}
1301	auto N = std::make_unique<SwitchFieldNode>(args&: StartBit, args&: NumBits);
1302	for (const auto &[FieldVal, ChildFC] : FCMap)
1303	N ->addCase(Value: FieldVal, N: convertFilterChooser(FC: ChildFC.get()));
1304	return N;
1305	}
1306
1307	std::unique_ptr<DecoderTreeNode>
1308	DecoderTreeBuilder::convertFilterChooser(const FilterChooser *FC) {
1309	auto N = std::make_unique<CheckAnyNode>();
1310
1311	do {
1312	if (FC->SingletonEncodingID)
1313	N ->addChild(Child: convertSingleton(EncodingID: *FC->SingletonEncodingID, FilterBits: FC->FilterBits));
1314	else
1315	N ->addChild(Child: convertFilterChooserMap(StartBit: FC->StartBit, NumBits: FC->NumBits,
1316	FCMap: FC->FilterChooserMap));
1317	FC = FC->VariableFC.get();
1318	} while (FC);
1319
1320	return N;
1321	}
1322
1323	std::unique_ptr<DecoderTreeNode>
1324	DecoderTreeBuilder::buildTree(ArrayRef<unsigned> EncodingIDs) {
1325	FilterChooser FC(Encodings, EncodingIDs);
1326	if (FC.hasConflict())
1327	return nullptr;
1328	return convertFilterChooser(FC: &FC);
1329	}
1330
1331	/// Collects all HwModes referenced by the target for encoding purposes.
1332	void DecoderEmitter::collectHwModesReferencedForEncodings(
1333	std::vector<unsigned> &HwModeIDs,
1334	NamespacesHwModesMap &NamespacesWithHwModes) const {
1335	SmallBitVector BV(CGH.getNumModeIds());
1336	for (const auto &MS : CGH.getHwModeSelects()) {
1337	for (auto [HwModeID, EncodingDef] : MS.second.Items) {
1338	if (EncodingDef->isSubClassOf(Name: "InstructionEncoding")) {
1339	StringRef DecoderNamespace =
1340	EncodingDef->getValueAsString(FieldName: "DecoderNamespace");
1341	NamespacesWithHwModes [DecoderNamespace].insert(x: HwModeID);
1342	BV.set(HwModeID);
1343	}
1344	}
1345	}
1346	// FIXME: Can't do `HwModeIDs.assign(BV.set_bits_begin(), BV.set_bits_end())`
1347	// because const_set_bits_iterator_impl is not copy-assignable.
1348	// This breaks some MacOS builds.
1349	llvm::copy(Range: BV.set_bits(), Out: std::back_inserter(x&: HwModeIDs));
1350	}
1351
1352	void DecoderEmitter::handleHwModesUnrelatedEncodings(
1353	unsigned EncodingID, ArrayRef<unsigned> HwModeIDs,
1354	NamespacesHwModesMap &NamespacesWithHwModes) {
1355	switch (DecoderEmitterSuppressDuplicates) {
1356	case SUPPRESSION_DISABLE: {
1357	for (unsigned HwModeID : HwModeIDs)
1358	EncodingIDsByHwMode [HwModeID].push_back(x: EncodingID);
1359	break;
1360	}
1361	case SUPPRESSION_LEVEL1: {
1362	StringRef DecoderNamespace = Encodings [EncodingID].getDecoderNamespace();
1363	auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
1364	if (It != NamespacesWithHwModes.end()) {
1365	for (unsigned HwModeID : It ->second)
1366	EncodingIDsByHwMode [HwModeID].push_back(x: EncodingID);
1367	} else {
1368	// Only emit the encoding once, as it's DecoderNamespace doesn't
1369	// contain any HwModes.
1370	EncodingIDsByHwMode [DefaultMode].push_back(x: EncodingID);
1371	}
1372	break;
1373	}
1374	case SUPPRESSION_LEVEL2:
1375	EncodingIDsByHwMode [DefaultMode].push_back(x: EncodingID);
1376	break;
1377	}
1378	}
1379
1380	/// Checks if the given target-specific non-pseudo instruction
1381	/// is a candidate for decoding.
1382	static bool isDecodableInstruction(const Record *InstDef) {
1383	return !InstDef->getValueAsBit(FieldName: "isAsmParserOnly") &&
1384	!InstDef->getValueAsBit(FieldName: "isCodeGenOnly");
1385	}
1386
1387	/// Checks if the given encoding is valid.
1388	static bool isValidEncoding(const Record *EncodingDef) {
1389	const RecordVal *InstField = EncodingDef->getValue(Name: "Inst");
1390	if (!InstField)
1391	return false;
1392
1393	if (const auto *InstInit = dyn_cast<BitsInit>(Val: InstField->getValue())) {
1394	// Fixed-length encoding. Size must be non-zero.
1395	if (!EncodingDef->getValueAsInt(FieldName: "Size"))
1396	return false;
1397
1398	// At least one of the encoding bits must be complete (not '?').
1399	// FIXME: This should take SoftFail field into account.
1400	return !InstInit->allInComplete();
1401	}
1402
1403	if (const auto *InstInit = dyn_cast<DagInit>(Val: InstField->getValue())) {
1404	// Variable-length encoding.
1405	// At least one of the encoding bits must be complete (not '?').
1406	VarLenInst VLI(InstInit, InstField);
1407	return !all_of(Range&: VLI, P: [](const EncodingSegment &Segment) {
1408	return isa<UnsetInit>(Val: Segment.Value);
1409	});
1410	}
1411
1412	// Inst field is neither BitsInit nor DagInit. This is something unsupported.
1413	return false;
1414	}
1415
1416	/// Parses all InstructionEncoding instances and fills internal data structures.
1417	void DecoderEmitter::parseInstructionEncodings() {
1418	// First, collect all encoding-related HwModes referenced by the target.
1419	// And establish a mapping table between DecoderNamespace and HwMode.
1420	// If HwModeNames is empty, add the default mode so we always have one HwMode.
1421	std::vector<unsigned> HwModeIDs;
1422	NamespacesHwModesMap NamespacesWithHwModes;
1423	collectHwModesReferencedForEncodings(HwModeIDs, NamespacesWithHwModes);
1424	if (HwModeIDs.empty())
1425	HwModeIDs.push_back(x: DefaultMode);
1426
1427	ArrayRef<const CodeGenInstruction *> Instructions =
1428	Target.getTargetNonPseudoInstructions();
1429	Encodings.reserve(n: Instructions.size());
1430
1431	for (const CodeGenInstruction *Inst : Instructions) {
1432	const Record *InstDef = Inst->TheDef;
1433	if (!isDecodableInstruction(InstDef)) {
1434	++NumEncodingsLackingDisasm;
1435	continue;
1436	}
1437
1438	if (const Record *RV = InstDef->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
1439	EncodingInfoByHwMode EBM(RV, CGH);
1440	for (auto [HwModeID, EncodingDef] : EBM) {
1441	if (!isValidEncoding(EncodingDef)) {
1442	// TODO: Should probably give a warning.
1443	++NumEncodingsOmitted;
1444	continue;
1445	}
1446	unsigned EncodingID = Encodings.size();
1447	Encodings.emplace_back(args&: EncodingDef, args&: Inst);
1448	EncodingIDsByHwMode [HwModeID].push_back(x: EncodingID);
1449	}
1450	continue; // Ignore encoding specified by Instruction itself.
1451	}
1452
1453	if (!isValidEncoding(EncodingDef: InstDef)) {
1454	++NumEncodingsOmitted;
1455	continue;
1456	}
1457
1458	unsigned EncodingID = Encodings.size();
1459	Encodings.emplace_back(args&: InstDef, args&: Inst);
1460
1461	// This instruction is encoded the same on all HwModes.
1462	// According to user needs, add it to all, some, or only the default HwMode.
1463	handleHwModesUnrelatedEncodings(EncodingID, HwModeIDs,
1464	NamespacesWithHwModes);
1465	}
1466
1467	for (const Record *EncodingDef :
1468	RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding")) {
1469	const Record *InstDef = EncodingDef->getValueAsDef(FieldName: "AliasOf");
1470	// TODO: Should probably give a warning in these cases.
1471	// What's the point of specifying an additional encoding
1472	// if it is invalid or if the instruction is not decodable?
1473	if (!isDecodableInstruction(InstDef)) {
1474	++NumEncodingsLackingDisasm;
1475	continue;
1476	}
1477	if (!isValidEncoding(EncodingDef)) {
1478	++NumEncodingsOmitted;
1479	continue;
1480	}
1481	unsigned EncodingID = Encodings.size();
1482	Encodings.emplace_back(args&: EncodingDef, args: &Target.getInstruction(InstRec: InstDef));
1483	EncodingIDsByHwMode [DefaultMode].push_back(x: EncodingID);
1484	}
1485
1486	// Do some statistics.
1487	NumInstructions = Instructions.size();
1488	NumEncodingsSupported = Encodings.size();
1489	NumEncodings = NumEncodingsSupported + NumEncodingsOmitted;
1490	}
1491
1492	DecoderEmitter::DecoderEmitter(const RecordKeeper &RK)
1493	: RK(RK), Target (RK), CGH(Target.getHwModes()) {
1494	Target.reverseBitsForLittleEndianEncoding();
1495	parseInstructionEncodings();
1496	}
1497
1498	// Emits disassembler code for instruction decoding.
1499	void DecoderEmitter::run(raw_ostream &o) const {
1500	formatted_raw_ostream OS(o);
1501	OS << R"(
1502	#include "llvm/MC/MCInst.h"
1503	#include "llvm/MC/MCSubtargetInfo.h"
1504	#include "llvm/Support/DataTypes.h"
1505	#include "llvm/Support/Debug.h"
1506	#include "llvm/Support/LEB128.h"
1507	#include "llvm/Support/raw_ostream.h"
1508	#include "llvm/TargetParser/SubtargetFeature.h"
1509	#include <assert.h>
1510
1511	namespace {
1512
1513	// InsnBitWidth is essentially a type trait used by the decoder emitter to query
1514	// the supported bitwidth for a given type. But default, the value is 0, making
1515	// it an invalid type for use as `InsnType` when instantiating the decoder.
1516	// Individual targets are expected to provide specializations for these based
1517	// on their usage.
1518	template <typename T> constexpr uint32_t InsnBitWidth = 0;
1519
1520	)";
1521
1522	// Do extra bookkeeping for variable-length encodings.
1523	bool IsVarLenInst = Target.hasVariableLengthEncodings();
1524	unsigned MaxInstLen = `0`;
1525	if (IsVarLenInst) {
1526	std::vector<unsigned> InstrLen(Target.getInstructions().size(), `0`);
1527	for (const InstructionEncoding &Encoding : Encodings) {
1528	MaxInstLen = std::max(a: MaxInstLen, b: Encoding.getBitWidth());
1529	InstrLen [Target.getInstrIntValue(R: Encoding.getInstruction()->TheDef)] =
1530	Encoding.getBitWidth();
1531	}
1532
1533	// For variable instruction, we emit an instruction length table to let the
1534	// decoder know how long the instructions are. You can see example usage in
1535	// M68k's disassembler.
1536	emitInstrLenTable(OS, InstrLen);
1537	}
1538
1539	emitRegClassByHwModeDecoders(OS);
1540
1541	// Map of (bitwidth, namespace, hwmode) tuple to encoding IDs.
1542	// Its organized as a nested map, with the (namespace, hwmode) as the key for
1543	// the inner map and bitwidth as the key for the outer map. We use std::map
1544	// for deterministic iteration order so that the code emitted is also
1545	// deterministic.
1546	using InnerKeyTy = std::pair<StringRef, unsigned>;
1547	using InnerMapTy = std::map<InnerKeyTy, std::vector<unsigned>>;
1548	std::map<unsigned, InnerMapTy> EncMap;
1549
1550	for (const auto &[HwModeID, EncodingIDs] : EncodingIDsByHwMode) {
1551	for (unsigned EncodingID : EncodingIDs) {
1552	const InstructionEncoding &Encoding = Encodings [EncodingID];
1553	const unsigned BitWidth =
1554	IsVarLenInst ? MaxInstLen : Encoding.getBitWidth();
1555	StringRef DecoderNamespace = Encoding.getDecoderNamespace();
1556	EncMap [BitWidth][{DecoderNamespace, HwModeID}].push_back(x: EncodingID);
1557	}
1558	}
1559
1560	// Variable length instructions use the same `APInt` type for all instructions
1561	// so we cannot specialize decoders based on instruction bitwidths (which
1562	// requires using different `InstType` for differet bitwidths for the correct
1563	// template specialization to kick in).
1564	if (IsVarLenInst && SpecializeDecodersPerBitwidth)
1565	PrintFatalError(
1566	Msg: "Cannot specialize decoders for variable length instuctions");
1567
1568	DecoderContext Ctx;
1569	DecoderTreeBuilder TreeBuilder(Ctx, Target, Encodings);
1570
1571	DecoderTableInfo TableInfo;
1572	DecoderTableEmitter TableEmitter(TableInfo, OS);
1573
1574	// Emit a table for each (namespace, hwmode, bitwidth) combination.
1575	// Entries in `EncMap` are already sorted by bitwidth. So bucketing per
1576	// bitwidth can be done on-the-fly as we iterate over the map.
1577	bool HasConflict = false;
1578	for (const auto &[BitWidth, BWMap] : EncMap) {
1579	for (const auto &[Key, EncodingIDs] : BWMap) {
1580	auto [DecoderNamespace, HwModeID] = Key;
1581
1582	std::unique_ptr<DecoderTreeNode> Tree =
1583	TreeBuilder.buildTree(EncodingIDs);
1584
1585	// Skip emitting the table if a conflict has been detected.
1586	if (!Tree) {
1587	HasConflict = true;
1588	continue;
1589	}
1590
1591	// Form the table name.
1592	SmallString<`32`> TableName({"DecoderTable", DecoderNamespace});
1593	if (HwModeID != DefaultMode)
1594	TableName.append(Refs: {"_", Target.getHwModes().getModeName(Id: HwModeID)});
1595	TableName.append(RHS: utostr(X: BitWidth));
1596
1597	TableEmitter.emitTable(
1598	TableName, BitWidth: SpecializeDecodersPerBitwidth ? BitWidth : `0`, Root: Tree.get());
1599	}
1600
1601	// Each BitWidth get's its own decoders and decoder function if
1602	// SpecializeDecodersPerBitwidth is enabled.
1603	if (SpecializeDecodersPerBitwidth) {
1604	emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: BitWidth);
1605	Ctx.Decoders.clear();
1606	}
1607	}
1608
1609	if (HasConflict)
1610	PrintFatalError(Msg: "Decoding conflict encountered");
1611
1612	// Emit the decoder function for the last bucket. This will also emit the
1613	// single decoder function if SpecializeDecodersPerBitwidth = false.
1614	if (!SpecializeDecodersPerBitwidth)
1615	emitDecoderFunction(OS, Decoders: Ctx.Decoders, BucketBitWidth: `0`);
1616
1617	// Emit the predicate function.
1618	if (TableInfo.HasCheckPredicate)
1619	emitPredicateFunction(OS, Predicates: Ctx.Predicates);
1620
1621	// Emit the main entry point for the decoder, decodeInstruction().
1622	emitDecodeInstruction(OS, IsVarLenInst, TableInfo);
1623
1624	OS << "\n} // namespace\n";
1625	}
1626
1627	void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS) {
1628	DecoderEmitter (RK).run(o&: OS);
1629	}
1630

Browse the source code of llvm_projects/llvm/utils/TableGen/DecoderEmitter.cpp