1 | //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // It contains the tablegen backend that emits the decoder functions for |
10 | // targets with fixed/variable length instruction set. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "Common/CodeGenHwModes.h" |
15 | #include "Common/CodeGenInstruction.h" |
16 | #include "Common/CodeGenTarget.h" |
17 | #include "Common/InfoByHwMode.h" |
18 | #include "Common/VarLenCodeEmitterGen.h" |
19 | #include "TableGenBackends.h" |
20 | #include "llvm/ADT/APInt.h" |
21 | #include "llvm/ADT/ArrayRef.h" |
22 | #include "llvm/ADT/CachedHashString.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/ADT/SetVector.h" |
25 | #include "llvm/ADT/SmallBitVector.h" |
26 | #include "llvm/ADT/SmallString.h" |
27 | #include "llvm/ADT/Statistic.h" |
28 | #include "llvm/ADT/StringExtras.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/MC/MCDecoderOps.h" |
31 | #include "llvm/Support/Casting.h" |
32 | #include "llvm/Support/CommandLine.h" |
33 | #include "llvm/Support/Debug.h" |
34 | #include "llvm/Support/ErrorHandling.h" |
35 | #include "llvm/Support/FormatVariadic.h" |
36 | #include "llvm/Support/FormattedStream.h" |
37 | #include "llvm/Support/LEB128.h" |
38 | #include "llvm/Support/MathExtras.h" |
39 | #include "llvm/Support/raw_ostream.h" |
40 | #include "llvm/TableGen/Error.h" |
41 | #include "llvm/TableGen/Record.h" |
42 | #include <algorithm> |
43 | #include <cassert> |
44 | #include <cstddef> |
45 | #include <cstdint> |
46 | #include <map> |
47 | #include <memory> |
48 | #include <set> |
49 | #include <string> |
50 | #include <utility> |
51 | #include <vector> |
52 | |
53 | using namespace llvm; |
54 | |
55 | #define DEBUG_TYPE "decoder-emitter" |
56 | |
57 | extern cl::OptionCategory DisassemblerEmitterCat; |
58 | |
59 | enum SuppressLevel { |
60 | SUPPRESSION_DISABLE, |
61 | SUPPRESSION_LEVEL1, |
62 | SUPPRESSION_LEVEL2 |
63 | }; |
64 | |
65 | static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates( |
66 | "suppress-per-hwmode-duplicates" , |
67 | cl::desc("Suppress duplication of instrs into per-HwMode decoder tables" ), |
68 | cl::values( |
69 | clEnumValN( |
70 | SUPPRESSION_DISABLE, "O0" , |
71 | "Do not prevent DecoderTable duplications caused by HwModes" ), |
72 | clEnumValN( |
73 | SUPPRESSION_LEVEL1, "O1" , |
74 | "Remove duplicate DecoderTable entries generated due to HwModes" ), |
75 | clEnumValN( |
76 | SUPPRESSION_LEVEL2, "O2" , |
77 | "Extract HwModes-specific instructions into new DecoderTables, " |
78 | "significantly reducing Table Duplications" )), |
79 | cl::init(Val: SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat)); |
80 | |
81 | static cl::opt<bool> LargeTable( |
82 | "large-decoder-table" , |
83 | cl::desc("Use large decoder table format. This uses 24 bits for offset\n" |
84 | "in the table instead of the default 16 bits." ), |
85 | cl::init(Val: false), cl::cat(DisassemblerEmitterCat)); |
86 | |
87 | static cl::opt<bool> UseFnTableInDecodeToMCInst( |
88 | "use-fn-table-in-decode-to-mcinst" , |
89 | cl::desc( |
90 | "Use a table of function pointers instead of a switch case in the\n" |
91 | "generated `decodeToMCInst` function. Helps improve compile time\n" |
92 | "of the generated code." ), |
93 | cl::init(Val: false), cl::cat(DisassemblerEmitterCat)); |
94 | |
95 | STATISTIC(NumEncodings, "Number of encodings considered" ); |
96 | STATISTIC(NumEncodingsLackingDisasm, |
97 | "Number of encodings without disassembler info" ); |
98 | STATISTIC(NumInstructions, "Number of instructions considered" ); |
99 | STATISTIC(NumEncodingsSupported, "Number of encodings supported" ); |
100 | STATISTIC(NumEncodingsOmitted, "Number of encodings omitted" ); |
101 | |
102 | static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; } |
103 | |
104 | namespace { |
105 | |
106 | struct EncodingField { |
107 | unsigned Base, Width, Offset; |
108 | EncodingField(unsigned B, unsigned W, unsigned O) |
109 | : Base(B), Width(W), Offset(O) {} |
110 | }; |
111 | |
112 | struct OperandInfo { |
113 | std::vector<EncodingField> Fields; |
114 | std::string Decoder; |
115 | bool HasCompleteDecoder; |
116 | uint64_t InitValue = 0; |
117 | |
118 | OperandInfo(std::string D, bool HCD) : Decoder(D), HasCompleteDecoder(HCD) {} |
119 | |
120 | void addField(unsigned Base, unsigned Width, unsigned Offset) { |
121 | Fields.push_back(x: EncodingField(Base, Width, Offset)); |
122 | } |
123 | |
124 | unsigned numFields() const { return Fields.size(); } |
125 | |
126 | typedef std::vector<EncodingField>::const_iterator const_iterator; |
127 | |
128 | const_iterator begin() const { return Fields.begin(); } |
129 | const_iterator end() const { return Fields.end(); } |
130 | }; |
131 | |
132 | typedef std::vector<uint32_t> FixupList; |
133 | typedef std::vector<FixupList> FixupScopeList; |
134 | typedef SmallSetVector<CachedHashString, 16> PredicateSet; |
135 | typedef SmallSetVector<CachedHashString, 16> DecoderSet; |
136 | |
137 | class DecoderTable { |
138 | public: |
139 | DecoderTable() { Data.reserve(n: 16384); } |
140 | |
141 | void clear() { Data.clear(); } |
142 | void push_back(uint8_t Item) { Data.push_back(x: Item); } |
143 | size_t size() const { return Data.size(); } |
144 | const uint8_t *data() const { return Data.data(); } |
145 | |
146 | using const_iterator = std::vector<uint8_t>::const_iterator; |
147 | const_iterator begin() const { return Data.begin(); } |
148 | const_iterator end() const { return Data.end(); } |
149 | |
150 | // Insert a ULEB128 encoded value into the table. |
151 | void insertULEB128(uint64_t Value) { |
152 | // Encode and emit the value to filter against. |
153 | uint8_t Buffer[16]; |
154 | unsigned Len = encodeULEB128(Value, p: Buffer); |
155 | Data.insert(position: Data.end(), first: Buffer, last: Buffer + Len); |
156 | } |
157 | |
158 | // Insert space for `NumToSkip` and return the position |
159 | // in the table for patching. |
160 | size_t insertNumToSkip() { |
161 | size_t Size = Data.size(); |
162 | Data.insert(position: Data.end(), n: getNumToSkipInBytes(), x: 0); |
163 | return Size; |
164 | } |
165 | |
166 | void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) { |
167 | // Calculate the distance from the byte following the fixup entry byte |
168 | // to the destination. The Target is calculated from after the |
169 | // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract |
170 | // `getNumToSkipInBytes()` from the displacement here to account for that. |
171 | assert(DestIdx >= FixupIdx + getNumToSkipInBytes() && |
172 | "Expecting a forward jump in the decoding table" ); |
173 | uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes(); |
174 | if (!isUIntN(N: 8 * getNumToSkipInBytes(), x: Delta)) |
175 | PrintFatalError( |
176 | Msg: "disassembler decoding table too large, try --large-decoder-table" ); |
177 | |
178 | Data[FixupIdx] = static_cast<uint8_t>(Delta); |
179 | Data[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8); |
180 | if (getNumToSkipInBytes() == 3) |
181 | Data[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16); |
182 | } |
183 | |
184 | private: |
185 | std::vector<uint8_t> Data; |
186 | }; |
187 | |
188 | struct DecoderTableInfo { |
189 | DecoderTable Table; |
190 | FixupScopeList FixupStack; |
191 | PredicateSet Predicates; |
192 | DecoderSet Decoders; |
193 | |
194 | bool isOutermostScope() const { return FixupStack.size() == 1; } |
195 | }; |
196 | |
197 | struct EncodingAndInst { |
198 | const Record *EncodingDef; |
199 | const CodeGenInstruction *Inst; |
200 | StringRef HwModeName; |
201 | |
202 | EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, |
203 | StringRef HwModeName = "" ) |
204 | : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} |
205 | }; |
206 | |
207 | struct EncodingIDAndOpcode { |
208 | unsigned EncodingID; |
209 | unsigned Opcode; |
210 | |
211 | EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} |
212 | EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) |
213 | : EncodingID(EncodingID), Opcode(Opcode) {} |
214 | }; |
215 | |
216 | using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; |
217 | using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>; |
218 | |
219 | class DecoderEmitter { |
220 | const RecordKeeper &RK; |
221 | std::vector<EncodingAndInst> NumberedEncodings; |
222 | |
223 | public: |
224 | DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace) |
225 | : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {} |
226 | |
227 | // Emit the decoder state machine table. Returns a mask of MCD decoder ops |
228 | // that were emitted. |
229 | unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table, |
230 | indent Indent, unsigned BitWidth, StringRef Namespace, |
231 | const EncodingIDsVec &EncodingIDs) const; |
232 | void emitInstrLenTable(formatted_raw_ostream &OS, |
233 | ArrayRef<unsigned> InstrLen) const; |
234 | void emitPredicateFunction(formatted_raw_ostream &OS, |
235 | PredicateSet &Predicates, indent Indent) const; |
236 | void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, |
237 | indent Indent) const; |
238 | |
239 | // run - Output the code emitter |
240 | void run(raw_ostream &o); |
241 | |
242 | private: |
243 | CodeGenTarget Target; |
244 | |
245 | public: |
246 | StringRef PredicateNamespace; |
247 | }; |
248 | |
249 | // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system |
250 | // for a bit value. |
251 | // |
252 | // BIT_UNFILTERED is used as the init value for a filter position. It is used |
253 | // only for filter processings. |
254 | struct BitValue { |
255 | enum bit_value_t : uint8_t { |
256 | BIT_FALSE, // '0' |
257 | BIT_TRUE, // '1' |
258 | BIT_UNSET, // '?', printed as '_' |
259 | BIT_UNFILTERED // unfiltered, printed as '.' |
260 | }; |
261 | |
262 | BitValue(bit_value_t V) : V(V) {} |
263 | explicit BitValue(const Init *Init) { |
264 | if (const auto *Bit = dyn_cast<BitInit>(Val: Init)) |
265 | V = Bit->getValue() ? BIT_TRUE : BIT_FALSE; |
266 | else |
267 | V = BIT_UNSET; |
268 | } |
269 | BitValue(const BitsInit &Bits, unsigned Idx) : BitValue(Bits.getBit(Bit: Idx)) {} |
270 | |
271 | bool isSet() const { return V == BIT_TRUE || V == BIT_FALSE; } |
272 | bool isUnset() const { return V == BIT_UNSET; } |
273 | std::optional<uint64_t> getValue() const { |
274 | if (isSet()) |
275 | return static_cast<uint64_t>(V); |
276 | return std::nullopt; |
277 | } |
278 | |
279 | // For printing a bit value. |
280 | operator StringRef() const { |
281 | switch (V) { |
282 | case BIT_FALSE: |
283 | return "0" ; |
284 | case BIT_TRUE: |
285 | return "1" ; |
286 | case BIT_UNSET: |
287 | return "_" ; |
288 | case BIT_UNFILTERED: |
289 | return "." ; |
290 | } |
291 | llvm_unreachable("Unknow bit value" ); |
292 | } |
293 | |
294 | bool operator==(bit_value_t Other) const { return Other == V; } |
295 | bool operator!=(bit_value_t Other) const { return Other != V; } |
296 | |
297 | private: |
298 | bit_value_t V; |
299 | }; |
300 | |
301 | } // end anonymous namespace |
302 | |
303 | static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { |
304 | if (Value.EncodingDef != Value.Inst->TheDef) |
305 | OS << Value.EncodingDef->getName() << ":" ; |
306 | OS << Value.Inst->TheDef->getName(); |
307 | return OS; |
308 | } |
309 | |
310 | // Prints the bit value for each position. |
311 | static void dumpBits(raw_ostream &OS, const BitsInit &Bits) { |
312 | for (const Init *Bit : reverse(C: Bits.getBits())) |
313 | OS << BitValue(Bit); |
314 | } |
315 | |
316 | static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) { |
317 | const RecordVal *RV = Def.getValue(Name: FieldName); |
318 | if (const BitsInit *Bits = dyn_cast<BitsInit>(Val: RV->getValue())) |
319 | return *Bits; |
320 | |
321 | // Handle variable length instructions. |
322 | VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV); |
323 | SmallVector<const Init *, 16> Bits; |
324 | |
325 | for (const auto &SI : VLI) { |
326 | if (const BitsInit *BI = dyn_cast<BitsInit>(Val: SI.Value)) |
327 | llvm::append_range(C&: Bits, R: BI->getBits()); |
328 | else if (const BitInit *BI = dyn_cast<BitInit>(Val: SI.Value)) |
329 | Bits.push_back(Elt: BI); |
330 | else |
331 | Bits.append(NumInputs: SI.BitWidth, Elt: UnsetInit::get(RK&: Def.getRecords())); |
332 | } |
333 | |
334 | return *BitsInit::get(RK&: Def.getRecords(), Range: Bits); |
335 | } |
336 | |
337 | // Representation of the instruction to work on. |
338 | typedef std::vector<BitValue> insn_t; |
339 | |
340 | namespace { |
341 | |
342 | static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL = |
343 | std::numeric_limits<uint64_t>::max(); |
344 | |
345 | class FilterChooser; |
346 | |
347 | /// Filter - Filter works with FilterChooser to produce the decoding tree for |
348 | /// the ISA. |
349 | /// |
350 | /// It is useful to think of a Filter as governing the switch stmts of the |
351 | /// decoding tree in a certain level. Each case stmt delegates to an inferior |
352 | /// FilterChooser to decide what further decoding logic to employ, or in another |
353 | /// words, what other remaining bits to look at. The FilterChooser eventually |
354 | /// chooses a best Filter to do its job. |
355 | /// |
356 | /// This recursive scheme ends when the number of Opcodes assigned to the |
357 | /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when |
358 | /// the Filter/FilterChooser combo does not know how to distinguish among the |
359 | /// Opcodes assigned. |
360 | /// |
361 | /// An example of a conflict is |
362 | /// |
363 | /// Conflict: |
364 | /// 111101000.00........00010000.... |
365 | /// 111101000.00........0001........ |
366 | /// 1111010...00........0001........ |
367 | /// 1111010...00.................... |
368 | /// 1111010......................... |
369 | /// 1111............................ |
370 | /// ................................ |
371 | /// VST4q8a 111101000_00________00010000____ |
372 | /// VST4q8b 111101000_00________00010000____ |
373 | /// |
374 | /// The Debug output shows the path that the decoding tree follows to reach the |
375 | /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced |
376 | /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. |
377 | /// |
378 | /// The encoding info in the .td files does not specify this meta information, |
379 | /// which could have been used by the decoder to resolve the conflict. The |
380 | /// decoder could try to decode the even/odd register numbering and assign to |
381 | /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" |
382 | /// version and return the Opcode since the two have the same Asm format string. |
383 | class Filter { |
384 | protected: |
385 | const FilterChooser &Owner; // FilterChooser who owns this filter |
386 | unsigned StartBit; // the starting bit position |
387 | unsigned NumBits; // number of bits to filter |
388 | bool Mixed; // a mixed region contains both set and unset bits |
389 | |
390 | // Map of well-known segment value to the set of uid's with that value. |
391 | std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; |
392 | |
393 | // Set of uid's with non-constant segment values. |
394 | std::vector<EncodingIDAndOpcode> VariableInstructions; |
395 | |
396 | // Map of well-known segment value to its delegate. |
397 | std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; |
398 | |
399 | // Number of instructions which fall under FilteredInstructions category. |
400 | unsigned NumFiltered; |
401 | |
402 | // Keeps track of the last opcode in the filtered bucket. |
403 | EncodingIDAndOpcode LastOpcFiltered; |
404 | |
405 | public: |
406 | Filter(Filter &&f); |
407 | Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits, |
408 | bool mixed); |
409 | |
410 | ~Filter() = default; |
411 | |
412 | unsigned getNumFiltered() const { return NumFiltered; } |
413 | |
414 | EncodingIDAndOpcode getSingletonOpc() const { |
415 | assert(NumFiltered == 1); |
416 | return LastOpcFiltered; |
417 | } |
418 | |
419 | // Return the filter chooser for the group of instructions without constant |
420 | // segment values. |
421 | const FilterChooser &getVariableFC() const { |
422 | assert(NumFiltered == 1 && FilterChooserMap.size() == 1); |
423 | return *(FilterChooserMap.find(x: NO_FIXED_SEGMENTS_SENTINEL)->second); |
424 | } |
425 | |
426 | // Divides the decoding task into sub tasks and delegates them to the |
427 | // inferior FilterChooser's. |
428 | // |
429 | // A special case arises when there's only one entry in the filtered |
430 | // instructions. In order to unambiguously decode the singleton, we need to |
431 | // match the remaining undecoded encoding bits against the singleton. |
432 | void recurse(); |
433 | |
434 | // Emit table entries to decode instructions given a segment or segments of |
435 | // bits. |
436 | void emitTableEntry(DecoderTableInfo &TableInfo) const; |
437 | |
438 | // Returns the number of fanout produced by the filter. More fanout implies |
439 | // the filter distinguishes more categories of instructions. |
440 | unsigned usefulness() const; |
441 | }; // end class Filter |
442 | |
443 | // These are states of our finite state machines used in FilterChooser's |
444 | // filterProcessor() which produces the filter candidates to use. |
445 | enum bitAttr_t { |
446 | ATTR_NONE, |
447 | ATTR_FILTERED, |
448 | ATTR_ALL_SET, |
449 | ATTR_ALL_UNSET, |
450 | ATTR_MIXED |
451 | }; |
452 | |
453 | /// FilterChooser - FilterChooser chooses the best filter among a set of Filters |
454 | /// in order to perform the decoding of instructions at the current level. |
455 | /// |
456 | /// Decoding proceeds from the top down. Based on the well-known encoding bits |
457 | /// of instructions available, FilterChooser builds up the possible Filters that |
458 | /// can further the task of decoding by distinguishing among the remaining |
459 | /// candidate instructions. |
460 | /// |
461 | /// Once a filter has been chosen, it is called upon to divide the decoding task |
462 | /// into sub-tasks and delegates them to its inferior FilterChoosers for further |
463 | /// processings. |
464 | /// |
465 | /// It is useful to think of a Filter as governing the switch stmts of the |
466 | /// decoding tree. And each case is delegated to an inferior FilterChooser to |
467 | /// decide what further remaining bits to look at. |
468 | |
469 | class FilterChooser { |
470 | protected: |
471 | friend class Filter; |
472 | |
473 | // Vector of codegen instructions to choose our filter. |
474 | ArrayRef<EncodingAndInst> AllInstructions; |
475 | |
476 | // Vector of uid's for this filter chooser to work on. |
477 | // The first member of the pair is the opcode id being decoded, the second is |
478 | // the opcode id that should be emitted. |
479 | ArrayRef<EncodingIDAndOpcode> Opcodes; |
480 | |
481 | // Lookup table for the operand decoding of instructions. |
482 | const std::map<unsigned, std::vector<OperandInfo>> &Operands; |
483 | |
484 | // Vector of candidate filters. |
485 | std::vector<Filter> Filters; |
486 | |
487 | // Array of bit values passed down from our parent. |
488 | // Set to all BIT_UNFILTERED's for Parent == NULL. |
489 | std::vector<BitValue> FilterBitValues; |
490 | |
491 | // Links to the FilterChooser above us in the decoding tree. |
492 | const FilterChooser *Parent; |
493 | |
494 | // Index of the best filter from Filters. |
495 | int BestIndex; |
496 | |
497 | // Width of instructions |
498 | unsigned BitWidth; |
499 | |
500 | // Parent emitter |
501 | const DecoderEmitter *Emitter; |
502 | |
503 | struct Island { |
504 | unsigned StartBit; |
505 | unsigned NumBits; |
506 | uint64_t FieldVal; |
507 | }; |
508 | |
509 | public: |
510 | FilterChooser(ArrayRef<EncodingAndInst> Insts, |
511 | ArrayRef<EncodingIDAndOpcode> IDs, |
512 | const std::map<unsigned, std::vector<OperandInfo>> &Ops, |
513 | unsigned BW, const DecoderEmitter *E) |
514 | : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), |
515 | FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr), |
516 | BestIndex(-1), BitWidth(BW), Emitter(E) { |
517 | doFilter(); |
518 | } |
519 | |
520 | FilterChooser(ArrayRef<EncodingAndInst> Insts, |
521 | ArrayRef<EncodingIDAndOpcode> IDs, |
522 | const std::map<unsigned, std::vector<OperandInfo>> &Ops, |
523 | const std::vector<BitValue> &ParentFilterBitValues, |
524 | const FilterChooser &parent) |
525 | : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), |
526 | FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), |
527 | BitWidth(parent.BitWidth), Emitter(parent.Emitter) { |
528 | doFilter(); |
529 | } |
530 | |
531 | FilterChooser(const FilterChooser &) = delete; |
532 | void operator=(const FilterChooser &) = delete; |
533 | |
534 | unsigned getBitWidth() const { return BitWidth; } |
535 | |
536 | protected: |
537 | // Populates the insn given the uid. |
538 | void insnWithID(insn_t &Insn, unsigned Opcode) const { |
539 | const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; |
540 | const BitsInit &Bits = getBitsField(Def: *EncodingDef, FieldName: "Inst" ); |
541 | Insn.resize(new_size: std::max(a: BitWidth, b: Bits.getNumBits()), x: BitValue::BIT_UNSET); |
542 | // We may have a SoftFail bitmask, which specifies a mask where an encoding |
543 | // may differ from the value in "Inst" and yet still be valid, but the |
544 | // disassembler should return SoftFail instead of Success. |
545 | // |
546 | // This is used for marking UNPREDICTABLE instructions in the ARM world. |
547 | const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail" ); |
548 | const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr; |
549 | for (unsigned i = 0; i < Bits.getNumBits(); ++i) { |
550 | if (SFBits && BitValue(*SFBits, i) == BitValue::BIT_TRUE) |
551 | Insn[i] = BitValue::BIT_UNSET; |
552 | else |
553 | Insn[i] = BitValue(Bits, i); |
554 | } |
555 | } |
556 | |
557 | // Populates the field of the insn given the start position and the number of |
558 | // consecutive bits to scan for. |
559 | // |
560 | // Returns a pair of values (indicator, field), where the indicator is false |
561 | // if there exists any uninitialized bit value in the range and true if all |
562 | // bits are well-known. The second value is the potentially populated field. |
563 | std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit, |
564 | unsigned NumBits) const; |
565 | |
566 | /// dumpFilterArray - dumpFilterArray prints out debugging info for the given |
567 | /// filter array as a series of chars. |
568 | void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const; |
569 | |
570 | /// dumpStack - dumpStack traverses the filter chooser chain and calls |
571 | /// dumpFilterArray on each filter chooser up to the top level one. |
572 | void dumpStack(raw_ostream &OS, const char *prefix) const; |
573 | |
574 | Filter &bestFilter() { |
575 | assert(BestIndex != -1 && "BestIndex not set" ); |
576 | return Filters[BestIndex]; |
577 | } |
578 | |
579 | bool PositionFiltered(unsigned Idx) const { |
580 | return FilterBitValues[Idx].isSet(); |
581 | } |
582 | |
583 | // Calculates the island(s) needed to decode the instruction. |
584 | // This returns a list of undecoded bits of an instructions, for example, |
585 | // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be |
586 | // decoded bits in order to verify that the instruction matches the Opcode. |
587 | unsigned getIslands(std::vector<Island> &Islands, const insn_t &Insn) const; |
588 | |
589 | // Emits code to check the Predicates member of an instruction are true. |
590 | // Returns true if predicate matches were emitted, false otherwise. |
591 | bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const; |
592 | bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, |
593 | raw_ostream &OS) const; |
594 | |
595 | bool doesOpcodeNeedPredicate(unsigned Opc) const; |
596 | unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; |
597 | void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; |
598 | |
599 | void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; |
600 | |
601 | // Emits table entries to decode the singleton. |
602 | void emitSingletonTableEntry(DecoderTableInfo &TableInfo, |
603 | EncodingIDAndOpcode Opc) const; |
604 | |
605 | // Emits code to decode the singleton, and then to decode the rest. |
606 | void emitSingletonTableEntry(DecoderTableInfo &TableInfo, |
607 | const Filter &Best) const; |
608 | |
609 | bool emitBinaryParser(raw_ostream &OS, indent Indent, |
610 | const OperandInfo &OpInfo) const; |
611 | |
612 | bool emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc) const; |
613 | std::pair<unsigned, bool> getDecoderIndex(DecoderSet &Decoders, |
614 | unsigned Opc) const; |
615 | |
616 | // Assign a single filter and run with it. |
617 | void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); |
618 | |
619 | // reportRegion is a helper function for filterProcessor to mark a region as |
620 | // eligible for use as a filter region. |
621 | void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, |
622 | bool AllowMixed); |
623 | |
624 | // FilterProcessor scans the well-known encoding bits of the instructions and |
625 | // builds up a list of candidate filters. It chooses the best filter and |
626 | // recursively descends down the decoding tree. |
627 | bool filterProcessor(bool AllowMixed, bool Greedy = true); |
628 | |
629 | // Decides on the best configuration of filter(s) to use in order to decode |
630 | // the instructions. A conflict of instructions may occur, in which case we |
631 | // dump the conflict set to the standard error. |
632 | void doFilter(); |
633 | |
634 | public: |
635 | // emitTableEntries - Emit state machine entries to decode our share of |
636 | // instructions. |
637 | void emitTableEntries(DecoderTableInfo &TableInfo) const; |
638 | }; |
639 | |
640 | } // end anonymous namespace |
641 | |
642 | /////////////////////////// |
643 | // // |
644 | // Filter Implementation // |
645 | // // |
646 | /////////////////////////// |
647 | |
648 | Filter::Filter(Filter &&f) |
649 | : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), |
650 | FilteredInstructions(std::move(f.FilteredInstructions)), |
651 | VariableInstructions(std::move(f.VariableInstructions)), |
652 | FilterChooserMap(std::move(f.FilterChooserMap)), |
653 | NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} |
654 | |
655 | Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits, |
656 | bool mixed) |
657 | : Owner(owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { |
658 | assert(StartBit + NumBits - 1 < Owner.BitWidth); |
659 | |
660 | NumFiltered = 0; |
661 | LastOpcFiltered = {0, 0}; |
662 | |
663 | for (const auto &OpcPair : Owner.Opcodes) { |
664 | insn_t Insn; |
665 | |
666 | // Populates the insn given the uid. |
667 | Owner.insnWithID(Insn, Opcode: OpcPair.EncodingID); |
668 | |
669 | // Scans the segment for possibly well-specified encoding bits. |
670 | auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits); |
671 | |
672 | if (Ok) { |
673 | // The encoding bits are well-known. Lets add the uid of the |
674 | // instruction into the bucket keyed off the constant field value. |
675 | LastOpcFiltered = OpcPair; |
676 | FilteredInstructions[Field].push_back(x: LastOpcFiltered); |
677 | ++NumFiltered; |
678 | } else { |
679 | // Some of the encoding bit(s) are unspecified. This contributes to |
680 | // one additional member of "Variable" instructions. |
681 | VariableInstructions.push_back(x: OpcPair); |
682 | } |
683 | } |
684 | |
685 | assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && |
686 | "Filter returns no instruction categories" ); |
687 | } |
688 | |
689 | // Divides the decoding task into sub tasks and delegates them to the |
690 | // inferior FilterChooser's. |
691 | // |
692 | // A special case arises when there's only one entry in the filtered |
693 | // instructions. In order to unambiguously decode the singleton, we need to |
694 | // match the remaining undecoded encoding bits against the singleton. |
695 | void Filter::recurse() { |
696 | // Starts by inheriting our parent filter chooser's filter bit values. |
697 | std::vector<BitValue> BitValueArray(Owner.FilterBitValues); |
698 | |
699 | if (!VariableInstructions.empty()) { |
700 | // Conservatively marks each segment position as BIT_UNSET. |
701 | for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) |
702 | BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNSET; |
703 | |
704 | // Delegates to an inferior filter chooser for further processing on this |
705 | // group of instructions whose segment values are variable. |
706 | FilterChooserMap.try_emplace( |
707 | k: NO_FIXED_SEGMENTS_SENTINEL, |
708 | args: std::make_unique<FilterChooser>(args: Owner.AllInstructions, |
709 | args&: VariableInstructions, args: Owner.Operands, |
710 | args&: BitValueArray, args: Owner)); |
711 | } |
712 | |
713 | // No need to recurse for a singleton filtered instruction. |
714 | // See also Filter::emit*(). |
715 | if (getNumFiltered() == 1) { |
716 | assert(FilterChooserMap.size() == 1); |
717 | return; |
718 | } |
719 | |
720 | // Otherwise, create sub choosers. |
721 | for (const auto &Inst : FilteredInstructions) { |
722 | // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. |
723 | for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) |
724 | BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex) |
725 | ? BitValue::BIT_TRUE |
726 | : BitValue::BIT_FALSE; |
727 | |
728 | // Delegates to an inferior filter chooser for further processing on this |
729 | // category of instructions. |
730 | FilterChooserMap.try_emplace( |
731 | k: Inst.first, |
732 | args: std::make_unique<FilterChooser>(args: Owner.AllInstructions, args: Inst.second, |
733 | args: Owner.Operands, args&: BitValueArray, args: Owner)); |
734 | } |
735 | } |
736 | |
737 | static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, |
738 | uint32_t DestIdx) { |
739 | // Any NumToSkip fixups in the current scope can resolve to the |
740 | // current location. |
741 | for (uint32_t FixupIdx : Fixups) |
742 | Table.patchNumToSkip(FixupIdx, DestIdx); |
743 | } |
744 | |
745 | // Emit table entries to decode instructions given a segment or segments |
746 | // of bits. |
747 | void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { |
748 | assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!" ); |
749 | TableInfo.Table.push_back(Item: MCD::OPC_ExtractField); |
750 | |
751 | TableInfo.Table.insertULEB128(Value: StartBit); |
752 | TableInfo.Table.push_back(Item: NumBits); |
753 | |
754 | // If the NO_FIXED_SEGMENTS_SENTINEL is present, we need to add a new scope |
755 | // for this filter. Otherwise, we can skip adding a new scope and any |
756 | // patching added will automatically be added to the enclosing scope. |
757 | |
758 | // If NO_FIXED_SEGMENTS_SENTINEL is present, it will be last entry in |
759 | // FilterChooserMap. |
760 | |
761 | const uint64_t LastFilter = FilterChooserMap.rbegin()->first; |
762 | bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL; |
763 | if (HasFallthrough) |
764 | TableInfo.FixupStack.emplace_back(); |
765 | |
766 | DecoderTable &Table = TableInfo.Table; |
767 | |
768 | size_t PrevFilter = 0; |
769 | for (const auto &[FilterVal, Delegate] : FilterChooserMap) { |
770 | // Field value NO_FIXED_SEGMENTS_SENTINEL implies a non-empty set of |
771 | // variable instructions. See also recurse(). |
772 | if (FilterVal == NO_FIXED_SEGMENTS_SENTINEL) { |
773 | // Each scope should always have at least one filter value to check |
774 | // for. |
775 | assert(PrevFilter != 0 && "empty filter set!" ); |
776 | FixupList &CurScope = TableInfo.FixupStack.back(); |
777 | // Resolve any NumToSkip fixups in the current scope. |
778 | resolveTableFixups(Table, Fixups: CurScope, DestIdx: Table.size()); |
779 | |
780 | // Delete the scope we have added here. |
781 | TableInfo.FixupStack.pop_back(); |
782 | |
783 | PrevFilter = 0; // Don't re-process the filter's fallthrough. |
784 | } else { |
785 | // The last filtervalue emitted can be OPC_FilterValue if we are at |
786 | // outermost scope. |
787 | const uint8_t DecoderOp = |
788 | FilterVal == LastFilter && TableInfo.isOutermostScope() |
789 | ? MCD::OPC_FilterValueOrFail |
790 | : MCD::OPC_FilterValue; |
791 | Table.push_back(Item: DecoderOp); |
792 | Table.insertULEB128(Value: FilterVal); |
793 | if (DecoderOp == MCD::OPC_FilterValue) { |
794 | // Reserve space for the NumToSkip entry. We'll backpatch the value |
795 | // later. |
796 | PrevFilter = Table.insertNumToSkip(); |
797 | } else { |
798 | PrevFilter = 0; |
799 | } |
800 | } |
801 | |
802 | // We arrive at a category of instructions with the same segment value. |
803 | // Now delegate to the sub filter chooser for further decodings. |
804 | // The case may fallthrough, which happens if the remaining well-known |
805 | // encoding bits do not match exactly. |
806 | Delegate->emitTableEntries(TableInfo); |
807 | |
808 | // Now that we've emitted the body of the handler, update the NumToSkip |
809 | // of the filter itself to be able to skip forward when false. |
810 | if (PrevFilter) |
811 | Table.patchNumToSkip(FixupIdx: PrevFilter, DestIdx: Table.size()); |
812 | } |
813 | |
814 | // If there is no fallthrough and the final filter was not in the outermost |
815 | // scope, then it must be fixed up according to the enclosing scope rather |
816 | // than the current position. |
817 | if (PrevFilter) |
818 | TableInfo.FixupStack.back().push_back(x: PrevFilter); |
819 | } |
820 | |
821 | // Returns the number of fanout produced by the filter. More fanout implies |
822 | // the filter distinguishes more categories of instructions. |
823 | unsigned Filter::usefulness() const { |
824 | return FilteredInstructions.size() + VariableInstructions.empty(); |
825 | } |
826 | |
827 | ////////////////////////////////// |
828 | // // |
829 | // Filterchooser Implementation // |
830 | // // |
831 | ////////////////////////////////// |
832 | |
833 | // Emit the decoder state machine table. Returns a mask of MCD decoder ops |
834 | // that were emitted. |
835 | unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS, |
836 | DecoderTable &Table, indent Indent, |
837 | unsigned BitWidth, StringRef Namespace, |
838 | const EncodingIDsVec &EncodingIDs) const { |
839 | // We'll need to be able to map from a decoded opcode into the corresponding |
840 | // EncodingID for this specific combination of BitWidth and Namespace. This |
841 | // is used below to index into NumberedEncodings. |
842 | DenseMap<unsigned, unsigned> OpcodeToEncodingID; |
843 | OpcodeToEncodingID.reserve(NumEntries: EncodingIDs.size()); |
844 | for (const auto &EI : EncodingIDs) |
845 | OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; |
846 | |
847 | OS << Indent << "static const uint8_t DecoderTable" << Namespace << BitWidth |
848 | << "[] = {\n" ; |
849 | |
850 | Indent += 2; |
851 | |
852 | // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. |
853 | auto emitULEB128 = [](DecoderTable::const_iterator &I, |
854 | formatted_raw_ostream &OS) { |
855 | while (*I >= 128) |
856 | OS << (unsigned)*I++ << ", " ; |
857 | OS << (unsigned)*I++ << ", " ; |
858 | }; |
859 | |
860 | // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the |
861 | // NumToSkip value. |
862 | auto emitNumToSkip = [](DecoderTable::const_iterator &I, |
863 | formatted_raw_ostream &OS) { |
864 | uint8_t Byte = *I++; |
865 | uint32_t NumToSkip = Byte; |
866 | OS << (unsigned)Byte << ", " ; |
867 | Byte = *I++; |
868 | OS << (unsigned)Byte << ", " ; |
869 | NumToSkip |= Byte << 8; |
870 | if (getNumToSkipInBytes() == 3) { |
871 | Byte = *I++; |
872 | OS << (unsigned)(Byte) << ", " ; |
873 | NumToSkip |= Byte << 16; |
874 | } |
875 | return NumToSkip; |
876 | }; |
877 | |
878 | // FIXME: We may be able to use the NumToSkip values to recover |
879 | // appropriate indentation levels. |
880 | DecoderTable::const_iterator I = Table.begin(); |
881 | DecoderTable::const_iterator E = Table.end(); |
882 | const uint8_t *const EndPtr = Table.data() + Table.size(); |
883 | |
884 | auto = [&](uint32_t NumToSkip, bool = false) { |
885 | uint32_t Index = ((I - Table.begin()) + NumToSkip); |
886 | OS << (InComment ? ", " : "// " ); |
887 | OS << "Skip to: " << Index; |
888 | if (*(I + NumToSkip) == MCD::OPC_Fail) |
889 | OS << " (Fail)" ; |
890 | }; |
891 | |
892 | unsigned OpcodeMask = 0; |
893 | |
894 | while (I != E) { |
895 | assert(I < E && "incomplete decode table entry!" ); |
896 | |
897 | uint64_t Pos = I - Table.begin(); |
898 | OS << "/* " << Pos << " */" ; |
899 | OS.PadToColumn(NewCol: 12); |
900 | |
901 | const uint8_t DecoderOp = *I++; |
902 | OpcodeMask |= (1 << DecoderOp); |
903 | switch (DecoderOp) { |
904 | default: |
905 | PrintFatalError(Msg: "Invalid decode table opcode: " + Twine((int)DecoderOp) + |
906 | " at index " + Twine(Pos)); |
907 | case MCD::OPC_ExtractField: { |
908 | OS << Indent << "MCD::OPC_ExtractField, " ; |
909 | |
910 | // ULEB128 encoded start value. |
911 | const char *ErrMsg = nullptr; |
912 | unsigned Start = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg); |
913 | assert(ErrMsg == nullptr && "ULEB128 value too large!" ); |
914 | emitULEB128(I, OS); |
915 | |
916 | unsigned Len = *I++; |
917 | OS << Len << ", // Inst{" ; |
918 | if (Len > 1) |
919 | OS << (Start + Len - 1) << "-" ; |
920 | OS << Start << "} ...\n" ; |
921 | break; |
922 | } |
923 | case MCD::OPC_FilterValue: |
924 | case MCD::OPC_FilterValueOrFail: { |
925 | bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail; |
926 | OS << Indent << "MCD::OPC_FilterValue" << (IsFail ? "OrFail, " : ", " ); |
927 | // The filter value is ULEB128 encoded. |
928 | emitULEB128(I, OS); |
929 | |
930 | if (!IsFail) { |
931 | uint32_t NumToSkip = emitNumToSkip(I, OS); |
932 | emitNumToSkipComment(NumToSkip); |
933 | } |
934 | OS << '\n'; |
935 | break; |
936 | } |
937 | case MCD::OPC_CheckField: |
938 | case MCD::OPC_CheckFieldOrFail: { |
939 | bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail; |
940 | OS << Indent << "MCD::OPC_CheckField" << (IsFail ? "OrFail, " : ", " ); |
941 | // ULEB128 encoded start value. |
942 | emitULEB128(I, OS); |
943 | // 8-bit length. |
944 | unsigned Len = *I++; |
945 | OS << Len << ", " ; |
946 | // ULEB128 encoded field value. |
947 | emitULEB128(I, OS); |
948 | |
949 | if (!IsFail) { |
950 | uint32_t NumToSkip = emitNumToSkip(I, OS); |
951 | emitNumToSkipComment(NumToSkip); |
952 | } |
953 | OS << '\n'; |
954 | break; |
955 | } |
956 | case MCD::OPC_CheckPredicate: |
957 | case MCD::OPC_CheckPredicateOrFail: { |
958 | bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail; |
959 | |
960 | OS << Indent << "MCD::OPC_CheckPredicate" << (IsFail ? "OrFail, " : ", " ); |
961 | emitULEB128(I, OS); |
962 | |
963 | if (!IsFail) { |
964 | uint32_t NumToSkip = emitNumToSkip(I, OS); |
965 | emitNumToSkipComment(NumToSkip); |
966 | } |
967 | OS << '\n'; |
968 | break; |
969 | } |
970 | case MCD::OPC_Decode: |
971 | case MCD::OPC_TryDecode: |
972 | case MCD::OPC_TryDecodeOrFail: { |
973 | bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail; |
974 | bool IsTry = DecoderOp == MCD::OPC_TryDecode || IsFail; |
975 | // Decode the Opcode value. |
976 | const char *ErrMsg = nullptr; |
977 | unsigned Opc = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg); |
978 | assert(ErrMsg == nullptr && "ULEB128 value too large!" ); |
979 | |
980 | OS << Indent << "MCD::OPC_" << (IsTry ? "Try" : "" ) << "Decode" |
981 | << (IsFail ? "OrFail, " : ", " ); |
982 | emitULEB128(I, OS); |
983 | |
984 | // Decoder index. |
985 | unsigned DecodeIdx = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg); |
986 | assert(ErrMsg == nullptr && "ULEB128 value too large!" ); |
987 | emitULEB128(I, OS); |
988 | |
989 | auto EncI = OpcodeToEncodingID.find(Val: Opc); |
990 | assert(EncI != OpcodeToEncodingID.end() && "no encoding entry" ); |
991 | auto EncodingID = EncI->second; |
992 | |
993 | if (!IsTry) { |
994 | OS << "// Opcode: " << NumberedEncodings[EncodingID] |
995 | << ", DecodeIdx: " << DecodeIdx << '\n'; |
996 | break; |
997 | } |
998 | |
999 | // Fallthrough for OPC_TryDecode. |
1000 | if (!IsFail) { |
1001 | uint32_t NumToSkip = emitNumToSkip(I, OS); |
1002 | OS << "// Opcode: " << NumberedEncodings[EncodingID] |
1003 | << ", DecodeIdx: " << DecodeIdx; |
1004 | emitNumToSkipComment(NumToSkip, /*InComment=*/true); |
1005 | } |
1006 | OS << '\n'; |
1007 | break; |
1008 | } |
1009 | case MCD::OPC_SoftFail: { |
1010 | OS << Indent << "MCD::OPC_SoftFail, " ; |
1011 | // Decode the positive mask. |
1012 | const char *ErrMsg = nullptr; |
1013 | uint64_t PositiveMask = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg); |
1014 | assert(ErrMsg == nullptr && "ULEB128 value too large!" ); |
1015 | emitULEB128(I, OS); |
1016 | |
1017 | // Decode the negative mask. |
1018 | uint64_t NegativeMask = decodeULEB128(p: &*I, n: nullptr, end: EndPtr, error: &ErrMsg); |
1019 | assert(ErrMsg == nullptr && "ULEB128 value too large!" ); |
1020 | emitULEB128(I, OS); |
1021 | OS << "// +ve mask: 0x" ; |
1022 | OS.write_hex(N: PositiveMask); |
1023 | OS << ", -ve mask: 0x" ; |
1024 | OS.write_hex(N: NegativeMask); |
1025 | OS << '\n'; |
1026 | break; |
1027 | } |
1028 | case MCD::OPC_Fail: |
1029 | OS << Indent << "MCD::OPC_Fail,\n" ; |
1030 | break; |
1031 | } |
1032 | } |
1033 | OS << Indent << "0\n" ; |
1034 | |
1035 | Indent -= 2; |
1036 | |
1037 | OS << Indent << "};\n\n" ; |
1038 | |
1039 | return OpcodeMask; |
1040 | } |
1041 | |
1042 | void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, |
1043 | ArrayRef<unsigned> InstrLen) const { |
1044 | OS << "static const uint8_t InstrLenTable[] = {\n" ; |
1045 | for (unsigned Len : InstrLen) |
1046 | OS << Len << ",\n" ; |
1047 | OS << "};\n\n" ; |
1048 | } |
1049 | |
1050 | void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, |
1051 | PredicateSet &Predicates, |
1052 | indent Indent) const { |
1053 | // The predicate function is just a big switch statement based on the |
1054 | // input predicate index. |
1055 | OS << Indent << "static bool checkDecoderPredicate(unsigned Idx, " |
1056 | << "const FeatureBitset &Bits) {\n" ; |
1057 | Indent += 2; |
1058 | OS << Indent << "switch (Idx) {\n" ; |
1059 | OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n" ; |
1060 | for (const auto &[Index, Predicate] : enumerate(First&: Predicates)) { |
1061 | OS << Indent << "case " << Index << ":\n" ; |
1062 | OS << Indent + 2 << "return (" << Predicate << ");\n" ; |
1063 | } |
1064 | OS << Indent << "}\n" ; |
1065 | Indent -= 2; |
1066 | OS << Indent << "}\n\n" ; |
1067 | } |
1068 | |
1069 | void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, |
1070 | DecoderSet &Decoders, |
1071 | indent Indent) const { |
1072 | // The decoder function is just a big switch statement or a table of function |
1073 | // pointers based on the input decoder index. |
1074 | |
1075 | // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits |
1076 | // It would be better for emitBinaryParser to use a 64-bit tmp whenever |
1077 | // possible but fall back to an InsnType-sized tmp for truly large fields. |
1078 | StringRef TmpTypeDecl = |
1079 | "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, " |
1080 | "InsnType, uint64_t>;\n" ; |
1081 | StringRef DecodeParams = |
1082 | "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const " |
1083 | "MCDisassembler *Decoder, bool &DecodeComplete" ; |
1084 | |
1085 | if (UseFnTableInDecodeToMCInst) { |
1086 | // Emit a function for each case first. |
1087 | for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) { |
1088 | OS << Indent << "template <typename InsnType>\n" ; |
1089 | OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams |
1090 | << ") {\n" ; |
1091 | Indent += 2; |
1092 | OS << Indent << TmpTypeDecl; |
1093 | OS << Indent << "[[maybe_unused]] TmpType tmp;\n" ; |
1094 | OS << Decoder; |
1095 | OS << Indent << "return S;\n" ; |
1096 | Indent -= 2; |
1097 | OS << Indent << "}\n\n" ; |
1098 | } |
1099 | } |
1100 | |
1101 | OS << Indent << "// Handling " << Decoders.size() << " cases.\n" ; |
1102 | OS << Indent << "template <typename InsnType>\n" ; |
1103 | OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, " |
1104 | << DecodeParams << ") {\n" ; |
1105 | Indent += 2; |
1106 | OS << Indent << "DecodeComplete = true;\n" ; |
1107 | |
1108 | if (UseFnTableInDecodeToMCInst) { |
1109 | // Build a table of function pointers. |
1110 | OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams |
1111 | << ");\n" ; |
1112 | OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n" ; |
1113 | for (size_t Index : llvm::seq(Size: Decoders.size())) |
1114 | OS << Indent + 2 << "decodeFn" << Index << ",\n" ; |
1115 | OS << Indent << "};\n" ; |
1116 | OS << Indent << "if (Idx >= " << Decoders.size() << ")\n" ; |
1117 | OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n" ; |
1118 | OS << Indent |
1119 | << "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, " |
1120 | "DecodeComplete);\n" ; |
1121 | } else { |
1122 | OS << Indent << TmpTypeDecl; |
1123 | OS << Indent << "TmpType tmp;\n" ; |
1124 | OS << Indent << "switch (Idx) {\n" ; |
1125 | OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n" ; |
1126 | for (const auto &[Index, Decoder] : enumerate(First&: Decoders)) { |
1127 | OS << Indent << "case " << Index << ":\n" ; |
1128 | OS << Decoder; |
1129 | OS << Indent + 2 << "return S;\n" ; |
1130 | } |
1131 | OS << Indent << "}\n" ; |
1132 | } |
1133 | Indent -= 2; |
1134 | OS << Indent << "}\n" ; |
1135 | } |
1136 | |
1137 | // Populates the field of the insn given the start position and the number of |
1138 | // consecutive bits to scan for. |
1139 | // |
1140 | // Returns a pair of values (indicator, field), where the indicator is false |
1141 | // if there exists any uninitialized bit value in the range and true if all |
1142 | // bits are well-known. The second value is the potentially populated field. |
1143 | std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn, |
1144 | unsigned StartBit, |
1145 | unsigned NumBits) const { |
1146 | uint64_t Field = 0; |
1147 | |
1148 | for (unsigned i = 0; i < NumBits; ++i) { |
1149 | if (Insn[StartBit + i] == BitValue::BIT_UNSET) |
1150 | return {false, Field}; |
1151 | |
1152 | if (Insn[StartBit + i] == BitValue::BIT_TRUE) |
1153 | Field = Field | (1ULL << i); |
1154 | } |
1155 | |
1156 | return {true, Field}; |
1157 | } |
1158 | |
1159 | /// dumpFilterArray - dumpFilterArray prints out debugging info for the given |
1160 | /// filter array as a series of chars. |
1161 | void FilterChooser::dumpFilterArray(raw_ostream &OS, |
1162 | ArrayRef<BitValue> Filter) const { |
1163 | for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) |
1164 | OS << Filter[bitIndex - 1]; |
1165 | } |
1166 | |
1167 | /// dumpStack - dumpStack traverses the filter chooser chain and calls |
1168 | /// dumpFilterArray on each filter chooser up to the top level one. |
1169 | void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const { |
1170 | const FilterChooser *current = this; |
1171 | |
1172 | while (current) { |
1173 | OS << prefix; |
1174 | dumpFilterArray(OS, Filter: current->FilterBitValues); |
1175 | OS << '\n'; |
1176 | current = current->Parent; |
1177 | } |
1178 | } |
1179 | |
1180 | // Calculates the island(s) needed to decode the instruction. |
1181 | // This returns a list of undecoded bits of an instructions, for example, |
1182 | // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be |
1183 | // decoded bits in order to verify that the instruction matches the Opcode. |
1184 | unsigned FilterChooser::getIslands(std::vector<Island> &Islands, |
1185 | const insn_t &Insn) const { |
1186 | uint64_t FieldVal; |
1187 | unsigned StartBit; |
1188 | |
1189 | // 0: Init |
1190 | // 1: Water (the bit value does not affect decoding) |
1191 | // 2: Island (well-known bit value needed for decoding) |
1192 | unsigned State = 0; |
1193 | |
1194 | for (unsigned i = 0; i < BitWidth; ++i) { |
1195 | std::optional<uint64_t> Val = Insn[i].getValue(); |
1196 | bool Filtered = PositionFiltered(Idx: i); |
1197 | switch (State) { |
1198 | default: |
1199 | llvm_unreachable("Unreachable code!" ); |
1200 | case 0: |
1201 | case 1: |
1202 | if (Filtered || !Val) { |
1203 | State = 1; // Still in Water |
1204 | } else { |
1205 | State = 2; // Into the Island |
1206 | StartBit = i; |
1207 | FieldVal = *Val; |
1208 | } |
1209 | break; |
1210 | case 2: |
1211 | if (Filtered || !Val) { |
1212 | State = 1; // Into the Water |
1213 | Islands.push_back(x: {.StartBit: StartBit, .NumBits: i - StartBit, .FieldVal: FieldVal}); |
1214 | } else { |
1215 | State = 2; // Still in Island |
1216 | FieldVal |= *Val << (i - StartBit); |
1217 | } |
1218 | break; |
1219 | } |
1220 | } |
1221 | // If we are still in Island after the loop, do some housekeeping. |
1222 | if (State == 2) |
1223 | Islands.push_back(x: {.StartBit: StartBit, .NumBits: BitWidth - StartBit, .FieldVal: FieldVal}); |
1224 | |
1225 | return Islands.size(); |
1226 | } |
1227 | |
1228 | bool FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent, |
1229 | const OperandInfo &OpInfo) const { |
1230 | const std::string &Decoder = OpInfo.Decoder; |
1231 | |
1232 | bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; |
1233 | |
1234 | if (UseInsertBits) { |
1235 | OS << Indent << "tmp = 0x" ; |
1236 | OS.write_hex(N: OpInfo.InitValue); |
1237 | OS << ";\n" ; |
1238 | } |
1239 | |
1240 | for (const EncodingField &EF : OpInfo) { |
1241 | OS << Indent; |
1242 | if (UseInsertBits) |
1243 | OS << "insertBits(tmp, " ; |
1244 | else |
1245 | OS << "tmp = " ; |
1246 | OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; |
1247 | if (UseInsertBits) |
1248 | OS << ", " << EF.Offset << ", " << EF.Width << ')'; |
1249 | else if (EF.Offset != 0) |
1250 | OS << " << " << EF.Offset; |
1251 | OS << ";\n" ; |
1252 | } |
1253 | |
1254 | bool OpHasCompleteDecoder; |
1255 | if (!Decoder.empty()) { |
1256 | OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; |
1257 | OS << Indent << "if (!Check(S, " << Decoder |
1258 | << "(MI, tmp, Address, Decoder))) { " |
1259 | << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; " ) |
1260 | << "return MCDisassembler::Fail; }\n" ; |
1261 | } else { |
1262 | OpHasCompleteDecoder = true; |
1263 | OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n" ; |
1264 | } |
1265 | return OpHasCompleteDecoder; |
1266 | } |
1267 | |
1268 | bool FilterChooser::emitDecoder(raw_ostream &OS, indent Indent, |
1269 | unsigned Opc) const { |
1270 | bool HasCompleteDecoder = true; |
1271 | |
1272 | for (const auto &Op : Operands.find(x: Opc)->second) { |
1273 | // If a custom instruction decoder was specified, use that. |
1274 | if (Op.numFields() == 0 && !Op.Decoder.empty()) { |
1275 | HasCompleteDecoder = Op.HasCompleteDecoder; |
1276 | OS << Indent << "if (!Check(S, " << Op.Decoder |
1277 | << "(MI, insn, Address, Decoder))) { " |
1278 | << (HasCompleteDecoder ? "" : "DecodeComplete = false; " ) |
1279 | << "return MCDisassembler::Fail; }\n" ; |
1280 | break; |
1281 | } |
1282 | |
1283 | HasCompleteDecoder &= emitBinaryParser(OS, Indent, OpInfo: Op); |
1284 | } |
1285 | return HasCompleteDecoder; |
1286 | } |
1287 | |
1288 | std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders, |
1289 | unsigned Opc) const { |
1290 | // Build up the predicate string. |
1291 | SmallString<256> Decoder; |
1292 | // FIXME: emitDecoder() function can take a buffer directly rather than |
1293 | // a stream. |
1294 | raw_svector_ostream S(Decoder); |
1295 | indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4); |
1296 | bool HasCompleteDecoder = emitDecoder(OS&: S, Indent, Opc); |
1297 | |
1298 | // Using the full decoder string as the key value here is a bit |
1299 | // heavyweight, but is effective. If the string comparisons become a |
1300 | // performance concern, we can implement a mangling of the predicate |
1301 | // data easily enough with a map back to the actual string. That's |
1302 | // overkill for now, though. |
1303 | |
1304 | // Make sure the predicate is in the table. |
1305 | Decoders.insert(X: CachedHashString(Decoder)); |
1306 | // Now figure out the index for when we write out the table. |
1307 | DecoderSet::const_iterator P = find(Range&: Decoders, Val: Decoder.str()); |
1308 | return {(unsigned)(P - Decoders.begin()), HasCompleteDecoder}; |
1309 | } |
1310 | |
1311 | // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. |
1312 | bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, |
1313 | raw_ostream &OS) const { |
1314 | if (const auto *D = dyn_cast<DefInit>(Val: &Val)) { |
1315 | if (!D->getDef()->isSubClassOf(Name: "SubtargetFeature" )) |
1316 | return true; |
1317 | OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() |
1318 | << "]" ; |
1319 | return false; |
1320 | } |
1321 | if (const auto *D = dyn_cast<DagInit>(Val: &Val)) { |
1322 | std::string Op = D->getOperator()->getAsString(); |
1323 | if (Op == "not" && D->getNumArgs() == 1) { |
1324 | OS << '!'; |
1325 | return emitPredicateMatchAux(Val: *D->getArg(Num: 0), ParenIfBinOp: true, OS); |
1326 | } |
1327 | if ((Op == "any_of" || Op == "all_of" ) && D->getNumArgs() > 0) { |
1328 | bool Paren = D->getNumArgs() > 1 && std::exchange(obj&: ParenIfBinOp, new_val: true); |
1329 | if (Paren) |
1330 | OS << '('; |
1331 | ListSeparator LS(Op == "any_of" ? " || " : " && " ); |
1332 | for (auto *Arg : D->getArgs()) { |
1333 | OS << LS; |
1334 | if (emitPredicateMatchAux(Val: *Arg, ParenIfBinOp, OS)) |
1335 | return true; |
1336 | } |
1337 | if (Paren) |
1338 | OS << ')'; |
1339 | return false; |
1340 | } |
1341 | } |
1342 | return true; |
1343 | } |
1344 | |
1345 | bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const { |
1346 | const ListInit *Predicates = |
1347 | AllInstructions[Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates" ); |
1348 | bool IsFirstEmission = true; |
1349 | for (unsigned i = 0; i < Predicates->size(); ++i) { |
1350 | const Record *Pred = Predicates->getElementAsRecord(Idx: i); |
1351 | if (!Pred->getValue(Name: "AssemblerMatcherPredicate" )) |
1352 | continue; |
1353 | |
1354 | if (!isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag" )->getValue())) |
1355 | continue; |
1356 | |
1357 | if (!IsFirstEmission) |
1358 | OS << " && " ; |
1359 | if (emitPredicateMatchAux(Val: *Pred->getValueAsDag(FieldName: "AssemblerCondDag" ), |
1360 | ParenIfBinOp: Predicates->size() > 1, OS)) |
1361 | PrintFatalError(ErrorLoc: Pred->getLoc(), Msg: "Invalid AssemblerCondDag!" ); |
1362 | IsFirstEmission = false; |
1363 | } |
1364 | return !Predicates->empty(); |
1365 | } |
1366 | |
1367 | bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { |
1368 | const ListInit *Predicates = |
1369 | AllInstructions[Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates" ); |
1370 | for (unsigned i = 0; i < Predicates->size(); ++i) { |
1371 | const Record *Pred = Predicates->getElementAsRecord(Idx: i); |
1372 | if (!Pred->getValue(Name: "AssemblerMatcherPredicate" )) |
1373 | continue; |
1374 | |
1375 | if (isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag" )->getValue())) |
1376 | return true; |
1377 | } |
1378 | return false; |
1379 | } |
1380 | |
1381 | unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, |
1382 | StringRef Predicate) const { |
1383 | // Using the full predicate string as the key value here is a bit |
1384 | // heavyweight, but is effective. If the string comparisons become a |
1385 | // performance concern, we can implement a mangling of the predicate |
1386 | // data easily enough with a map back to the actual string. That's |
1387 | // overkill for now, though. |
1388 | |
1389 | // Make sure the predicate is in the table. |
1390 | TableInfo.Predicates.insert(X: CachedHashString(Predicate)); |
1391 | // Now figure out the index for when we write out the table. |
1392 | PredicateSet::const_iterator P = find(Range&: TableInfo.Predicates, Val: Predicate); |
1393 | return (unsigned)(P - TableInfo.Predicates.begin()); |
1394 | } |
1395 | |
1396 | void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, |
1397 | unsigned Opc) const { |
1398 | if (!doesOpcodeNeedPredicate(Opc)) |
1399 | return; |
1400 | |
1401 | // Build up the predicate string. |
1402 | SmallString<256> Predicate; |
1403 | // FIXME: emitPredicateMatch() functions can take a buffer directly rather |
1404 | // than a stream. |
1405 | raw_svector_ostream PS(Predicate); |
1406 | emitPredicateMatch(OS&: PS, Opc); |
1407 | |
1408 | // Figure out the index into the predicate table for the predicate just |
1409 | // computed. |
1410 | unsigned PIdx = getPredicateIndex(TableInfo, Predicate: PS.str()); |
1411 | |
1412 | const uint8_t DecoderOp = TableInfo.isOutermostScope() |
1413 | ? MCD::OPC_CheckPredicateOrFail |
1414 | : MCD::OPC_CheckPredicate; |
1415 | TableInfo.Table.push_back(Item: DecoderOp); |
1416 | TableInfo.Table.insertULEB128(Value: PIdx); |
1417 | |
1418 | if (DecoderOp == MCD::OPC_CheckPredicate) { |
1419 | // Push location for NumToSkip backpatching. |
1420 | TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip()); |
1421 | } |
1422 | } |
1423 | |
1424 | void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, |
1425 | unsigned Opc) const { |
1426 | const Record *EncodingDef = AllInstructions[Opc].EncodingDef; |
1427 | const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail" ); |
1428 | const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr; |
1429 | |
1430 | if (!SFBits) |
1431 | return; |
1432 | const BitsInit *InstBits = EncodingDef->getValueAsBitsInit(FieldName: "Inst" ); |
1433 | |
1434 | APInt PositiveMask(BitWidth, 0ULL); |
1435 | APInt NegativeMask(BitWidth, 0ULL); |
1436 | for (unsigned i = 0; i < BitWidth; ++i) { |
1437 | BitValue B(*SFBits, i); |
1438 | BitValue IB(*InstBits, i); |
1439 | |
1440 | if (B != BitValue::BIT_TRUE) |
1441 | continue; |
1442 | |
1443 | if (IB == BitValue::BIT_FALSE) { |
1444 | // The bit is meant to be false, so emit a check to see if it is true. |
1445 | PositiveMask.setBit(i); |
1446 | } else if (IB == BitValue::BIT_TRUE) { |
1447 | // The bit is meant to be true, so emit a check to see if it is false. |
1448 | NegativeMask.setBit(i); |
1449 | } else { |
1450 | // The bit is not set; this must be an error! |
1451 | errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " |
1452 | << AllInstructions[Opc] << " is set but Inst{" << i |
1453 | << "} is unset!\n" |
1454 | << " - You can only mark a bit as SoftFail if it is fully defined" |
1455 | << " (1/0 - not '?') in Inst\n" ; |
1456 | return; |
1457 | } |
1458 | } |
1459 | |
1460 | bool NeedPositiveMask = PositiveMask.getBoolValue(); |
1461 | bool NeedNegativeMask = NegativeMask.getBoolValue(); |
1462 | |
1463 | if (!NeedPositiveMask && !NeedNegativeMask) |
1464 | return; |
1465 | |
1466 | TableInfo.Table.push_back(Item: MCD::OPC_SoftFail); |
1467 | TableInfo.Table.insertULEB128(Value: PositiveMask.getZExtValue()); |
1468 | TableInfo.Table.insertULEB128(Value: NegativeMask.getZExtValue()); |
1469 | } |
1470 | |
1471 | // Emits table entries to decode the singleton. |
1472 | void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, |
1473 | EncodingIDAndOpcode Opc) const { |
1474 | std::vector<Island> Islands; |
1475 | insn_t Insn; |
1476 | insnWithID(Insn, Opcode: Opc.EncodingID); |
1477 | |
1478 | // Look for islands of undecoded bits of the singleton. |
1479 | getIslands(Islands, Insn); |
1480 | |
1481 | // Emit the predicate table entry if one is needed. |
1482 | emitPredicateTableEntry(TableInfo, Opc: Opc.EncodingID); |
1483 | |
1484 | // Check any additional encoding fields needed. |
1485 | for (const Island &Ilnd : reverse(C&: Islands)) { |
1486 | unsigned NumBits = Ilnd.NumBits; |
1487 | assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!" ); |
1488 | const uint8_t DecoderOp = TableInfo.isOutermostScope() |
1489 | ? MCD::OPC_CheckFieldOrFail |
1490 | : MCD::OPC_CheckField; |
1491 | TableInfo.Table.push_back(Item: DecoderOp); |
1492 | |
1493 | TableInfo.Table.insertULEB128(Value: Ilnd.StartBit); |
1494 | TableInfo.Table.push_back(Item: NumBits); |
1495 | TableInfo.Table.insertULEB128(Value: Ilnd.FieldVal); |
1496 | |
1497 | if (DecoderOp == MCD::OPC_CheckField) { |
1498 | // Allocate space in the table for fixup so all our relative position |
1499 | // calculations work OK even before we fully resolve the real value here. |
1500 | |
1501 | // Push location for NumToSkip backpatching. |
1502 | TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip()); |
1503 | } |
1504 | } |
1505 | |
1506 | // Check for soft failure of the match. |
1507 | emitSoftFailTableEntry(TableInfo, Opc: Opc.EncodingID); |
1508 | |
1509 | auto [DIdx, HasCompleteDecoder] = |
1510 | getDecoderIndex(Decoders&: TableInfo.Decoders, Opc: Opc.EncodingID); |
1511 | |
1512 | // Produce OPC_Decode or OPC_TryDecode opcode based on the information |
1513 | // whether the instruction decoder is complete or not. If it is complete |
1514 | // then it handles all possible values of remaining variable/unfiltered bits |
1515 | // and for any value can determine if the bitpattern is a valid instruction |
1516 | // or not. This means OPC_Decode will be the final step in the decoding |
1517 | // process. If it is not complete, then the Fail return code from the |
1518 | // decoder method indicates that additional processing should be done to see |
1519 | // if there is any other instruction that also matches the bitpattern and |
1520 | // can decode it. |
1521 | const uint8_t DecoderOp = HasCompleteDecoder ? MCD::OPC_Decode |
1522 | : (TableInfo.isOutermostScope() |
1523 | ? MCD::OPC_TryDecodeOrFail |
1524 | : MCD::OPC_TryDecode); |
1525 | TableInfo.Table.push_back(Item: DecoderOp); |
1526 | NumEncodingsSupported++; |
1527 | TableInfo.Table.insertULEB128(Value: Opc.Opcode); |
1528 | TableInfo.Table.insertULEB128(Value: DIdx); |
1529 | |
1530 | if (DecoderOp == MCD::OPC_TryDecode) { |
1531 | // Push location for NumToSkip backpatching. |
1532 | TableInfo.FixupStack.back().push_back(x: TableInfo.Table.insertNumToSkip()); |
1533 | } |
1534 | } |
1535 | |
1536 | // Emits table entries to decode the singleton, and then to decode the rest. |
1537 | void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, |
1538 | const Filter &Best) const { |
1539 | EncodingIDAndOpcode Opc = Best.getSingletonOpc(); |
1540 | |
1541 | // complex singletons need predicate checks from the first singleton |
1542 | // to refer forward to the variable filterchooser that follows. |
1543 | TableInfo.FixupStack.emplace_back(); |
1544 | |
1545 | emitSingletonTableEntry(TableInfo, Opc); |
1546 | |
1547 | resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(), |
1548 | DestIdx: TableInfo.Table.size()); |
1549 | TableInfo.FixupStack.pop_back(); |
1550 | |
1551 | Best.getVariableFC().emitTableEntries(TableInfo); |
1552 | } |
1553 | |
1554 | // Assign a single filter and run with it. Top level API client can initialize |
1555 | // with a single filter to start the filtering process. |
1556 | void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, |
1557 | bool mixed) { |
1558 | Filters.clear(); |
1559 | Filters.emplace_back(args&: *this, args&: startBit, args&: numBit, args: true); |
1560 | BestIndex = 0; // Sole Filter instance to choose from. |
1561 | bestFilter().recurse(); |
1562 | } |
1563 | |
1564 | // reportRegion is a helper function for filterProcessor to mark a region as |
1565 | // eligible for use as a filter region. |
1566 | void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, |
1567 | unsigned BitIndex, bool AllowMixed) { |
1568 | if (RA == ATTR_MIXED && AllowMixed) |
1569 | Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: true); |
1570 | else if (RA == ATTR_ALL_SET && !AllowMixed) |
1571 | Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: false); |
1572 | } |
1573 | |
1574 | // FilterProcessor scans the well-known encoding bits of the instructions and |
1575 | // builds up a list of candidate filters. It chooses the best filter and |
1576 | // recursively descends down the decoding tree. |
1577 | bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { |
1578 | Filters.clear(); |
1579 | BestIndex = -1; |
1580 | unsigned numInstructions = Opcodes.size(); |
1581 | |
1582 | assert(numInstructions && "Filter created with no instructions" ); |
1583 | |
1584 | // No further filtering is necessary. |
1585 | if (numInstructions == 1) |
1586 | return true; |
1587 | |
1588 | // Heuristics. See also doFilter()'s "Heuristics" comment when num of |
1589 | // instructions is 3. |
1590 | if (AllowMixed && !Greedy) { |
1591 | assert(numInstructions == 3); |
1592 | |
1593 | for (const auto &Opcode : Opcodes) { |
1594 | std::vector<Island> Islands; |
1595 | insn_t Insn; |
1596 | |
1597 | insnWithID(Insn, Opcode: Opcode.EncodingID); |
1598 | |
1599 | // Look for islands of undecoded bits of any instruction. |
1600 | if (getIslands(Islands, Insn) > 0) { |
1601 | // Found an instruction with island(s). Now just assign a filter. |
1602 | runSingleFilter(startBit: Islands[0].StartBit, numBit: Islands[0].NumBits, mixed: true); |
1603 | return true; |
1604 | } |
1605 | } |
1606 | } |
1607 | |
1608 | unsigned BitIndex; |
1609 | |
1610 | // We maintain BIT_WIDTH copies of the bitAttrs automaton. |
1611 | // The automaton consumes the corresponding bit from each |
1612 | // instruction. |
1613 | // |
1614 | // Input symbols: 0, 1, and _ (unset). |
1615 | // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. |
1616 | // Initial state: NONE. |
1617 | // |
1618 | // (NONE) ------- [01] -> (ALL_SET) |
1619 | // (NONE) ------- _ ----> (ALL_UNSET) |
1620 | // (ALL_SET) ---- [01] -> (ALL_SET) |
1621 | // (ALL_SET) ---- _ ----> (MIXED) |
1622 | // (ALL_UNSET) -- [01] -> (MIXED) |
1623 | // (ALL_UNSET) -- _ ----> (ALL_UNSET) |
1624 | // (MIXED) ------ . ----> (MIXED) |
1625 | // (FILTERED)---- . ----> (FILTERED) |
1626 | |
1627 | std::vector<bitAttr_t> bitAttrs(BitWidth, ATTR_NONE); |
1628 | |
1629 | // FILTERED bit positions provide no entropy and are not worthy of pursuing. |
1630 | // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. |
1631 | for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) |
1632 | if (FilterBitValues[BitIndex].isSet()) |
1633 | bitAttrs[BitIndex] = ATTR_FILTERED; |
1634 | |
1635 | for (const auto &OpcPair : Opcodes) { |
1636 | insn_t insn; |
1637 | |
1638 | insnWithID(Insn&: insn, Opcode: OpcPair.EncodingID); |
1639 | |
1640 | for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { |
1641 | switch (bitAttrs[BitIndex]) { |
1642 | case ATTR_NONE: |
1643 | if (insn[BitIndex] == BitValue::BIT_UNSET) |
1644 | bitAttrs[BitIndex] = ATTR_ALL_UNSET; |
1645 | else |
1646 | bitAttrs[BitIndex] = ATTR_ALL_SET; |
1647 | break; |
1648 | case ATTR_ALL_SET: |
1649 | if (insn[BitIndex] == BitValue::BIT_UNSET) |
1650 | bitAttrs[BitIndex] = ATTR_MIXED; |
1651 | break; |
1652 | case ATTR_ALL_UNSET: |
1653 | if (insn[BitIndex] != BitValue::BIT_UNSET) |
1654 | bitAttrs[BitIndex] = ATTR_MIXED; |
1655 | break; |
1656 | case ATTR_MIXED: |
1657 | case ATTR_FILTERED: |
1658 | break; |
1659 | } |
1660 | } |
1661 | } |
1662 | |
1663 | // The regionAttr automaton consumes the bitAttrs automatons' state, |
1664 | // lowest-to-highest. |
1665 | // |
1666 | // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) |
1667 | // States: NONE, ALL_SET, MIXED |
1668 | // Initial state: NONE |
1669 | // |
1670 | // (NONE) ----- F --> (NONE) |
1671 | // (NONE) ----- S --> (ALL_SET) ; and set region start |
1672 | // (NONE) ----- U --> (NONE) |
1673 | // (NONE) ----- M --> (MIXED) ; and set region start |
1674 | // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region |
1675 | // (ALL_SET) -- S --> (ALL_SET) |
1676 | // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region |
1677 | // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region |
1678 | // (MIXED) ---- F --> (NONE) ; and report a MIXED region |
1679 | // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region |
1680 | // (MIXED) ---- U --> (NONE) ; and report a MIXED region |
1681 | // (MIXED) ---- M --> (MIXED) |
1682 | |
1683 | bitAttr_t RA = ATTR_NONE; |
1684 | unsigned StartBit = 0; |
1685 | |
1686 | for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { |
1687 | bitAttr_t bitAttr = bitAttrs[BitIndex]; |
1688 | |
1689 | assert(bitAttr != ATTR_NONE && "Bit without attributes" ); |
1690 | |
1691 | switch (RA) { |
1692 | case ATTR_NONE: |
1693 | switch (bitAttr) { |
1694 | case ATTR_FILTERED: |
1695 | break; |
1696 | case ATTR_ALL_SET: |
1697 | StartBit = BitIndex; |
1698 | RA = ATTR_ALL_SET; |
1699 | break; |
1700 | case ATTR_ALL_UNSET: |
1701 | break; |
1702 | case ATTR_MIXED: |
1703 | StartBit = BitIndex; |
1704 | RA = ATTR_MIXED; |
1705 | break; |
1706 | default: |
1707 | llvm_unreachable("Unexpected bitAttr!" ); |
1708 | } |
1709 | break; |
1710 | case ATTR_ALL_SET: |
1711 | switch (bitAttr) { |
1712 | case ATTR_FILTERED: |
1713 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1714 | RA = ATTR_NONE; |
1715 | break; |
1716 | case ATTR_ALL_SET: |
1717 | break; |
1718 | case ATTR_ALL_UNSET: |
1719 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1720 | RA = ATTR_NONE; |
1721 | break; |
1722 | case ATTR_MIXED: |
1723 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1724 | StartBit = BitIndex; |
1725 | RA = ATTR_MIXED; |
1726 | break; |
1727 | default: |
1728 | llvm_unreachable("Unexpected bitAttr!" ); |
1729 | } |
1730 | break; |
1731 | case ATTR_MIXED: |
1732 | switch (bitAttr) { |
1733 | case ATTR_FILTERED: |
1734 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1735 | StartBit = BitIndex; |
1736 | RA = ATTR_NONE; |
1737 | break; |
1738 | case ATTR_ALL_SET: |
1739 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1740 | StartBit = BitIndex; |
1741 | RA = ATTR_ALL_SET; |
1742 | break; |
1743 | case ATTR_ALL_UNSET: |
1744 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1745 | RA = ATTR_NONE; |
1746 | break; |
1747 | case ATTR_MIXED: |
1748 | break; |
1749 | default: |
1750 | llvm_unreachable("Unexpected bitAttr!" ); |
1751 | } |
1752 | break; |
1753 | case ATTR_ALL_UNSET: |
1754 | llvm_unreachable("regionAttr state machine has no ATTR_UNSET state" ); |
1755 | case ATTR_FILTERED: |
1756 | llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state" ); |
1757 | } |
1758 | } |
1759 | |
1760 | // At the end, if we're still in ALL_SET or MIXED states, report a region |
1761 | switch (RA) { |
1762 | case ATTR_NONE: |
1763 | break; |
1764 | case ATTR_FILTERED: |
1765 | break; |
1766 | case ATTR_ALL_SET: |
1767 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1768 | break; |
1769 | case ATTR_ALL_UNSET: |
1770 | break; |
1771 | case ATTR_MIXED: |
1772 | reportRegion(RA, StartBit, BitIndex, AllowMixed); |
1773 | break; |
1774 | } |
1775 | |
1776 | // We have finished with the filter processings. Now it's time to choose |
1777 | // the best performing filter. |
1778 | BestIndex = 0; |
1779 | bool AllUseless = true; |
1780 | unsigned BestScore = 0; |
1781 | |
1782 | for (const auto &[Idx, Filter] : enumerate(First&: Filters)) { |
1783 | unsigned Usefulness = Filter.usefulness(); |
1784 | |
1785 | if (Usefulness) |
1786 | AllUseless = false; |
1787 | |
1788 | if (Usefulness > BestScore) { |
1789 | BestIndex = Idx; |
1790 | BestScore = Usefulness; |
1791 | } |
1792 | } |
1793 | |
1794 | if (!AllUseless) |
1795 | bestFilter().recurse(); |
1796 | |
1797 | return !AllUseless; |
1798 | } // end of FilterChooser::filterProcessor(bool) |
1799 | |
1800 | // Decides on the best configuration of filter(s) to use in order to decode |
1801 | // the instructions. A conflict of instructions may occur, in which case we |
1802 | // dump the conflict set to the standard error. |
1803 | void FilterChooser::doFilter() { |
1804 | unsigned Num = Opcodes.size(); |
1805 | assert(Num && "FilterChooser created with no instructions" ); |
1806 | |
1807 | // Try regions of consecutive known bit values first. |
1808 | if (filterProcessor(AllowMixed: false)) |
1809 | return; |
1810 | |
1811 | // Then regions of mixed bits (both known and unitialized bit values allowed). |
1812 | if (filterProcessor(AllowMixed: true)) |
1813 | return; |
1814 | |
1815 | // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where |
1816 | // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a |
1817 | // well-known encoding pattern. In such case, we backtrack and scan for the |
1818 | // the very first consecutive ATTR_ALL_SET region and assign a filter to it. |
1819 | if (Num == 3 && filterProcessor(AllowMixed: true, Greedy: false)) |
1820 | return; |
1821 | |
1822 | // If we come to here, the instruction decoding has failed. |
1823 | // Set the BestIndex to -1 to indicate so. |
1824 | BestIndex = -1; |
1825 | } |
1826 | |
1827 | // emitTableEntries - Emit state machine entries to decode our share of |
1828 | // instructions. |
1829 | void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { |
1830 | if (Opcodes.size() == 1) { |
1831 | // There is only one instruction in the set, which is great! |
1832 | // Call emitSingletonDecoder() to see whether there are any remaining |
1833 | // encodings bits. |
1834 | emitSingletonTableEntry(TableInfo, Opc: Opcodes[0]); |
1835 | return; |
1836 | } |
1837 | |
1838 | // Choose the best filter to do the decodings! |
1839 | if (BestIndex != -1) { |
1840 | const Filter &Best = Filters[BestIndex]; |
1841 | if (Best.getNumFiltered() == 1) |
1842 | emitSingletonTableEntry(TableInfo, Best); |
1843 | else |
1844 | Best.emitTableEntry(TableInfo); |
1845 | return; |
1846 | } |
1847 | |
1848 | // We don't know how to decode these instructions! Dump the |
1849 | // conflict set and bail. |
1850 | |
1851 | // Print out useful conflict information for postmortem analysis. |
1852 | errs() << "Decoding Conflict:\n" ; |
1853 | |
1854 | dumpStack(OS&: errs(), prefix: "\t\t" ); |
1855 | |
1856 | for (auto Opcode : Opcodes) { |
1857 | const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID]; |
1858 | errs() << '\t' << Enc << ' '; |
1859 | dumpBits(OS&: errs(), Bits: getBitsField(Def: *Enc.EncodingDef, FieldName: "Inst" )); |
1860 | errs() << '\n'; |
1861 | } |
1862 | PrintFatalError(Msg: "Decoding conflict encountered" ); |
1863 | } |
1864 | |
1865 | static std::string findOperandDecoderMethod(const Record *Record) { |
1866 | std::string Decoder; |
1867 | |
1868 | const RecordVal *DecoderString = Record->getValue(Name: "DecoderMethod" ); |
1869 | const StringInit *String = |
1870 | DecoderString ? dyn_cast<StringInit>(Val: DecoderString->getValue()) : nullptr; |
1871 | if (String) { |
1872 | Decoder = String->getValue().str(); |
1873 | if (!Decoder.empty()) |
1874 | return Decoder; |
1875 | } |
1876 | |
1877 | if (Record->isSubClassOf(Name: "RegisterOperand" )) |
1878 | // Allows use of a DecoderMethod in referenced RegisterClass if set. |
1879 | return findOperandDecoderMethod(Record: Record->getValueAsDef(FieldName: "RegClass" )); |
1880 | |
1881 | if (Record->isSubClassOf(Name: "RegisterClass" )) { |
1882 | Decoder = "Decode" + Record->getName().str() + "RegisterClass" ; |
1883 | } else if (Record->isSubClassOf(Name: "PointerLikeRegClass" )) { |
1884 | Decoder = "DecodePointerLikeRegClass" + |
1885 | utostr(X: Record->getValueAsInt(FieldName: "RegClassKind" )); |
1886 | } |
1887 | |
1888 | return Decoder; |
1889 | } |
1890 | |
1891 | OperandInfo getOpInfo(const Record *TypeRecord) { |
1892 | const RecordVal *HasCompleteDecoderVal = |
1893 | TypeRecord->getValue(Name: "hasCompleteDecoder" ); |
1894 | const BitInit *HasCompleteDecoderBit = |
1895 | HasCompleteDecoderVal |
1896 | ? dyn_cast<BitInit>(Val: HasCompleteDecoderVal->getValue()) |
1897 | : nullptr; |
1898 | bool HasCompleteDecoder = |
1899 | HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; |
1900 | |
1901 | return OperandInfo(findOperandDecoderMethod(Record: TypeRecord), HasCompleteDecoder); |
1902 | } |
1903 | |
1904 | static void parseVarLenInstOperand(const Record &Def, |
1905 | std::vector<OperandInfo> &Operands, |
1906 | const CodeGenInstruction &CGI) { |
1907 | |
1908 | const RecordVal *RV = Def.getValue(Name: "Inst" ); |
1909 | VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV); |
1910 | SmallVector<int> TiedTo; |
1911 | |
1912 | for (const auto &[Idx, Op] : enumerate(First: CGI.Operands)) { |
1913 | if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) |
1914 | for (auto *Arg : Op.MIOperandInfo->getArgs()) |
1915 | Operands.push_back(x: getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef())); |
1916 | else |
1917 | Operands.push_back(x: getOpInfo(TypeRecord: Op.Rec)); |
1918 | |
1919 | int TiedReg = Op.getTiedRegister(); |
1920 | TiedTo.push_back(Elt: -1); |
1921 | if (TiedReg != -1) { |
1922 | TiedTo[Idx] = TiedReg; |
1923 | TiedTo[TiedReg] = Idx; |
1924 | } |
1925 | } |
1926 | |
1927 | unsigned CurrBitPos = 0; |
1928 | for (const auto &EncodingSegment : VLI) { |
1929 | unsigned Offset = 0; |
1930 | StringRef OpName; |
1931 | |
1932 | if (const StringInit *SI = dyn_cast<StringInit>(Val: EncodingSegment.Value)) { |
1933 | OpName = SI->getValue(); |
1934 | } else if (const DagInit *DI = dyn_cast<DagInit>(Val: EncodingSegment.Value)) { |
1935 | OpName = cast<StringInit>(Val: DI->getArg(Num: 0))->getValue(); |
1936 | Offset = cast<IntInit>(Val: DI->getArg(Num: 2))->getValue(); |
1937 | } |
1938 | |
1939 | if (!OpName.empty()) { |
1940 | auto OpSubOpPair = |
1941 | const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( |
1942 | Op: OpName); |
1943 | unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(Op: OpSubOpPair); |
1944 | Operands[OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset); |
1945 | if (!EncodingSegment.CustomDecoder.empty()) |
1946 | Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); |
1947 | |
1948 | int TiedReg = TiedTo[OpSubOpPair.first]; |
1949 | if (TiedReg != -1) { |
1950 | unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( |
1951 | Op: {TiedReg, OpSubOpPair.second}); |
1952 | Operands[OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset); |
1953 | } |
1954 | } |
1955 | |
1956 | CurrBitPos += EncodingSegment.BitWidth; |
1957 | } |
1958 | } |
1959 | |
1960 | static void debugDumpRecord(const Record &Rec) { |
1961 | // Dump the record, so we can see what's going on. |
1962 | PrintNote(PrintMsg: [&Rec](raw_ostream &OS) { |
1963 | OS << "Dumping record for previous error:\n" ; |
1964 | OS << Rec; |
1965 | }); |
1966 | } |
1967 | |
1968 | /// For an operand field named OpName: populate OpInfo.InitValue with the |
1969 | /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to |
1970 | /// insert from the decoded instruction. |
1971 | static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, |
1972 | std::map<StringRef, StringRef> &TiedNames, |
1973 | StringRef OpName, OperandInfo &OpInfo) { |
1974 | // Some bits of the operand may be required to be 1 depending on the |
1975 | // instruction's encoding. Collect those bits. |
1976 | if (const RecordVal *EncodedValue = EncodingDef.getValue(Name: OpName)) |
1977 | if (const BitsInit *OpBits = dyn_cast<BitsInit>(Val: EncodedValue->getValue())) |
1978 | for (unsigned I = 0; I < OpBits->getNumBits(); ++I) |
1979 | if (const BitInit *OpBit = dyn_cast<BitInit>(Val: OpBits->getBit(Bit: I))) |
1980 | if (OpBit->getValue()) |
1981 | OpInfo.InitValue |= 1ULL << I; |
1982 | |
1983 | for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { |
1984 | const VarInit *Var; |
1985 | unsigned Offset = 0; |
1986 | for (; J != Bits.getNumBits(); ++J) { |
1987 | const VarBitInit *BJ = dyn_cast<VarBitInit>(Val: Bits.getBit(Bit: J)); |
1988 | if (BJ) { |
1989 | Var = dyn_cast<VarInit>(Val: BJ->getBitVar()); |
1990 | if (I == J) |
1991 | Offset = BJ->getBitNum(); |
1992 | else if (BJ->getBitNum() != Offset + J - I) |
1993 | break; |
1994 | } else { |
1995 | Var = dyn_cast<VarInit>(Val: Bits.getBit(Bit: J)); |
1996 | } |
1997 | if (!Var || |
1998 | (Var->getName() != OpName && Var->getName() != TiedNames[OpName])) |
1999 | break; |
2000 | } |
2001 | if (I == J) |
2002 | ++J; |
2003 | else |
2004 | OpInfo.addField(Base: I, Width: J - I, Offset); |
2005 | } |
2006 | } |
2007 | |
2008 | static unsigned |
2009 | populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef, |
2010 | const CodeGenInstruction &CGI, unsigned Opc, |
2011 | std::map<unsigned, std::vector<OperandInfo>> &Operands, |
2012 | bool IsVarLenInst) { |
2013 | const Record &Def = *CGI.TheDef; |
2014 | // If all the bit positions are not specified; do not decode this instruction. |
2015 | // We are bound to fail! For proper disassembly, the well-known encoding bits |
2016 | // of the instruction must be fully specified. |
2017 | |
2018 | const BitsInit &Bits = getBitsField(Def: EncodingDef, FieldName: "Inst" ); |
2019 | if (Bits.allInComplete()) |
2020 | return 0; |
2021 | |
2022 | std::vector<OperandInfo> InsnOperands; |
2023 | |
2024 | // If the instruction has specified a custom decoding hook, use that instead |
2025 | // of trying to auto-generate the decoder. |
2026 | StringRef InstDecoder = EncodingDef.getValueAsString(FieldName: "DecoderMethod" ); |
2027 | if (!InstDecoder.empty()) { |
2028 | bool HasCompleteInstDecoder = |
2029 | EncodingDef.getValueAsBit(FieldName: "hasCompleteDecoder" ); |
2030 | InsnOperands.push_back( |
2031 | x: OperandInfo(InstDecoder.str(), HasCompleteInstDecoder)); |
2032 | Operands[Opc] = std::move(InsnOperands); |
2033 | return Bits.getNumBits(); |
2034 | } |
2035 | |
2036 | // Generate a description of the operand of the instruction that we know |
2037 | // how to decode automatically. |
2038 | // FIXME: We'll need to have a way to manually override this as needed. |
2039 | |
2040 | // Gather the outputs/inputs of the instruction, so we can find their |
2041 | // positions in the encoding. This assumes for now that they appear in the |
2042 | // MCInst in the order that they're listed. |
2043 | std::vector<std::pair<const Init *, StringRef>> InOutOperands; |
2044 | const DagInit *Out = Def.getValueAsDag(FieldName: "OutOperandList" ); |
2045 | const DagInit *In = Def.getValueAsDag(FieldName: "InOperandList" ); |
2046 | for (const auto &[Idx, Arg] : enumerate(First: Out->getArgs())) |
2047 | InOutOperands.emplace_back(args: Arg, args: Out->getArgNameStr(Num: Idx)); |
2048 | for (const auto &[Idx, Arg] : enumerate(First: In->getArgs())) |
2049 | InOutOperands.emplace_back(args: Arg, args: In->getArgNameStr(Num: Idx)); |
2050 | |
2051 | // Search for tied operands, so that we can correctly instantiate |
2052 | // operands that are not explicitly represented in the encoding. |
2053 | std::map<StringRef, StringRef> TiedNames; |
2054 | for (const auto &Op : CGI.Operands) { |
2055 | for (const auto &[J, CI] : enumerate(First: Op.Constraints)) { |
2056 | if (!CI.isTied()) |
2057 | continue; |
2058 | std::pair<unsigned, unsigned> SO = |
2059 | CGI.Operands.getSubOperandNumber(Op: CI.getTiedOperand()); |
2060 | StringRef TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; |
2061 | if (TiedName.empty()) |
2062 | TiedName = CGI.Operands[SO.first].Name; |
2063 | StringRef MyName = Op.SubOpNames[J]; |
2064 | if (MyName.empty()) |
2065 | MyName = Op.Name; |
2066 | |
2067 | TiedNames[MyName] = TiedName; |
2068 | TiedNames[TiedName] = MyName; |
2069 | } |
2070 | } |
2071 | |
2072 | if (IsVarLenInst) { |
2073 | parseVarLenInstOperand(Def: EncodingDef, Operands&: InsnOperands, CGI); |
2074 | } else { |
2075 | // For each operand, see if we can figure out where it is encoded. |
2076 | for (const auto &Op : InOutOperands) { |
2077 | const Init *OpInit = Op.first; |
2078 | StringRef OpName = Op.second; |
2079 | |
2080 | // We're ready to find the instruction encoding locations for this |
2081 | // operand. |
2082 | |
2083 | // First, find the operand type ("OpInit"), and sub-op names |
2084 | // ("SubArgDag") if present. |
2085 | const DagInit *SubArgDag = dyn_cast<DagInit>(Val: OpInit); |
2086 | if (SubArgDag) |
2087 | OpInit = SubArgDag->getOperator(); |
2088 | const Record *OpTypeRec = cast<DefInit>(Val: OpInit)->getDef(); |
2089 | // Lookup the sub-operands from the operand type record (note that only |
2090 | // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). |
2091 | const DagInit *SubOps = OpTypeRec->isSubClassOf(Name: "Operand" ) |
2092 | ? OpTypeRec->getValueAsDag(FieldName: "MIOperandInfo" ) |
2093 | : nullptr; |
2094 | |
2095 | // Lookup the decoder method and construct a new OperandInfo to hold our |
2096 | // result. |
2097 | OperandInfo OpInfo = getOpInfo(TypeRecord: OpTypeRec); |
2098 | |
2099 | // If we have named sub-operands... |
2100 | if (SubArgDag) { |
2101 | // Then there should not be a custom decoder specified on the top-level |
2102 | // type. |
2103 | if (!OpInfo.Decoder.empty()) { |
2104 | PrintError(ErrorLoc: EncodingDef.getLoc(), |
2105 | Msg: "DecoderEmitter: operand \"" + OpName + "\" has type \"" + |
2106 | OpInit->getAsString() + |
2107 | "\" with a custom DecoderMethod, but also named " |
2108 | "sub-operands." ); |
2109 | continue; |
2110 | } |
2111 | |
2112 | // Decode each of the sub-ops separately. |
2113 | assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); |
2114 | for (const auto &[I, Arg] : enumerate(First: SubOps->getArgs())) { |
2115 | StringRef SubOpName = SubArgDag->getArgNameStr(Num: I); |
2116 | OperandInfo SubOpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef()); |
2117 | |
2118 | addOneOperandFields(EncodingDef, Bits, TiedNames, OpName: SubOpName, |
2119 | OpInfo&: SubOpInfo); |
2120 | InsnOperands.push_back(x: std::move(SubOpInfo)); |
2121 | } |
2122 | continue; |
2123 | } |
2124 | |
2125 | // Otherwise, if we have an operand with sub-operands, but they aren't |
2126 | // named... |
2127 | if (SubOps && OpInfo.Decoder.empty()) { |
2128 | // If it's a single sub-operand, and no custom decoder, use the decoder |
2129 | // from the one sub-operand. |
2130 | if (SubOps->getNumArgs() == 1) |
2131 | OpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: SubOps->getArg(Num: 0))->getDef()); |
2132 | |
2133 | // If we have multiple sub-ops, there'd better have a custom |
2134 | // decoder. (Otherwise we don't know how to populate them properly...) |
2135 | if (SubOps->getNumArgs() > 1) { |
2136 | PrintError(ErrorLoc: EncodingDef.getLoc(), |
2137 | Msg: "DecoderEmitter: operand \"" + OpName + |
2138 | "\" uses MIOperandInfo with multiple ops, but doesn't " |
2139 | "have a custom decoder!" ); |
2140 | debugDumpRecord(Rec: EncodingDef); |
2141 | continue; |
2142 | } |
2143 | } |
2144 | |
2145 | addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); |
2146 | // FIXME: it should be an error not to find a definition for a given |
2147 | // operand, rather than just failing to add it to the resulting |
2148 | // instruction! (This is a longstanding bug, which will be addressed in an |
2149 | // upcoming change.) |
2150 | if (OpInfo.numFields() > 0) |
2151 | InsnOperands.push_back(x: std::move(OpInfo)); |
2152 | } |
2153 | } |
2154 | Operands[Opc] = std::move(InsnOperands); |
2155 | |
2156 | #if 0 |
2157 | LLVM_DEBUG({ |
2158 | // Dumps the instruction encoding bits. |
2159 | dumpBits(errs(), Bits); |
2160 | |
2161 | errs() << '\n'; |
2162 | |
2163 | // Dumps the list of operand info. |
2164 | for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { |
2165 | const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; |
2166 | const std::string &OperandName = Info.Name; |
2167 | const Record &OperandDef = *Info.Rec; |
2168 | |
2169 | errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n" ; |
2170 | } |
2171 | }); |
2172 | #endif |
2173 | |
2174 | return Bits.getNumBits(); |
2175 | } |
2176 | |
2177 | // emitFieldFromInstruction - Emit the templated helper function |
2178 | // fieldFromInstruction(). |
2179 | // On Windows we make sure that this function is not inlined when |
2180 | // using the VS compiler. It has a bug which causes the function |
2181 | // to be optimized out in some circumstances. See llvm.org/pr38292 |
2182 | static void emitFieldFromInstruction(formatted_raw_ostream &OS) { |
2183 | OS << R"( |
2184 | // Helper functions for extracting fields from encoded instructions. |
2185 | // InsnType must either be integral or an APInt-like object that must: |
2186 | // * be default-constructible and copy-constructible |
2187 | // * be constructible from an APInt (this can be private) |
2188 | // * Support insertBits(bits, startBit, numBits) |
2189 | // * Support extractBitsAsZExtValue(numBits, startBit) |
2190 | // * Support the ~, &, ==, and != operators with other objects of the same type |
2191 | // * Support the != and bitwise & with uint64_t |
2192 | // * Support put (<<) to raw_ostream& |
2193 | template <typename InsnType> |
2194 | #if defined(_MSC_VER) && !defined(__clang__) |
2195 | __declspec(noinline) |
2196 | #endif |
2197 | static std::enable_if_t<std::is_integral<InsnType>::value, InsnType> |
2198 | fieldFromInstruction(const InsnType &insn, unsigned startBit, |
2199 | unsigned numBits) { |
2200 | assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!"); |
2201 | assert(startBit + numBits <= (sizeof(InsnType) * 8) && |
2202 | "Instruction field out of bounds!"); |
2203 | InsnType fieldMask; |
2204 | if (numBits == sizeof(InsnType) * 8) |
2205 | fieldMask = (InsnType)(-1LL); |
2206 | else |
2207 | fieldMask = (((InsnType)1 << numBits) - 1) << startBit; |
2208 | return (insn & fieldMask) >> startBit; |
2209 | } |
2210 | |
2211 | template <typename InsnType> |
2212 | static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t> |
2213 | fieldFromInstruction(const InsnType &insn, unsigned startBit, |
2214 | unsigned numBits) { |
2215 | return insn.extractBitsAsZExtValue(numBits, startBit); |
2216 | } |
2217 | )" ; |
2218 | } |
2219 | |
2220 | // emitInsertBits - Emit the templated helper function insertBits(). |
2221 | static void emitInsertBits(formatted_raw_ostream &OS) { |
2222 | OS << R"( |
2223 | // Helper function for inserting bits extracted from an encoded instruction into |
2224 | // a field. |
2225 | template <typename InsnType> |
2226 | static void insertBits(InsnType &field, InsnType bits, unsigned startBit, |
2227 | unsigned numBits) { |
2228 | if constexpr (std::is_integral<InsnType>::value) { |
2229 | assert(startBit + numBits <= sizeof field * 8); |
2230 | (void)numBits; |
2231 | field |= (InsnType)bits << startBit; |
2232 | } else { |
2233 | field.insertBits(bits, startBit, numBits); |
2234 | } |
2235 | } |
2236 | )" ; |
2237 | } |
2238 | |
2239 | // emitDecodeInstruction - Emit the templated helper function |
2240 | // decodeInstruction(). |
2241 | static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst, |
2242 | unsigned OpcodeMask) { |
2243 | const bool HasTryDecode = OpcodeMask & ((1 << MCD::OPC_TryDecode) | |
2244 | (1 << MCD::OPC_TryDecodeOrFail)); |
2245 | const bool HasCheckPredicate = |
2246 | OpcodeMask & |
2247 | ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail)); |
2248 | const bool HasSoftFail = OpcodeMask & (1 << MCD::OPC_SoftFail); |
2249 | |
2250 | OS << R"( |
2251 | static unsigned decodeNumToSkip(const uint8_t *&Ptr) { |
2252 | unsigned NumToSkip = *Ptr++; |
2253 | NumToSkip |= (*Ptr++) << 8; |
2254 | )" ; |
2255 | if (getNumToSkipInBytes() == 3) |
2256 | OS << " NumToSkip |= (*Ptr++) << 16;\n" ; |
2257 | OS << R"( return NumToSkip; |
2258 | } |
2259 | |
2260 | template <typename InsnType> |
2261 | static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, |
2262 | InsnType insn, uint64_t Address, |
2263 | const MCDisassembler *DisAsm, |
2264 | const MCSubtargetInfo &STI)" ; |
2265 | if (IsVarLenInst) { |
2266 | OS << ",\n " |
2267 | "llvm::function_ref<void(APInt &, uint64_t)> makeUp" ; |
2268 | } |
2269 | OS << ") {\n" ; |
2270 | if (HasCheckPredicate) |
2271 | OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n" ; |
2272 | |
2273 | OS << R"( |
2274 | const uint8_t *Ptr = DecodeTable; |
2275 | uint64_t CurFieldValue = 0; |
2276 | DecodeStatus S = MCDisassembler::Success; |
2277 | while (true) { |
2278 | ptrdiff_t Loc = Ptr - DecodeTable; |
2279 | const uint8_t DecoderOp = *Ptr++; |
2280 | switch (DecoderOp) { |
2281 | default: |
2282 | errs() << Loc << ": Unexpected decode table opcode: " |
2283 | << (int)DecoderOp << '\n'; |
2284 | return MCDisassembler::Fail; |
2285 | case MCD::OPC_ExtractField: { |
2286 | // Decode the start value. |
2287 | unsigned Start = decodeULEB128AndIncUnsafe(Ptr); |
2288 | unsigned Len = *Ptr++;)" ; |
2289 | if (IsVarLenInst) |
2290 | OS << "\n makeUp(insn, Start + Len);" ; |
2291 | OS << R"( |
2292 | CurFieldValue = fieldFromInstruction(insn, Start, Len); |
2293 | LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", " |
2294 | << Len << "): " << CurFieldValue << "\n"); |
2295 | break; |
2296 | } |
2297 | case MCD::OPC_FilterValue: |
2298 | case MCD::OPC_FilterValueOrFail: { |
2299 | bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail; |
2300 | // Decode the field value. |
2301 | uint64_t Val = decodeULEB128AndIncUnsafe(Ptr); |
2302 | bool Failed = Val != CurFieldValue; |
2303 | unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr); |
2304 | |
2305 | // Note: Print NumToSkip even for OPC_FilterValueOrFail to simplify debug |
2306 | // prints. |
2307 | LLVM_DEBUG({ |
2308 | StringRef OpName = IsFail ? "OPC_FilterValueOrFail" : "OPC_FilterValue"; |
2309 | dbgs() << Loc << ": " << OpName << '(' << Val << ", " << NumToSkip |
2310 | << ") " << (Failed ? "FAIL:" : "PASS:") |
2311 | << " continuing at " << (Ptr - DecodeTable) << '\n'; |
2312 | }); |
2313 | |
2314 | // Perform the filter operation. |
2315 | if (Failed) { |
2316 | if (IsFail) |
2317 | return MCDisassembler::Fail; |
2318 | Ptr += NumToSkip; |
2319 | } |
2320 | break; |
2321 | } |
2322 | case MCD::OPC_CheckField: |
2323 | case MCD::OPC_CheckFieldOrFail: { |
2324 | bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail; |
2325 | // Decode the start value. |
2326 | unsigned Start = decodeULEB128AndIncUnsafe(Ptr); |
2327 | unsigned Len = *Ptr;)" ; |
2328 | if (IsVarLenInst) |
2329 | OS << "\n makeUp(insn, Start + Len);" ; |
2330 | OS << R"( |
2331 | uint64_t FieldValue = fieldFromInstruction(insn, Start, Len); |
2332 | // Decode the field value. |
2333 | unsigned PtrLen = 0; |
2334 | uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen); |
2335 | Ptr += PtrLen; |
2336 | bool Failed = ExpectedValue != FieldValue; |
2337 | unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr); |
2338 | |
2339 | LLVM_DEBUG({ |
2340 | StringRef OpName = IsFail ? "OPC_CheckFieldOrFail" : "OPC_CheckField"; |
2341 | dbgs() << Loc << ": " << OpName << '(' << Start << ", " << Len << ", " |
2342 | << ExpectedValue << ", " << NumToSkip << "): FieldValue = " |
2343 | << FieldValue << ", ExpectedValue = " << ExpectedValue << ": " |
2344 | << (Failed ? "FAIL\n" : "PASS\n"); |
2345 | }); |
2346 | |
2347 | // If the actual and expected values don't match, skip or fail. |
2348 | if (Failed) { |
2349 | if (IsFail) |
2350 | return MCDisassembler::Fail; |
2351 | Ptr += NumToSkip; |
2352 | } |
2353 | break; |
2354 | })" ; |
2355 | if (HasCheckPredicate) { |
2356 | OS << R"( |
2357 | case MCD::OPC_CheckPredicate: |
2358 | case MCD::OPC_CheckPredicateOrFail: { |
2359 | bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail; |
2360 | // Decode the Predicate Index value. |
2361 | unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr); |
2362 | unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr); |
2363 | // Check the predicate. |
2364 | bool Failed = !checkDecoderPredicate(PIdx, Bits); |
2365 | |
2366 | LLVM_DEBUG({ |
2367 | StringRef OpName = IsFail ? "OPC_CheckPredicateOrFail" : "OPC_CheckPredicate"; |
2368 | dbgs() << Loc << ": " << OpName << '(' << PIdx << ", " << NumToSkip |
2369 | << "): " << (Failed ? "FAIL\n" : "PASS\n"); |
2370 | }); |
2371 | |
2372 | if (Failed) { |
2373 | if (IsFail) |
2374 | return MCDisassembler::Fail; |
2375 | Ptr += NumToSkip; |
2376 | } |
2377 | break; |
2378 | })" ; |
2379 | } |
2380 | OS << R"( |
2381 | case MCD::OPC_Decode: { |
2382 | // Decode the Opcode value. |
2383 | unsigned Opc = decodeULEB128AndIncUnsafe(Ptr); |
2384 | unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); |
2385 | |
2386 | MI.clear(); |
2387 | MI.setOpcode(Opc); |
2388 | bool DecodeComplete;)" ; |
2389 | if (IsVarLenInst) { |
2390 | OS << "\n unsigned Len = InstrLenTable[Opc];\n" |
2391 | << " makeUp(insn, Len);" ; |
2392 | } |
2393 | OS << R"( |
2394 | S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete); |
2395 | assert(DecodeComplete); |
2396 | |
2397 | LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc |
2398 | << ", using decoder " << DecodeIdx << ": " |
2399 | << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n")); |
2400 | return S; |
2401 | })" ; |
2402 | if (HasTryDecode) { |
2403 | OS << R"( |
2404 | case MCD::OPC_TryDecode: |
2405 | case MCD::OPC_TryDecodeOrFail: { |
2406 | bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail; |
2407 | // Decode the Opcode value. |
2408 | unsigned Opc = decodeULEB128AndIncUnsafe(Ptr); |
2409 | unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); |
2410 | unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr); |
2411 | |
2412 | // Perform the decode operation. |
2413 | MCInst TmpMI; |
2414 | TmpMI.setOpcode(Opc); |
2415 | bool DecodeComplete; |
2416 | S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete); |
2417 | LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc |
2418 | << ", using decoder " << DecodeIdx << ": "); |
2419 | |
2420 | if (DecodeComplete) { |
2421 | // Decoding complete. |
2422 | LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n")); |
2423 | MI = TmpMI; |
2424 | return S; |
2425 | } |
2426 | assert(S == MCDisassembler::Fail); |
2427 | if (IsFail) { |
2428 | LLVM_DEBUG(dbgs() << "FAIL: returning FAIL\n"); |
2429 | return MCDisassembler::Fail; |
2430 | } |
2431 | // If the decoding was incomplete, skip. |
2432 | Ptr += NumToSkip; |
2433 | LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n"); |
2434 | // Reset decode status. This also drops a SoftFail status that could be |
2435 | // set before the decode attempt. |
2436 | S = MCDisassembler::Success; |
2437 | break; |
2438 | })" ; |
2439 | } |
2440 | if (HasSoftFail) { |
2441 | OS << R"( |
2442 | case MCD::OPC_SoftFail: { |
2443 | // Decode the mask values. |
2444 | uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr); |
2445 | uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr); |
2446 | bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0; |
2447 | if (Failed) |
2448 | S = MCDisassembler::SoftFail; |
2449 | LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n")); |
2450 | break; |
2451 | })" ; |
2452 | } |
2453 | OS << R"( |
2454 | case MCD::OPC_Fail: { |
2455 | LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n"); |
2456 | return MCDisassembler::Fail; |
2457 | } |
2458 | } |
2459 | } |
2460 | llvm_unreachable("bogosity detected in disassembler state machine!"); |
2461 | } |
2462 | |
2463 | )" ; |
2464 | } |
2465 | |
2466 | // Helper to propagate SoftFail status. Returns false if the status is Fail; |
2467 | // callers are expected to early-exit in that condition. (Note, the '&' operator |
2468 | // is correct to propagate the values of this enum; see comment on 'enum |
2469 | // DecodeStatus'.) |
2470 | static void emitCheck(formatted_raw_ostream &OS) { |
2471 | OS << R"( |
2472 | static bool Check(DecodeStatus &Out, DecodeStatus In) { |
2473 | Out = static_cast<DecodeStatus>(Out & In); |
2474 | return Out != MCDisassembler::Fail; |
2475 | } |
2476 | |
2477 | )" ; |
2478 | } |
2479 | |
2480 | // Collect all HwModes referenced by the target for encoding purposes, |
2481 | // returning a vector of corresponding names. |
2482 | static void collectHwModesReferencedForEncodings( |
2483 | const CodeGenHwModes &HWM, std::vector<StringRef> &Names, |
2484 | NamespacesHwModesMap &NamespacesWithHwModes) { |
2485 | SmallBitVector BV(HWM.getNumModeIds()); |
2486 | for (const auto &MS : HWM.getHwModeSelects()) { |
2487 | for (const HwModeSelect::PairType &P : MS.second.Items) { |
2488 | if (P.second->isSubClassOf(Name: "InstructionEncoding" )) { |
2489 | std::string DecoderNamespace = |
2490 | P.second->getValueAsString(FieldName: "DecoderNamespace" ).str(); |
2491 | if (P.first == DefaultMode) { |
2492 | NamespacesWithHwModes[DecoderNamespace].insert(x: "" ); |
2493 | } else { |
2494 | NamespacesWithHwModes[DecoderNamespace].insert( |
2495 | x: HWM.getMode(Id: P.first).Name); |
2496 | } |
2497 | BV.set(P.first); |
2498 | } |
2499 | } |
2500 | } |
2501 | transform(Range: BV.set_bits(), d_first: std::back_inserter(x&: Names), F: [&HWM](const int &M) { |
2502 | if (M == DefaultMode) |
2503 | return StringRef("" ); |
2504 | return HWM.getModeName(Id: M, /*IncludeDefault=*/true); |
2505 | }); |
2506 | } |
2507 | |
2508 | static void |
2509 | handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr, |
2510 | ArrayRef<StringRef> HwModeNames, |
2511 | NamespacesHwModesMap &NamespacesWithHwModes, |
2512 | std::vector<EncodingAndInst> &GlobalEncodings) { |
2513 | const Record *InstDef = Instr->TheDef; |
2514 | |
2515 | switch (DecoderEmitterSuppressDuplicates) { |
2516 | case SUPPRESSION_DISABLE: { |
2517 | for (StringRef HwModeName : HwModeNames) |
2518 | GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName); |
2519 | break; |
2520 | } |
2521 | case SUPPRESSION_LEVEL1: { |
2522 | std::string DecoderNamespace = |
2523 | InstDef->getValueAsString(FieldName: "DecoderNamespace" ).str(); |
2524 | auto It = NamespacesWithHwModes.find(x: DecoderNamespace); |
2525 | if (It != NamespacesWithHwModes.end()) { |
2526 | for (StringRef HwModeName : It->second) |
2527 | GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName); |
2528 | } else { |
2529 | // Only emit the encoding once, as it's DecoderNamespace doesn't |
2530 | // contain any HwModes. |
2531 | GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "" ); |
2532 | } |
2533 | break; |
2534 | } |
2535 | case SUPPRESSION_LEVEL2: |
2536 | GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "" ); |
2537 | break; |
2538 | } |
2539 | } |
2540 | |
2541 | // Emits disassembler code for instruction decoding. |
2542 | void DecoderEmitter::run(raw_ostream &o) { |
2543 | formatted_raw_ostream OS(o); |
2544 | OS << R"( |
2545 | #include "llvm/MC/MCInst.h" |
2546 | #include "llvm/MC/MCSubtargetInfo.h" |
2547 | #include "llvm/Support/DataTypes.h" |
2548 | #include "llvm/Support/Debug.h" |
2549 | #include "llvm/Support/LEB128.h" |
2550 | #include "llvm/Support/raw_ostream.h" |
2551 | #include "llvm/TargetParser/SubtargetFeature.h" |
2552 | #include <assert.h> |
2553 | |
2554 | namespace { |
2555 | )" ; |
2556 | |
2557 | emitFieldFromInstruction(OS); |
2558 | emitInsertBits(OS); |
2559 | emitCheck(OS); |
2560 | |
2561 | Target.reverseBitsForLittleEndianEncoding(); |
2562 | |
2563 | // Parameterize the decoders based on namespace and instruction width. |
2564 | |
2565 | // First, collect all encoding-related HwModes referenced by the target. |
2566 | // And establish a mapping table between DecoderNamespace and HwMode. |
2567 | // If HwModeNames is empty, add the empty string so we always have one HwMode. |
2568 | const CodeGenHwModes &HWM = Target.getHwModes(); |
2569 | std::vector<StringRef> HwModeNames; |
2570 | NamespacesHwModesMap NamespacesWithHwModes; |
2571 | collectHwModesReferencedForEncodings(HWM, Names&: HwModeNames, NamespacesWithHwModes); |
2572 | if (HwModeNames.empty()) |
2573 | HwModeNames.push_back(x: "" ); |
2574 | |
2575 | const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); |
2576 | NumberedEncodings.reserve(n: NumberedInstructions.size()); |
2577 | for (const auto &NumberedInstruction : NumberedInstructions) { |
2578 | const Record *InstDef = NumberedInstruction->TheDef; |
2579 | if (const RecordVal *RV = InstDef->getValue(Name: "EncodingInfos" )) { |
2580 | if (const DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) { |
2581 | EncodingInfoByHwMode EBM(DI->getDef(), HWM); |
2582 | for (auto &[ModeId, Encoding] : EBM) { |
2583 | // DecoderTables with DefaultMode should not have any suffix. |
2584 | if (ModeId == DefaultMode) { |
2585 | NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction, args: "" ); |
2586 | } else { |
2587 | NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction, |
2588 | args: HWM.getMode(Id: ModeId).Name); |
2589 | } |
2590 | } |
2591 | continue; |
2592 | } |
2593 | } |
2594 | // This instruction is encoded the same on all HwModes. |
2595 | // According to user needs, provide varying degrees of suppression. |
2596 | handleHwModesUnrelatedEncodings(Instr: NumberedInstruction, HwModeNames, |
2597 | NamespacesWithHwModes, GlobalEncodings&: NumberedEncodings); |
2598 | } |
2599 | for (const Record *NumberedAlias : |
2600 | RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding" )) |
2601 | NumberedEncodings.emplace_back( |
2602 | args&: NumberedAlias, |
2603 | args: &Target.getInstruction(InstRec: NumberedAlias->getValueAsDef(FieldName: "AliasOf" ))); |
2604 | |
2605 | std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> |
2606 | OpcMap; |
2607 | std::map<unsigned, std::vector<OperandInfo>> Operands; |
2608 | std::vector<unsigned> InstrLen; |
2609 | bool IsVarLenInst = Target.hasVariableLengthEncodings(); |
2610 | unsigned MaxInstLen = 0; |
2611 | |
2612 | for (const auto &[NEI, NumberedEncoding] : enumerate(First&: NumberedEncodings)) { |
2613 | const Record *EncodingDef = NumberedEncoding.EncodingDef; |
2614 | const CodeGenInstruction *Inst = NumberedEncoding.Inst; |
2615 | const Record *Def = Inst->TheDef; |
2616 | unsigned Size = EncodingDef->getValueAsInt(FieldName: "Size" ); |
2617 | if (Def->getValueAsString(FieldName: "Namespace" ) == "TargetOpcode" || |
2618 | Def->getValueAsBit(FieldName: "isPseudo" ) || |
2619 | Def->getValueAsBit(FieldName: "isAsmParserOnly" ) || |
2620 | Def->getValueAsBit(FieldName: "isCodeGenOnly" )) { |
2621 | NumEncodingsLackingDisasm++; |
2622 | continue; |
2623 | } |
2624 | |
2625 | if (NEI < NumberedInstructions.size()) |
2626 | NumInstructions++; |
2627 | NumEncodings++; |
2628 | |
2629 | if (!Size && !IsVarLenInst) |
2630 | continue; |
2631 | |
2632 | if (IsVarLenInst) |
2633 | InstrLen.resize(new_size: NumberedInstructions.size(), x: 0); |
2634 | |
2635 | if (unsigned Len = populateInstruction(Target, EncodingDef: *EncodingDef, CGI: *Inst, Opc: NEI, |
2636 | Operands, IsVarLenInst)) { |
2637 | if (IsVarLenInst) { |
2638 | MaxInstLen = std::max(a: MaxInstLen, b: Len); |
2639 | InstrLen[NEI] = Len; |
2640 | } |
2641 | std::string DecoderNamespace = |
2642 | EncodingDef->getValueAsString(FieldName: "DecoderNamespace" ).str(); |
2643 | if (!NumberedEncoding.HwModeName.empty()) |
2644 | DecoderNamespace += "_" + NumberedEncoding.HwModeName.str(); |
2645 | OpcMap[{DecoderNamespace, Size}].emplace_back( |
2646 | args&: NEI, args: Target.getInstrIntValue(R: Def)); |
2647 | } else { |
2648 | NumEncodingsOmitted++; |
2649 | } |
2650 | } |
2651 | |
2652 | DecoderTableInfo TableInfo; |
2653 | unsigned OpcodeMask = 0; |
2654 | for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) { |
2655 | const std::string &DecoderNamespace = NSAndByteSize.first; |
2656 | const unsigned BitWidth = 8 * NSAndByteSize.second; |
2657 | // Emit the decoder for this namespace+width combination. |
2658 | FilterChooser FC(NumberedEncodings, EncodingIDs, Operands, |
2659 | IsVarLenInst ? MaxInstLen : BitWidth, this); |
2660 | |
2661 | // The decode table is cleared for each top level decoder function. The |
2662 | // predicates and decoders themselves, however, are shared across all |
2663 | // decoders to give more opportunities for uniqueing. |
2664 | TableInfo.Table.clear(); |
2665 | TableInfo.FixupStack.clear(); |
2666 | TableInfo.FixupStack.emplace_back(); |
2667 | FC.emitTableEntries(TableInfo); |
2668 | // Any NumToSkip fixups in the top level scope can resolve to the |
2669 | // OPC_Fail at the end of the table. |
2670 | assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!" ); |
2671 | // Resolve any NumToSkip fixups in the current scope. |
2672 | resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(), |
2673 | DestIdx: TableInfo.Table.size()); |
2674 | TableInfo.FixupStack.clear(); |
2675 | |
2676 | TableInfo.Table.push_back(Item: MCD::OPC_Fail); |
2677 | |
2678 | // Print the table to the output stream. |
2679 | OpcodeMask |= emitTable(OS, Table&: TableInfo.Table, Indent: indent(0), BitWidth: FC.getBitWidth(), |
2680 | Namespace: DecoderNamespace, EncodingIDs); |
2681 | } |
2682 | |
2683 | // For variable instruction, we emit a instruction length table |
2684 | // to let the decoder know how long the instructions are. |
2685 | // You can see example usage in M68k's disassembler. |
2686 | if (IsVarLenInst) |
2687 | emitInstrLenTable(OS, InstrLen); |
2688 | |
2689 | const bool HasCheckPredicate = |
2690 | OpcodeMask & |
2691 | ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail)); |
2692 | |
2693 | // Emit the predicate function. |
2694 | if (HasCheckPredicate) |
2695 | emitPredicateFunction(OS, Predicates&: TableInfo.Predicates, Indent: indent(0)); |
2696 | |
2697 | // Emit the decoder function. |
2698 | emitDecoderFunction(OS, Decoders&: TableInfo.Decoders, Indent: indent(0)); |
2699 | |
2700 | // Emit the main entry point for the decoder, decodeInstruction(). |
2701 | emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask); |
2702 | |
2703 | OS << "\n} // namespace\n" ; |
2704 | } |
2705 | |
2706 | void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS, |
2707 | StringRef PredicateNamespace) { |
2708 | DecoderEmitter(RK, PredicateNamespace).run(o&: OS); |
2709 | } |
2710 | |