| 1 | //===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is part of the X86 Disassembler Emitter. |
| 10 | // It contains the interface of the disassembler tables. |
| 11 | // Documentation for the disassembler emitter in general can be found in |
| 12 | // X86DisassemblerEmitter.h. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H |
| 17 | #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H |
| 18 | |
| 19 | #include "X86DisassemblerShared.h" |
| 20 | #include "llvm/Support/X86DisassemblerDecoderCommon.h" |
| 21 | #include <map> |
| 22 | #include <memory> |
| 23 | #include <vector> |
| 24 | |
| 25 | namespace llvm { |
| 26 | class raw_ostream; |
| 27 | |
| 28 | namespace X86Disassembler { |
| 29 | |
| 30 | class ModRMFilter; |
| 31 | |
| 32 | /// DisassemblerTables - Encapsulates all the decode tables being generated by |
| 33 | /// the table emitter. Contains functions to populate the tables as well as |
| 34 | /// to emit them as hierarchical C structures suitable for consumption by the |
| 35 | /// runtime. |
| 36 | class DisassemblerTables { |
| 37 | private: |
| 38 | /// The decoder tables. There is one for each opcode type: |
| 39 | /// [0] one-byte opcodes |
| 40 | /// [1] two-byte opcodes of the form 0f __ |
| 41 | /// [2] three-byte opcodes of the form 0f 38 __ |
| 42 | /// [3] three-byte opcodes of the form 0f 3a __ |
| 43 | /// [4] XOP8 map opcode |
| 44 | /// [5] XOP9 map opcode |
| 45 | /// [6] XOPA map opcode |
| 46 | /// [7] 3dnow map opcode |
| 47 | /// [8] fixed length MAP4 opcode |
| 48 | /// [9] fixed length MAP5 opcode |
| 49 | /// [10] fixed length MAP6 opcode |
| 50 | /// [11] fixed length MAP7 opcode |
| 51 | std::unique_ptr<ContextDecision> Tables[12]; |
| 52 | |
| 53 | // Table of ModRM encodings. |
| 54 | typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy; |
| 55 | mutable ModRMMapTy ModRMTable; |
| 56 | |
| 57 | /// The instruction information table |
| 58 | std::vector<InstructionSpecifier> InstructionSpecifiers; |
| 59 | |
| 60 | /// True if there are primary decode conflicts in the instruction set |
| 61 | bool HasConflicts; |
| 62 | |
| 63 | /// emitModRMDecision - Emits a table of entries corresponding to a single |
| 64 | /// ModR/M decision. Compacts the ModR/M decision if possible. ModR/M |
| 65 | /// decisions are printed as: |
| 66 | /// |
| 67 | /// { /* struct ModRMDecision */ |
| 68 | /// TYPE, |
| 69 | /// modRMTablennnn |
| 70 | /// } |
| 71 | /// |
| 72 | /// where nnnn is a unique ID for the corresponding table of IDs. |
| 73 | /// TYPE indicates whether the table has one entry that is the same |
| 74 | /// regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one |
| 75 | /// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte. |
| 76 | /// nnnn is the number of a table for looking up these values. The tables |
| 77 | /// are written separately so that tables consisting entirely of zeros will |
| 78 | /// not be duplicated. (These all have the name modRMEmptyTable.) A table |
| 79 | /// is printed as: |
| 80 | /// |
| 81 | /// InstrUID modRMTablennnn[k] = { |
| 82 | /// nnnn, /* MNEMONIC */ |
| 83 | /// ... |
| 84 | /// nnnn /* MNEMONIC */ |
| 85 | /// }; |
| 86 | /// |
| 87 | /// @param o1 - The output stream to print the ID table to. |
| 88 | /// @param o2 - The output stream to print the decision structure to. |
| 89 | /// @param i1 - The indentation level to use with stream o1. |
| 90 | /// @param i2 - The indentation level to use with stream o2. |
| 91 | /// @param ModRMTableNum - next table number for adding to ModRMTable. |
| 92 | /// @param decision - The ModR/M decision to emit. This decision has 256 |
| 93 | /// entries - emitModRMDecision decides how to compact it. |
| 94 | void emitModRMDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, |
| 95 | unsigned &i2, unsigned &ModRMTableNum, |
| 96 | ModRMDecision &decision) const; |
| 97 | |
| 98 | /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M |
| 99 | /// decisions. An OpcodeDecision is printed as: |
| 100 | /// |
| 101 | /// { /* struct OpcodeDecision */ |
| 102 | /// /* 0x00 */ |
| 103 | /// { /* struct ModRMDecision */ |
| 104 | /// ... |
| 105 | /// } |
| 106 | /// ... |
| 107 | /// } |
| 108 | /// |
| 109 | /// where the ModRMDecision structure is printed as described in the |
| 110 | /// documentation for emitModRMDecision(). emitOpcodeDecision() passes on a |
| 111 | /// stream and indent level for the UID tables generated by |
| 112 | /// emitModRMDecision(), but does not use them itself. |
| 113 | /// |
| 114 | /// @param o1 - The output stream to print the ID tables generated by |
| 115 | /// emitModRMDecision() to. |
| 116 | /// @param o2 - The output stream for the decision structure itself. |
| 117 | /// @param i1 - The indent level to use with stream o1. |
| 118 | /// @param i2 - The indent level to use with stream o2. |
| 119 | /// @param ModRMTableNum - next table number for adding to ModRMTable. |
| 120 | /// @param decision - The OpcodeDecision to emit along with its subsidiary |
| 121 | /// structures. |
| 122 | void emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, |
| 123 | unsigned &i2, unsigned &ModRMTableNum, |
| 124 | OpcodeDecision &decision) const; |
| 125 | |
| 126 | /// emitContextDecision - Emits a ContextDecision and all its subsidiary |
| 127 | /// Opcode and ModRMDecisions. A ContextDecision is printed as: |
| 128 | /// |
| 129 | /// struct ContextDecision NAME = { |
| 130 | /// { /* OpcodeDecisions */ |
| 131 | /// /* IC */ |
| 132 | /// { /* struct OpcodeDecision */ |
| 133 | /// ... |
| 134 | /// }, |
| 135 | /// ... |
| 136 | /// } |
| 137 | /// } |
| 138 | /// |
| 139 | /// NAME is the name of the ContextDecision (typically one of the four names |
| 140 | /// ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM from |
| 141 | /// X86DisassemblerDecoderCommon.h). |
| 142 | /// IC is one of the contexts in InstructionContext. There is an opcode |
| 143 | /// decision for each possible context. |
| 144 | /// The OpcodeDecision structures are printed as described in the |
| 145 | /// documentation for emitOpcodeDecision. |
| 146 | /// |
| 147 | /// @param o1 - The output stream to print the ID tables generated by |
| 148 | /// emitModRMDecision() to. |
| 149 | /// @param o2 - The output stream to print the decision structure to. |
| 150 | /// @param i1 - The indent level to use with stream o1. |
| 151 | /// @param i2 - The indent level to use with stream o2. |
| 152 | /// @param ModRMTableNum - next table number for adding to ModRMTable. |
| 153 | /// @param decision - The ContextDecision to emit along with its subsidiary |
| 154 | /// structures. |
| 155 | /// @param name - The name for the ContextDecision. |
| 156 | void emitContextDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, |
| 157 | unsigned &i2, unsigned &ModRMTableNum, |
| 158 | ContextDecision &decision, const char *name) const; |
| 159 | |
| 160 | /// emitInstructionInfo - Prints the instruction specifier table, which has |
| 161 | /// one entry for each instruction, and contains name and operand |
| 162 | /// information. This table is printed as: |
| 163 | /// |
| 164 | /// struct InstructionSpecifier CONTEXTS_SYM[k] = { |
| 165 | /// { |
| 166 | /// /* nnnn */ |
| 167 | /// "MNEMONIC", |
| 168 | /// 0xnn, |
| 169 | /// { |
| 170 | /// { |
| 171 | /// ENCODING, |
| 172 | /// TYPE |
| 173 | /// }, |
| 174 | /// ... |
| 175 | /// } |
| 176 | /// }, |
| 177 | /// }; |
| 178 | /// |
| 179 | /// k is the total number of instructions. |
| 180 | /// nnnn is the ID of the current instruction (0-based). This table |
| 181 | /// includes entries for non-instructions like PHINODE. |
| 182 | /// 0xnn is the lowest possible opcode for the current instruction, used for |
| 183 | /// AddRegFrm instructions to compute the operand's value. |
| 184 | /// ENCODING and TYPE describe the encoding and type for a single operand. |
| 185 | /// |
| 186 | /// @param o - The output stream to which the instruction table should be |
| 187 | /// written. |
| 188 | /// @param i - The indent level for use with the stream. |
| 189 | void emitInstructionInfo(raw_ostream &o, unsigned &i) const; |
| 190 | |
| 191 | /// emitContextTable - Prints the table that is used to translate from an |
| 192 | /// instruction attribute mask to an instruction context. This table is |
| 193 | /// printed as: |
| 194 | /// |
| 195 | /// InstructionContext CONTEXTS_STR[256] = { |
| 196 | /// IC, /* 0x00 */ |
| 197 | /// ... |
| 198 | /// }; |
| 199 | /// |
| 200 | /// IC is the context corresponding to the mask 0x00, and there are 256 |
| 201 | /// possible masks. |
| 202 | /// |
| 203 | /// @param o - The output stream to which the context table should be |
| 204 | /// written. |
| 205 | /// @param i - The indent level for use with the stream. |
| 206 | void emitContextTable(raw_ostream &o, uint32_t &i) const; |
| 207 | |
| 208 | /// emitContextDecisions - Prints all four ContextDecision structures using |
| 209 | /// emitContextDecision(). |
| 210 | /// |
| 211 | /// @param o1 - The output stream to print the ID tables generated by |
| 212 | /// emitModRMDecision() to. |
| 213 | /// @param o2 - The output stream to print the decision structures to. |
| 214 | /// @param i1 - The indent level to use with stream o1. |
| 215 | /// @param i2 - The indent level to use with stream o2. |
| 216 | /// @param ModRMTableNum - next table number for adding to ModRMTable. |
| 217 | void emitContextDecisions(raw_ostream &o1, raw_ostream &o2, unsigned &i1, |
| 218 | unsigned &i2, unsigned &ModRMTableNum) const; |
| 219 | |
| 220 | /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a |
| 221 | /// ModRMDecision to refer to a particular instruction ID. |
| 222 | /// |
| 223 | /// @param decision - The ModRMDecision to populate. |
| 224 | /// @param filter - The filter to use in deciding which entries to populate. |
| 225 | /// @param uid - The unique ID to set matching entries to. |
| 226 | /// @param opcode - The opcode of the instruction, for error reporting. |
| 227 | void setTableFields(ModRMDecision &decision, const ModRMFilter &filter, |
| 228 | InstrUID uid, uint8_t opcode); |
| 229 | |
| 230 | public: |
| 231 | /// Constructor - Allocates space for the class decisions and clears them. |
| 232 | DisassemblerTables(); |
| 233 | |
| 234 | ~DisassemblerTables(); |
| 235 | |
| 236 | /// emit - Emits the instruction table, context table, and class decisions. |
| 237 | /// |
| 238 | /// @param o - The output stream to print the tables to. |
| 239 | void emit(raw_ostream &o) const; |
| 240 | |
| 241 | /// setTableFields - Uses the opcode type, instruction context, opcode, and a |
| 242 | /// ModRMFilter as criteria to set a particular set of entries in the |
| 243 | /// decode tables to point to a specific uid. |
| 244 | /// |
| 245 | /// @param type - The opcode type (ONEBYTE, TWOBYTE, etc.) |
| 246 | /// @param insnContext - The context to use (IC, IC_64BIT, etc.) |
| 247 | /// @param opcode - The last byte of the opcode (not counting any escape |
| 248 | /// or extended opcodes). |
| 249 | /// @param filter - The ModRMFilter that decides which ModR/M byte |
| 250 | /// values |
| 251 | /// correspond to the desired instruction. |
| 252 | /// @param uid - The unique ID of the instruction. |
| 253 | /// @param is32bit - Instructon is only 32-bit |
| 254 | /// @param noPrefix - Instruction record has no prefix. |
| 255 | /// @param ignoresVEX_L - Instruction ignores VEX.L |
| 256 | /// @param ignoresVEX_W - Instruction ignores VEX.W |
| 257 | /// @param AddrSize - Instructions address size 16/32/64. 0 is unspecified |
| 258 | void setTableFields(OpcodeType type, InstructionContext insnContext, |
| 259 | uint8_t opcode, const ModRMFilter &filter, InstrUID uid, |
| 260 | bool is32bit, bool noPrefix, bool ignoresVEX_L, |
| 261 | bool ignoresVEX_W, unsigned AddrSize); |
| 262 | |
| 263 | /// specForUID - Returns the instruction specifier for a given unique |
| 264 | /// instruction ID. Used when resolving collisions. |
| 265 | /// |
| 266 | /// @param uid - The unique ID of the instruction. |
| 267 | /// @return - A reference to the instruction specifier. |
| 268 | InstructionSpecifier &specForUID(InstrUID uid) { |
| 269 | if (uid >= InstructionSpecifiers.size()) |
| 270 | InstructionSpecifiers.resize(new_size: uid + 1); |
| 271 | |
| 272 | return InstructionSpecifiers[uid]; |
| 273 | } |
| 274 | |
| 275 | // hasConflicts - Reports whether there were primary decode conflicts |
| 276 | // from any instructions added to the tables. |
| 277 | // @return - true if there were; false otherwise. |
| 278 | |
| 279 | bool hasConflicts() { return HasConflicts; } |
| 280 | }; |
| 281 | |
| 282 | } // namespace X86Disassembler |
| 283 | |
| 284 | } // namespace llvm |
| 285 | |
| 286 | #endif |
| 287 | |