| 1 | //===- X86RecognizableInstr.h - Disassembler instruction spec ---*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is part of the X86 Disassembler Emitter. |
| 10 | // It contains the interface of a single recognizable instruction. |
| 11 | // Documentation for the disassembler emitter in general can be found in |
| 12 | // X86DisassemblerEmitter.h. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H |
| 17 | #define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H |
| 18 | |
| 19 | #include "Common/CodeGenInstruction.h" |
| 20 | #include "llvm/Support/X86DisassemblerDecoderCommon.h" |
| 21 | #include <cstdint> |
| 22 | #include <string> |
| 23 | #include <vector> |
| 24 | |
| 25 | struct InstructionSpecifier; |
| 26 | |
| 27 | namespace llvm { |
| 28 | class Record; |
| 29 | #define X86_INSTR_MRM_MAPPING \ |
| 30 | MAP(C0, 64) \ |
| 31 | MAP(C1, 65) \ |
| 32 | MAP(C2, 66) \ |
| 33 | MAP(C3, 67) \ |
| 34 | MAP(C4, 68) \ |
| 35 | MAP(C5, 69) \ |
| 36 | MAP(C6, 70) \ |
| 37 | MAP(C7, 71) \ |
| 38 | MAP(C8, 72) \ |
| 39 | MAP(C9, 73) \ |
| 40 | MAP(CA, 74) \ |
| 41 | MAP(CB, 75) \ |
| 42 | MAP(CC, 76) \ |
| 43 | MAP(CD, 77) \ |
| 44 | MAP(CE, 78) \ |
| 45 | MAP(CF, 79) \ |
| 46 | MAP(D0, 80) \ |
| 47 | MAP(D1, 81) \ |
| 48 | MAP(D2, 82) \ |
| 49 | MAP(D3, 83) \ |
| 50 | MAP(D4, 84) \ |
| 51 | MAP(D5, 85) \ |
| 52 | MAP(D6, 86) \ |
| 53 | MAP(D7, 87) \ |
| 54 | MAP(D8, 88) \ |
| 55 | MAP(D9, 89) \ |
| 56 | MAP(DA, 90) \ |
| 57 | MAP(DB, 91) \ |
| 58 | MAP(DC, 92) \ |
| 59 | MAP(DD, 93) \ |
| 60 | MAP(DE, 94) \ |
| 61 | MAP(DF, 95) \ |
| 62 | MAP(E0, 96) \ |
| 63 | MAP(E1, 97) \ |
| 64 | MAP(E2, 98) \ |
| 65 | MAP(E3, 99) \ |
| 66 | MAP(E4, 100) \ |
| 67 | MAP(E5, 101) \ |
| 68 | MAP(E6, 102) \ |
| 69 | MAP(E7, 103) \ |
| 70 | MAP(E8, 104) \ |
| 71 | MAP(E9, 105) \ |
| 72 | MAP(EA, 106) \ |
| 73 | MAP(EB, 107) \ |
| 74 | MAP(EC, 108) \ |
| 75 | MAP(ED, 109) \ |
| 76 | MAP(EE, 110) \ |
| 77 | MAP(EF, 111) \ |
| 78 | MAP(F0, 112) \ |
| 79 | MAP(F1, 113) \ |
| 80 | MAP(F2, 114) \ |
| 81 | MAP(F3, 115) \ |
| 82 | MAP(F4, 116) \ |
| 83 | MAP(F5, 117) \ |
| 84 | MAP(F6, 118) \ |
| 85 | MAP(F7, 119) \ |
| 86 | MAP(F8, 120) \ |
| 87 | MAP(F9, 121) \ |
| 88 | MAP(FA, 122) \ |
| 89 | MAP(FB, 123) \ |
| 90 | MAP(FC, 124) \ |
| 91 | MAP(FD, 125) \ |
| 92 | MAP(FE, 126) \ |
| 93 | MAP(FF, 127) |
| 94 | |
| 95 | // A clone of X86 since we can't depend on something that is generated. |
| 96 | namespace X86Local { |
| 97 | enum { |
| 98 | Pseudo = 0, |
| 99 | RawFrm = 1, |
| 100 | AddRegFrm = 2, |
| 101 | RawFrmMemOffs = 3, |
| 102 | RawFrmSrc = 4, |
| 103 | RawFrmDst = 5, |
| 104 | RawFrmDstSrc = 6, |
| 105 | RawFrmImm8 = 7, |
| 106 | RawFrmImm16 = 8, |
| 107 | AddCCFrm = 9, |
| 108 | PrefixByte = 10, |
| 109 | MRMDestRegCC = 18, |
| 110 | MRMDestMemCC = 19, |
| 111 | MRMDestMem4VOp3CC = 20, |
| 112 | MRMr0 = 21, |
| 113 | MRMSrcMemFSIB = 22, |
| 114 | MRMDestMemFSIB = 23, |
| 115 | MRMDestMem = 24, |
| 116 | MRMSrcMem = 25, |
| 117 | MRMSrcMem4VOp3 = 26, |
| 118 | MRMSrcMemOp4 = 27, |
| 119 | MRMSrcMemCC = 28, |
| 120 | MRMXmCC = 30, |
| 121 | MRMXm = 31, |
| 122 | MRM0m = 32, |
| 123 | MRM1m = 33, |
| 124 | MRM2m = 34, |
| 125 | MRM3m = 35, |
| 126 | MRM4m = 36, |
| 127 | MRM5m = 37, |
| 128 | MRM6m = 38, |
| 129 | MRM7m = 39, |
| 130 | MRMDestReg = 40, |
| 131 | MRMSrcReg = 41, |
| 132 | MRMSrcReg4VOp3 = 42, |
| 133 | MRMSrcRegOp4 = 43, |
| 134 | MRMSrcRegCC = 44, |
| 135 | MRMXrCC = 46, |
| 136 | MRMXr = 47, |
| 137 | MRM0r = 48, |
| 138 | MRM1r = 49, |
| 139 | MRM2r = 50, |
| 140 | MRM3r = 51, |
| 141 | MRM4r = 52, |
| 142 | MRM5r = 53, |
| 143 | MRM6r = 54, |
| 144 | MRM7r = 55, |
| 145 | MRM0X = 56, |
| 146 | MRM1X = 57, |
| 147 | MRM2X = 58, |
| 148 | MRM3X = 59, |
| 149 | MRM4X = 60, |
| 150 | MRM5X = 61, |
| 151 | MRM6X = 62, |
| 152 | MRM7X = 63, |
| 153 | #define MAP(from, to) MRM_##from = to, |
| 154 | X86_INSTR_MRM_MAPPING |
| 155 | #undef MAP |
| 156 | }; |
| 157 | |
| 158 | enum { |
| 159 | OB = 0, |
| 160 | TB = 1, |
| 161 | T8 = 2, |
| 162 | TA = 3, |
| 163 | XOP8 = 4, |
| 164 | XOP9 = 5, |
| 165 | XOPA = 6, |
| 166 | ThreeDNow = 7, |
| 167 | T_MAP4 = 8, |
| 168 | T_MAP5 = 9, |
| 169 | T_MAP6 = 10, |
| 170 | T_MAP7 = 11 |
| 171 | }; |
| 172 | |
| 173 | enum { PD = 1, XS = 2, XD = 3, PS = 4 }; |
| 174 | enum { VEX = 1, XOP = 2, EVEX = 3 }; |
| 175 | enum { OpSize16 = 1, OpSize32 = 2 }; |
| 176 | enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 }; |
| 177 | enum { ExplicitREX2 = 1, ExplicitEVEX = 3 }; |
| 178 | } // namespace X86Local |
| 179 | |
| 180 | namespace X86Disassembler { |
| 181 | class DisassemblerTables; |
| 182 | /// Extract common fields of a single X86 instruction from a CodeGenInstruction |
| 183 | struct RecognizableInstrBase { |
| 184 | /// The OpPrefix field from the record |
| 185 | uint8_t OpPrefix; |
| 186 | /// The OpMap field from the record |
| 187 | uint8_t OpMap; |
| 188 | /// The opcode field from the record; this is the opcode used in the Intel |
| 189 | /// encoding and therefore distinct from the UID |
| 190 | uint8_t Opcode; |
| 191 | /// The form field from the record |
| 192 | uint8_t Form; |
| 193 | // The encoding field from the record |
| 194 | uint8_t Encoding; |
| 195 | /// The OpSize field from the record |
| 196 | uint8_t OpSize; |
| 197 | /// The AdSize field from the record |
| 198 | uint8_t AdSize; |
| 199 | /// The hasREX_W field from the record |
| 200 | bool HasREX_W; |
| 201 | /// The hasVEX_4V field from the record |
| 202 | bool HasVEX_4V; |
| 203 | /// The IgnoresW field from the record |
| 204 | bool IgnoresW; |
| 205 | /// The hasVEX_L field from the record |
| 206 | bool HasVEX_L; |
| 207 | /// The ignoreVEX_L field from the record |
| 208 | bool IgnoresVEX_L; |
| 209 | /// The hasEVEX_L2Prefix field from the record |
| 210 | bool HasEVEX_L2; |
| 211 | /// The hasEVEX_K field from the record |
| 212 | bool HasEVEX_K; |
| 213 | /// The hasEVEX_KZ field from the record |
| 214 | bool HasEVEX_KZ; |
| 215 | /// The hasEVEX_B field from the record |
| 216 | bool HasEVEX_B; |
| 217 | /// The hasEVEX_U field from the record |
| 218 | bool HasEVEX_U; |
| 219 | /// The hasEVEX_NF field from the record |
| 220 | bool HasEVEX_NF; |
| 221 | /// The hasTwoConditionalOps field from the record |
| 222 | bool HasTwoConditionalOps; |
| 223 | /// Indicates that the instruction uses the L and L' fields for RC. |
| 224 | bool EncodeRC; |
| 225 | /// The isCodeGenOnly field from the record |
| 226 | bool IsCodeGenOnly; |
| 227 | /// The isAsmParserOnly field from the record |
| 228 | bool IsAsmParserOnly; |
| 229 | /// The ForceDisassemble field from the record |
| 230 | bool ForceDisassemble; |
| 231 | // The CD8_Scale field from the record |
| 232 | uint8_t CD8_Scale; |
| 233 | /// If explicitOpPrefix field from the record equals ExplicitREX2 |
| 234 | bool ExplicitREX2Prefix; |
| 235 | /// \param insn The CodeGenInstruction to extract information from. |
| 236 | RecognizableInstrBase(const CodeGenInstruction &insn); |
| 237 | /// \returns true if this instruction should be emitted |
| 238 | bool shouldBeEmitted() const; |
| 239 | }; |
| 240 | |
| 241 | /// RecognizableInstr - Encapsulates all information required to decode a single |
| 242 | /// instruction, as extracted from the LLVM instruction tables. Has methods |
| 243 | /// to interpret the information available in the LLVM tables, and to emit the |
| 244 | /// instruction into DisassemblerTables. |
| 245 | class RecognizableInstr : public RecognizableInstrBase { |
| 246 | private: |
| 247 | /// The record from the .td files corresponding to this instruction |
| 248 | const Record *Rec; |
| 249 | /// The instruction name as listed in the tables |
| 250 | std::string Name; |
| 251 | // Whether the instruction has the predicate "In32BitMode" |
| 252 | bool Is32Bit; |
| 253 | // Whether the instruction has the predicate "In64BitMode" |
| 254 | bool Is64Bit; |
| 255 | /// The operands of the instruction, as listed in the CodeGenInstruction. |
| 256 | /// They are not one-to-one with operands listed in the MCInst; for example, |
| 257 | /// memory operands expand to 5 operands in the MCInst |
| 258 | const std::vector<CGIOperandList::OperandInfo> *Operands; |
| 259 | |
| 260 | /// The opcode of the instruction, as used in an MCInst |
| 261 | InstrUID UID; |
| 262 | /// The description of the instruction that is emitted into the instruction |
| 263 | /// info table |
| 264 | InstructionSpecifier *Spec; |
| 265 | |
| 266 | /// insnContext - Returns the primary context in which the instruction is |
| 267 | /// valid. |
| 268 | /// |
| 269 | /// @return - The context in which the instruction is valid. |
| 270 | InstructionContext insnContext() const; |
| 271 | |
| 272 | /// typeFromString - Translates an operand type from the string provided in |
| 273 | /// the LLVM tables to an OperandType for use in the operand specifier. |
| 274 | /// |
| 275 | /// @param s - The string, as extracted by calling Rec->getName() |
| 276 | /// on a CodeGenInstruction::OperandInfo. |
| 277 | /// @param hasREX_W - Indicates whether the instruction has a REX.W |
| 278 | /// prefix. If it does, 32-bit register operands stay |
| 279 | /// 32-bit regardless of the operand size. |
| 280 | /// @param OpSize Indicates the operand size of the instruction. |
| 281 | /// If register size does not match OpSize, then |
| 282 | /// register sizes keep their size. |
| 283 | /// @return - The operand's type. |
| 284 | static OperandType typeFromString(StringRef Str, bool hasREX_W, |
| 285 | uint8_t OpSize); |
| 286 | |
| 287 | /// immediateEncodingFromString - Translates an immediate encoding from the |
| 288 | /// string provided in the LLVM tables to an OperandEncoding for use in |
| 289 | /// the operand specifier. |
| 290 | /// |
| 291 | /// @param s - See typeFromString(). |
| 292 | /// @param OpSize - Indicates whether this is an OpSize16 instruction. |
| 293 | /// If it is not, then 16-bit immediate operands stay 16-bit. |
| 294 | /// @return - The operand's encoding. |
| 295 | static OperandEncoding immediateEncodingFromString(StringRef Str, |
| 296 | uint8_t OpSize); |
| 297 | |
| 298 | /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but |
| 299 | /// handles operands that are in the REG field of the ModR/M byte. |
| 300 | static OperandEncoding rmRegisterEncodingFromString(StringRef Str, |
| 301 | uint8_t OpSize); |
| 302 | |
| 303 | /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but |
| 304 | /// handles operands that are in the REG field of the ModR/M byte. |
| 305 | static OperandEncoding roRegisterEncodingFromString(StringRef Str, |
| 306 | uint8_t OpSize); |
| 307 | static OperandEncoding memoryEncodingFromString(StringRef Str, |
| 308 | uint8_t OpSize); |
| 309 | static OperandEncoding relocationEncodingFromString(StringRef Str, |
| 310 | uint8_t OpSize); |
| 311 | static OperandEncoding opcodeModifierEncodingFromString(StringRef Str, |
| 312 | uint8_t OpSize); |
| 313 | static OperandEncoding vvvvRegisterEncodingFromString(StringRef Str, |
| 314 | uint8_t OpSize); |
| 315 | static OperandEncoding writemaskRegisterEncodingFromString(StringRef Str, |
| 316 | uint8_t OpSize); |
| 317 | |
| 318 | /// Adjust the encoding type for an operand based on the instruction. |
| 319 | void adjustOperandEncoding(OperandEncoding &encoding); |
| 320 | |
| 321 | /// handleOperand - Converts a single operand from the LLVM table format to |
| 322 | /// the emitted table format, handling any duplicate operands it encounters |
| 323 | /// and then one non-duplicate. |
| 324 | /// |
| 325 | /// @param optional - Determines whether to assert that the |
| 326 | /// operand exists. |
| 327 | /// @param operandIndex - The index into the generated operand table. |
| 328 | /// Incremented by this function one or more |
| 329 | /// times to reflect possible duplicate |
| 330 | /// operands). |
| 331 | /// @param physicalOperandIndex - The index of the current operand into the |
| 332 | /// set of non-duplicate ('physical') operands. |
| 333 | /// Incremented by this function once. |
| 334 | /// @param numPhysicalOperands - The number of non-duplicate operands in the |
| 335 | /// instructions. |
| 336 | /// @param operandMapping - The operand mapping, which has an entry for |
| 337 | /// each operand that indicates whether it is a |
| 338 | /// duplicate, and of what. |
| 339 | using EncodingFn = |
| 340 | llvm::function_ref<OperandEncoding(StringRef s, uint8_t OpSize)>; |
| 341 | void handleOperand(bool optional, unsigned &operandIndex, |
| 342 | unsigned &physicalOperandIndex, |
| 343 | unsigned numPhysicalOperands, |
| 344 | const unsigned *operandMapping, |
| 345 | EncodingFn encodingFromString); |
| 346 | |
| 347 | /// emitInstructionSpecifier - Loads the instruction specifier for the current |
| 348 | /// instruction into a DisassemblerTables. |
| 349 | /// |
| 350 | void emitInstructionSpecifier(); |
| 351 | |
| 352 | /// emitDecodePath - Populates the proper fields in the decode tables |
| 353 | /// corresponding to the decode paths for this instruction. |
| 354 | /// |
| 355 | /// \param tables The DisassemblerTables to populate with the decode |
| 356 | /// decode information for the current instruction. |
| 357 | void emitDecodePath(DisassemblerTables &tables) const; |
| 358 | |
| 359 | public: |
| 360 | /// Constructor - Initializes a RecognizableInstr with the appropriate fields |
| 361 | /// from a CodeGenInstruction. |
| 362 | /// |
| 363 | /// \param tables The DisassemblerTables that the specifier will be added to. |
| 364 | /// \param insn The CodeGenInstruction to extract information from. |
| 365 | /// \param uid The unique ID of the current instruction. |
| 366 | RecognizableInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, |
| 367 | InstrUID uid); |
| 368 | /// processInstr - Accepts a CodeGenInstruction and loads decode information |
| 369 | /// for it into a DisassemblerTables if appropriate. |
| 370 | /// |
| 371 | /// \param tables The DiassemblerTables to be populated with decode |
| 372 | /// information. |
| 373 | /// \param insn The CodeGenInstruction to be used as a source for this |
| 374 | /// information. |
| 375 | /// \param uid The unique ID of the instruction. |
| 376 | static void processInstr(DisassemblerTables &tables, |
| 377 | const CodeGenInstruction &insn, InstrUID uid); |
| 378 | }; |
| 379 | |
| 380 | std::string getMnemonic(const CodeGenInstruction *I, unsigned Variant); |
| 381 | bool isRegisterOperand(const Record *Rec); |
| 382 | bool isMemoryOperand(const Record *Rec); |
| 383 | bool isImmediateOperand(const Record *Rec); |
| 384 | unsigned getRegOperandSize(const Record *RegRec); |
| 385 | unsigned getMemOperandSize(const Record *MemRec); |
| 386 | } // namespace X86Disassembler |
| 387 | } // namespace llvm |
| 388 | #endif |
| 389 | |