1 | //===- X86RecognizableInstr.h - Disassembler instruction spec ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is part of the X86 Disassembler Emitter. |
10 | // It contains the interface of a single recognizable instruction. |
11 | // Documentation for the disassembler emitter in general can be found in |
12 | // X86DisassemblerEmitter.h. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H |
17 | #define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H |
18 | |
19 | #include "Common/CodeGenInstruction.h" |
20 | #include "llvm/Support/X86DisassemblerDecoderCommon.h" |
21 | #include <cstdint> |
22 | #include <string> |
23 | #include <vector> |
24 | |
25 | struct InstructionSpecifier; |
26 | |
27 | namespace llvm { |
28 | class Record; |
29 | #define X86_INSTR_MRM_MAPPING \ |
30 | MAP(C0, 64) \ |
31 | MAP(C1, 65) \ |
32 | MAP(C2, 66) \ |
33 | MAP(C3, 67) \ |
34 | MAP(C4, 68) \ |
35 | MAP(C5, 69) \ |
36 | MAP(C6, 70) \ |
37 | MAP(C7, 71) \ |
38 | MAP(C8, 72) \ |
39 | MAP(C9, 73) \ |
40 | MAP(CA, 74) \ |
41 | MAP(CB, 75) \ |
42 | MAP(CC, 76) \ |
43 | MAP(CD, 77) \ |
44 | MAP(CE, 78) \ |
45 | MAP(CF, 79) \ |
46 | MAP(D0, 80) \ |
47 | MAP(D1, 81) \ |
48 | MAP(D2, 82) \ |
49 | MAP(D3, 83) \ |
50 | MAP(D4, 84) \ |
51 | MAP(D5, 85) \ |
52 | MAP(D6, 86) \ |
53 | MAP(D7, 87) \ |
54 | MAP(D8, 88) \ |
55 | MAP(D9, 89) \ |
56 | MAP(DA, 90) \ |
57 | MAP(DB, 91) \ |
58 | MAP(DC, 92) \ |
59 | MAP(DD, 93) \ |
60 | MAP(DE, 94) \ |
61 | MAP(DF, 95) \ |
62 | MAP(E0, 96) \ |
63 | MAP(E1, 97) \ |
64 | MAP(E2, 98) \ |
65 | MAP(E3, 99) \ |
66 | MAP(E4, 100) \ |
67 | MAP(E5, 101) \ |
68 | MAP(E6, 102) \ |
69 | MAP(E7, 103) \ |
70 | MAP(E8, 104) \ |
71 | MAP(E9, 105) \ |
72 | MAP(EA, 106) \ |
73 | MAP(EB, 107) \ |
74 | MAP(EC, 108) \ |
75 | MAP(ED, 109) \ |
76 | MAP(EE, 110) \ |
77 | MAP(EF, 111) \ |
78 | MAP(F0, 112) \ |
79 | MAP(F1, 113) \ |
80 | MAP(F2, 114) \ |
81 | MAP(F3, 115) \ |
82 | MAP(F4, 116) \ |
83 | MAP(F5, 117) \ |
84 | MAP(F6, 118) \ |
85 | MAP(F7, 119) \ |
86 | MAP(F8, 120) \ |
87 | MAP(F9, 121) \ |
88 | MAP(FA, 122) \ |
89 | MAP(FB, 123) \ |
90 | MAP(FC, 124) \ |
91 | MAP(FD, 125) \ |
92 | MAP(FE, 126) \ |
93 | MAP(FF, 127) |
94 | |
95 | // A clone of X86 since we can't depend on something that is generated. |
96 | namespace X86Local { |
97 | enum { |
98 | Pseudo = 0, |
99 | RawFrm = 1, |
100 | AddRegFrm = 2, |
101 | RawFrmMemOffs = 3, |
102 | RawFrmSrc = 4, |
103 | RawFrmDst = 5, |
104 | RawFrmDstSrc = 6, |
105 | RawFrmImm8 = 7, |
106 | RawFrmImm16 = 8, |
107 | AddCCFrm = 9, |
108 | PrefixByte = 10, |
109 | MRMDestRegCC = 18, |
110 | MRMDestMemCC = 19, |
111 | MRMDestMem4VOp3CC = 20, |
112 | MRMr0 = 21, |
113 | MRMSrcMemFSIB = 22, |
114 | MRMDestMemFSIB = 23, |
115 | MRMDestMem = 24, |
116 | MRMSrcMem = 25, |
117 | MRMSrcMem4VOp3 = 26, |
118 | MRMSrcMemOp4 = 27, |
119 | MRMSrcMemCC = 28, |
120 | MRMXmCC = 30, |
121 | MRMXm = 31, |
122 | MRM0m = 32, |
123 | MRM1m = 33, |
124 | MRM2m = 34, |
125 | MRM3m = 35, |
126 | MRM4m = 36, |
127 | MRM5m = 37, |
128 | MRM6m = 38, |
129 | MRM7m = 39, |
130 | MRMDestReg = 40, |
131 | MRMSrcReg = 41, |
132 | MRMSrcReg4VOp3 = 42, |
133 | MRMSrcRegOp4 = 43, |
134 | MRMSrcRegCC = 44, |
135 | MRMXrCC = 46, |
136 | MRMXr = 47, |
137 | MRM0r = 48, |
138 | MRM1r = 49, |
139 | MRM2r = 50, |
140 | MRM3r = 51, |
141 | MRM4r = 52, |
142 | MRM5r = 53, |
143 | MRM6r = 54, |
144 | MRM7r = 55, |
145 | MRM0X = 56, |
146 | MRM1X = 57, |
147 | MRM2X = 58, |
148 | MRM3X = 59, |
149 | MRM4X = 60, |
150 | MRM5X = 61, |
151 | MRM6X = 62, |
152 | MRM7X = 63, |
153 | #define MAP(from, to) MRM_##from = to, |
154 | X86_INSTR_MRM_MAPPING |
155 | #undef MAP |
156 | }; |
157 | |
158 | enum { |
159 | OB = 0, |
160 | TB = 1, |
161 | T8 = 2, |
162 | TA = 3, |
163 | XOP8 = 4, |
164 | XOP9 = 5, |
165 | XOPA = 6, |
166 | ThreeDNow = 7, |
167 | T_MAP4 = 8, |
168 | T_MAP5 = 9, |
169 | T_MAP6 = 10, |
170 | T_MAP7 = 11 |
171 | }; |
172 | |
173 | enum { PD = 1, XS = 2, XD = 3, PS = 4 }; |
174 | enum { VEX = 1, XOP = 2, EVEX = 3 }; |
175 | enum { OpSize16 = 1, OpSize32 = 2 }; |
176 | enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 }; |
177 | enum { ExplicitREX2 = 1, ExplicitEVEX = 3 }; |
178 | } // namespace X86Local |
179 | |
180 | namespace X86Disassembler { |
181 | class DisassemblerTables; |
182 | /// Extract common fields of a single X86 instruction from a CodeGenInstruction |
183 | struct RecognizableInstrBase { |
184 | /// The OpPrefix field from the record |
185 | uint8_t OpPrefix; |
186 | /// The OpMap field from the record |
187 | uint8_t OpMap; |
188 | /// The opcode field from the record; this is the opcode used in the Intel |
189 | /// encoding and therefore distinct from the UID |
190 | uint8_t Opcode; |
191 | /// The form field from the record |
192 | uint8_t Form; |
193 | // The encoding field from the record |
194 | uint8_t Encoding; |
195 | /// The OpSize field from the record |
196 | uint8_t OpSize; |
197 | /// The AdSize field from the record |
198 | uint8_t AdSize; |
199 | /// The hasREX_W field from the record |
200 | bool HasREX_W; |
201 | /// The hasVEX_4V field from the record |
202 | bool HasVEX_4V; |
203 | /// The IgnoresW field from the record |
204 | bool IgnoresW; |
205 | /// The hasVEX_L field from the record |
206 | bool HasVEX_L; |
207 | /// The ignoreVEX_L field from the record |
208 | bool IgnoresVEX_L; |
209 | /// The hasEVEX_L2Prefix field from the record |
210 | bool HasEVEX_L2; |
211 | /// The hasEVEX_K field from the record |
212 | bool HasEVEX_K; |
213 | /// The hasEVEX_KZ field from the record |
214 | bool HasEVEX_KZ; |
215 | /// The hasEVEX_B field from the record |
216 | bool HasEVEX_B; |
217 | /// The hasEVEX_NF field from the record |
218 | bool HasEVEX_NF; |
219 | /// The hasTwoConditionalOps field from the record |
220 | bool HasTwoConditionalOps; |
221 | /// Indicates that the instruction uses the L and L' fields for RC. |
222 | bool EncodeRC; |
223 | /// The isCodeGenOnly field from the record |
224 | bool IsCodeGenOnly; |
225 | /// The isAsmParserOnly field from the record |
226 | bool IsAsmParserOnly; |
227 | /// The ForceDisassemble field from the record |
228 | bool ForceDisassemble; |
229 | // The CD8_Scale field from the record |
230 | uint8_t CD8_Scale; |
231 | /// If explicitOpPrefix field from the record equals ExplicitREX2 |
232 | bool ExplicitREX2Prefix; |
233 | /// \param insn The CodeGenInstruction to extract information from. |
234 | RecognizableInstrBase(const CodeGenInstruction &insn); |
235 | /// \returns true if this instruction should be emitted |
236 | bool shouldBeEmitted() const; |
237 | }; |
238 | |
239 | /// RecognizableInstr - Encapsulates all information required to decode a single |
240 | /// instruction, as extracted from the LLVM instruction tables. Has methods |
241 | /// to interpret the information available in the LLVM tables, and to emit the |
242 | /// instruction into DisassemblerTables. |
243 | class RecognizableInstr : public RecognizableInstrBase { |
244 | private: |
245 | /// The record from the .td files corresponding to this instruction |
246 | const Record *Rec; |
247 | /// The instruction name as listed in the tables |
248 | std::string Name; |
249 | // Whether the instruction has the predicate "In32BitMode" |
250 | bool Is32Bit; |
251 | // Whether the instruction has the predicate "In64BitMode" |
252 | bool Is64Bit; |
253 | /// The operands of the instruction, as listed in the CodeGenInstruction. |
254 | /// They are not one-to-one with operands listed in the MCInst; for example, |
255 | /// memory operands expand to 5 operands in the MCInst |
256 | const std::vector<CGIOperandList::OperandInfo> *Operands; |
257 | |
258 | /// The opcode of the instruction, as used in an MCInst |
259 | InstrUID UID; |
260 | /// The description of the instruction that is emitted into the instruction |
261 | /// info table |
262 | InstructionSpecifier *Spec; |
263 | |
264 | /// insnContext - Returns the primary context in which the instruction is |
265 | /// valid. |
266 | /// |
267 | /// @return - The context in which the instruction is valid. |
268 | InstructionContext insnContext() const; |
269 | |
270 | /// typeFromString - Translates an operand type from the string provided in |
271 | /// the LLVM tables to an OperandType for use in the operand specifier. |
272 | /// |
273 | /// @param s - The string, as extracted by calling Rec->getName() |
274 | /// on a CodeGenInstruction::OperandInfo. |
275 | /// @param hasREX_W - Indicates whether the instruction has a REX.W |
276 | /// prefix. If it does, 32-bit register operands stay |
277 | /// 32-bit regardless of the operand size. |
278 | /// @param OpSize Indicates the operand size of the instruction. |
279 | /// If register size does not match OpSize, then |
280 | /// register sizes keep their size. |
281 | /// @return - The operand's type. |
282 | static OperandType typeFromString(const std::string &s, bool hasREX_W, |
283 | uint8_t OpSize); |
284 | |
285 | /// immediateEncodingFromString - Translates an immediate encoding from the |
286 | /// string provided in the LLVM tables to an OperandEncoding for use in |
287 | /// the operand specifier. |
288 | /// |
289 | /// @param s - See typeFromString(). |
290 | /// @param OpSize - Indicates whether this is an OpSize16 instruction. |
291 | /// If it is not, then 16-bit immediate operands stay 16-bit. |
292 | /// @return - The operand's encoding. |
293 | static OperandEncoding immediateEncodingFromString(const std::string &s, |
294 | uint8_t OpSize); |
295 | |
296 | /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but |
297 | /// handles operands that are in the REG field of the ModR/M byte. |
298 | static OperandEncoding rmRegisterEncodingFromString(const std::string &s, |
299 | uint8_t OpSize); |
300 | |
301 | /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but |
302 | /// handles operands that are in the REG field of the ModR/M byte. |
303 | static OperandEncoding roRegisterEncodingFromString(const std::string &s, |
304 | uint8_t OpSize); |
305 | static OperandEncoding memoryEncodingFromString(const std::string &s, |
306 | uint8_t OpSize); |
307 | static OperandEncoding relocationEncodingFromString(const std::string &s, |
308 | uint8_t OpSize); |
309 | static OperandEncoding opcodeModifierEncodingFromString(const std::string &s, |
310 | uint8_t OpSize); |
311 | static OperandEncoding vvvvRegisterEncodingFromString(const std::string &s, |
312 | uint8_t OpSize); |
313 | static OperandEncoding |
314 | writemaskRegisterEncodingFromString(const std::string &s, uint8_t OpSize); |
315 | |
316 | /// Adjust the encoding type for an operand based on the instruction. |
317 | void adjustOperandEncoding(OperandEncoding &encoding); |
318 | |
319 | /// handleOperand - Converts a single operand from the LLVM table format to |
320 | /// the emitted table format, handling any duplicate operands it encounters |
321 | /// and then one non-duplicate. |
322 | /// |
323 | /// @param optional - Determines whether to assert that the |
324 | /// operand exists. |
325 | /// @param operandIndex - The index into the generated operand table. |
326 | /// Incremented by this function one or more |
327 | /// times to reflect possible duplicate |
328 | /// operands). |
329 | /// @param physicalOperandIndex - The index of the current operand into the |
330 | /// set of non-duplicate ('physical') operands. |
331 | /// Incremented by this function once. |
332 | /// @param numPhysicalOperands - The number of non-duplicate operands in the |
333 | /// instructions. |
334 | /// @param operandMapping - The operand mapping, which has an entry for |
335 | /// each operand that indicates whether it is a |
336 | /// duplicate, and of what. |
337 | void handleOperand(bool optional, unsigned &operandIndex, |
338 | unsigned &physicalOperandIndex, |
339 | unsigned numPhysicalOperands, |
340 | const unsigned *operandMapping, |
341 | OperandEncoding (*encodingFromString)(const std::string &, |
342 | uint8_t OpSize)); |
343 | |
344 | /// emitInstructionSpecifier - Loads the instruction specifier for the current |
345 | /// instruction into a DisassemblerTables. |
346 | /// |
347 | void emitInstructionSpecifier(); |
348 | |
349 | /// emitDecodePath - Populates the proper fields in the decode tables |
350 | /// corresponding to the decode paths for this instruction. |
351 | /// |
352 | /// \param tables The DisassemblerTables to populate with the decode |
353 | /// decode information for the current instruction. |
354 | void emitDecodePath(DisassemblerTables &tables) const; |
355 | |
356 | public: |
357 | /// Constructor - Initializes a RecognizableInstr with the appropriate fields |
358 | /// from a CodeGenInstruction. |
359 | /// |
360 | /// \param tables The DisassemblerTables that the specifier will be added to. |
361 | /// \param insn The CodeGenInstruction to extract information from. |
362 | /// \param uid The unique ID of the current instruction. |
363 | RecognizableInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, |
364 | InstrUID uid); |
365 | /// processInstr - Accepts a CodeGenInstruction and loads decode information |
366 | /// for it into a DisassemblerTables if appropriate. |
367 | /// |
368 | /// \param tables The DiassemblerTables to be populated with decode |
369 | /// information. |
370 | /// \param insn The CodeGenInstruction to be used as a source for this |
371 | /// information. |
372 | /// \param uid The unique ID of the instruction. |
373 | static void processInstr(DisassemblerTables &tables, |
374 | const CodeGenInstruction &insn, InstrUID uid); |
375 | }; |
376 | |
377 | std::string getMnemonic(const CodeGenInstruction *I, unsigned Variant); |
378 | bool isRegisterOperand(const Record *Rec); |
379 | bool isMemoryOperand(const Record *Rec); |
380 | bool isImmediateOperand(const Record *Rec); |
381 | unsigned getRegOperandSize(const Record *RegRec); |
382 | unsigned getMemOperandSize(const Record *MemRec); |
383 | } // namespace X86Disassembler |
384 | } // namespace llvm |
385 | #endif |
386 | |