1//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is part of the X86 Disassembler.
10// It contains code to translate the data produced by the decoder into
11// MCInsts.
12//
13//
14// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15// 64-bit X86 instruction sets. The main decode sequence for an assembly
16// instruction in this disassembler is:
17//
18// 1. Read the prefix bytes and determine the attributes of the instruction.
19// These attributes, recorded in enum attributeBits
20// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21// provides a mapping from bitmasks to contexts, which are represented by
22// enum InstructionContext (ibid.).
23//
24// 2. Read the opcode, and determine what kind of opcode it is. The
25// disassembler distinguishes four kinds of opcodes, which are enumerated in
26// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29//
30// 3. Depending on the opcode type, look in one of four ClassDecision structures
31// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33// a ModRMDecision (ibid.).
34//
35// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39// ModR/M byte is required and how to interpret it.
40//
41// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44// meanings of its operands.
45//
46// 6. For each operand, its encoding is an entry from OperandEncoding
47// (X86DisassemblerDecoderCommon.h) and its type is an entry from
48// OperandType (ibid.). The encoding indicates how to read it from the
49// instruction; the type indicates how to interpret the value once it has
50// been read. For example, a register operand could be stored in the R/M
51// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53// register, for instance). Given this information, the operands can be
54// extracted and interpreted.
55//
56// 7. As the last step, the disassembler translates the instruction information
57// and operands into a format understandable by the client - in this case, an
58// MCInst for use by the MC infrastructure.
59//
60// The disassembler is broken broadly into two parts: the table emitter that
61// emits the instruction decode tables discussed above during compilation, and
62// the disassembler itself. The table emitter is documented in more detail in
63// utils/TableGen/X86DisassemblerEmitter.h.
64//
65// X86Disassembler.cpp contains the code responsible for step 7, and for
66// invoking the decoder to execute steps 1-6.
67// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68// table emitter and the disassembler.
69// X86DisassemblerDecoder.h contains the public interface of the decoder,
70// factored out into C for possible use by other projects.
71// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72// responsible for steps 1-6.
73//
74//===----------------------------------------------------------------------===//
75
76#include "MCTargetDesc/X86BaseInfo.h"
77#include "MCTargetDesc/X86MCTargetDesc.h"
78#include "TargetInfo/X86TargetInfo.h"
79#include "X86DisassemblerDecoder.h"
80#include "llvm-c/Visibility.h"
81#include "llvm/MC/MCContext.h"
82#include "llvm/MC/MCDisassembler/MCDisassembler.h"
83#include "llvm/MC/MCExpr.h"
84#include "llvm/MC/MCInst.h"
85#include "llvm/MC/MCInstrInfo.h"
86#include "llvm/MC/MCSubtargetInfo.h"
87#include "llvm/MC/TargetRegistry.h"
88#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
90#include "llvm/Support/raw_ostream.h"
91
92using namespace llvm;
93using namespace llvm::X86Disassembler;
94
95#define DEBUG_TYPE "x86-disassembler"
96
97#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
98
99// Specifies whether a ModR/M byte is needed and (if so) which
100// instruction each possible value of the ModR/M byte corresponds to. Once
101// this information is known, we have narrowed down to a single instruction.
102struct ModRMDecision {
103 uint8_t modrm_type;
104 uint16_t instructionIDs;
105};
106
107// Specifies which set of ModR/M->instruction tables to look at
108// given a particular opcode.
109struct OpcodeDecision {
110 ModRMDecision modRMDecisions[256];
111};
112
113// Specifies which opcode->instruction tables to look at given
114// a particular context (set of attributes). Since there are many possible
115// contexts, the decoder first uses CONTEXTS_SYM to determine which context
116// applies given a specific set of attributes. Hence there are only IC_max
117// entries in this table, rather than 2^(ATTR_max).
118struct ContextDecision {
119 OpcodeDecision opcodeDecisions[IC_max];
120};
121
122#include "X86GenDisassemblerTables.inc"
123
124static InstrUID decode(OpcodeType type, InstructionContext insnContext,
125 uint8_t opcode, uint8_t modRM) {
126 const struct ModRMDecision *dec;
127
128 switch (type) {
129 case ONEBYTE:
130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
131 break;
132 case TWOBYTE:
133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
134 break;
135 case THREEBYTE_38:
136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
137 break;
138 case THREEBYTE_3A:
139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
140 break;
141 case XOP8_MAP:
142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
143 break;
144 case XOP9_MAP:
145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
146 break;
147 case XOPA_MAP:
148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
149 break;
150 case THREEDNOW_MAP:
151 dec =
152 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
153 break;
154 case MAP4:
155 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
156 break;
157 case MAP5:
158 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
159 break;
160 case MAP6:
161 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
162 break;
163 case MAP7:
164 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
165 break;
166 }
167
168 switch (dec->modrm_type) {
169 default:
170 llvm_unreachable("Corrupt table! Unknown modrm_type");
171 return 0;
172 case MODRM_ONEENTRY:
173 return modRMTable[dec->instructionIDs];
174 case MODRM_SPLITRM:
175 if (modFromModRM(modRM) == 0x3)
176 return modRMTable[dec->instructionIDs + 1];
177 return modRMTable[dec->instructionIDs];
178 case MODRM_SPLITREG:
179 if (modFromModRM(modRM) == 0x3)
180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
181 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
182 case MODRM_SPLITMISC:
183 if (modFromModRM(modRM) == 0x3)
184 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
185 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
186 case MODRM_FULL:
187 return modRMTable[dec->instructionIDs + modRM];
188 }
189}
190
191static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
192 uint64_t offset = insn->readerCursor - insn->startLocation;
193 if (offset >= insn->bytes.size())
194 return true;
195 byte = insn->bytes[offset];
196 return false;
197}
198
199template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
200 auto r = insn->bytes;
201 uint64_t offset = insn->readerCursor - insn->startLocation;
202 if (offset + sizeof(T) > r.size())
203 return true;
204 ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);
205 insn->readerCursor += sizeof(T);
206 return false;
207}
208
209static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
210 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
211}
212
213static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
214 return insn->mode == MODE_64BIT && prefix == 0xd5;
215}
216
217// Consumes all of an instruction's prefix bytes, and marks the
218// instruction as having them. Also sets the instruction's default operand,
219// address, and other relevant data sizes to report operands correctly.
220//
221// insn must not be empty.
222static int readPrefixes(struct InternalInstruction *insn) {
223 bool isPrefix = true;
224 uint8_t byte = 0;
225 uint8_t nextByte;
226
227 LLVM_DEBUG(dbgs() << "readPrefixes()");
228
229 while (isPrefix) {
230 // If we fail reading prefixes, just stop here and let the opcode reader
231 // deal with it.
232 if (consume(insn, ptr&: byte))
233 break;
234
235 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
236 // break and let it be disassembled as a normal "instruction".
237 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
238 break;
239
240 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, byte&: nextByte)) {
241 // If the byte is 0xf2 or 0xf3, and any of the following conditions are
242 // met:
243 // - it is followed by a LOCK (0xf0) prefix
244 // - it is followed by an xchg instruction
245 // then it should be disassembled as a xacquire/xrelease not repne/rep.
246 if (((nextByte == 0xf0) ||
247 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
248 insn->xAcquireRelease = true;
249 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
250 break;
251 }
252 // Also if the byte is 0xf3, and the following condition is met:
253 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
254 // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
255 // then it should be disassembled as an xrelease not rep.
256 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
257 nextByte == 0xc6 || nextByte == 0xc7)) {
258 insn->xAcquireRelease = true;
259 break;
260 }
261 if (isREX(insn, prefix: nextByte)) {
262 uint8_t nnextByte;
263 // Go to REX prefix after the current one
264 if (consume(insn, ptr&: nnextByte))
265 return -1;
266 // We should be able to read next byte after REX prefix
267 if (peek(insn, byte&: nnextByte))
268 return -1;
269 --insn->readerCursor;
270 }
271 }
272
273 switch (byte) {
274 case 0xf0: // LOCK
275 insn->hasLockPrefix = true;
276 break;
277 case 0xf2: // REPNE/REPNZ
278 case 0xf3: { // REP or REPE/REPZ
279 uint8_t nextByte;
280 if (peek(insn, byte&: nextByte))
281 break;
282 // TODO:
283 // 1. There could be several 0x66
284 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
285 // it's not mandatory prefix
286 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
287 // 0x0f exactly after it to be mandatory prefix
288 // 4. if (nextByte == 0xd5) it's REX2 and we need
289 // 0x0f exactly after it to be mandatory prefix
290 if (isREX(insn, prefix: nextByte) || isREX2(insn, prefix: nextByte) || nextByte == 0x0f ||
291 nextByte == 0x66)
292 // The last of 0xf2 /0xf3 is mandatory prefix
293 insn->mandatoryPrefix = byte;
294 insn->repeatPrefix = byte;
295 break;
296 }
297 case 0x2e: // CS segment override -OR- Branch not taken
298 insn->segmentOverride = SEG_OVERRIDE_CS;
299 break;
300 case 0x36: // SS segment override -OR- Branch taken
301 insn->segmentOverride = SEG_OVERRIDE_SS;
302 break;
303 case 0x3e: // DS segment override
304 insn->segmentOverride = SEG_OVERRIDE_DS;
305 break;
306 case 0x26: // ES segment override
307 insn->segmentOverride = SEG_OVERRIDE_ES;
308 break;
309 case 0x64: // FS segment override
310 insn->segmentOverride = SEG_OVERRIDE_FS;
311 break;
312 case 0x65: // GS segment override
313 insn->segmentOverride = SEG_OVERRIDE_GS;
314 break;
315 case 0x66: { // Operand-size override {
316 uint8_t nextByte;
317 insn->hasOpSize = true;
318 if (peek(insn, byte&: nextByte))
319 break;
320 // 0x66 can't overwrite existing mandatory prefix and should be ignored
321 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, prefix: nextByte)))
322 insn->mandatoryPrefix = byte;
323 break;
324 }
325 case 0x67: // Address-size override
326 insn->hasAdSize = true;
327 break;
328 default: // Not a prefix byte
329 isPrefix = false;
330 break;
331 }
332
333 if (isREX(insn, prefix: byte)) {
334 insn->rexPrefix = byte;
335 isPrefix = true;
336 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
337 } else if (isPrefix) {
338 insn->rexPrefix = 0;
339 }
340
341 if (isPrefix)
342 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
343 }
344
345 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
346
347 if (byte == 0x62) {
348 uint8_t byte1, byte2;
349 if (consume(insn, ptr&: byte1)) {
350 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
351 return -1;
352 }
353
354 if (peek(insn, byte&: byte2)) {
355 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
356 return -1;
357 }
358
359 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
360 insn->vectorExtensionType = TYPE_EVEX;
361 } else {
362 --insn->readerCursor; // unconsume byte1
363 --insn->readerCursor; // unconsume byte
364 }
365
366 if (insn->vectorExtensionType == TYPE_EVEX) {
367 insn->vectorExtensionPrefix[0] = byte;
368 insn->vectorExtensionPrefix[1] = byte1;
369 if (consume(insn, ptr&: insn->vectorExtensionPrefix[2])) {
370 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
371 return -1;
372 }
373 if (consume(insn, ptr&: insn->vectorExtensionPrefix[3])) {
374 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
375 return -1;
376 }
377
378 if (insn->mode == MODE_64BIT) {
379 // We simulate the REX prefix for simplicity's sake
380 insn->rexPrefix = 0x40 |
381 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
382 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
383 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
384 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
385
386 // We simulate the REX2 prefix for simplicity's sake
387 insn->rex2ExtensionPrefix[1] =
388 (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
389 (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
390 (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
391 }
392
393 LLVM_DEBUG(
394 dbgs() << format(
395 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
396 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
397 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
398 }
399 } else if (byte == 0xc4) {
400 uint8_t byte1;
401 if (peek(insn, byte&: byte1)) {
402 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
403 return -1;
404 }
405
406 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
407 insn->vectorExtensionType = TYPE_VEX_3B;
408 else
409 --insn->readerCursor;
410
411 if (insn->vectorExtensionType == TYPE_VEX_3B) {
412 insn->vectorExtensionPrefix[0] = byte;
413 consume(insn, ptr&: insn->vectorExtensionPrefix[1]);
414 consume(insn, ptr&: insn->vectorExtensionPrefix[2]);
415
416 // We simulate the REX prefix for simplicity's sake
417
418 if (insn->mode == MODE_64BIT)
419 insn->rexPrefix = 0x40 |
420 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
421 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
422 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
423 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
424
425 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
426 insn->vectorExtensionPrefix[0],
427 insn->vectorExtensionPrefix[1],
428 insn->vectorExtensionPrefix[2]));
429 }
430 } else if (byte == 0xc5) {
431 uint8_t byte1;
432 if (peek(insn, byte&: byte1)) {
433 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
434 return -1;
435 }
436
437 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
438 insn->vectorExtensionType = TYPE_VEX_2B;
439 else
440 --insn->readerCursor;
441
442 if (insn->vectorExtensionType == TYPE_VEX_2B) {
443 insn->vectorExtensionPrefix[0] = byte;
444 consume(insn, ptr&: insn->vectorExtensionPrefix[1]);
445
446 if (insn->mode == MODE_64BIT)
447 insn->rexPrefix =
448 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
449
450 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
451 default:
452 break;
453 case VEX_PREFIX_66:
454 insn->hasOpSize = true;
455 break;
456 }
457
458 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
459 insn->vectorExtensionPrefix[0],
460 insn->vectorExtensionPrefix[1]));
461 }
462 } else if (byte == 0x8f) {
463 uint8_t byte1;
464 if (peek(insn, byte&: byte1)) {
465 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
466 return -1;
467 }
468
469 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
470 insn->vectorExtensionType = TYPE_XOP;
471 else
472 --insn->readerCursor;
473
474 if (insn->vectorExtensionType == TYPE_XOP) {
475 insn->vectorExtensionPrefix[0] = byte;
476 consume(insn, ptr&: insn->vectorExtensionPrefix[1]);
477 consume(insn, ptr&: insn->vectorExtensionPrefix[2]);
478
479 // We simulate the REX prefix for simplicity's sake
480
481 if (insn->mode == MODE_64BIT)
482 insn->rexPrefix = 0x40 |
483 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
484 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
485 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
486 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
487
488 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
489 default:
490 break;
491 case VEX_PREFIX_66:
492 insn->hasOpSize = true;
493 break;
494 }
495
496 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
497 insn->vectorExtensionPrefix[0],
498 insn->vectorExtensionPrefix[1],
499 insn->vectorExtensionPrefix[2]));
500 }
501 } else if (isREX2(insn, prefix: byte)) {
502 uint8_t byte1;
503 if (peek(insn, byte&: byte1)) {
504 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
505 return -1;
506 }
507 insn->rex2ExtensionPrefix[0] = byte;
508 consume(insn, ptr&: insn->rex2ExtensionPrefix[1]);
509
510 // We simulate the REX prefix for simplicity's sake
511 insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
512 (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
513 (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
514 (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
515 LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
516 insn->rex2ExtensionPrefix[0],
517 insn->rex2ExtensionPrefix[1]));
518 } else
519 --insn->readerCursor;
520
521 if (insn->mode == MODE_16BIT) {
522 insn->registerSize = (insn->hasOpSize ? 4 : 2);
523 insn->addressSize = (insn->hasAdSize ? 4 : 2);
524 insn->displacementSize = (insn->hasAdSize ? 4 : 2);
525 insn->immediateSize = (insn->hasOpSize ? 4 : 2);
526 } else if (insn->mode == MODE_32BIT) {
527 insn->registerSize = (insn->hasOpSize ? 2 : 4);
528 insn->addressSize = (insn->hasAdSize ? 2 : 4);
529 insn->displacementSize = (insn->hasAdSize ? 2 : 4);
530 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
531 } else if (insn->mode == MODE_64BIT) {
532 insn->displacementSize = 4;
533 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
534 insn->registerSize = 8;
535 insn->addressSize = (insn->hasAdSize ? 4 : 8);
536 insn->immediateSize = 4;
537 insn->hasOpSize = false;
538 } else {
539 insn->registerSize = (insn->hasOpSize ? 2 : 4);
540 insn->addressSize = (insn->hasAdSize ? 4 : 8);
541 insn->immediateSize = (insn->hasOpSize ? 2 : 4);
542 }
543 }
544
545 return 0;
546}
547
548// Consumes the SIB byte to determine addressing information.
549static int readSIB(struct InternalInstruction *insn) {
550 SIBBase sibBaseBase = SIB_BASE_NONE;
551 uint8_t index, base;
552
553 LLVM_DEBUG(dbgs() << "readSIB()");
554 switch (insn->addressSize) {
555 case 2:
556 default:
557 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
558 case 4:
559 insn->sibIndexBase = SIB_INDEX_EAX;
560 sibBaseBase = SIB_BASE_EAX;
561 break;
562 case 8:
563 insn->sibIndexBase = SIB_INDEX_RAX;
564 sibBaseBase = SIB_BASE_RAX;
565 break;
566 }
567
568 if (consume(insn, ptr&: insn->sib))
569 return -1;
570
571 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
572 (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
573
574 if (index == 0x4) {
575 insn->sibIndex = SIB_INDEX_NONE;
576 } else {
577 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
578 }
579
580 insn->sibScale = 1 << scaleFromSIB(insn->sib);
581
582 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
583 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
584
585 switch (base) {
586 case 0x5:
587 case 0xd:
588 switch (modFromModRM(insn->modRM)) {
589 case 0x0:
590 insn->eaDisplacement = EA_DISP_32;
591 insn->sibBase = SIB_BASE_NONE;
592 break;
593 case 0x1:
594 insn->eaDisplacement = EA_DISP_8;
595 insn->sibBase = (SIBBase)(sibBaseBase + base);
596 break;
597 case 0x2:
598 insn->eaDisplacement = EA_DISP_32;
599 insn->sibBase = (SIBBase)(sibBaseBase + base);
600 break;
601 default:
602 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
603 }
604 break;
605 default:
606 insn->sibBase = (SIBBase)(sibBaseBase + base);
607 break;
608 }
609
610 return 0;
611}
612
613static int readDisplacement(struct InternalInstruction *insn) {
614 int8_t d8;
615 int16_t d16;
616 int32_t d32;
617 LLVM_DEBUG(dbgs() << "readDisplacement()");
618
619 insn->displacementOffset = insn->readerCursor - insn->startLocation;
620 switch (insn->eaDisplacement) {
621 case EA_DISP_NONE:
622 break;
623 case EA_DISP_8:
624 if (consume(insn, ptr&: d8))
625 return -1;
626 insn->displacement = d8;
627 break;
628 case EA_DISP_16:
629 if (consume(insn, ptr&: d16))
630 return -1;
631 insn->displacement = d16;
632 break;
633 case EA_DISP_32:
634 if (consume(insn, ptr&: d32))
635 return -1;
636 insn->displacement = d32;
637 break;
638 }
639
640 return 0;
641}
642
643// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
644static int readModRM(struct InternalInstruction *insn) {
645 uint8_t mod, rm, reg;
646 LLVM_DEBUG(dbgs() << "readModRM()");
647
648 if (insn->consumedModRM)
649 return 0;
650
651 if (consume(insn, ptr&: insn->modRM))
652 return -1;
653 insn->consumedModRM = true;
654
655 mod = modFromModRM(insn->modRM);
656 rm = rmFromModRM(insn->modRM);
657 reg = regFromModRM(insn->modRM);
658
659 // This goes by insn->registerSize to pick the correct register, which messes
660 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
661 // fixupReg().
662 switch (insn->registerSize) {
663 case 2:
664 insn->regBase = MODRM_REG_AX;
665 insn->eaRegBase = EA_REG_AX;
666 break;
667 case 4:
668 insn->regBase = MODRM_REG_EAX;
669 insn->eaRegBase = EA_REG_EAX;
670 break;
671 case 8:
672 insn->regBase = MODRM_REG_RAX;
673 insn->eaRegBase = EA_REG_RAX;
674 break;
675 }
676
677 reg |= (rFromREX(insn->rexPrefix) << 3) |
678 (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
679 rm |= (bFromREX(insn->rexPrefix) << 3) |
680 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
681
682 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
683 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
684
685 insn->reg = (Reg)(insn->regBase + reg);
686
687 switch (insn->addressSize) {
688 case 2: {
689 EABase eaBaseBase = EA_BASE_BX_SI;
690
691 switch (mod) {
692 case 0x0:
693 if (rm == 0x6) {
694 insn->eaBase = EA_BASE_NONE;
695 insn->eaDisplacement = EA_DISP_16;
696 if (readDisplacement(insn))
697 return -1;
698 } else {
699 insn->eaBase = (EABase)(eaBaseBase + rm);
700 insn->eaDisplacement = EA_DISP_NONE;
701 }
702 break;
703 case 0x1:
704 insn->eaBase = (EABase)(eaBaseBase + rm);
705 insn->eaDisplacement = EA_DISP_8;
706 insn->displacementSize = 1;
707 if (readDisplacement(insn))
708 return -1;
709 break;
710 case 0x2:
711 insn->eaBase = (EABase)(eaBaseBase + rm);
712 insn->eaDisplacement = EA_DISP_16;
713 if (readDisplacement(insn))
714 return -1;
715 break;
716 case 0x3:
717 insn->eaBase = (EABase)(insn->eaRegBase + rm);
718 if (readDisplacement(insn))
719 return -1;
720 break;
721 }
722 break;
723 }
724 case 4:
725 case 8: {
726 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
727
728 switch (mod) {
729 case 0x0:
730 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
731 // In determining whether RIP-relative mode is used (rm=5),
732 // or whether a SIB byte is present (rm=4),
733 // the extension bits (REX.b and EVEX.x) are ignored.
734 switch (rm & 7) {
735 case 0x4: // SIB byte is present
736 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
737 if (readSIB(insn) || readDisplacement(insn))
738 return -1;
739 break;
740 case 0x5: // RIP-relative
741 insn->eaBase = EA_BASE_NONE;
742 insn->eaDisplacement = EA_DISP_32;
743 if (readDisplacement(insn))
744 return -1;
745 break;
746 default:
747 insn->eaBase = (EABase)(eaBaseBase + rm);
748 break;
749 }
750 break;
751 case 0x1:
752 insn->displacementSize = 1;
753 [[fallthrough]];
754 case 0x2:
755 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
756 switch (rm & 7) {
757 case 0x4: // SIB byte is present
758 insn->eaBase = EA_BASE_sib;
759 if (readSIB(insn) || readDisplacement(insn))
760 return -1;
761 break;
762 default:
763 insn->eaBase = (EABase)(eaBaseBase + rm);
764 if (readDisplacement(insn))
765 return -1;
766 break;
767 }
768 break;
769 case 0x3:
770 insn->eaDisplacement = EA_DISP_NONE;
771 insn->eaBase = (EABase)(insn->eaRegBase + rm);
772 break;
773 }
774 break;
775 }
776 } // switch (insn->addressSize)
777
778 return 0;
779}
780
781#define GENERIC_FIXUP_FUNC(name, base, prefix) \
782 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
783 uint8_t index, uint8_t *valid) { \
784 *valid = 1; \
785 switch (type) { \
786 default: \
787 debug("Unhandled register type"); \
788 *valid = 0; \
789 return 0; \
790 case TYPE_Rv: \
791 return base + index; \
792 case TYPE_R8: \
793 if (insn->rexPrefix && index >= 4 && index <= 7) \
794 return prefix##_SPL + (index - 4); \
795 else \
796 return prefix##_AL + index; \
797 case TYPE_R16: \
798 return prefix##_AX + index; \
799 case TYPE_R32: \
800 return prefix##_EAX + index; \
801 case TYPE_R64: \
802 return prefix##_RAX + index; \
803 case TYPE_ZMM: \
804 return prefix##_ZMM0 + index; \
805 case TYPE_YMM: \
806 return prefix##_YMM0 + index; \
807 case TYPE_XMM: \
808 return prefix##_XMM0 + index; \
809 case TYPE_TMM: \
810 if (index > 7) \
811 *valid = 0; \
812 return prefix##_TMM0 + index; \
813 case TYPE_VK: \
814 index &= 0xf; \
815 if (index > 7) \
816 *valid = 0; \
817 return prefix##_K0 + index; \
818 case TYPE_VK_PAIR: \
819 if (index > 7) \
820 *valid = 0; \
821 return prefix##_K0_K1 + (index / 2); \
822 case TYPE_MM64: \
823 return prefix##_MM0 + (index & 0x7); \
824 case TYPE_SEGMENTREG: \
825 if ((index & 7) > 5) \
826 *valid = 0; \
827 return prefix##_ES + (index & 7); \
828 case TYPE_DEBUGREG: \
829 if (index > 15) \
830 *valid = 0; \
831 return prefix##_DR0 + index; \
832 case TYPE_CONTROLREG: \
833 if (index > 15) \
834 *valid = 0; \
835 return prefix##_CR0 + index; \
836 case TYPE_MVSIBX: \
837 return prefix##_XMM0 + index; \
838 case TYPE_MVSIBY: \
839 return prefix##_YMM0 + index; \
840 case TYPE_MVSIBZ: \
841 return prefix##_ZMM0 + index; \
842 } \
843 }
844
845// Consult an operand type to determine the meaning of the reg or R/M field. If
846// the operand is an XMM operand, for example, an operand would be XMM0 instead
847// of AX, which readModRM() would otherwise misinterpret it as.
848//
849// @param insn - The instruction containing the operand.
850// @param type - The operand type.
851// @param index - The existing value of the field as reported by readModRM().
852// @param valid - The address of a uint8_t. The target is set to 1 if the
853// field is valid for the register class; 0 if not.
854// @return - The proper value.
855GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
856GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
857
858// Consult an operand specifier to determine which of the fixup*Value functions
859// to use in correcting readModRM()'ss interpretation.
860//
861// @param insn - See fixup*Value().
862// @param op - The operand specifier.
863// @return - 0 if fixup was successful; -1 if the register returned was
864// invalid for its class.
865static int fixupReg(struct InternalInstruction *insn,
866 const struct OperandSpecifier *op) {
867 uint8_t valid;
868 LLVM_DEBUG(dbgs() << "fixupReg()");
869
870 switch ((OperandEncoding)op->encoding) {
871 default:
872 debug("Expected a REG or R/M encoding in fixupReg");
873 return -1;
874 case ENCODING_VVVV:
875 insn->vvvv =
876 (Reg)fixupRegValue(insn, type: (OperandType)op->type, index: insn->vvvv, valid: &valid);
877 if (!valid)
878 return -1;
879 break;
880 case ENCODING_REG:
881 insn->reg = (Reg)fixupRegValue(insn, type: (OperandType)op->type,
882 index: insn->reg - insn->regBase, valid: &valid);
883 if (!valid)
884 return -1;
885 break;
886 CASE_ENCODING_RM:
887 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
888 modFromModRM(insn->modRM) == 3) {
889 // EVEX_X can extend the register id to 32 for a non-GPR register that is
890 // encoded in RM.
891 // mode : MODE_64_BIT
892 // Only 8 vector registers are available in 32 bit mode
893 // mod : 3
894 // RM encodes a register
895 switch (op->type) {
896 case TYPE_Rv:
897 case TYPE_R8:
898 case TYPE_R16:
899 case TYPE_R32:
900 case TYPE_R64:
901 break;
902 default:
903 insn->eaBase =
904 (EABase)(insn->eaBase +
905 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
906 break;
907 }
908 }
909 [[fallthrough]];
910 case ENCODING_SIB:
911 if (insn->eaBase >= insn->eaRegBase) {
912 insn->eaBase = (EABase)fixupRMValue(
913 insn, type: (OperandType)op->type, index: insn->eaBase - insn->eaRegBase, valid: &valid);
914 if (!valid)
915 return -1;
916 }
917 break;
918 }
919
920 return 0;
921}
922
923// Read the opcode (except the ModR/M byte in the case of extended or escape
924// opcodes).
925static bool readOpcode(struct InternalInstruction *insn) {
926 uint8_t current;
927 LLVM_DEBUG(dbgs() << "readOpcode()");
928
929 insn->opcodeType = ONEBYTE;
930 if (insn->vectorExtensionType == TYPE_EVEX) {
931 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
932 default:
933 LLVM_DEBUG(
934 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
935 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
936 return true;
937 case VEX_LOB_0F:
938 insn->opcodeType = TWOBYTE;
939 return consume(insn, ptr&: insn->opcode);
940 case VEX_LOB_0F38:
941 insn->opcodeType = THREEBYTE_38;
942 return consume(insn, ptr&: insn->opcode);
943 case VEX_LOB_0F3A:
944 insn->opcodeType = THREEBYTE_3A;
945 return consume(insn, ptr&: insn->opcode);
946 case VEX_LOB_MAP4:
947 insn->opcodeType = MAP4;
948 return consume(insn, ptr&: insn->opcode);
949 case VEX_LOB_MAP5:
950 insn->opcodeType = MAP5;
951 return consume(insn, ptr&: insn->opcode);
952 case VEX_LOB_MAP6:
953 insn->opcodeType = MAP6;
954 return consume(insn, ptr&: insn->opcode);
955 case VEX_LOB_MAP7:
956 insn->opcodeType = MAP7;
957 return consume(insn, ptr&: insn->opcode);
958 }
959 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
960 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
961 default:
962 LLVM_DEBUG(
963 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
964 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
965 return true;
966 case VEX_LOB_0F:
967 insn->opcodeType = TWOBYTE;
968 return consume(insn, ptr&: insn->opcode);
969 case VEX_LOB_0F38:
970 insn->opcodeType = THREEBYTE_38;
971 return consume(insn, ptr&: insn->opcode);
972 case VEX_LOB_0F3A:
973 insn->opcodeType = THREEBYTE_3A;
974 return consume(insn, ptr&: insn->opcode);
975 case VEX_LOB_MAP5:
976 insn->opcodeType = MAP5;
977 return consume(insn, ptr&: insn->opcode);
978 case VEX_LOB_MAP6:
979 insn->opcodeType = MAP6;
980 return consume(insn, ptr&: insn->opcode);
981 case VEX_LOB_MAP7:
982 insn->opcodeType = MAP7;
983 return consume(insn, ptr&: insn->opcode);
984 }
985 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
986 insn->opcodeType = TWOBYTE;
987 return consume(insn, ptr&: insn->opcode);
988 } else if (insn->vectorExtensionType == TYPE_XOP) {
989 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
990 default:
991 LLVM_DEBUG(
992 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
993 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
994 return true;
995 case XOP_MAP_SELECT_8:
996 insn->opcodeType = XOP8_MAP;
997 return consume(insn, ptr&: insn->opcode);
998 case XOP_MAP_SELECT_9:
999 insn->opcodeType = XOP9_MAP;
1000 return consume(insn, ptr&: insn->opcode);
1001 case XOP_MAP_SELECT_A:
1002 insn->opcodeType = XOPA_MAP;
1003 return consume(insn, ptr&: insn->opcode);
1004 }
1005 } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
1006 // m bit indicates opcode map 1
1007 insn->opcodeType = TWOBYTE;
1008 return consume(insn, ptr&: insn->opcode);
1009 }
1010
1011 if (consume(insn, ptr&: current))
1012 return true;
1013
1014 if (current == 0x0f) {
1015 LLVM_DEBUG(
1016 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1017 if (consume(insn, ptr&: current))
1018 return true;
1019
1020 if (current == 0x38) {
1021 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1022 current));
1023 if (consume(insn, ptr&: current))
1024 return true;
1025
1026 insn->opcodeType = THREEBYTE_38;
1027 } else if (current == 0x3a) {
1028 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1029 current));
1030 if (consume(insn, ptr&: current))
1031 return true;
1032
1033 insn->opcodeType = THREEBYTE_3A;
1034 } else if (current == 0x0f) {
1035 LLVM_DEBUG(
1036 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1037
1038 // Consume operands before the opcode to comply with the 3DNow encoding
1039 if (readModRM(insn))
1040 return true;
1041
1042 if (consume(insn, ptr&: current))
1043 return true;
1044
1045 insn->opcodeType = THREEDNOW_MAP;
1046 } else {
1047 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1048 insn->opcodeType = TWOBYTE;
1049 }
1050 } else if (insn->mandatoryPrefix)
1051 // The opcode with mandatory prefix must start with opcode escape.
1052 // If not it's legacy repeat prefix
1053 insn->mandatoryPrefix = 0;
1054
1055 // At this point we have consumed the full opcode.
1056 // Anything we consume from here on must be unconsumed.
1057 insn->opcode = current;
1058
1059 return false;
1060}
1061
1062// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1063static bool is16BitEquivalent(const char *orig, const char *equiv) {
1064 for (int i = 0;; i++) {
1065 if (orig[i] == '\0' && equiv[i] == '\0')
1066 return true;
1067 if (orig[i] == '\0' || equiv[i] == '\0')
1068 return false;
1069 if (orig[i] != equiv[i]) {
1070 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1071 continue;
1072 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1073 continue;
1074 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1075 continue;
1076 return false;
1077 }
1078 }
1079}
1080
1081// Determine whether this instruction is a 64-bit instruction.
1082static bool is64Bit(const char *name) {
1083 for (int i = 0;; ++i) {
1084 if (name[i] == '\0')
1085 return false;
1086 if (name[i] == '6' && name[i + 1] == '4')
1087 return true;
1088 }
1089}
1090
1091// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1092// for extended and escape opcodes, and using a supplied attribute mask.
1093static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1094 struct InternalInstruction *insn,
1095 uint16_t attrMask) {
1096 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1097 const ContextDecision *decision;
1098 switch (insn->opcodeType) {
1099 case ONEBYTE:
1100 decision = &ONEBYTE_SYM;
1101 break;
1102 case TWOBYTE:
1103 decision = &TWOBYTE_SYM;
1104 break;
1105 case THREEBYTE_38:
1106 decision = &THREEBYTE38_SYM;
1107 break;
1108 case THREEBYTE_3A:
1109 decision = &THREEBYTE3A_SYM;
1110 break;
1111 case XOP8_MAP:
1112 decision = &XOP8_MAP_SYM;
1113 break;
1114 case XOP9_MAP:
1115 decision = &XOP9_MAP_SYM;
1116 break;
1117 case XOPA_MAP:
1118 decision = &XOPA_MAP_SYM;
1119 break;
1120 case THREEDNOW_MAP:
1121 decision = &THREEDNOW_MAP_SYM;
1122 break;
1123 case MAP4:
1124 decision = &MAP4_SYM;
1125 break;
1126 case MAP5:
1127 decision = &MAP5_SYM;
1128 break;
1129 case MAP6:
1130 decision = &MAP6_SYM;
1131 break;
1132 case MAP7:
1133 decision = &MAP7_SYM;
1134 break;
1135 }
1136
1137 if (decision->opcodeDecisions[insnCtx]
1138 .modRMDecisions[insn->opcode]
1139 .modrm_type != MODRM_ONEENTRY) {
1140 if (readModRM(insn))
1141 return -1;
1142 *instructionID =
1143 decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: insn->modRM);
1144 } else {
1145 *instructionID = decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: 0);
1146 }
1147
1148 return 0;
1149}
1150
1151static bool isCCMPOrCTEST(InternalInstruction *insn) {
1152 if (insn->opcodeType != MAP4)
1153 return false;
1154 if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7)
1155 return true;
1156 switch (insn->opcode & 0xfe) {
1157 default:
1158 return false;
1159 case 0x38:
1160 case 0x3a:
1161 case 0x84:
1162 return true;
1163 case 0x80:
1164 return regFromModRM(insn->modRM) == 7;
1165 case 0xf6:
1166 return regFromModRM(insn->modRM) == 0;
1167 }
1168}
1169
1170static bool isNF(InternalInstruction *insn) {
1171 if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1172 return false;
1173 if (insn->opcodeType == MAP4)
1174 return true;
1175 // Below NF instructions are not in map4.
1176 if (insn->opcodeType == THREEBYTE_38 &&
1177 ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) {
1178 switch (insn->opcode) {
1179 case 0xf2: // ANDN
1180 case 0xf3: // BLSI, BLSR, BLSMSK
1181 case 0xf5: // BZHI
1182 case 0xf7: // BEXTR
1183 return true;
1184 default:
1185 break;
1186 }
1187 }
1188 return false;
1189}
1190
1191// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1192// for extended and escape opcodes. Determines the attributes and context for
1193// the instruction before doing so.
1194static int getInstructionID(struct InternalInstruction *insn,
1195 const MCInstrInfo *mii) {
1196 uint16_t attrMask;
1197 uint16_t instructionID;
1198
1199 LLVM_DEBUG(dbgs() << "getID()");
1200
1201 attrMask = ATTR_NONE;
1202
1203 if (insn->mode == MODE_64BIT)
1204 attrMask |= ATTR_64BIT;
1205
1206 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1207 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1208
1209 if (insn->vectorExtensionType == TYPE_EVEX) {
1210 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1211 case VEX_PREFIX_66:
1212 attrMask |= ATTR_OPSIZE;
1213 break;
1214 case VEX_PREFIX_F3:
1215 attrMask |= ATTR_XS;
1216 break;
1217 case VEX_PREFIX_F2:
1218 attrMask |= ATTR_XD;
1219 break;
1220 }
1221
1222 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1223 attrMask |= ATTR_EVEXKZ;
1224 if (isNF(insn) && !readModRM(insn) &&
1225 !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1226 attrMask |= ATTR_EVEXNF;
1227 // aaa is not used a opmask in MAP4
1228 else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&
1229 (insn->opcodeType != MAP4))
1230 attrMask |= ATTR_EVEXK;
1231 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) {
1232 attrMask |= ATTR_EVEXB;
1233 if (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) && !readModRM(insn) &&
1234 modFromModRM(insn->modRM) == 3)
1235 attrMask |= ATTR_EVEXU;
1236 }
1237 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1238 attrMask |= ATTR_VEXL;
1239 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1240 attrMask |= ATTR_EVEXL2;
1241 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1242 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1243 case VEX_PREFIX_66:
1244 attrMask |= ATTR_OPSIZE;
1245 break;
1246 case VEX_PREFIX_F3:
1247 attrMask |= ATTR_XS;
1248 break;
1249 case VEX_PREFIX_F2:
1250 attrMask |= ATTR_XD;
1251 break;
1252 }
1253
1254 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1255 attrMask |= ATTR_VEXL;
1256 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1257 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1258 case VEX_PREFIX_66:
1259 attrMask |= ATTR_OPSIZE;
1260 if (insn->hasAdSize)
1261 attrMask |= ATTR_ADSIZE;
1262 break;
1263 case VEX_PREFIX_F3:
1264 attrMask |= ATTR_XS;
1265 break;
1266 case VEX_PREFIX_F2:
1267 attrMask |= ATTR_XD;
1268 break;
1269 }
1270
1271 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1272 attrMask |= ATTR_VEXL;
1273 } else if (insn->vectorExtensionType == TYPE_XOP) {
1274 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1275 case VEX_PREFIX_66:
1276 attrMask |= ATTR_OPSIZE;
1277 break;
1278 case VEX_PREFIX_F3:
1279 attrMask |= ATTR_XS;
1280 break;
1281 case VEX_PREFIX_F2:
1282 attrMask |= ATTR_XD;
1283 break;
1284 }
1285
1286 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1287 attrMask |= ATTR_VEXL;
1288 } else {
1289 return -1;
1290 }
1291 } else if (!insn->mandatoryPrefix) {
1292 // If we don't have mandatory prefix we should use legacy prefixes here
1293 if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1294 attrMask |= ATTR_OPSIZE;
1295 if (insn->hasAdSize)
1296 attrMask |= ATTR_ADSIZE;
1297 if (insn->opcodeType == ONEBYTE) {
1298 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1299 // Special support for PAUSE
1300 attrMask |= ATTR_XS;
1301 } else {
1302 if (insn->repeatPrefix == 0xf2)
1303 attrMask |= ATTR_XD;
1304 else if (insn->repeatPrefix == 0xf3)
1305 attrMask |= ATTR_XS;
1306 }
1307 } else {
1308 switch (insn->mandatoryPrefix) {
1309 case 0xf2:
1310 attrMask |= ATTR_XD;
1311 break;
1312 case 0xf3:
1313 attrMask |= ATTR_XS;
1314 break;
1315 case 0x66:
1316 if (insn->mode != MODE_16BIT)
1317 attrMask |= ATTR_OPSIZE;
1318 if (insn->hasAdSize)
1319 attrMask |= ATTR_ADSIZE;
1320 break;
1321 case 0x67:
1322 attrMask |= ATTR_ADSIZE;
1323 break;
1324 }
1325 }
1326
1327 if (insn->rexPrefix & 0x08) {
1328 attrMask |= ATTR_REXW;
1329 attrMask &= ~ATTR_ADSIZE;
1330 }
1331
1332 // Absolute jump and pushp/popp need special handling
1333 if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&
1334 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
1335 attrMask |= ATTR_REX2;
1336
1337 if (insn->mode == MODE_16BIT) {
1338 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1339 // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1340 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1341 attrMask ^= ATTR_ADSIZE;
1342 // If we're in 16-bit mode and this is one of the relative jumps and opsize
1343 // prefix isn't present, we need to force the opsize attribute since the
1344 // prefix is inverted relative to 32-bit mode.
1345 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1346 (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1347 attrMask |= ATTR_OPSIZE;
1348
1349 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1350 insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1351 attrMask |= ATTR_OPSIZE;
1352 }
1353
1354
1355 if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1356 return -1;
1357
1358 // The following clauses compensate for limitations of the tables.
1359
1360 if (insn->mode != MODE_64BIT &&
1361 insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1362 // The tables can't distinquish between cases where the W-bit is used to
1363 // select register size and cases where its a required part of the opcode.
1364 if ((insn->vectorExtensionType == TYPE_EVEX &&
1365 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1366 (insn->vectorExtensionType == TYPE_VEX_3B &&
1367 wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1368 (insn->vectorExtensionType == TYPE_XOP &&
1369 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1370
1371 uint16_t instructionIDWithREXW;
1372 if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithREXW, insn,
1373 attrMask: attrMask | ATTR_REXW)) {
1374 insn->instructionID = instructionID;
1375 insn->spec = &INSTRUCTIONS_SYM[instructionID];
1376 return 0;
1377 }
1378
1379 auto SpecName = mii->getName(Opcode: instructionIDWithREXW);
1380 // If not a 64-bit instruction. Switch the opcode.
1381 if (!is64Bit(name: SpecName.data())) {
1382 insn->instructionID = instructionIDWithREXW;
1383 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1384 return 0;
1385 }
1386 }
1387 }
1388
1389 // Absolute moves, umonitor, and movdir64b need special handling.
1390 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1391 // inverted w.r.t.
1392 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1393 // any position.
1394 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1395 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1396 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||
1397 (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {
1398 // Make sure we observed the prefixes in any position.
1399 if (insn->hasAdSize)
1400 attrMask |= ATTR_ADSIZE;
1401 if (insn->hasOpSize)
1402 attrMask |= ATTR_OPSIZE;
1403
1404 // In 16-bit, invert the attributes.
1405 if (insn->mode == MODE_16BIT) {
1406 attrMask ^= ATTR_ADSIZE;
1407
1408 // The OpSize attribute is only valid with the absolute moves.
1409 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1410 attrMask ^= ATTR_OPSIZE;
1411 }
1412
1413 if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1414 return -1;
1415
1416 insn->instructionID = instructionID;
1417 insn->spec = &INSTRUCTIONS_SYM[instructionID];
1418 return 0;
1419 }
1420
1421 if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1422 !(attrMask & ATTR_OPSIZE)) {
1423 // The instruction tables make no distinction between instructions that
1424 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1425 // particular spot (i.e., many MMX operations). In general we're
1426 // conservative, but in the specific case where OpSize is present but not in
1427 // the right place we check if there's a 16-bit operation.
1428 const struct InstructionSpecifier *spec;
1429 uint16_t instructionIDWithOpsize;
1430 llvm::StringRef specName, specWithOpSizeName;
1431
1432 spec = &INSTRUCTIONS_SYM[instructionID];
1433
1434 if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithOpsize, insn,
1435 attrMask: attrMask | ATTR_OPSIZE)) {
1436 // ModRM required with OpSize but not present. Give up and return the
1437 // version without OpSize set.
1438 insn->instructionID = instructionID;
1439 insn->spec = spec;
1440 return 0;
1441 }
1442
1443 specName = mii->getName(Opcode: instructionID);
1444 specWithOpSizeName = mii->getName(Opcode: instructionIDWithOpsize);
1445
1446 if (is16BitEquivalent(orig: specName.data(), equiv: specWithOpSizeName.data()) &&
1447 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1448 insn->instructionID = instructionIDWithOpsize;
1449 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1450 } else {
1451 insn->instructionID = instructionID;
1452 insn->spec = spec;
1453 }
1454 return 0;
1455 }
1456
1457 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1458 insn->rexPrefix & 0x01) {
1459 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1460 // as XCHG %r8, %eax.
1461 const struct InstructionSpecifier *spec;
1462 uint16_t instructionIDWithNewOpcode;
1463 const struct InstructionSpecifier *specWithNewOpcode;
1464
1465 spec = &INSTRUCTIONS_SYM[instructionID];
1466
1467 // Borrow opcode from one of the other XCHGar opcodes
1468 insn->opcode = 0x91;
1469
1470 if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithNewOpcode, insn,
1471 attrMask)) {
1472 insn->opcode = 0x90;
1473
1474 insn->instructionID = instructionID;
1475 insn->spec = spec;
1476 return 0;
1477 }
1478
1479 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1480
1481 // Change back
1482 insn->opcode = 0x90;
1483
1484 insn->instructionID = instructionIDWithNewOpcode;
1485 insn->spec = specWithNewOpcode;
1486
1487 return 0;
1488 }
1489
1490 insn->instructionID = instructionID;
1491 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1492
1493 return 0;
1494}
1495
1496// Read an operand from the opcode field of an instruction and interprets it
1497// appropriately given the operand width. Handles AddRegFrm instructions.
1498//
1499// @param insn - the instruction whose opcode field is to be read.
1500// @param size - The width (in bytes) of the register being specified.
1501// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1502// RAX.
1503// @return - 0 on success; nonzero otherwise.
1504static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1505 LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1506
1507 if (size == 0)
1508 size = insn->registerSize;
1509
1510 auto setOpcodeRegister = [&](unsigned base) {
1511 insn->opcodeRegister =
1512 (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
1513 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
1514 (insn->opcode & 7)));
1515 };
1516
1517 switch (size) {
1518 case 1:
1519 setOpcodeRegister(MODRM_REG_AL);
1520 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1521 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1522 insn->opcodeRegister =
1523 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1524 }
1525
1526 break;
1527 case 2:
1528 setOpcodeRegister(MODRM_REG_AX);
1529 break;
1530 case 4:
1531 setOpcodeRegister(MODRM_REG_EAX);
1532 break;
1533 case 8:
1534 setOpcodeRegister(MODRM_REG_RAX);
1535 break;
1536 }
1537
1538 return 0;
1539}
1540
1541// Consume an immediate operand from an instruction, given the desired operand
1542// size.
1543//
1544// @param insn - The instruction whose operand is to be read.
1545// @param size - The width (in bytes) of the operand.
1546// @return - 0 if the immediate was successfully consumed; nonzero
1547// otherwise.
1548static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1549 uint8_t imm8;
1550 uint16_t imm16;
1551 uint32_t imm32;
1552 uint64_t imm64;
1553
1554 LLVM_DEBUG(dbgs() << "readImmediate()");
1555
1556 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1557
1558 insn->immediateSize = size;
1559 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1560
1561 switch (size) {
1562 case 1:
1563 if (consume(insn, ptr&: imm8))
1564 return -1;
1565 insn->immediates[insn->numImmediatesConsumed] = imm8;
1566 break;
1567 case 2:
1568 if (consume(insn, ptr&: imm16))
1569 return -1;
1570 insn->immediates[insn->numImmediatesConsumed] = imm16;
1571 break;
1572 case 4:
1573 if (consume(insn, ptr&: imm32))
1574 return -1;
1575 insn->immediates[insn->numImmediatesConsumed] = imm32;
1576 break;
1577 case 8:
1578 if (consume(insn, ptr&: imm64))
1579 return -1;
1580 insn->immediates[insn->numImmediatesConsumed] = imm64;
1581 break;
1582 default:
1583 llvm_unreachable("invalid size");
1584 }
1585
1586 insn->numImmediatesConsumed++;
1587
1588 return 0;
1589}
1590
1591// Consume vvvv from an instruction if it has a VEX prefix.
1592static int readVVVV(struct InternalInstruction *insn) {
1593 LLVM_DEBUG(dbgs() << "readVVVV()");
1594
1595 int vvvv;
1596 if (insn->vectorExtensionType == TYPE_EVEX)
1597 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1598 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1599 else if (insn->vectorExtensionType == TYPE_VEX_3B)
1600 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1601 else if (insn->vectorExtensionType == TYPE_VEX_2B)
1602 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1603 else if (insn->vectorExtensionType == TYPE_XOP)
1604 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1605 else
1606 return -1;
1607
1608 if (insn->mode != MODE_64BIT)
1609 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1610
1611 insn->vvvv = static_cast<Reg>(vvvv);
1612 return 0;
1613}
1614
1615// Read an mask register from the opcode field of an instruction.
1616//
1617// @param insn - The instruction whose opcode field is to be read.
1618// @return - 0 on success; nonzero otherwise.
1619static int readMaskRegister(struct InternalInstruction *insn) {
1620 LLVM_DEBUG(dbgs() << "readMaskRegister()");
1621
1622 if (insn->vectorExtensionType != TYPE_EVEX)
1623 return -1;
1624
1625 insn->writemask =
1626 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1627 return 0;
1628}
1629
1630// Consults the specifier for an instruction and consumes all
1631// operands for that instruction, interpreting them as it goes.
1632static int readOperands(struct InternalInstruction *insn) {
1633 int hasVVVV, needVVVV;
1634 int sawRegImm = 0;
1635
1636 LLVM_DEBUG(dbgs() << "readOperands()");
1637
1638 // If non-zero vvvv specified, make sure one of the operands uses it.
1639 hasVVVV = !readVVVV(insn);
1640 needVVVV = hasVVVV && (insn->vvvv != 0);
1641
1642 for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1643 switch (Op.encoding) {
1644 case ENCODING_NONE:
1645 case ENCODING_SI:
1646 case ENCODING_DI:
1647 break;
1648 CASE_ENCODING_VSIB:
1649 // VSIB can use the V2 bit so check only the other bits.
1650 if (needVVVV)
1651 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1652 if (readModRM(insn))
1653 return -1;
1654
1655 // Reject if SIB wasn't used.
1656 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1657 return -1;
1658
1659 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1660 if (insn->sibIndex == SIB_INDEX_NONE)
1661 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1662
1663 // If EVEX.v2 is set this is one of the 16-31 registers.
1664 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1665 v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1666 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1667
1668 // Adjust the index register to the correct size.
1669 switch ((OperandType)Op.type) {
1670 default:
1671 debug("Unhandled VSIB index type");
1672 return -1;
1673 case TYPE_MVSIBX:
1674 insn->sibIndex =
1675 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1676 break;
1677 case TYPE_MVSIBY:
1678 insn->sibIndex =
1679 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1680 break;
1681 case TYPE_MVSIBZ:
1682 insn->sibIndex =
1683 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1684 break;
1685 }
1686
1687 // Apply the AVX512 compressed displacement scaling factor.
1688 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1689 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1690 break;
1691 case ENCODING_SIB:
1692 // Reject if SIB wasn't used.
1693 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1694 return -1;
1695 if (readModRM(insn))
1696 return -1;
1697 if (fixupReg(insn, op: &Op))
1698 return -1;
1699 break;
1700 case ENCODING_REG:
1701 CASE_ENCODING_RM:
1702 if (readModRM(insn))
1703 return -1;
1704 if (fixupReg(insn, op: &Op))
1705 return -1;
1706 // Apply the AVX512 compressed displacement scaling factor.
1707 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1708 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1709 break;
1710 case ENCODING_IB:
1711 if (sawRegImm) {
1712 // Saw a register immediate so don't read again and instead split the
1713 // previous immediate. FIXME: This is a hack.
1714 insn->immediates[insn->numImmediatesConsumed] =
1715 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1716 ++insn->numImmediatesConsumed;
1717 break;
1718 }
1719 if (readImmediate(insn, size: 1))
1720 return -1;
1721 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1722 sawRegImm = 1;
1723 break;
1724 case ENCODING_IW:
1725 if (readImmediate(insn, size: 2))
1726 return -1;
1727 break;
1728 case ENCODING_ID:
1729 if (readImmediate(insn, size: 4))
1730 return -1;
1731 break;
1732 case ENCODING_IO:
1733 if (readImmediate(insn, size: 8))
1734 return -1;
1735 break;
1736 case ENCODING_Iv:
1737 if (readImmediate(insn, size: insn->immediateSize))
1738 return -1;
1739 break;
1740 case ENCODING_Ia:
1741 if (readImmediate(insn, size: insn->addressSize))
1742 return -1;
1743 break;
1744 case ENCODING_IRC:
1745 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1746 lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1747 break;
1748 case ENCODING_RB:
1749 if (readOpcodeRegister(insn, size: 1))
1750 return -1;
1751 break;
1752 case ENCODING_RW:
1753 if (readOpcodeRegister(insn, size: 2))
1754 return -1;
1755 break;
1756 case ENCODING_RD:
1757 if (readOpcodeRegister(insn, size: 4))
1758 return -1;
1759 break;
1760 case ENCODING_RO:
1761 if (readOpcodeRegister(insn, size: 8))
1762 return -1;
1763 break;
1764 case ENCODING_Rv:
1765 if (readOpcodeRegister(insn, size: 0))
1766 return -1;
1767 break;
1768 case ENCODING_CF:
1769 insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]);
1770 needVVVV = false; // oszc shares the same bits with VVVV
1771 break;
1772 case ENCODING_CC:
1773 if (isCCMPOrCTEST(insn))
1774 insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1775 else
1776 insn->immediates[1] = insn->opcode & 0xf;
1777 break;
1778 case ENCODING_FP:
1779 break;
1780 case ENCODING_VVVV:
1781 needVVVV = 0; // Mark that we have found a VVVV operand.
1782 if (!hasVVVV)
1783 return -1;
1784 if (insn->mode != MODE_64BIT)
1785 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1786 if (fixupReg(insn, op: &Op))
1787 return -1;
1788 break;
1789 case ENCODING_WRITEMASK:
1790 if (readMaskRegister(insn))
1791 return -1;
1792 break;
1793 case ENCODING_DUP:
1794 break;
1795 default:
1796 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1797 return -1;
1798 }
1799 }
1800
1801 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1802 if (needVVVV)
1803 return -1;
1804
1805 return 0;
1806}
1807
1808namespace llvm {
1809
1810// Fill-ins to make the compiler happy. These constants are never actually
1811// assigned; they are just filler to make an automatically-generated switch
1812// statement work.
1813namespace X86 {
1814 enum {
1815 BX_SI = 500,
1816 BX_DI = 501,
1817 BP_SI = 502,
1818 BP_DI = 503,
1819 sib = 504,
1820 sib64 = 505
1821 };
1822} // namespace X86
1823
1824} // namespace llvm
1825
1826static bool translateInstruction(MCInst &target,
1827 InternalInstruction &source,
1828 const MCDisassembler *Dis);
1829
1830namespace {
1831
1832/// Generic disassembler for all X86 platforms. All each platform class should
1833/// have to do is subclass the constructor, and provide a different
1834/// disassemblerMode value.
1835class X86GenericDisassembler : public MCDisassembler {
1836 std::unique_ptr<const MCInstrInfo> MII;
1837public:
1838 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1839 std::unique_ptr<const MCInstrInfo> MII);
1840public:
1841 DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1842 ArrayRef<uint8_t> Bytes, uint64_t Address,
1843 raw_ostream &cStream) const override;
1844
1845private:
1846 DisassemblerMode fMode;
1847};
1848
1849} // namespace
1850
1851X86GenericDisassembler::X86GenericDisassembler(
1852 const MCSubtargetInfo &STI,
1853 MCContext &Ctx,
1854 std::unique_ptr<const MCInstrInfo> MII)
1855 : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1856 const FeatureBitset &FB = STI.getFeatureBits();
1857 if (FB[X86::Is16Bit]) {
1858 fMode = MODE_16BIT;
1859 return;
1860 } else if (FB[X86::Is32Bit]) {
1861 fMode = MODE_32BIT;
1862 return;
1863 } else if (FB[X86::Is64Bit]) {
1864 fMode = MODE_64BIT;
1865 return;
1866 }
1867
1868 llvm_unreachable("Invalid CPU mode");
1869}
1870
1871MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1872 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1873 raw_ostream &CStream) const {
1874 CommentStream = &CStream;
1875
1876 InternalInstruction Insn;
1877 memset(s: &Insn, c: 0, n: sizeof(InternalInstruction));
1878 Insn.bytes = Bytes;
1879 Insn.startLocation = Address;
1880 Insn.readerCursor = Address;
1881 Insn.mode = fMode;
1882
1883 if (Bytes.empty() || readPrefixes(insn: &Insn) || readOpcode(insn: &Insn) ||
1884 getInstructionID(insn: &Insn, mii: MII.get()) || Insn.instructionID == 0 ||
1885 readOperands(insn: &Insn)) {
1886 Size = Insn.readerCursor - Address;
1887 return Fail;
1888 }
1889
1890 Insn.operands = x86OperandSets[Insn.spec->operands];
1891 Insn.length = Insn.readerCursor - Insn.startLocation;
1892 Size = Insn.length;
1893 if (Size > 15)
1894 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1895
1896 bool Ret = translateInstruction(target&: Instr, source&: Insn, Dis: this);
1897 if (!Ret) {
1898 unsigned Flags = X86::IP_NO_PREFIX;
1899 if (Insn.hasAdSize)
1900 Flags |= X86::IP_HAS_AD_SIZE;
1901 if (!Insn.mandatoryPrefix) {
1902 if (Insn.hasOpSize)
1903 Flags |= X86::IP_HAS_OP_SIZE;
1904 if (Insn.repeatPrefix == 0xf2)
1905 Flags |= X86::IP_HAS_REPEAT_NE;
1906 else if (Insn.repeatPrefix == 0xf3 &&
1907 // It should not be 'pause' f3 90
1908 Insn.opcode != 0x90)
1909 Flags |= X86::IP_HAS_REPEAT;
1910 if (Insn.hasLockPrefix)
1911 Flags |= X86::IP_HAS_LOCK;
1912 }
1913 Instr.setFlags(Flags);
1914 }
1915 return (!Ret) ? Success : Fail;
1916}
1917
1918//
1919// Private code that translates from struct InternalInstructions to MCInsts.
1920//
1921
1922/// translateRegister - Translates an internal register to the appropriate LLVM
1923/// register, and appends it as an operand to an MCInst.
1924///
1925/// @param mcInst - The MCInst to append to.
1926/// @param reg - The Reg to append.
1927static void translateRegister(MCInst &mcInst, Reg reg) {
1928#define ENTRY(x) X86::x,
1929 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1930#undef ENTRY
1931
1932 MCPhysReg llvmRegnum = llvmRegnums[reg];
1933 mcInst.addOperand(Op: MCOperand::createReg(Reg: llvmRegnum));
1934}
1935
1936static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1937 0, // SEG_OVERRIDE_NONE
1938 X86::CS,
1939 X86::SS,
1940 X86::DS,
1941 X86::ES,
1942 X86::FS,
1943 X86::GS
1944};
1945
1946/// translateSrcIndex - Appends a source index operand to an MCInst.
1947///
1948/// @param mcInst - The MCInst to append to.
1949/// @param insn - The internal instruction.
1950static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1951 unsigned baseRegNo;
1952
1953 if (insn.mode == MODE_64BIT)
1954 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1955 else if (insn.mode == MODE_32BIT)
1956 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1957 else {
1958 assert(insn.mode == MODE_16BIT);
1959 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1960 }
1961 MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1962 mcInst.addOperand(Op: baseReg);
1963
1964 MCOperand segmentReg;
1965 segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
1966 mcInst.addOperand(Op: segmentReg);
1967 return false;
1968}
1969
1970/// translateDstIndex - Appends a destination index operand to an MCInst.
1971///
1972/// @param mcInst - The MCInst to append to.
1973/// @param insn - The internal instruction.
1974
1975static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1976 unsigned baseRegNo;
1977
1978 if (insn.mode == MODE_64BIT)
1979 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1980 else if (insn.mode == MODE_32BIT)
1981 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1982 else {
1983 assert(insn.mode == MODE_16BIT);
1984 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1985 }
1986 MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1987 mcInst.addOperand(Op: baseReg);
1988 return false;
1989}
1990
1991/// translateImmediate - Appends an immediate operand to an MCInst.
1992///
1993/// @param mcInst - The MCInst to append to.
1994/// @param immediate - The immediate value to append.
1995/// @param operand - The operand, as stored in the descriptor table.
1996/// @param insn - The internal instruction.
1997static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1998 const OperandSpecifier &operand,
1999 InternalInstruction &insn,
2000 const MCDisassembler *Dis) {
2001 // Sign-extend the immediate if necessary.
2002
2003 OperandType type = (OperandType)operand.type;
2004
2005 bool isBranch = false;
2006 uint64_t pcrel = 0;
2007 if (type == TYPE_REL) {
2008 isBranch = true;
2009 pcrel = insn.startLocation + insn.length;
2010 switch (operand.encoding) {
2011 default:
2012 break;
2013 case ENCODING_Iv:
2014 switch (insn.displacementSize) {
2015 default:
2016 break;
2017 case 1:
2018 if(immediate & 0x80)
2019 immediate |= ~(0xffull);
2020 break;
2021 case 2:
2022 if(immediate & 0x8000)
2023 immediate |= ~(0xffffull);
2024 break;
2025 case 4:
2026 if(immediate & 0x80000000)
2027 immediate |= ~(0xffffffffull);
2028 break;
2029 case 8:
2030 break;
2031 }
2032 break;
2033 case ENCODING_IB:
2034 if(immediate & 0x80)
2035 immediate |= ~(0xffull);
2036 break;
2037 case ENCODING_IW:
2038 if(immediate & 0x8000)
2039 immediate |= ~(0xffffull);
2040 break;
2041 case ENCODING_ID:
2042 if(immediate & 0x80000000)
2043 immediate |= ~(0xffffffffull);
2044 break;
2045 }
2046 }
2047 // By default sign-extend all X86 immediates based on their encoding.
2048 else if (type == TYPE_IMM) {
2049 switch (operand.encoding) {
2050 default:
2051 break;
2052 case ENCODING_IB:
2053 if(immediate & 0x80)
2054 immediate |= ~(0xffull);
2055 break;
2056 case ENCODING_IW:
2057 if(immediate & 0x8000)
2058 immediate |= ~(0xffffull);
2059 break;
2060 case ENCODING_ID:
2061 if(immediate & 0x80000000)
2062 immediate |= ~(0xffffffffull);
2063 break;
2064 case ENCODING_IO:
2065 break;
2066 }
2067 }
2068
2069 switch (type) {
2070 case TYPE_XMM:
2071 mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::XMM0 + (immediate >> 4)));
2072 return;
2073 case TYPE_YMM:
2074 mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::YMM0 + (immediate >> 4)));
2075 return;
2076 case TYPE_ZMM:
2077 mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ZMM0 + (immediate >> 4)));
2078 return;
2079 default:
2080 // operand is 64 bits wide. Do nothing.
2081 break;
2082 }
2083
2084 if (!Dis->tryAddingSymbolicOperand(
2085 Inst&: mcInst, Value: immediate + pcrel, Address: insn.startLocation, IsBranch: isBranch,
2086 Offset: insn.immediateOffset, OpSize: insn.immediateSize, InstSize: insn.length))
2087 mcInst.addOperand(Op: MCOperand::createImm(Val: immediate));
2088
2089 if (type == TYPE_MOFFS) {
2090 MCOperand segmentReg;
2091 segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2092 mcInst.addOperand(Op: segmentReg);
2093 }
2094}
2095
2096/// translateRMRegister - Translates a register stored in the R/M field of the
2097/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2098/// @param mcInst - The MCInst to append to.
2099/// @param insn - The internal instruction to extract the R/M field
2100/// from.
2101/// @return - 0 on success; -1 otherwise
2102static bool translateRMRegister(MCInst &mcInst,
2103 InternalInstruction &insn) {
2104 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2105 debug("A R/M register operand may not have a SIB byte");
2106 return true;
2107 }
2108
2109 switch (insn.eaBase) {
2110 default:
2111 debug("Unexpected EA base register");
2112 return true;
2113 case EA_BASE_NONE:
2114 debug("EA_BASE_NONE for ModR/M base");
2115 return true;
2116#define ENTRY(x) case EA_BASE_##x:
2117 ALL_EA_BASES
2118#undef ENTRY
2119 debug("A R/M register operand may not have a base; "
2120 "the operand must be a register.");
2121 return true;
2122#define ENTRY(x) \
2123 case EA_REG_##x: \
2124 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2125 ALL_REGS
2126#undef ENTRY
2127 }
2128
2129 return false;
2130}
2131
2132/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2133/// fields of an internal instruction (and possibly its SIB byte) to a memory
2134/// operand in LLVM's format, and appends it to an MCInst.
2135///
2136/// @param mcInst - The MCInst to append to.
2137/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2138/// from.
2139/// @param ForceSIB - The instruction must use SIB.
2140/// @return - 0 on success; nonzero otherwise
2141static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2142 const MCDisassembler *Dis,
2143 bool ForceSIB = false) {
2144 // Addresses in an MCInst are represented as five operands:
2145 // 1. basereg (register) The R/M base, or (if there is a SIB) the
2146 // SIB base
2147 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2148 // scale amount
2149 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2150 // the index (which is multiplied by the
2151 // scale amount)
2152 // 4. displacement (immediate) 0, or the displacement if there is one
2153 // 5. segmentreg (register) x86_registerNONE for now, but could be set
2154 // if we have segment overrides
2155
2156 MCOperand baseReg;
2157 MCOperand scaleAmount;
2158 MCOperand indexReg;
2159 MCOperand displacement;
2160 MCOperand segmentReg;
2161 uint64_t pcrel = 0;
2162
2163 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2164 if (insn.sibBase != SIB_BASE_NONE) {
2165 switch (insn.sibBase) {
2166 default:
2167 debug("Unexpected sibBase");
2168 return true;
2169#define ENTRY(x) \
2170 case SIB_BASE_##x: \
2171 baseReg = MCOperand::createReg(X86::x); break;
2172 ALL_SIB_BASES
2173#undef ENTRY
2174 }
2175 } else {
2176 baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2177 }
2178
2179 if (insn.sibIndex != SIB_INDEX_NONE) {
2180 switch (insn.sibIndex) {
2181 default:
2182 debug("Unexpected sibIndex");
2183 return true;
2184#define ENTRY(x) \
2185 case SIB_INDEX_##x: \
2186 indexReg = MCOperand::createReg(X86::x); break;
2187 EA_BASES_32BIT
2188 EA_BASES_64BIT
2189 REGS_XMM
2190 REGS_YMM
2191 REGS_ZMM
2192#undef ENTRY
2193 }
2194 } else {
2195 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2196 // but no index is used and modrm alone should have been enough.
2197 // -No base register in 32-bit mode. In 64-bit mode this is used to
2198 // avoid rip-relative addressing.
2199 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2200 // base always requires a SIB byte.
2201 // -A scale other than 1 is used.
2202 if (!ForceSIB &&
2203 (insn.sibScale != 1 ||
2204 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2205 (insn.sibBase != SIB_BASE_NONE &&
2206 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2207 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2208 indexReg = MCOperand::createReg(Reg: insn.addressSize == 4 ? X86::EIZ :
2209 X86::RIZ);
2210 } else
2211 indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2212 }
2213
2214 scaleAmount = MCOperand::createImm(Val: insn.sibScale);
2215 } else {
2216 switch (insn.eaBase) {
2217 case EA_BASE_NONE:
2218 if (insn.eaDisplacement == EA_DISP_NONE) {
2219 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2220 return true;
2221 }
2222 if (insn.mode == MODE_64BIT){
2223 pcrel = insn.startLocation + insn.length;
2224 Dis->tryAddingPcLoadReferenceComment(Value: insn.displacement + pcrel,
2225 Address: insn.startLocation +
2226 insn.displacementOffset);
2227 // Section 2.2.1.6
2228 baseReg = MCOperand::createReg(Reg: insn.addressSize == 4 ? X86::EIP :
2229 X86::RIP);
2230 }
2231 else
2232 baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2233
2234 indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2235 break;
2236 case EA_BASE_BX_SI:
2237 baseReg = MCOperand::createReg(Reg: X86::BX);
2238 indexReg = MCOperand::createReg(Reg: X86::SI);
2239 break;
2240 case EA_BASE_BX_DI:
2241 baseReg = MCOperand::createReg(Reg: X86::BX);
2242 indexReg = MCOperand::createReg(Reg: X86::DI);
2243 break;
2244 case EA_BASE_BP_SI:
2245 baseReg = MCOperand::createReg(Reg: X86::BP);
2246 indexReg = MCOperand::createReg(Reg: X86::SI);
2247 break;
2248 case EA_BASE_BP_DI:
2249 baseReg = MCOperand::createReg(Reg: X86::BP);
2250 indexReg = MCOperand::createReg(Reg: X86::DI);
2251 break;
2252 default:
2253 indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2254 switch (insn.eaBase) {
2255 default:
2256 debug("Unexpected eaBase");
2257 return true;
2258 // Here, we will use the fill-ins defined above. However,
2259 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2260 // sib and sib64 were handled in the top-level if, so they're only
2261 // placeholders to keep the compiler happy.
2262#define ENTRY(x) \
2263 case EA_BASE_##x: \
2264 baseReg = MCOperand::createReg(X86::x); break;
2265 ALL_EA_BASES
2266#undef ENTRY
2267#define ENTRY(x) case EA_REG_##x:
2268 ALL_REGS
2269#undef ENTRY
2270 debug("A R/M memory operand may not be a register; "
2271 "the base field must be a base.");
2272 return true;
2273 }
2274 }
2275
2276 scaleAmount = MCOperand::createImm(Val: 1);
2277 }
2278
2279 displacement = MCOperand::createImm(Val: insn.displacement);
2280
2281 segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2282
2283 mcInst.addOperand(Op: baseReg);
2284 mcInst.addOperand(Op: scaleAmount);
2285 mcInst.addOperand(Op: indexReg);
2286
2287 const uint8_t dispSize =
2288 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
2289
2290 if (!Dis->tryAddingSymbolicOperand(
2291 Inst&: mcInst, Value: insn.displacement + pcrel, Address: insn.startLocation, IsBranch: false,
2292 Offset: insn.displacementOffset, OpSize: dispSize, InstSize: insn.length))
2293 mcInst.addOperand(Op: displacement);
2294 mcInst.addOperand(Op: segmentReg);
2295 return false;
2296}
2297
2298/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2299/// byte of an instruction to LLVM form, and appends it to an MCInst.
2300///
2301/// @param mcInst - The MCInst to append to.
2302/// @param operand - The operand, as stored in the descriptor table.
2303/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2304/// from.
2305/// @return - 0 on success; nonzero otherwise
2306static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2307 InternalInstruction &insn, const MCDisassembler *Dis) {
2308 switch (operand.type) {
2309 default:
2310 debug("Unexpected type for a R/M operand");
2311 return true;
2312 case TYPE_R8:
2313 case TYPE_R16:
2314 case TYPE_R32:
2315 case TYPE_R64:
2316 case TYPE_Rv:
2317 case TYPE_MM64:
2318 case TYPE_XMM:
2319 case TYPE_YMM:
2320 case TYPE_ZMM:
2321 case TYPE_TMM:
2322 case TYPE_VK_PAIR:
2323 case TYPE_VK:
2324 case TYPE_DEBUGREG:
2325 case TYPE_CONTROLREG:
2326 case TYPE_BNDR:
2327 return translateRMRegister(mcInst, insn);
2328 case TYPE_M:
2329 case TYPE_MVSIBX:
2330 case TYPE_MVSIBY:
2331 case TYPE_MVSIBZ:
2332 return translateRMMemory(mcInst, insn, Dis);
2333 case TYPE_MSIB:
2334 return translateRMMemory(mcInst, insn, Dis, ForceSIB: true);
2335 }
2336}
2337
2338/// translateFPRegister - Translates a stack position on the FPU stack to its
2339/// LLVM form, and appends it to an MCInst.
2340///
2341/// @param mcInst - The MCInst to append to.
2342/// @param stackPos - The stack position to translate.
2343static void translateFPRegister(MCInst &mcInst,
2344 uint8_t stackPos) {
2345 mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ST0 + stackPos));
2346}
2347
2348/// translateMaskRegister - Translates a 3-bit mask register number to
2349/// LLVM form, and appends it to an MCInst.
2350///
2351/// @param mcInst - The MCInst to append to.
2352/// @param maskRegNum - Number of mask register from 0 to 7.
2353/// @return - false on success; true otherwise.
2354static bool translateMaskRegister(MCInst &mcInst,
2355 uint8_t maskRegNum) {
2356 if (maskRegNum >= 8) {
2357 debug("Invalid mask register number");
2358 return true;
2359 }
2360
2361 mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::K0 + maskRegNum));
2362 return false;
2363}
2364
2365/// translateOperand - Translates an operand stored in an internal instruction
2366/// to LLVM's format and appends it to an MCInst.
2367///
2368/// @param mcInst - The MCInst to append to.
2369/// @param operand - The operand, as stored in the descriptor table.
2370/// @param insn - The internal instruction.
2371/// @return - false on success; true otherwise.
2372static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2373 InternalInstruction &insn,
2374 const MCDisassembler *Dis) {
2375 switch (operand.encoding) {
2376 default:
2377 debug("Unhandled operand encoding during translation");
2378 return true;
2379 case ENCODING_REG:
2380 translateRegister(mcInst, reg: insn.reg);
2381 return false;
2382 case ENCODING_WRITEMASK:
2383 return translateMaskRegister(mcInst, maskRegNum: insn.writemask);
2384 case ENCODING_SIB:
2385 CASE_ENCODING_RM:
2386 CASE_ENCODING_VSIB:
2387 return translateRM(mcInst, operand, insn, Dis);
2388 case ENCODING_IB:
2389 case ENCODING_IW:
2390 case ENCODING_ID:
2391 case ENCODING_IO:
2392 case ENCODING_Iv:
2393 case ENCODING_Ia:
2394 translateImmediate(mcInst,
2395 immediate: insn.immediates[insn.numImmediatesTranslated++],
2396 operand,
2397 insn,
2398 Dis);
2399 return false;
2400 case ENCODING_IRC:
2401 mcInst.addOperand(Op: MCOperand::createImm(Val: insn.RC));
2402 return false;
2403 case ENCODING_SI:
2404 return translateSrcIndex(mcInst, insn);
2405 case ENCODING_DI:
2406 return translateDstIndex(mcInst, insn);
2407 case ENCODING_RB:
2408 case ENCODING_RW:
2409 case ENCODING_RD:
2410 case ENCODING_RO:
2411 case ENCODING_Rv:
2412 translateRegister(mcInst, reg: insn.opcodeRegister);
2413 return false;
2414 case ENCODING_CF:
2415 mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[1]));
2416 return false;
2417 case ENCODING_CC:
2418 if (isCCMPOrCTEST(insn: &insn))
2419 mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[2]));
2420 else
2421 mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[1]));
2422 return false;
2423 case ENCODING_FP:
2424 translateFPRegister(mcInst, stackPos: insn.modRM & 7);
2425 return false;
2426 case ENCODING_VVVV:
2427 translateRegister(mcInst, reg: insn.vvvv);
2428 return false;
2429 case ENCODING_DUP:
2430 return translateOperand(mcInst, operand: insn.operands[operand.type - TYPE_DUP0],
2431 insn, Dis);
2432 }
2433}
2434
2435/// translateInstruction - Translates an internal instruction and all its
2436/// operands to an MCInst.
2437///
2438/// @param mcInst - The MCInst to populate with the instruction's data.
2439/// @param insn - The internal instruction.
2440/// @return - false on success; true otherwise.
2441static bool translateInstruction(MCInst &mcInst,
2442 InternalInstruction &insn,
2443 const MCDisassembler *Dis) {
2444 if (!insn.spec) {
2445 debug("Instruction has no specification");
2446 return true;
2447 }
2448
2449 mcInst.clear();
2450 mcInst.setOpcode(insn.instructionID);
2451 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2452 // prefix bytes should be disassembled as xrelease and xacquire then set the
2453 // opcode to those instead of the rep and repne opcodes.
2454 if (insn.xAcquireRelease) {
2455 if(mcInst.getOpcode() == X86::REP_PREFIX)
2456 mcInst.setOpcode(X86::XRELEASE_PREFIX);
2457 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2458 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2459 }
2460
2461 insn.numImmediatesTranslated = 0;
2462
2463 for (const auto &Op : insn.operands) {
2464 if (Op.encoding != ENCODING_NONE) {
2465 if (translateOperand(mcInst, operand: Op, insn, Dis)) {
2466 return true;
2467 }
2468 }
2469 }
2470
2471 return false;
2472}
2473
2474static MCDisassembler *createX86Disassembler(const Target &T,
2475 const MCSubtargetInfo &STI,
2476 MCContext &Ctx) {
2477 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2478 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2479}
2480
2481extern "C" LLVM_C_ABI void LLVMInitializeX86Disassembler() {
2482 // Register the disassembler.
2483 TargetRegistry::RegisterMCDisassembler(T&: getTheX86_32Target(),
2484 Fn: createX86Disassembler);
2485 TargetRegistry::RegisterMCDisassembler(T&: getTheX86_64Target(),
2486 Fn: createX86Disassembler);
2487}
2488