X86Disassembler.cpp source code [llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp]

1	//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is part of the X86 Disassembler.
10	// It contains code to translate the data produced by the decoder into
11	// MCInsts.
12	//
13	//
14	// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15	// 64-bit X86 instruction sets. The main decode sequence for an assembly
16	// instruction in this disassembler is:
17	//
18	// 1. Read the prefix bytes and determine the attributes of the instruction.
19	// These attributes, recorded in enum attributeBits
20	// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21	// provides a mapping from bitmasks to contexts, which are represented by
22	// enum InstructionContext (ibid.).
23	//
24	// 2. Read the opcode, and determine what kind of opcode it is. The
25	// disassembler distinguishes four kinds of opcodes, which are enumerated in
26	// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27	// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28	// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29	//
30	// 3. Depending on the opcode type, look in one of four ClassDecision structures
31	// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32	// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33	// a ModRMDecision (ibid.).
34	//
35	// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36	// instructions that have ModRMReg / ModRMMem forms in LLVM, need the
37	// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38	// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39	// ModR/M byte is required and how to interpret it.
40	//
41	// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42	// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43	// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44	// meanings of its operands.
45	//
46	// 6. For each operand, its encoding is an entry from OperandEncoding
47	// (X86DisassemblerDecoderCommon.h) and its type is an entry from
48	// OperandType (ibid.). The encoding indicates how to read it from the
49	// instruction; the type indicates how to interpret the value once it has
50	// been read. For example, a register operand could be stored in the R/M
51	// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52	// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53	// register, for instance). Given this information, the operands can be
54	// extracted and interpreted.
55	//
56	// 7. As the last step, the disassembler translates the instruction information
57	// and operands into a format understandable by the client - in this case, an
58	// MCInst for use by the MC infrastructure.
59	//
60	// The disassembler is broken broadly into two parts: the table emitter that
61	// emits the instruction decode tables discussed above during compilation, and
62	// the disassembler itself. The table emitter is documented in more detail in
63	// utils/TableGen/X86DisassemblerEmitter.h.
64	//
65	// X86Disassembler.cpp contains the code responsible for step 7, and for
66	// invoking the decoder to execute steps 1-6.
67	// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68	// table emitter and the disassembler.
69	// X86DisassemblerDecoder.h contains the public interface of the decoder,
70	// factored out into C for possible use by other projects.
71	// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72	// responsible for steps 1-6.
73	//
74	//===----------------------------------------------------------------------===//
75
76	#include "MCTargetDesc/X86BaseInfo.h"
77	#include "MCTargetDesc/X86MCTargetDesc.h"
78	#include "TargetInfo/X86TargetInfo.h"
79	#include "X86DisassemblerDecoder.h"
80	#include "llvm/MC/MCContext.h"
81	#include "llvm/MC/MCDisassembler/MCDisassembler.h"
82	#include "llvm/MC/MCExpr.h"
83	#include "llvm/MC/MCInst.h"
84	#include "llvm/MC/MCInstrInfo.h"
85	#include "llvm/MC/MCSubtargetInfo.h"
86	#include "llvm/MC/TargetRegistry.h"
87	#include "llvm/Support/Debug.h"
88	#include "llvm/Support/Format.h"
89	#include "llvm/Support/raw_ostream.h"
90
91	using namespace llvm;
92	using namespace llvm::X86Disassembler;
93
94	#define DEBUG_TYPE "x86-disassembler"
95
96	#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97
98	// Specifies whether a ModR/M byte is needed and (if so) which
99	// instruction each possible value of the ModR/M byte corresponds to. Once
100	// this information is known, we have narrowed down to a single instruction.
101	struct ModRMDecision {
102	uint8_t modrm_type;
103	uint16_t instructionIDs;
104	};
105
106	// Specifies which set of ModR/M->instruction tables to look at
107	// given a particular opcode.
108	struct OpcodeDecision {
109	ModRMDecision modRMDecisions[`256`];
110	};
111
112	// Specifies which opcode->instruction tables to look at given
113	// a particular context (set of attributes). Since there are many possible
114	// contexts, the decoder first uses CONTEXTS_SYM to determine which context
115	// applies given a specific set of attributes. Hence there are only IC_max
116	// entries in this table, rather than 2^(ATTR_max).
117	struct ContextDecision {
118	OpcodeDecision opcodeDecisions[IC_max];
119	};
120
121	#include "X86GenDisassemblerTables.inc"
122
123	static InstrUID decode(OpcodeType type, InstructionContext insnContext,
124	uint8_t opcode, uint8_t modRM) {
125	const struct ModRMDecision *dec;
126
127	switch (type) {
128	case ONEBYTE:
129	dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130	break;
131	case TWOBYTE:
132	dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133	break;
134	case THREEBYTE_38:
135	dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136	break;
137	case THREEBYTE_3A:
138	dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139	break;
140	case XOP8_MAP:
141	dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142	break;
143	case XOP9_MAP:
144	dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145	break;
146	case XOPA_MAP:
147	dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148	break;
149	case THREEDNOW_MAP:
150	dec =
151	&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152	break;
153	case MAP4:
154	dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155	break;
156	case MAP5:
157	dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158	break;
159	case MAP6:
160	dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161	break;
162	case MAP7:
163	dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
164	break;
165	}
166
167	switch (dec->modrm_type) {
168	default:
169	llvm_unreachable("Corrupt table! Unknown modrm_type");
170	return `0`;
171	case MODRM_ONEENTRY:
172	return modRMTable[dec->instructionIDs];
173	case MODRM_SPLITRM:
174	if (modFromModRM(modRM) == `0x3`)
175	return modRMTable[dec->instructionIDs + `1`];
176	return modRMTable[dec->instructionIDs];
177	case MODRM_SPLITREG:
178	if (modFromModRM(modRM) == `0x3`)
179	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`) + `8`];
180	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
181	case MODRM_SPLITMISC:
182	if (modFromModRM(modRM) == `0x3`)
183	return modRMTable[dec->instructionIDs + (modRM & `0x3f`) + `8`];
184	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
185	case MODRM_FULL:
186	return modRMTable[dec->instructionIDs + modRM];
187	}
188	}
189
190	static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
191	uint64_t offset = insn->readerCursor - insn->startLocation;
192	if (offset >= insn->bytes.size())
193	return true;
194	byte = insn->bytes [offset];
195	return false;
196	}
197
198	template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
199	auto r = insn->bytes;
200	uint64_t offset = insn->readerCursor - insn->startLocation;
201	if (offset + sizeof(T) > r.size())
202	return true;
203	ptr = support::endian::read<T>(&r [offset], llvm::endianness::little);
204	insn->readerCursor += sizeof(T);
205	return false;
206	}
207
208	static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
209	return insn->mode == MODE_64BIT && prefix >= `0x40` && prefix <= `0x4f`;
210	}
211
212	static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
213	return insn->mode == MODE_64BIT && prefix == `0xd5`;
214	}
215
216	// Consumes all of an instruction's prefix bytes, and marks the
217	// instruction as having them. Also sets the instruction's default operand,
218	// address, and other relevant data sizes to report operands correctly.
219	//
220	// insn must not be empty.
221	static int readPrefixes(struct InternalInstruction *insn) {
222	bool isPrefix = true;
223	uint8_t byte = `0`;
224	uint8_t nextByte;
225
226	LLVM_DEBUG(dbgs() << "readPrefixes()");
227
228	while (isPrefix) {
229	// If we fail reading prefixes, just stop here and let the opcode reader
230	// deal with it.
231	if (consume(insn, ptr&: byte))
232	break;
233
234	// If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
235	// break and let it be disassembled as a normal "instruction".
236	if (insn->readerCursor - `1` == insn->startLocation && byte == `0xf0`) // LOCK
237	break;
238
239	if ((byte == `0xf2` \|\| byte == `0xf3`) && !peek(insn, byte&: nextByte)) {
240	// If the byte is 0xf2 or 0xf3, and any of the following conditions are
241	// met:
242	// - it is followed by a LOCK (0xf0) prefix
243	// - it is followed by an xchg instruction
244	// then it should be disassembled as a xacquire/xrelease not repne/rep.
245	if (((nextByte == `0xf0`) \|\|
246	((nextByte & `0xfe`) == `0x86` \|\| (nextByte & `0xf8`) == `0x90`))) {
247	insn->xAcquireRelease = true;
248	if (!(byte == `0xf3` && nextByte == `0x90`)) // PAUSE instruction support
249	break;
250	}
251	// Also if the byte is 0xf3, and the following condition is met:
252	// - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
253	// "mov mem, imm" (opcode 0xc6/0xc7) instructions.
254	// then it should be disassembled as an xrelease not rep.
255	if (byte == `0xf3` && (nextByte == `0x88` \|\| nextByte == `0x89` \|\|
256	nextByte == `0xc6` \|\| nextByte == `0xc7`)) {
257	insn->xAcquireRelease = true;
258	break;
259	}
260	if (isREX(insn, prefix: nextByte)) {
261	uint8_t nnextByte;
262	// Go to REX prefix after the current one
263	if (consume(insn, ptr&: nnextByte))
264	return -`1`;
265	// We should be able to read next byte after REX prefix
266	if (peek(insn, byte&: nnextByte))
267	return -`1`;
268	--insn->readerCursor;
269	}
270	}
271
272	switch (byte) {
273	case `0xf0`: // LOCK
274	insn->hasLockPrefix = true;
275	break;
276	case `0xf2`: // REPNE/REPNZ
277	case `0xf3`: { // REP or REPE/REPZ
278	uint8_t nextByte;
279	if (peek(insn, byte&: nextByte))
280	break;
281	// TODO:
282	// 1. There could be several 0x66
283	// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
284	// it's not mandatory prefix
285	// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
286	// 0x0f exactly after it to be mandatory prefix
287	// 4. if (nextByte == 0xd5) it's REX2 and we need
288	// 0x0f exactly after it to be mandatory prefix
289	if (isREX(insn, prefix: nextByte) \|\| isREX2(insn, prefix: nextByte) \|\| nextByte == `0x0f` \|\|
290	nextByte == `0x66`)
291	// The last of 0xf2 /0xf3 is mandatory prefix
292	insn->mandatoryPrefix = byte;
293	insn->repeatPrefix = byte;
294	break;
295	}
296	case `0x2e`: // CS segment override -OR- Branch not taken
297	insn->segmentOverride = SEG_OVERRIDE_CS;
298	break;
299	case `0x36`: // SS segment override -OR- Branch taken
300	insn->segmentOverride = SEG_OVERRIDE_SS;
301	break;
302	case `0x3e`: // DS segment override
303	insn->segmentOverride = SEG_OVERRIDE_DS;
304	break;
305	case `0x26`: // ES segment override
306	insn->segmentOverride = SEG_OVERRIDE_ES;
307	break;
308	case `0x64`: // FS segment override
309	insn->segmentOverride = SEG_OVERRIDE_FS;
310	break;
311	case `0x65`: // GS segment override
312	insn->segmentOverride = SEG_OVERRIDE_GS;
313	break;
314	case `0x66`: { // Operand-size override {
315	uint8_t nextByte;
316	insn->hasOpSize = true;
317	if (peek(insn, byte&: nextByte))
318	break;
319	// 0x66 can't overwrite existing mandatory prefix and should be ignored
320	if (!insn->mandatoryPrefix && (nextByte == `0x0f` \|\| isREX(insn, prefix: nextByte)))
321	insn->mandatoryPrefix = byte;
322	break;
323	}
324	case `0x67`: // Address-size override
325	insn->hasAdSize = true;
326	break;
327	default: // Not a prefix byte
328	isPrefix = false;
329	break;
330	}
331
332	if (isREX(insn, prefix: byte)) {
333	insn->rexPrefix = byte;
334	isPrefix = true;
335	LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
336	} else if (isPrefix) {
337	insn->rexPrefix = `0`;
338	}
339
340	if (isPrefix)
341	LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
342	}
343
344	insn->vectorExtensionType = TYPE_NO_VEX_XOP;
345
346	if (byte == `0x62`) {
347	uint8_t byte1, byte2;
348	if (consume(insn, ptr&: byte1)) {
349	LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
350	return -`1`;
351	}
352
353	if (peek(insn, byte&: byte2)) {
354	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
355	return -`1`;
356	}
357
358	if ((insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)) {
359	insn->vectorExtensionType = TYPE_EVEX;
360	} else {
361	--insn->readerCursor; // unconsume byte1
362	--insn->readerCursor; // unconsume byte
363	}
364
365	if (insn->vectorExtensionType == TYPE_EVEX) {
366	insn->vectorExtensionPrefix[`0`] = byte;
367	insn->vectorExtensionPrefix[`1`] = byte1;
368	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`2`])) {
369	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
370	return -`1`;
371	}
372	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`3`])) {
373	LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
374	return -`1`;
375	}
376
377	if (insn->mode == MODE_64BIT) {
378	// We simulate the REX prefix for simplicity's sake
379	insn->rexPrefix = `0x40` \|
380	(wFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `3`) \|
381	(rFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `2`) \|
382	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `1`) \|
383	(bFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `0`);
384
385	// We simulate the REX2 prefix for simplicity's sake
386	insn->rex2ExtensionPrefix[`1`] =
387	(r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `6`) \|
388	(uFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `5`) \|
389	(b2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`);
390	}
391
392	LLVM_DEBUG(
393	dbgs() << format(
394	"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
395	insn->vectorExtensionPrefix[`0`], insn->vectorExtensionPrefix[`1`],
396	insn->vectorExtensionPrefix[`2`], insn->vectorExtensionPrefix[`3`]));
397	}
398	} else if (byte == `0xc4`) {
399	uint8_t byte1;
400	if (peek(insn, byte&: byte1)) {
401	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
402	return -`1`;
403	}
404
405	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
406	insn->vectorExtensionType = TYPE_VEX_3B;
407	else
408	--insn->readerCursor;
409
410	if (insn->vectorExtensionType == TYPE_VEX_3B) {
411	insn->vectorExtensionPrefix[`0`] = byte;
412	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
413	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
414
415	// We simulate the REX prefix for simplicity's sake
416
417	if (insn->mode == MODE_64BIT)
418	insn->rexPrefix = `0x40` \|
419	(wFromVEX3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
420	(rFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
421	(xFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
422	(bFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
423
424	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
425	insn->vectorExtensionPrefix[`0`],
426	insn->vectorExtensionPrefix[`1`],
427	insn->vectorExtensionPrefix[`2`]));
428	}
429	} else if (byte == `0xc5`) {
430	uint8_t byte1;
431	if (peek(insn, byte&: byte1)) {
432	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
433	return -`1`;
434	}
435
436	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
437	insn->vectorExtensionType = TYPE_VEX_2B;
438	else
439	--insn->readerCursor;
440
441	if (insn->vectorExtensionType == TYPE_VEX_2B) {
442	insn->vectorExtensionPrefix[`0`] = byte;
443	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
444
445	if (insn->mode == MODE_64BIT)
446	insn->rexPrefix =
447	`0x40` \| (rFromVEX2of2(insn->vectorExtensionPrefix[`1`]) << `2`);
448
449	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
450	default:
451	break;
452	case VEX_PREFIX_66:
453	insn->hasOpSize = true;
454	break;
455	}
456
457	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
458	insn->vectorExtensionPrefix[`0`],
459	insn->vectorExtensionPrefix[`1`]));
460	}
461	} else if (byte == `0x8f`) {
462	uint8_t byte1;
463	if (peek(insn, byte&: byte1)) {
464	LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
465	return -`1`;
466	}
467
468	if ((byte1 & `0x38`) != `0x0`) // 0 in these 3 bits is a POP instruction.
469	insn->vectorExtensionType = TYPE_XOP;
470	else
471	--insn->readerCursor;
472
473	if (insn->vectorExtensionType == TYPE_XOP) {
474	insn->vectorExtensionPrefix[`0`] = byte;
475	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
476	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
477
478	// We simulate the REX prefix for simplicity's sake
479
480	if (insn->mode == MODE_64BIT)
481	insn->rexPrefix = `0x40` \|
482	(wFromXOP3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
483	(rFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
484	(xFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
485	(bFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
486
487	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
488	default:
489	break;
490	case VEX_PREFIX_66:
491	insn->hasOpSize = true;
492	break;
493	}
494
495	LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
496	insn->vectorExtensionPrefix[`0`],
497	insn->vectorExtensionPrefix[`1`],
498	insn->vectorExtensionPrefix[`2`]));
499	}
500	} else if (isREX2(insn, prefix: byte)) {
501	uint8_t byte1;
502	if (peek(insn, byte&: byte1)) {
503	LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
504	return -`1`;
505	}
506	insn->rex2ExtensionPrefix[`0`] = byte;
507	consume(insn, ptr&: insn->rex2ExtensionPrefix[`1`]);
508
509	// We simulate the REX prefix for simplicity's sake
510	insn->rexPrefix = `0x40` \| (wFromREX2(insn->rex2ExtensionPrefix[`1`]) << `3`) \|
511	(rFromREX2(insn->rex2ExtensionPrefix[`1`]) << `2`) \|
512	(xFromREX2(insn->rex2ExtensionPrefix[`1`]) << `1`) \|
513	(bFromREX2(insn->rex2ExtensionPrefix[`1`]) << `0`);
514	LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
515	insn->rex2ExtensionPrefix[`0`],
516	insn->rex2ExtensionPrefix[`1`]));
517	} else
518	--insn->readerCursor;
519
520	if (insn->mode == MODE_16BIT) {
521	insn->registerSize = (insn->hasOpSize ? `4` : `2`);
522	insn->addressSize = (insn->hasAdSize ? `4` : `2`);
523	insn->displacementSize = (insn->hasAdSize ? `4` : `2`);
524	insn->immediateSize = (insn->hasOpSize ? `4` : `2`);
525	} else if (insn->mode == MODE_32BIT) {
526	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
527	insn->addressSize = (insn->hasAdSize ? `2` : `4`);
528	insn->displacementSize = (insn->hasAdSize ? `2` : `4`);
529	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
530	} else if (insn->mode == MODE_64BIT) {
531	insn->displacementSize = `4`;
532	if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
533	insn->registerSize = `8`;
534	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
535	insn->immediateSize = `4`;
536	insn->hasOpSize = false;
537	} else {
538	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
539	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
540	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
541	}
542	}
543
544	return `0`;
545	}
546
547	// Consumes the SIB byte to determine addressing information.
548	static int readSIB(struct InternalInstruction *insn) {
549	SIBBase sibBaseBase = SIB_BASE_NONE;
550	uint8_t index, base;
551
552	LLVM_DEBUG(dbgs() << "readSIB()");
553	switch (insn->addressSize) {
554	case `2`:
555	default:
556	llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
557	case `4`:
558	insn->sibIndexBase = SIB_INDEX_EAX;
559	sibBaseBase = SIB_BASE_EAX;
560	break;
561	case `8`:
562	insn->sibIndexBase = SIB_INDEX_RAX;
563	sibBaseBase = SIB_BASE_RAX;
564	break;
565	}
566
567	if (consume(insn, ptr&: insn->sib))
568	return -`1`;
569
570	index = indexFromSIB(insn->sib) \| (xFromREX(insn->rexPrefix) << `3`) \|
571	(x2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
572
573	if (index == `0x4`) {
574	insn->sibIndex = SIB_INDEX_NONE;
575	} else {
576	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
577	}
578
579	insn->sibScale = `1` << scaleFromSIB(insn->sib);
580
581	base = baseFromSIB(insn->sib) \| (bFromREX(insn->rexPrefix) << `3`) \|
582	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
583
584	switch (base) {
585	case `0x5`:
586	case `0xd`:
587	switch (modFromModRM(insn->modRM)) {
588	case `0x0`:
589	insn->eaDisplacement = EA_DISP_32;
590	insn->sibBase = SIB_BASE_NONE;
591	break;
592	case `0x1`:
593	insn->eaDisplacement = EA_DISP_8;
594	insn->sibBase = (SIBBase)(sibBaseBase + base);
595	break;
596	case `0x2`:
597	insn->eaDisplacement = EA_DISP_32;
598	insn->sibBase = (SIBBase)(sibBaseBase + base);
599	break;
600	default:
601	llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
602	}
603	break;
604	default:
605	insn->sibBase = (SIBBase)(sibBaseBase + base);
606	break;
607	}
608
609	return `0`;
610	}
611
612	static int readDisplacement(struct InternalInstruction *insn) {
613	int8_t d8;
614	int16_t d16;
615	int32_t d32;
616	LLVM_DEBUG(dbgs() << "readDisplacement()");
617
618	insn->displacementOffset = insn->readerCursor - insn->startLocation;
619	switch (insn->eaDisplacement) {
620	case EA_DISP_NONE:
621	break;
622	case EA_DISP_8:
623	if (consume(insn, ptr&: d8))
624	return -`1`;
625	insn->displacement = d8;
626	break;
627	case EA_DISP_16:
628	if (consume(insn, ptr&: d16))
629	return -`1`;
630	insn->displacement = d16;
631	break;
632	case EA_DISP_32:
633	if (consume(insn, ptr&: d32))
634	return -`1`;
635	insn->displacement = d32;
636	break;
637	}
638
639	return `0`;
640	}
641
642	// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
643	static int readModRM(struct InternalInstruction *insn) {
644	uint8_t mod, rm, reg;
645	LLVM_DEBUG(dbgs() << "readModRM()");
646
647	if (insn->consumedModRM)
648	return `0`;
649
650	if (consume(insn, ptr&: insn->modRM))
651	return -`1`;
652	insn->consumedModRM = true;
653
654	mod = modFromModRM(insn->modRM);
655	rm = rmFromModRM(insn->modRM);
656	reg = regFromModRM(insn->modRM);
657
658	// This goes by insn->registerSize to pick the correct register, which messes
659	// up if we're using (say) XMM or 8-bit register operands. That gets fixed in
660	// fixupReg().
661	switch (insn->registerSize) {
662	case `2`:
663	insn->regBase = MODRM_REG_AX;
664	insn->eaRegBase = EA_REG_AX;
665	break;
666	case `4`:
667	insn->regBase = MODRM_REG_EAX;
668	insn->eaRegBase = EA_REG_EAX;
669	break;
670	case `8`:
671	insn->regBase = MODRM_REG_RAX;
672	insn->eaRegBase = EA_REG_RAX;
673	break;
674	}
675
676	reg \|= (rFromREX(insn->rexPrefix) << `3`) \|
677	(r2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
678	rm \|= (bFromREX(insn->rexPrefix) << `3`) \|
679	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
680
681	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
682	reg \|= r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`;
683
684	insn->reg = (Reg)(insn->regBase + reg);
685
686	switch (insn->addressSize) {
687	case `2`: {
688	EABase eaBaseBase = EA_BASE_BX_SI;
689
690	switch (mod) {
691	case `0x0`:
692	if (rm == `0x6`) {
693	insn->eaBase = EA_BASE_NONE;
694	insn->eaDisplacement = EA_DISP_16;
695	if (readDisplacement(insn))
696	return -`1`;
697	} else {
698	insn->eaBase = (EABase)(eaBaseBase + rm);
699	insn->eaDisplacement = EA_DISP_NONE;
700	}
701	break;
702	case `0x1`:
703	insn->eaBase = (EABase)(eaBaseBase + rm);
704	insn->eaDisplacement = EA_DISP_8;
705	insn->displacementSize = `1`;
706	if (readDisplacement(insn))
707	return -`1`;
708	break;
709	case `0x2`:
710	insn->eaBase = (EABase)(eaBaseBase + rm);
711	insn->eaDisplacement = EA_DISP_16;
712	if (readDisplacement(insn))
713	return -`1`;
714	break;
715	case `0x3`:
716	insn->eaBase = (EABase)(insn->eaRegBase + rm);
717	if (readDisplacement(insn))
718	return -`1`;
719	break;
720	}
721	break;
722	}
723	case `4`:
724	case `8`: {
725	EABase eaBaseBase = (insn->addressSize == `4` ? EA_BASE_EAX : EA_BASE_RAX);
726
727	switch (mod) {
728	case `0x0`:
729	insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
730	// In determining whether RIP-relative mode is used (rm=5),
731	// or whether a SIB byte is present (rm=4),
732	// the extension bits (REX.b and EVEX.x) are ignored.
733	switch (rm & `7`) {
734	case `0x4`: // SIB byte is present
735	insn->eaBase = (insn->addressSize == `4` ? EA_BASE_sib : EA_BASE_sib64);
736	if (readSIB(insn) \|\| readDisplacement(insn))
737	return -`1`;
738	break;
739	case `0x5`: // RIP-relative
740	insn->eaBase = EA_BASE_NONE;
741	insn->eaDisplacement = EA_DISP_32;
742	if (readDisplacement(insn))
743	return -`1`;
744	break;
745	default:
746	insn->eaBase = (EABase)(eaBaseBase + rm);
747	break;
748	}
749	break;
750	case `0x1`:
751	insn->displacementSize = `1`;
752	[[fallthrough]];
753	case `0x2`:
754	insn->eaDisplacement = (mod == `0x1` ? EA_DISP_8 : EA_DISP_32);
755	switch (rm & `7`) {
756	case `0x4`: // SIB byte is present
757	insn->eaBase = EA_BASE_sib;
758	if (readSIB(insn) \|\| readDisplacement(insn))
759	return -`1`;
760	break;
761	default:
762	insn->eaBase = (EABase)(eaBaseBase + rm);
763	if (readDisplacement(insn))
764	return -`1`;
765	break;
766	}
767	break;
768	case `0x3`:
769	insn->eaDisplacement = EA_DISP_NONE;
770	insn->eaBase = (EABase)(insn->eaRegBase + rm);
771	break;
772	}
773	break;
774	}
775	} // switch (insn->addressSize)
776
777	return `0`;
778	}
779
780	#define GENERIC_FIXUP_FUNC(name, base, prefix) \
781	static uint16_t name(struct InternalInstruction *insn, OperandType type, \
782	uint8_t index, uint8_t *valid) { \
783	*valid = 1; \
784	switch (type) { \
785	default: \
786	debug("Unhandled register type"); \
787	*valid = 0; \
788	return 0; \
789	case TYPE_Rv: \
790	return base + index; \
791	case TYPE_R8: \
792	if (insn->rexPrefix && index >= 4 && index <= 7) \
793	return prefix##_SPL + (index - 4); \
794	else \
795	return prefix##_AL + index; \
796	case TYPE_R16: \
797	return prefix##_AX + index; \
798	case TYPE_R32: \
799	return prefix##_EAX + index; \
800	case TYPE_R64: \
801	return prefix##_RAX + index; \
802	case TYPE_ZMM: \
803	return prefix##_ZMM0 + index; \
804	case TYPE_YMM: \
805	return prefix##_YMM0 + index; \
806	case TYPE_XMM: \
807	return prefix##_XMM0 + index; \
808	case TYPE_TMM: \
809	if (index > 7) \
810	*valid = 0; \
811	return prefix##_TMM0 + index; \
812	case TYPE_TMM_PAIR: \
813	if (index > 7) \
814	*valid = 0; \
815	return prefix##_TMM0_TMM1 + (index / 2); \
816	case TYPE_VK: \
817	index &= 0xf; \
818	if (index > 7) \
819	*valid = 0; \
820	return prefix##_K0 + index; \
821	case TYPE_VK_PAIR: \
822	if (index > 7) \
823	*valid = 0; \
824	return prefix##_K0_K1 + (index / 2); \
825	case TYPE_MM64: \
826	return prefix##_MM0 + (index & 0x7); \
827	case TYPE_SEGMENTREG: \
828	if ((index & 7) > 5) \
829	*valid = 0; \
830	return prefix##_ES + (index & 7); \
831	case TYPE_DEBUGREG: \
832	if (index > 15) \
833	*valid = 0; \
834	return prefix##_DR0 + index; \
835	case TYPE_CONTROLREG: \
836	if (index > 15) \
837	*valid = 0; \
838	return prefix##_CR0 + index; \
839	case TYPE_MVSIBX: \
840	return prefix##_XMM0 + index; \
841	case TYPE_MVSIBY: \
842	return prefix##_YMM0 + index; \
843	case TYPE_MVSIBZ: \
844	return prefix##_ZMM0 + index; \
845	} \
846	}
847
848	// Consult an operand type to determine the meaning of the reg or R/M field. If
849	// the operand is an XMM operand, for example, an operand would be XMM0 instead
850	// of AX, which readModRM() would otherwise misinterpret it as.
851	//
852	// @param insn - The instruction containing the operand.
853	// @param type - The operand type.
854	// @param index - The existing value of the field as reported by readModRM().
855	// @param valid - The address of a uint8_t. The target is set to 1 if the
856	// field is valid for the register class; 0 if not.
857	// @return - The proper value.
858	GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
859	GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
860
861	// Consult an operand specifier to determine which of the fixupValue functions*
862	// to use in correcting readModRM()'ss interpretation.
863	//
864	// @param insn - See fixupValue().*
865	// @param op - The operand specifier.
866	// @return - 0 if fixup was successful; -1 if the register returned was
867	// invalid for its class.
868	static int fixupReg(struct InternalInstruction *insn,
869	const struct OperandSpecifier *op) {
870	uint8_t valid;
871	LLVM_DEBUG(dbgs() << "fixupReg()");
872
873	switch ((OperandEncoding)op->encoding) {
874	default:
875	debug("Expected a REG or R/M encoding in fixupReg");
876	return -`1`;
877	case ENCODING_VVVV:
878	insn->vvvv =
879	(Reg)fixupRegValue(insn, type: (OperandType)op->type, index: insn->vvvv, valid: &valid);
880	if (!valid)
881	return -`1`;
882	break;
883	case ENCODING_REG:
884	insn->reg = (Reg)fixupRegValue(insn, type: (OperandType)op->type,
885	index: insn->reg - insn->regBase, valid: &valid);
886	if (!valid)
887	return -`1`;
888	break;
889	CASE_ENCODING_RM:
890	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
891	modFromModRM(insn->modRM) == `3`) {
892	// EVEX_X can extend the register id to 32 for a non-GPR register that is
893	// encoded in RM.
894	// mode : MODE_64_BIT
895	// Only 8 vector registers are available in 32 bit mode
896	// mod : 3
897	// RM encodes a register
898	switch (op->type) {
899	case TYPE_Rv:
900	case TYPE_R8:
901	case TYPE_R16:
902	case TYPE_R32:
903	case TYPE_R64:
904	break;
905	default:
906	insn->eaBase =
907	(EABase)(insn->eaBase +
908	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`));
909	break;
910	}
911	}
912	[[fallthrough]];
913	case ENCODING_SIB:
914	if (insn->eaBase >= insn->eaRegBase) {
915	insn->eaBase = (EABase)fixupRMValue(
916	insn, type: (OperandType)op->type, index: insn->eaBase - insn->eaRegBase, valid: &valid);
917	if (!valid)
918	return -`1`;
919	}
920	break;
921	}
922
923	return `0`;
924	}
925
926	// Read the opcode (except the ModR/M byte in the case of extended or escape
927	// opcodes).
928	static bool readOpcode(struct InternalInstruction *insn) {
929	uint8_t current;
930	LLVM_DEBUG(dbgs() << "readOpcode()");
931
932	insn->opcodeType = ONEBYTE;
933	if (insn->vectorExtensionType == TYPE_EVEX) {
934	switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])) {
935	default:
936	LLVM_DEBUG(
937	dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
938	mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])));
939	return true;
940	case VEX_LOB_0F:
941	insn->opcodeType = TWOBYTE;
942	return consume(insn, ptr&: insn->opcode);
943	case VEX_LOB_0F38:
944	insn->opcodeType = THREEBYTE_38;
945	return consume(insn, ptr&: insn->opcode);
946	case VEX_LOB_0F3A:
947	insn->opcodeType = THREEBYTE_3A;
948	return consume(insn, ptr&: insn->opcode);
949	case VEX_LOB_MAP4:
950	insn->opcodeType = MAP4;
951	return consume(insn, ptr&: insn->opcode);
952	case VEX_LOB_MAP5:
953	insn->opcodeType = MAP5;
954	return consume(insn, ptr&: insn->opcode);
955	case VEX_LOB_MAP6:
956	insn->opcodeType = MAP6;
957	return consume(insn, ptr&: insn->opcode);
958	case VEX_LOB_MAP7:
959	insn->opcodeType = MAP7;
960	return consume(insn, ptr&: insn->opcode);
961	}
962	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
963	switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])) {
964	default:
965	LLVM_DEBUG(
966	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
967	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
968	return true;
969	case VEX_LOB_0F:
970	insn->opcodeType = TWOBYTE;
971	return consume(insn, ptr&: insn->opcode);
972	case VEX_LOB_0F38:
973	insn->opcodeType = THREEBYTE_38;
974	return consume(insn, ptr&: insn->opcode);
975	case VEX_LOB_0F3A:
976	insn->opcodeType = THREEBYTE_3A;
977	return consume(insn, ptr&: insn->opcode);
978	case VEX_LOB_MAP5:
979	insn->opcodeType = MAP5;
980	return consume(insn, ptr&: insn->opcode);
981	case VEX_LOB_MAP6:
982	insn->opcodeType = MAP6;
983	return consume(insn, ptr&: insn->opcode);
984	case VEX_LOB_MAP7:
985	insn->opcodeType = MAP7;
986	return consume(insn, ptr&: insn->opcode);
987	}
988	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
989	insn->opcodeType = TWOBYTE;
990	return consume(insn, ptr&: insn->opcode);
991	} else if (insn->vectorExtensionType == TYPE_XOP) {
992	switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[`1`])) {
993	default:
994	LLVM_DEBUG(
995	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
996	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
997	return true;
998	case XOP_MAP_SELECT_8:
999	insn->opcodeType = XOP8_MAP;
1000	return consume(insn, ptr&: insn->opcode);
1001	case XOP_MAP_SELECT_9:
1002	insn->opcodeType = XOP9_MAP;
1003	return consume(insn, ptr&: insn->opcode);
1004	case XOP_MAP_SELECT_A:
1005	insn->opcodeType = XOPA_MAP;
1006	return consume(insn, ptr&: insn->opcode);
1007	}
1008	} else if (mFromREX2(insn->rex2ExtensionPrefix[`1`])) {
1009	// m bit indicates opcode map 1
1010	insn->opcodeType = TWOBYTE;
1011	return consume(insn, ptr&: insn->opcode);
1012	}
1013
1014	if (consume(insn, ptr&: current))
1015	return true;
1016
1017	if (current == `0x0f`) {
1018	LLVM_DEBUG(
1019	dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1020	if (consume(insn, ptr&: current))
1021	return true;
1022
1023	if (current == `0x38`) {
1024	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1025	current));
1026	if (consume(insn, ptr&: current))
1027	return true;
1028
1029	insn->opcodeType = THREEBYTE_38;
1030	} else if (current == `0x3a`) {
1031	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1032	current));
1033	if (consume(insn, ptr&: current))
1034	return true;
1035
1036	insn->opcodeType = THREEBYTE_3A;
1037	} else if (current == `0x0f`) {
1038	LLVM_DEBUG(
1039	dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1040
1041	// Consume operands before the opcode to comply with the 3DNow encoding
1042	if (readModRM(insn))
1043	return true;
1044
1045	if (consume(insn, ptr&: current))
1046	return true;
1047
1048	insn->opcodeType = THREEDNOW_MAP;
1049	} else {
1050	LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1051	insn->opcodeType = TWOBYTE;
1052	}
1053	} else if (insn->mandatoryPrefix)
1054	// The opcode with mandatory prefix must start with opcode escape.
1055	// If not it's legacy repeat prefix
1056	insn->mandatoryPrefix = `0`;
1057
1058	// At this point we have consumed the full opcode.
1059	// Anything we consume from here on must be unconsumed.
1060	insn->opcode = current;
1061
1062	return false;
1063	}
1064
1065	// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1066	static bool is16BitEquivalent(const char orig, const* char *equiv) {
1067	for (int i = `0`;; i++) {
1068	if (orig[i] == `'\0'` && equiv[i] == `'\0'`)
1069	return true;
1070	if (orig[i] == `'\0'` \|\| equiv[i] == `'\0'`)
1071	return false;
1072	if (orig[i] != equiv[i]) {
1073	if ((orig[i] == `'Q'` \|\| orig[i] == `'L'`) && equiv[i] == `'W'`)
1074	continue;
1075	if ((orig[i] == `'6'` \|\| orig[i] == `'3'`) && equiv[i] == `'1'`)
1076	continue;
1077	if ((orig[i] == `'4'` \|\| orig[i] == `'2'`) && equiv[i] == `'6'`)
1078	continue;
1079	return false;
1080	}
1081	}
1082	}
1083
1084	// Determine whether this instruction is a 64-bit instruction.
1085	static bool is64Bit(const char *name) {
1086	for (int i = `0`;; ++i) {
1087	if (name[i] == `'\0'`)
1088	return false;
1089	if (name[i] == `'6'` && name[i + `1`] == `'4'`)
1090	return true;
1091	}
1092	}
1093
1094	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1095	// for extended and escape opcodes, and using a supplied attribute mask.
1096	static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1097	struct InternalInstruction *insn,
1098	uint16_t attrMask) {
1099	auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1100	const ContextDecision *decision;
1101	switch (insn->opcodeType) {
1102	case ONEBYTE:
1103	decision = &ONEBYTE_SYM;
1104	break;
1105	case TWOBYTE:
1106	decision = &TWOBYTE_SYM;
1107	break;
1108	case THREEBYTE_38:
1109	decision = &THREEBYTE38_SYM;
1110	break;
1111	case THREEBYTE_3A:
1112	decision = &THREEBYTE3A_SYM;
1113	break;
1114	case XOP8_MAP:
1115	decision = &XOP8_MAP_SYM;
1116	break;
1117	case XOP9_MAP:
1118	decision = &XOP9_MAP_SYM;
1119	break;
1120	case XOPA_MAP:
1121	decision = &XOPA_MAP_SYM;
1122	break;
1123	case THREEDNOW_MAP:
1124	decision = &THREEDNOW_MAP_SYM;
1125	break;
1126	case MAP4:
1127	decision = &MAP4_SYM;
1128	break;
1129	case MAP5:
1130	decision = &MAP5_SYM;
1131	break;
1132	case MAP6:
1133	decision = &MAP6_SYM;
1134	break;
1135	case MAP7:
1136	decision = &MAP7_SYM;
1137	break;
1138	}
1139
1140	if (decision->opcodeDecisions[insnCtx]
1141	.modRMDecisions[insn->opcode]
1142	.modrm_type != MODRM_ONEENTRY) {
1143	if (readModRM(insn))
1144	return -`1`;
1145	*instructionID =
1146	decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: insn->modRM);
1147	} else {
1148	*instructionID = decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: `0`);
1149	}
1150
1151	return `0`;
1152	}
1153
1154	static bool isCCMPOrCTEST(InternalInstruction *insn) {
1155	if (insn->opcodeType != MAP4)
1156	return false;
1157	if (insn->opcode == `0x83` && regFromModRM(insn->modRM) == `7`)
1158	return true;
1159	switch (insn->opcode & `0xfe`) {
1160	default:
1161	return false;
1162	case `0x38`:
1163	case `0x3a`:
1164	case `0x84`:
1165	return true;
1166	case `0x80`:
1167	return regFromModRM(insn->modRM) == `7`;
1168	case `0xf6`:
1169	return regFromModRM(insn->modRM) == `0`;
1170	}
1171	}
1172
1173	static bool isNF(InternalInstruction *insn) {
1174	if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1175	return false;
1176	if (insn->opcodeType == MAP4)
1177	return true;
1178	// Below NF instructions are not in map4.
1179	if (insn->opcodeType == THREEBYTE_38 &&
1180	ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) == VEX_PREFIX_NONE) {
1181	switch (insn->opcode) {
1182	case `0xf2`: // ANDN
1183	case `0xf3`: // BLSI, BLSR, BLSMSK
1184	case `0xf5`: // BZHI
1185	case `0xf7`: // BEXTR
1186	return true;
1187	default:
1188	break;
1189	}
1190	}
1191	return false;
1192	}
1193
1194	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1195	// for extended and escape opcodes. Determines the attributes and context for
1196	// the instruction before doing so.
1197	static int getInstructionID(struct InternalInstruction *insn,
1198	const MCInstrInfo *mii) {
1199	uint16_t attrMask;
1200	uint16_t instructionID;
1201
1202	LLVM_DEBUG(dbgs() << "getID()");
1203
1204	attrMask = ATTR_NONE;
1205
1206	if (insn->mode == MODE_64BIT)
1207	attrMask \|= ATTR_64BIT;
1208
1209	if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1210	attrMask \|= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1211
1212	if (insn->vectorExtensionType == TYPE_EVEX) {
1213	switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) {
1214	case VEX_PREFIX_66:
1215	attrMask \|= ATTR_OPSIZE;
1216	break;
1217	case VEX_PREFIX_F3:
1218	attrMask \|= ATTR_XS;
1219	break;
1220	case VEX_PREFIX_F2:
1221	attrMask \|= ATTR_XD;
1222	break;
1223	}
1224
1225	if (zFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1226	attrMask \|= ATTR_EVEXKZ;
1227	if (isNF(insn) && !readModRM(insn) &&
1228	!isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1229	attrMask \|= ATTR_EVEXNF;
1230	// aaa is not used a opmask in MAP4
1231	else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]) &&
1232	(insn->opcodeType != MAP4))
1233	attrMask \|= ATTR_EVEXK;
1234	if (bFromEVEX4of4(insn->vectorExtensionPrefix[`3`])) {
1235	attrMask \|= ATTR_EVEXB;
1236	if (uFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) && !readModRM(insn) &&
1237	modFromModRM(insn->modRM) == `3`)
1238	attrMask \|= ATTR_EVEXU;
1239	}
1240	if (lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1241	attrMask \|= ATTR_VEXL;
1242	if (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1243	attrMask \|= ATTR_EVEXL2;
1244	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1245	switch (ppFromVEX3of3(insn->vectorExtensionPrefix[`2`])) {
1246	case VEX_PREFIX_66:
1247	attrMask \|= ATTR_OPSIZE;
1248	break;
1249	case VEX_PREFIX_F3:
1250	attrMask \|= ATTR_XS;
1251	break;
1252	case VEX_PREFIX_F2:
1253	attrMask \|= ATTR_XD;
1254	break;
1255	}
1256
1257	if (lFromVEX3of3(insn->vectorExtensionPrefix[`2`]))
1258	attrMask \|= ATTR_VEXL;
1259	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1260	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
1261	case VEX_PREFIX_66:
1262	attrMask \|= ATTR_OPSIZE;
1263	if (insn->hasAdSize)
1264	attrMask \|= ATTR_ADSIZE;
1265	break;
1266	case VEX_PREFIX_F3:
1267	attrMask \|= ATTR_XS;
1268	break;
1269	case VEX_PREFIX_F2:
1270	attrMask \|= ATTR_XD;
1271	break;
1272	}
1273
1274	if (lFromVEX2of2(insn->vectorExtensionPrefix[`1`]))
1275	attrMask \|= ATTR_VEXL;
1276	} else if (insn->vectorExtensionType == TYPE_XOP) {
1277	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
1278	case VEX_PREFIX_66:
1279	attrMask \|= ATTR_OPSIZE;
1280	break;
1281	case VEX_PREFIX_F3:
1282	attrMask \|= ATTR_XS;
1283	break;
1284	case VEX_PREFIX_F2:
1285	attrMask \|= ATTR_XD;
1286	break;
1287	}
1288
1289	if (lFromXOP3of3(insn->vectorExtensionPrefix[`2`]))
1290	attrMask \|= ATTR_VEXL;
1291	} else {
1292	return -`1`;
1293	}
1294	} else if (!insn->mandatoryPrefix) {
1295	// If we don't have mandatory prefix we should use legacy prefixes here
1296	if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1297	attrMask \|= ATTR_OPSIZE;
1298	if (insn->hasAdSize)
1299	attrMask \|= ATTR_ADSIZE;
1300	if (insn->opcodeType == ONEBYTE) {
1301	if (insn->repeatPrefix == `0xf3` && (insn->opcode == `0x90`))
1302	// Special support for PAUSE
1303	attrMask \|= ATTR_XS;
1304	} else {
1305	if (insn->repeatPrefix == `0xf2`)
1306	attrMask \|= ATTR_XD;
1307	else if (insn->repeatPrefix == `0xf3`)
1308	attrMask \|= ATTR_XS;
1309	}
1310	} else {
1311	switch (insn->mandatoryPrefix) {
1312	case `0xf2`:
1313	attrMask \|= ATTR_XD;
1314	break;
1315	case `0xf3`:
1316	attrMask \|= ATTR_XS;
1317	break;
1318	case `0x66`:
1319	if (insn->mode != MODE_16BIT)
1320	attrMask \|= ATTR_OPSIZE;
1321	if (insn->hasAdSize)
1322	attrMask \|= ATTR_ADSIZE;
1323	break;
1324	case `0x67`:
1325	attrMask \|= ATTR_ADSIZE;
1326	break;
1327	}
1328	}
1329
1330	if (insn->rexPrefix & `0x08`) {
1331	attrMask \|= ATTR_REXW;
1332	attrMask &= ~ATTR_ADSIZE;
1333	}
1334
1335	// Absolute jump and pushp/popp need special handling
1336	if (insn->rex2ExtensionPrefix[`0`] == `0xd5` && insn->opcodeType == ONEBYTE &&
1337	(insn->opcode == `0xA1` \|\| (insn->opcode & `0xf0`) == `0x50`))
1338	attrMask \|= ATTR_REX2;
1339
1340	if (insn->mode == MODE_16BIT) {
1341	// JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1342	// of the AdSize prefix is inverted w.r.t. 32-bit mode.
1343	if (insn->opcodeType == ONEBYTE && insn->opcode == `0xE3`)
1344	attrMask ^= ATTR_ADSIZE;
1345	// If we're in 16-bit mode and this is one of the relative jumps and opsize
1346	// prefix isn't present, we need to force the opsize attribute since the
1347	// prefix is inverted relative to 32-bit mode.
1348	if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1349	(insn->opcode == `0xE8` \|\| insn->opcode == `0xE9`))
1350	attrMask \|= ATTR_OPSIZE;
1351
1352	if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1353	insn->opcode >= `0x80` && insn->opcode <= `0x8F`)
1354	attrMask \|= ATTR_OPSIZE;
1355	}
1356
1357
1358	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1359	return -`1`;
1360
1361	// The following clauses compensate for limitations of the tables.
1362
1363	if (insn->mode != MODE_64BIT &&
1364	insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1365	// The tables can't distinquish between cases where the W-bit is used to
1366	// select register size and cases where its a required part of the opcode.
1367	if ((insn->vectorExtensionType == TYPE_EVEX &&
1368	wFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) \|\|
1369	(insn->vectorExtensionType == TYPE_VEX_3B &&
1370	wFromVEX3of3(insn->vectorExtensionPrefix[`2`])) \|\|
1371	(insn->vectorExtensionType == TYPE_XOP &&
1372	wFromXOP3of3(insn->vectorExtensionPrefix[`2`]))) {
1373
1374	uint16_t instructionIDWithREXW;
1375	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithREXW, insn,
1376	attrMask: attrMask \| ATTR_REXW)) {
1377	insn->instructionID = instructionID;
1378	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1379	return `0`;
1380	}
1381
1382	auto SpecName = mii->getName(Opcode: instructionIDWithREXW);
1383	// If not a 64-bit instruction. Switch the opcode.
1384	if (!is64Bit(name: SpecName.data())) {
1385	insn->instructionID = instructionIDWithREXW;
1386	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1387	return `0`;
1388	}
1389	}
1390	}
1391
1392	// Absolute moves, umonitor, and movdir64b need special handling.
1393	// -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1394	// inverted w.r.t.
1395	// -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1396	// any position.
1397	if ((insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`)) \|\|
1398	(insn->opcodeType == TWOBYTE && (insn->opcode == `0xAE`)) \|\|
1399	(insn->opcodeType == THREEBYTE_38 && insn->opcode == `0xF8`) \|\|
1400	(insn->opcodeType == MAP4 && insn->opcode == `0xF8`)) {
1401	// Make sure we observed the prefixes in any position.
1402	if (insn->hasAdSize)
1403	attrMask \|= ATTR_ADSIZE;
1404	if (insn->hasOpSize)
1405	attrMask \|= ATTR_OPSIZE;
1406
1407	// In 16-bit, invert the attributes.
1408	if (insn->mode == MODE_16BIT) {
1409	attrMask ^= ATTR_ADSIZE;
1410
1411	// The OpSize attribute is only valid with the absolute moves.
1412	if (insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`))
1413	attrMask ^= ATTR_OPSIZE;
1414	}
1415
1416	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1417	return -`1`;
1418
1419	insn->instructionID = instructionID;
1420	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1421	return `0`;
1422	}
1423
1424	if ((insn->mode == MODE_16BIT \|\| insn->hasOpSize) &&
1425	!(attrMask & ATTR_OPSIZE)) {
1426	// The instruction tables make no distinction between instructions that
1427	// allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1428	// particular spot (i.e., many MMX operations). In general we're
1429	// conservative, but in the specific case where OpSize is present but not in
1430	// the right place we check if there's a 16-bit operation.
1431	const struct InstructionSpecifier *spec;
1432	uint16_t instructionIDWithOpsize;
1433	llvm::StringRef specName, specWithOpSizeName;
1434
1435	spec = &INSTRUCTIONS_SYM[instructionID];
1436
1437	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithOpsize, insn,
1438	attrMask: attrMask \| ATTR_OPSIZE)) {
1439	// ModRM required with OpSize but not present. Give up and return the
1440	// version without OpSize set.
1441	insn->instructionID = instructionID;
1442	insn->spec = spec;
1443	return `0`;
1444	}
1445
1446	specName = mii->getName(Opcode: instructionID);
1447	specWithOpSizeName = mii->getName(Opcode: instructionIDWithOpsize);
1448
1449	if (is16BitEquivalent(orig: specName.data(), equiv: specWithOpSizeName.data()) &&
1450	(insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1451	insn->instructionID = instructionIDWithOpsize;
1452	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1453	} else {
1454	insn->instructionID = instructionID;
1455	insn->spec = spec;
1456	}
1457	return `0`;
1458	}
1459
1460	if (insn->opcodeType == ONEBYTE && insn->opcode == `0x90` &&
1461	insn->rexPrefix & `0x01`) {
1462	// NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1463	// as XCHG %r8, %eax.
1464	const struct InstructionSpecifier *spec;
1465	uint16_t instructionIDWithNewOpcode;
1466	const struct InstructionSpecifier *specWithNewOpcode;
1467
1468	spec = &INSTRUCTIONS_SYM[instructionID];
1469
1470	// Borrow opcode from one of the other XCHGar opcodes
1471	insn->opcode = `0x91`;
1472
1473	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithNewOpcode, insn,
1474	attrMask)) {
1475	insn->opcode = `0x90`;
1476
1477	insn->instructionID = instructionID;
1478	insn->spec = spec;
1479	return `0`;
1480	}
1481
1482	specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1483
1484	// Change back
1485	insn->opcode = `0x90`;
1486
1487	insn->instructionID = instructionIDWithNewOpcode;
1488	insn->spec = specWithNewOpcode;
1489
1490	return `0`;
1491	}
1492
1493	insn->instructionID = instructionID;
1494	insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1495
1496	return `0`;
1497	}
1498
1499	// Read an operand from the opcode field of an instruction and interprets it
1500	// appropriately given the operand width. Handles AddRegFrm instructions.
1501	//
1502	// @param insn - the instruction whose opcode field is to be read.
1503	// @param size - The width (in bytes) of the register being specified.
1504	// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1505	// RAX.
1506	// @return - 0 on success; nonzero otherwise.
1507	static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1508	LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1509
1510	if (size == `0`)
1511	size = insn->registerSize;
1512
1513	auto setOpcodeRegister = [&](unsigned base) {
1514	insn->opcodeRegister =
1515	(Reg)(base + ((bFromREX(insn->rexPrefix) << `3`) \|
1516	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`) \|
1517	(insn->opcode & `7`)));
1518	};
1519
1520	switch (size) {
1521	case `1`:
1522	setOpcodeRegister (MODRM_REG_AL);
1523	if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + `0x4` &&
1524	insn->opcodeRegister < MODRM_REG_AL + `0x8`) {
1525	insn->opcodeRegister =
1526	(Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - `4`));
1527	}
1528
1529	break;
1530	case `2`:
1531	setOpcodeRegister (MODRM_REG_AX);
1532	break;
1533	case `4`:
1534	setOpcodeRegister (MODRM_REG_EAX);
1535	break;
1536	case `8`:
1537	setOpcodeRegister (MODRM_REG_RAX);
1538	break;
1539	}
1540
1541	return `0`;
1542	}
1543
1544	// Consume an immediate operand from an instruction, given the desired operand
1545	// size.
1546	//
1547	// @param insn - The instruction whose operand is to be read.
1548	// @param size - The width (in bytes) of the operand.
1549	// @return - 0 if the immediate was successfully consumed; nonzero
1550	// otherwise.
1551	static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1552	uint8_t imm8;
1553	uint16_t imm16;
1554	uint32_t imm32;
1555	uint64_t imm64;
1556
1557	LLVM_DEBUG(dbgs() << "readImmediate()");
1558
1559	assert(insn->numImmediatesConsumed < `2` && "Already consumed two immediates");
1560
1561	insn->immediateSize = size;
1562	insn->immediateOffset = insn->readerCursor - insn->startLocation;
1563
1564	switch (size) {
1565	case `1`:
1566	if (consume(insn, ptr&: imm8))
1567	return -`1`;
1568	insn->immediates[insn->numImmediatesConsumed] = imm8;
1569	break;
1570	case `2`:
1571	if (consume(insn, ptr&: imm16))
1572	return -`1`;
1573	insn->immediates[insn->numImmediatesConsumed] = imm16;
1574	break;
1575	case `4`:
1576	if (consume(insn, ptr&: imm32))
1577	return -`1`;
1578	insn->immediates[insn->numImmediatesConsumed] = imm32;
1579	break;
1580	case `8`:
1581	if (consume(insn, ptr&: imm64))
1582	return -`1`;
1583	insn->immediates[insn->numImmediatesConsumed] = imm64;
1584	break;
1585	default:
1586	llvm_unreachable("invalid size");
1587	}
1588
1589	insn->numImmediatesConsumed++;
1590
1591	return `0`;
1592	}
1593
1594	// Consume vvvv from an instruction if it has a VEX prefix.
1595	static int readVVVV(struct InternalInstruction *insn) {
1596	LLVM_DEBUG(dbgs() << "readVVVV()");
1597
1598	int vvvv;
1599	if (insn->vectorExtensionType == TYPE_EVEX)
1600	vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `4` \|
1601	vvvvFromEVEX3of4(insn->vectorExtensionPrefix[`2`]));
1602	else if (insn->vectorExtensionType == TYPE_VEX_3B)
1603	vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[`2`]);
1604	else if (insn->vectorExtensionType == TYPE_VEX_2B)
1605	vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[`1`]);
1606	else if (insn->vectorExtensionType == TYPE_XOP)
1607	vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[`2`]);
1608	else
1609	return -`1`;
1610
1611	if (insn->mode != MODE_64BIT)
1612	vvvv &= `0xf`; // Can only clear bit 4. Bit 3 must be cleared later.
1613
1614	insn->vvvv = static_cast<Reg>(vvvv);
1615	return `0`;
1616	}
1617
1618	// Read an mask register from the opcode field of an instruction.
1619	//
1620	// @param insn - The instruction whose opcode field is to be read.
1621	// @return - 0 on success; nonzero otherwise.
1622	static int readMaskRegister(struct InternalInstruction *insn) {
1623	LLVM_DEBUG(dbgs() << "readMaskRegister()");
1624
1625	if (insn->vectorExtensionType != TYPE_EVEX)
1626	return -`1`;
1627
1628	insn->writemask =
1629	static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]));
1630	return `0`;
1631	}
1632
1633	// Consults the specifier for an instruction and consumes all
1634	// operands for that instruction, interpreting them as it goes.
1635	static int readOperands(struct InternalInstruction *insn) {
1636	int hasVVVV, needVVVV;
1637	int sawRegImm = `0`;
1638
1639	LLVM_DEBUG(dbgs() << "readOperands()");
1640
1641	// If non-zero vvvv specified, make sure one of the operands uses it.
1642	hasVVVV = !readVVVV(insn);
1643	needVVVV = hasVVVV && (insn->vvvv != `0`);
1644
1645	for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1646	switch (Op.encoding) {
1647	case ENCODING_NONE:
1648	case ENCODING_SI:
1649	case ENCODING_DI:
1650	break;
1651	CASE_ENCODING_VSIB:
1652	// VSIB can use the V2 bit so check only the other bits.
1653	if (needVVVV)
1654	needVVVV = hasVVVV & ((insn->vvvv & `0xf`) != `0`);
1655	if (readModRM(insn))
1656	return -`1`;
1657
1658	// Reject if SIB wasn't used.
1659	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1660	return -`1`;
1661
1662	// If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1663	if (insn->sibIndex == SIB_INDEX_NONE)
1664	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + `4`);
1665
1666	// If EVEX.v2 is set this is one of the 16-31 registers.
1667	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1668	v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1669	insn->sibIndex = (SIBIndex)(insn->sibIndex + `16`);
1670
1671	// Adjust the index register to the correct size.
1672	switch ((OperandType)Op.type) {
1673	default:
1674	debug("Unhandled VSIB index type");
1675	return -`1`;
1676	case TYPE_MVSIBX:
1677	insn->sibIndex =
1678	(SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1679	break;
1680	case TYPE_MVSIBY:
1681	insn->sibIndex =
1682	(SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1683	break;
1684	case TYPE_MVSIBZ:
1685	insn->sibIndex =
1686	(SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1687	break;
1688	}
1689
1690	// Apply the AVX512 compressed displacement scaling factor.
1691	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1692	insn->displacement *= `1` << (Op.encoding - ENCODING_VSIB);
1693	break;
1694	case ENCODING_SIB:
1695	// Reject if SIB wasn't used.
1696	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1697	return -`1`;
1698	if (readModRM(insn))
1699	return -`1`;
1700	if (fixupReg(insn, op: &Op))
1701	return -`1`;
1702	break;
1703	case ENCODING_REG:
1704	CASE_ENCODING_RM:
1705	if (readModRM(insn))
1706	return -`1`;
1707	if (fixupReg(insn, op: &Op))
1708	return -`1`;
1709	// Apply the AVX512 compressed displacement scaling factor.
1710	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1711	insn->displacement *= `1` << (Op.encoding - ENCODING_RM);
1712	break;
1713	case ENCODING_IB:
1714	if (sawRegImm) {
1715	// Saw a register immediate so don't read again and instead split the
1716	// previous immediate. FIXME: This is a hack.
1717	insn->immediates[insn->numImmediatesConsumed] =
1718	insn->immediates[insn->numImmediatesConsumed - `1`] & `0xf`;
1719	++insn->numImmediatesConsumed;
1720	break;
1721	}
1722	if (readImmediate(insn, size: `1`))
1723	return -`1`;
1724	if (Op.type == TYPE_XMM \|\| Op.type == TYPE_YMM)
1725	sawRegImm = `1`;
1726	break;
1727	case ENCODING_IW:
1728	if (readImmediate(insn, size: `2`))
1729	return -`1`;
1730	break;
1731	case ENCODING_ID:
1732	if (readImmediate(insn, size: `4`))
1733	return -`1`;
1734	break;
1735	case ENCODING_IO:
1736	if (readImmediate(insn, size: `8`))
1737	return -`1`;
1738	break;
1739	case ENCODING_Iv:
1740	if (readImmediate(insn, size: insn->immediateSize))
1741	return -`1`;
1742	break;
1743	case ENCODING_Ia:
1744	if (readImmediate(insn, size: insn->addressSize))
1745	return -`1`;
1746	break;
1747	case ENCODING_IRC:
1748	insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `1`) \|
1749	lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1750	break;
1751	case ENCODING_RB:
1752	if (readOpcodeRegister(insn, size: `1`))
1753	return -`1`;
1754	break;
1755	case ENCODING_RW:
1756	if (readOpcodeRegister(insn, size: `2`))
1757	return -`1`;
1758	break;
1759	case ENCODING_RD:
1760	if (readOpcodeRegister(insn, size: `4`))
1761	return -`1`;
1762	break;
1763	case ENCODING_RO:
1764	if (readOpcodeRegister(insn, size: `8`))
1765	return -`1`;
1766	break;
1767	case ENCODING_Rv:
1768	if (readOpcodeRegister(insn, size: `0`))
1769	return -`1`;
1770	break;
1771	case ENCODING_CF:
1772	insn->immediates[`1`] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[`2`]);
1773	needVVVV = false; // oszc shares the same bits with VVVV
1774	break;
1775	case ENCODING_CC:
1776	if (isCCMPOrCTEST(insn))
1777	insn->immediates[`2`] = scFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1778	else
1779	insn->immediates[`1`] = insn->opcode & `0xf`;
1780	break;
1781	case ENCODING_FP:
1782	break;
1783	case ENCODING_VVVV:
1784	needVVVV = `0`; // Mark that we have found a VVVV operand.
1785	if (!hasVVVV)
1786	return -`1`;
1787	if (insn->mode != MODE_64BIT)
1788	insn->vvvv = static_cast<Reg>(insn->vvvv & `0x7`);
1789	if (fixupReg(insn, op: &Op))
1790	return -`1`;
1791	break;
1792	case ENCODING_WRITEMASK:
1793	if (readMaskRegister(insn))
1794	return -`1`;
1795	break;
1796	case ENCODING_DUP:
1797	break;
1798	default:
1799	LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1800	return -`1`;
1801	}
1802	}
1803
1804	// If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1805	if (needVVVV)
1806	return -`1`;
1807
1808	return `0`;
1809	}
1810
1811	namespace llvm {
1812
1813	// Fill-ins to make the compiler happy. These constants are never actually
1814	// assigned; they are just filler to make an automatically-generated switch
1815	// statement work.
1816	namespace X86 {
1817	enum {
1818	BX_SI = `500`,
1819	BX_DI = `501`,
1820	BP_SI = `502`,
1821	BP_DI = `503`,
1822	sib = `504`,
1823	sib64 = `505`
1824	};
1825	} // namespace X86
1826
1827	} // namespace llvm
1828
1829	static bool translateInstruction(MCInst &target,
1830	InternalInstruction &source,
1831	const MCDisassembler *Dis);
1832
1833	namespace {
1834
1835	/// Generic disassembler for all X86 platforms. All each platform class should
1836	/// have to do is subclass the constructor, and provide a different
1837	/// disassemblerMode value.
1838	class X86GenericDisassembler : public MCDisassembler {
1839	std::unique_ptr<const MCInstrInfo> MII;
1840	public:
1841	X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1842	std::unique_ptr<const MCInstrInfo> MII);
1843	public:
1844	DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1845	ArrayRef<uint8_t> Bytes, uint64_t Address,
1846	raw_ostream &cStream) const override;
1847
1848	private:
1849	DisassemblerMode fMode;
1850	};
1851
1852	} // namespace
1853
1854	X86GenericDisassembler::X86GenericDisassembler(
1855	const MCSubtargetInfo &STI,
1856	MCContext &Ctx,
1857	std::unique_ptr<const MCInstrInfo> MII)
1858	: MCDisassembler (STI, Ctx), MII (std::move(MII)) {
1859	const FeatureBitset &FB = STI.getFeatureBits();
1860	if (FB [X86::Is16Bit]) {
1861	fMode = MODE_16BIT;
1862	return;
1863	} else if (FB [X86::Is32Bit]) {
1864	fMode = MODE_32BIT;
1865	return;
1866	} else if (FB [X86::Is64Bit]) {
1867	fMode = MODE_64BIT;
1868	return;
1869	}
1870
1871	llvm_unreachable("Invalid CPU mode");
1872	}
1873
1874	MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1875	MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1876	raw_ostream &CStream) const {
1877	CommentStream = &CStream;
1878
1879	InternalInstruction Insn;
1880	memset(s: &Insn, c: `0`, n: sizeof(InternalInstruction));
1881	Insn.bytes = Bytes;
1882	Insn.startLocation = Address;
1883	Insn.readerCursor = Address;
1884	Insn.mode = fMode;
1885
1886	if (Bytes.empty() \|\| readPrefixes(insn: &Insn) \|\| readOpcode(insn: &Insn) \|\|
1887	getInstructionID(insn: &Insn, mii: MII.get()) \|\| Insn.instructionID == `0` \|\|
1888	readOperands(insn: &Insn)) {
1889	Size = Insn.readerCursor - Address;
1890	return Fail;
1891	}
1892
1893	Insn.operands = x86OperandSets[Insn.spec->operands];
1894	Insn.length = Insn.readerCursor - Insn.startLocation;
1895	Size = Insn.length;
1896	if (Size > `15`)
1897	LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1898
1899	bool Ret = translateInstruction(target&: Instr, source&: Insn, Dis: this);
1900	if (!Ret) {
1901	unsigned Flags = X86::IP_NO_PREFIX;
1902	if (Insn.hasAdSize)
1903	Flags \|= X86::IP_HAS_AD_SIZE;
1904	if (!Insn.mandatoryPrefix) {
1905	if (Insn.hasOpSize)
1906	Flags \|= X86::IP_HAS_OP_SIZE;
1907	if (Insn.repeatPrefix == `0xf2`)
1908	Flags \|= X86::IP_HAS_REPEAT_NE;
1909	else if (Insn.repeatPrefix == `0xf3` &&
1910	// It should not be 'pause' f3 90
1911	Insn.opcode != `0x90`)
1912	Flags \|= X86::IP_HAS_REPEAT;
1913	if (Insn.hasLockPrefix)
1914	Flags \|= X86::IP_HAS_LOCK;
1915	}
1916	Instr.setFlags(Flags);
1917	}
1918	return (!Ret) ? Success : Fail;
1919	}
1920
1921	//
1922	// Private code that translates from struct InternalInstructions to MCInsts.
1923	//
1924
1925	/// translateRegister - Translates an internal register to the appropriate LLVM
1926	/// register, and appends it as an operand to an MCInst.
1927	///
1928	/// @param mcInst - The MCInst to append to.
1929	/// @param reg - The Reg to append.
1930	static void translateRegister(MCInst &mcInst, Reg reg) {
1931	#define ENTRY(x) X86::x,
1932	static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1933	#undef ENTRY
1934
1935	MCPhysReg llvmRegnum = llvmRegnums[reg];
1936	mcInst.addOperand(Op: MCOperand::createReg(Reg: llvmRegnum));
1937	}
1938
1939	static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1940	`0`, // SEG_OVERRIDE_NONE
1941	X86::CS,
1942	X86::SS,
1943	X86::DS,
1944	X86::ES,
1945	X86::FS,
1946	X86::GS
1947	};
1948
1949	/// translateSrcIndex - Appends a source index operand to an MCInst.
1950	///
1951	/// @param mcInst - The MCInst to append to.
1952	/// @param insn - The internal instruction.
1953	static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1954	unsigned baseRegNo;
1955
1956	if (insn.mode == MODE_64BIT)
1957	baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1958	else if (insn.mode == MODE_32BIT)
1959	baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1960	else {
1961	assert(insn.mode == MODE_16BIT);
1962	baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1963	}
1964	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1965	mcInst.addOperand(Op: baseReg);
1966
1967	MCOperand segmentReg;
1968	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
1969	mcInst.addOperand(Op: segmentReg);
1970	return false;
1971	}
1972
1973	/// translateDstIndex - Appends a destination index operand to an MCInst.
1974	///
1975	/// @param mcInst - The MCInst to append to.
1976	/// @param insn - The internal instruction.
1977
1978	static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1979	unsigned baseRegNo;
1980
1981	if (insn.mode == MODE_64BIT)
1982	baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1983	else if (insn.mode == MODE_32BIT)
1984	baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1985	else {
1986	assert(insn.mode == MODE_16BIT);
1987	baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1988	}
1989	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1990	mcInst.addOperand(Op: baseReg);
1991	return false;
1992	}
1993
1994	/// translateImmediate - Appends an immediate operand to an MCInst.
1995	///
1996	/// @param mcInst - The MCInst to append to.
1997	/// @param immediate - The immediate value to append.
1998	/// @param operand - The operand, as stored in the descriptor table.
1999	/// @param insn - The internal instruction.
2000	static void translateImmediate(MCInst &mcInst, uint64_t immediate,
2001	const OperandSpecifier &operand,
2002	InternalInstruction &insn,
2003	const MCDisassembler *Dis) {
2004	// Sign-extend the immediate if necessary.
2005
2006	OperandType type = (OperandType)operand.type;
2007
2008	bool isBranch = false;
2009	uint64_t pcrel = `0`;
2010	if (type == TYPE_REL) {
2011	isBranch = true;
2012	pcrel = insn.startLocation + insn.length;
2013	switch (operand.encoding) {
2014	default:
2015	break;
2016	case ENCODING_Iv:
2017	switch (insn.displacementSize) {
2018	default:
2019	break;
2020	case `1`:
2021	if(immediate & `0x80`)
2022	immediate \|= ~(`0xffull`);
2023	break;
2024	case `2`:
2025	if(immediate & `0x8000`)
2026	immediate \|= ~(`0xffffull`);
2027	break;
2028	case `4`:
2029	if(immediate & `0x80000000`)
2030	immediate \|= ~(`0xffffffffull`);
2031	break;
2032	case `8`:
2033	break;
2034	}
2035	break;
2036	case ENCODING_IB:
2037	if(immediate & `0x80`)
2038	immediate \|= ~(`0xffull`);
2039	break;
2040	case ENCODING_IW:
2041	if(immediate & `0x8000`)
2042	immediate \|= ~(`0xffffull`);
2043	break;
2044	case ENCODING_ID:
2045	if(immediate & `0x80000000`)
2046	immediate \|= ~(`0xffffffffull`);
2047	break;
2048	}
2049	}
2050	// By default sign-extend all X86 immediates based on their encoding.
2051	else if (type == TYPE_IMM) {
2052	switch (operand.encoding) {
2053	default:
2054	break;
2055	case ENCODING_IB:
2056	if(immediate & `0x80`)
2057	immediate \|= ~(`0xffull`);
2058	break;
2059	case ENCODING_IW:
2060	if(immediate & `0x8000`)
2061	immediate \|= ~(`0xffffull`);
2062	break;
2063	case ENCODING_ID:
2064	if(immediate & `0x80000000`)
2065	immediate \|= ~(`0xffffffffull`);
2066	break;
2067	case ENCODING_IO:
2068	break;
2069	}
2070	}
2071
2072	switch (type) {
2073	case TYPE_XMM:
2074	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::XMM0 + (immediate >> `4`)));
2075	return;
2076	case TYPE_YMM:
2077	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::YMM0 + (immediate >> `4`)));
2078	return;
2079	case TYPE_ZMM:
2080	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ZMM0 + (immediate >> `4`)));
2081	return;
2082	default:
2083	// operand is 64 bits wide. Do nothing.
2084	break;
2085	}
2086
2087	if (!Dis->tryAddingSymbolicOperand(
2088	Inst&: mcInst, Value: immediate + pcrel, Address: insn.startLocation, IsBranch: isBranch,
2089	Offset: insn.immediateOffset, OpSize: insn.immediateSize, InstSize: insn.length))
2090	mcInst.addOperand(Op: MCOperand::createImm(Val: immediate));
2091
2092	if (type == TYPE_MOFFS) {
2093	MCOperand segmentReg;
2094	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2095	mcInst.addOperand(Op: segmentReg);
2096	}
2097	}
2098
2099	/// translateRMRegister - Translates a register stored in the R/M field of the
2100	/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2101	/// @param mcInst - The MCInst to append to.
2102	/// @param insn - The internal instruction to extract the R/M field
2103	/// from.
2104	/// @return - 0 on success; -1 otherwise
2105	static bool translateRMRegister(MCInst &mcInst,
2106	InternalInstruction &insn) {
2107	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2108	debug("A R/M register operand may not have a SIB byte");
2109	return true;
2110	}
2111
2112	switch (insn.eaBase) {
2113	default:
2114	debug("Unexpected EA base register");
2115	return true;
2116	case EA_BASE_NONE:
2117	debug("EA_BASE_NONE for ModR/M base");
2118	return true;
2119	#define ENTRY(x) case EA_BASE_##x:
2120	ALL_EA_BASES
2121	#undef ENTRY
2122	debug("A R/M register operand may not have a base; "
2123	"the operand must be a register.");
2124	return true;
2125	#define ENTRY(x) \
2126	case EA_REG_##x: \
2127	mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2128	ALL_REGS
2129	#undef ENTRY
2130	}
2131
2132	return false;
2133	}
2134
2135	/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2136	/// fields of an internal instruction (and possibly its SIB byte) to a memory
2137	/// operand in LLVM's format, and appends it to an MCInst.
2138	///
2139	/// @param mcInst - The MCInst to append to.
2140	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2141	/// from.
2142	/// @param ForceSIB - The instruction must use SIB.
2143	/// @return - 0 on success; nonzero otherwise
2144	static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2145	const MCDisassembler *Dis,
2146	bool ForceSIB = false) {
2147	// Addresses in an MCInst are represented as five operands:
2148	// 1. basereg (register) The R/M base, or (if there is a SIB) the
2149	// SIB base
2150	// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2151	// scale amount
2152	// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2153	// the index (which is multiplied by the
2154	// scale amount)
2155	// 4. displacement (immediate) 0, or the displacement if there is one
2156	// 5. segmentreg (register) x86_registerNONE for now, but could be set
2157	// if we have segment overrides
2158
2159	MCOperand baseReg;
2160	MCOperand scaleAmount;
2161	MCOperand indexReg;
2162	MCOperand displacement;
2163	MCOperand segmentReg;
2164	uint64_t pcrel = `0`;
2165
2166	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2167	if (insn.sibBase != SIB_BASE_NONE) {
2168	switch (insn.sibBase) {
2169	default:
2170	debug("Unexpected sibBase");
2171	return true;
2172	#define ENTRY(x) \
2173	case SIB_BASE_##x: \
2174	baseReg = MCOperand::createReg(X86::x); break;
2175	ALL_SIB_BASES
2176	#undef ENTRY
2177	}
2178	} else {
2179	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2180	}
2181
2182	if (insn.sibIndex != SIB_INDEX_NONE) {
2183	switch (insn.sibIndex) {
2184	default:
2185	debug("Unexpected sibIndex");
2186	return true;
2187	#define ENTRY(x) \
2188	case SIB_INDEX_##x: \
2189	indexReg = MCOperand::createReg(X86::x); break;
2190	EA_BASES_32BIT
2191	EA_BASES_64BIT
2192	REGS_XMM
2193	REGS_YMM
2194	REGS_ZMM
2195	#undef ENTRY
2196	}
2197	} else {
2198	// Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2199	// but no index is used and modrm alone should have been enough.
2200	// -No base register in 32-bit mode. In 64-bit mode this is used to
2201	// avoid rip-relative addressing.
2202	// -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2203	// base always requires a SIB byte.
2204	// -A scale other than 1 is used.
2205	if (!ForceSIB &&
2206	(insn.sibScale != `1` \|\|
2207	(insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) \|\|
2208	(insn.sibBase != SIB_BASE_NONE &&
2209	insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2210	insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2211	indexReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIZ :
2212	X86::RIZ);
2213	} else
2214	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2215	}
2216
2217	scaleAmount = MCOperand::createImm(Val: insn.sibScale);
2218	} else {
2219	switch (insn.eaBase) {
2220	case EA_BASE_NONE:
2221	if (insn.eaDisplacement == EA_DISP_NONE) {
2222	debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2223	return true;
2224	}
2225	if (insn.mode == MODE_64BIT){
2226	pcrel = insn.startLocation + insn.length;
2227	Dis->tryAddingPcLoadReferenceComment(Value: insn.displacement + pcrel,
2228	Address: insn.startLocation +
2229	insn.displacementOffset);
2230	// Section 2.2.1.6
2231	baseReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIP :
2232	X86::RIP);
2233	}
2234	else
2235	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2236
2237	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2238	break;
2239	case EA_BASE_BX_SI:
2240	baseReg = MCOperand::createReg(Reg: X86::BX);
2241	indexReg = MCOperand::createReg(Reg: X86::SI);
2242	break;
2243	case EA_BASE_BX_DI:
2244	baseReg = MCOperand::createReg(Reg: X86::BX);
2245	indexReg = MCOperand::createReg(Reg: X86::DI);
2246	break;
2247	case EA_BASE_BP_SI:
2248	baseReg = MCOperand::createReg(Reg: X86::BP);
2249	indexReg = MCOperand::createReg(Reg: X86::SI);
2250	break;
2251	case EA_BASE_BP_DI:
2252	baseReg = MCOperand::createReg(Reg: X86::BP);
2253	indexReg = MCOperand::createReg(Reg: X86::DI);
2254	break;
2255	default:
2256	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2257	switch (insn.eaBase) {
2258	default:
2259	debug("Unexpected eaBase");
2260	return true;
2261	// Here, we will use the fill-ins defined above. However,
2262	// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2263	// sib and sib64 were handled in the top-level if, so they're only
2264	// placeholders to keep the compiler happy.
2265	#define ENTRY(x) \
2266	case EA_BASE_##x: \
2267	baseReg = MCOperand::createReg(X86::x); break;
2268	ALL_EA_BASES
2269	#undef ENTRY
2270	#define ENTRY(x) case EA_REG_##x:
2271	ALL_REGS
2272	#undef ENTRY
2273	debug("A R/M memory operand may not be a register; "
2274	"the base field must be a base.");
2275	return true;
2276	}
2277	}
2278
2279	scaleAmount = MCOperand::createImm(Val: `1`);
2280	}
2281
2282	displacement = MCOperand::createImm(Val: insn.displacement);
2283
2284	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2285
2286	mcInst.addOperand(Op: baseReg);
2287	mcInst.addOperand(Op: scaleAmount);
2288	mcInst.addOperand(Op: indexReg);
2289
2290	const uint8_t dispSize =
2291	(insn.eaDisplacement == EA_DISP_NONE) ? `0` : insn.displacementSize;
2292
2293	if (!Dis->tryAddingSymbolicOperand(
2294	Inst&: mcInst, Value: insn.displacement + pcrel, Address: insn.startLocation, IsBranch: false,
2295	Offset: insn.displacementOffset, OpSize: dispSize, InstSize: insn.length))
2296	mcInst.addOperand(Op: displacement);
2297	mcInst.addOperand(Op: segmentReg);
2298	return false;
2299	}
2300
2301	/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2302	/// byte of an instruction to LLVM form, and appends it to an MCInst.
2303	///
2304	/// @param mcInst - The MCInst to append to.
2305	/// @param operand - The operand, as stored in the descriptor table.
2306	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2307	/// from.
2308	/// @return - 0 on success; nonzero otherwise
2309	static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2310	InternalInstruction &insn, const MCDisassembler *Dis) {
2311	switch (operand.type) {
2312	default:
2313	debug("Unexpected type for a R/M operand");
2314	return true;
2315	case TYPE_R8:
2316	case TYPE_R16:
2317	case TYPE_R32:
2318	case TYPE_R64:
2319	case TYPE_Rv:
2320	case TYPE_MM64:
2321	case TYPE_XMM:
2322	case TYPE_YMM:
2323	case TYPE_ZMM:
2324	case TYPE_TMM:
2325	case TYPE_TMM_PAIR:
2326	case TYPE_VK_PAIR:
2327	case TYPE_VK:
2328	case TYPE_DEBUGREG:
2329	case TYPE_CONTROLREG:
2330	case TYPE_BNDR:
2331	return translateRMRegister(mcInst, insn);
2332	case TYPE_M:
2333	case TYPE_MVSIBX:
2334	case TYPE_MVSIBY:
2335	case TYPE_MVSIBZ:
2336	return translateRMMemory(mcInst, insn, Dis);
2337	case TYPE_MSIB:
2338	return translateRMMemory(mcInst, insn, Dis, ForceSIB: true);
2339	}
2340	}
2341
2342	/// translateFPRegister - Translates a stack position on the FPU stack to its
2343	/// LLVM form, and appends it to an MCInst.
2344	///
2345	/// @param mcInst - The MCInst to append to.
2346	/// @param stackPos - The stack position to translate.
2347	static void translateFPRegister(MCInst &mcInst,
2348	uint8_t stackPos) {
2349	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ST0 + stackPos));
2350	}
2351
2352	/// translateMaskRegister - Translates a 3-bit mask register number to
2353	/// LLVM form, and appends it to an MCInst.
2354	///
2355	/// @param mcInst - The MCInst to append to.
2356	/// @param maskRegNum - Number of mask register from 0 to 7.
2357	/// @return - false on success; true otherwise.
2358	static bool translateMaskRegister(MCInst &mcInst,
2359	uint8_t maskRegNum) {
2360	if (maskRegNum >= `8`) {
2361	debug("Invalid mask register number");
2362	return true;
2363	}
2364
2365	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::K0 + maskRegNum));
2366	return false;
2367	}
2368
2369	/// translateOperand - Translates an operand stored in an internal instruction
2370	/// to LLVM's format and appends it to an MCInst.
2371	///
2372	/// @param mcInst - The MCInst to append to.
2373	/// @param operand - The operand, as stored in the descriptor table.
2374	/// @param insn - The internal instruction.
2375	/// @return - false on success; true otherwise.
2376	static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2377	InternalInstruction &insn,
2378	const MCDisassembler *Dis) {
2379	switch (operand.encoding) {
2380	default:
2381	debug("Unhandled operand encoding during translation");
2382	return true;
2383	case ENCODING_REG:
2384	translateRegister(mcInst, reg: insn.reg);
2385	return false;
2386	case ENCODING_WRITEMASK:
2387	return translateMaskRegister(mcInst, maskRegNum: insn.writemask);
2388	case ENCODING_SIB:
2389	CASE_ENCODING_RM:
2390	CASE_ENCODING_VSIB:
2391	return translateRM(mcInst, operand, insn, Dis);
2392	case ENCODING_IB:
2393	case ENCODING_IW:
2394	case ENCODING_ID:
2395	case ENCODING_IO:
2396	case ENCODING_Iv:
2397	case ENCODING_Ia:
2398	translateImmediate(mcInst,
2399	immediate: insn.immediates[insn.numImmediatesTranslated++],
2400	operand,
2401	insn,
2402	Dis);
2403	return false;
2404	case ENCODING_IRC:
2405	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.RC));
2406	return false;
2407	case ENCODING_SI:
2408	return translateSrcIndex(mcInst, insn);
2409	case ENCODING_DI:
2410	return translateDstIndex(mcInst, insn);
2411	case ENCODING_RB:
2412	case ENCODING_RW:
2413	case ENCODING_RD:
2414	case ENCODING_RO:
2415	case ENCODING_Rv:
2416	translateRegister(mcInst, reg: insn.opcodeRegister);
2417	return false;
2418	case ENCODING_CF:
2419	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2420	return false;
2421	case ENCODING_CC:
2422	if (isCCMPOrCTEST(insn: &insn))
2423	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`2`]));
2424	else
2425	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2426	return false;
2427	case ENCODING_FP:
2428	translateFPRegister(mcInst, stackPos: insn.modRM & `7`);
2429	return false;
2430	case ENCODING_VVVV:
2431	translateRegister(mcInst, reg: insn.vvvv);
2432	return false;
2433	case ENCODING_DUP:
2434	return translateOperand(mcInst, operand: insn.operands [operand.type - TYPE_DUP0],
2435	insn, Dis);
2436	}
2437	}
2438
2439	/// translateInstruction - Translates an internal instruction and all its
2440	/// operands to an MCInst.
2441	///
2442	/// @param mcInst - The MCInst to populate with the instruction's data.
2443	/// @param insn - The internal instruction.
2444	/// @return - false on success; true otherwise.
2445	static bool translateInstruction(MCInst &mcInst,
2446	InternalInstruction &insn,
2447	const MCDisassembler *Dis) {
2448	if (!insn.spec) {
2449	debug("Instruction has no specification");
2450	return true;
2451	}
2452
2453	mcInst.clear();
2454	mcInst.setOpcode(insn.instructionID);
2455	// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2456	// prefix bytes should be disassembled as xrelease and xacquire then set the
2457	// opcode to those instead of the rep and repne opcodes.
2458	if (insn.xAcquireRelease) {
2459	if(mcInst.getOpcode() == X86::REP_PREFIX)
2460	mcInst.setOpcode(X86::XRELEASE_PREFIX);
2461	else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2462	mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2463	}
2464
2465	insn.numImmediatesTranslated = `0`;
2466
2467	for (const auto &Op : insn.operands) {
2468	if (Op.encoding != ENCODING_NONE) {
2469	if (translateOperand(mcInst, operand: Op, insn, Dis)) {
2470	return true;
2471	}
2472	}
2473	}
2474
2475	return false;
2476	}
2477
2478	static MCDisassembler createX86Disassembler(const* Target &T,
2479	const MCSubtargetInfo &STI,
2480	MCContext &Ctx) {
2481	std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2482	return new X86GenericDisassembler (STI, Ctx, std::move(MII));
2483	}
2484
2485	extern "C" LLVM_C_ABI void LLVMInitializeX86Disassembler() {
2486	// Register the disassembler.
2487	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_32Target(),
2488	Fn: createX86Disassembler);
2489	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_64Target(),
2490	Fn: createX86Disassembler);
2491	}
2492

Browse the source code of llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp