X86Disassembler.cpp source code [llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp]

1	//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is part of the X86 Disassembler.
10	// It contains code to translate the data produced by the decoder into
11	// MCInsts.
12	//
13	//
14	// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15	// 64-bit X86 instruction sets. The main decode sequence for an assembly
16	// instruction in this disassembler is:
17	//
18	// 1. Read the prefix bytes and determine the attributes of the instruction.
19	// These attributes, recorded in enum attributeBits
20	// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21	// provides a mapping from bitmasks to contexts, which are represented by
22	// enum InstructionContext (ibid.).
23	//
24	// 2. Read the opcode, and determine what kind of opcode it is. The
25	// disassembler distinguishes four kinds of opcodes, which are enumerated in
26	// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27	// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28	// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29	//
30	// 3. Depending on the opcode type, look in one of four ClassDecision structures
31	// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32	// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33	// a ModRMDecision (ibid.).
34	//
35	// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36	// instructions that have ModRMReg / ModRMMem forms in LLVM, need the
37	// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38	// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39	// ModR/M byte is required and how to interpret it.
40	//
41	// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42	// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43	// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44	// meanings of its operands.
45	//
46	// 6. For each operand, its encoding is an entry from OperandEncoding
47	// (X86DisassemblerDecoderCommon.h) and its type is an entry from
48	// OperandType (ibid.). The encoding indicates how to read it from the
49	// instruction; the type indicates how to interpret the value once it has
50	// been read. For example, a register operand could be stored in the R/M
51	// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52	// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53	// register, for instance). Given this information, the operands can be
54	// extracted and interpreted.
55	//
56	// 7. As the last step, the disassembler translates the instruction information
57	// and operands into a format understandable by the client - in this case, an
58	// MCInst for use by the MC infrastructure.
59	//
60	// The disassembler is broken broadly into two parts: the table emitter that
61	// emits the instruction decode tables discussed above during compilation, and
62	// the disassembler itself. The table emitter is documented in more detail in
63	// utils/TableGen/X86DisassemblerEmitter.h.
64	//
65	// X86Disassembler.cpp contains the code responsible for step 7, and for
66	// invoking the decoder to execute steps 1-6.
67	// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68	// table emitter and the disassembler.
69	// X86DisassemblerDecoder.h contains the public interface of the decoder,
70	// factored out into C for possible use by other projects.
71	// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72	// responsible for steps 1-6.
73	//
74	//===----------------------------------------------------------------------===//
75
76	#include "MCTargetDesc/X86BaseInfo.h"
77	#include "MCTargetDesc/X86MCTargetDesc.h"
78	#include "TargetInfo/X86TargetInfo.h"
79	#include "X86DisassemblerDecoder.h"
80	#include "llvm/MC/MCContext.h"
81	#include "llvm/MC/MCDisassembler/MCDisassembler.h"
82	#include "llvm/MC/MCExpr.h"
83	#include "llvm/MC/MCInst.h"
84	#include "llvm/MC/MCInstrInfo.h"
85	#include "llvm/MC/MCSubtargetInfo.h"
86	#include "llvm/MC/TargetRegistry.h"
87	#include "llvm/Support/Debug.h"
88	#include "llvm/Support/Format.h"
89	#include "llvm/Support/raw_ostream.h"
90
91	using namespace llvm;
92	using namespace llvm::X86Disassembler;
93
94	#define DEBUG_TYPE "x86-disassembler"
95
96	#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97
98	// Specifies whether a ModR/M byte is needed and (if so) which
99	// instruction each possible value of the ModR/M byte corresponds to. Once
100	// this information is known, we have narrowed down to a single instruction.
101	struct ModRMDecision {
102	uint8_t modrm_type;
103	uint16_t instructionIDs;
104	};
105
106	// Specifies which set of ModR/M->instruction tables to look at
107	// given a particular opcode.
108	struct OpcodeDecision {
109	ModRMDecision modRMDecisions[`256`];
110	};
111
112	// Specifies which opcode->instruction tables to look at given
113	// a particular context (set of attributes). Since there are many possible
114	// contexts, the decoder first uses CONTEXTS_SYM to determine which context
115	// applies given a specific set of attributes. Hence there are only IC_max
116	// entries in this table, rather than 2^(ATTR_max).
117	struct ContextDecision {
118	OpcodeDecision opcodeDecisions[IC_max];
119	};
120
121	#include "X86GenDisassemblerTables.inc"
122
123	static InstrUID decode(OpcodeType type, InstructionContext insnContext,
124	uint8_t opcode, uint8_t modRM) {
125	const struct ModRMDecision *dec;
126
127	switch (type) {
128	case ONEBYTE:
129	dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130	break;
131	case TWOBYTE:
132	dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133	break;
134	case THREEBYTE_38:
135	dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136	break;
137	case THREEBYTE_3A:
138	dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139	break;
140	case XOP8_MAP:
141	dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142	break;
143	case XOP9_MAP:
144	dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145	break;
146	case XOPA_MAP:
147	dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148	break;
149	case THREEDNOW_MAP:
150	dec =
151	&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152	break;
153	case MAP4:
154	dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155	break;
156	case MAP5:
157	dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158	break;
159	case MAP6:
160	dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161	break;
162	case MAP7:
163	dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
164	break;
165	}
166
167	switch (dec->modrm_type) {
168	default:
169	llvm_unreachable("Corrupt table! Unknown modrm_type");
170	return `0`;
171	case MODRM_ONEENTRY:
172	return modRMTable[dec->instructionIDs];
173	case MODRM_SPLITRM:
174	if (modFromModRM(modRM) == `0x3`)
175	return modRMTable[dec->instructionIDs + `1`];
176	return modRMTable[dec->instructionIDs];
177	case MODRM_SPLITREG:
178	if (modFromModRM(modRM) == `0x3`)
179	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`) + `8`];
180	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
181	case MODRM_SPLITMISC:
182	if (modFromModRM(modRM) == `0x3`)
183	return modRMTable[dec->instructionIDs + (modRM & `0x3f`) + `8`];
184	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
185	case MODRM_FULL:
186	return modRMTable[dec->instructionIDs + modRM];
187	}
188	}
189
190	static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
191	uint64_t offset = insn->readerCursor - insn->startLocation;
192	if (offset >= insn->bytes.size())
193	return true;
194	byte = insn->bytes [offset];
195	return false;
196	}
197
198	template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
199	auto r = insn->bytes;
200	uint64_t offset = insn->readerCursor - insn->startLocation;
201	if (offset + sizeof(T) > r.size())
202	return true;
203	ptr = support::endian::read<T>(&r [offset], llvm::endianness::little);
204	insn->readerCursor += sizeof(T);
205	return false;
206	}
207
208	static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
209	return insn->mode == MODE_64BIT && prefix >= `0x40` && prefix <= `0x4f`;
210	}
211
212	static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
213	return insn->mode == MODE_64BIT && prefix == `0xd5`;
214	}
215
216	// Consumes all of an instruction's prefix bytes, and marks the
217	// instruction as having them. Also sets the instruction's default operand,
218	// address, and other relevant data sizes to report operands correctly.
219	//
220	// insn must not be empty.
221	static int readPrefixes(struct InternalInstruction *insn) {
222	bool isPrefix = true;
223	uint8_t byte = `0`;
224	uint8_t nextByte;
225
226	LLVM_DEBUG(dbgs() << "readPrefixes()");
227
228	while (isPrefix) {
229	// If we fail reading prefixes, just stop here and let the opcode reader
230	// deal with it.
231	if (consume(insn, ptr&: byte))
232	break;
233
234	// If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
235	// break and let it be disassembled as a normal "instruction".
236	if (insn->readerCursor - `1` == insn->startLocation && byte == `0xf0`) // LOCK
237	break;
238
239	if ((byte == `0xf2` \|\| byte == `0xf3`) && !peek(insn, byte&: nextByte)) {
240	// If the byte is 0xf2 or 0xf3, and any of the following conditions are
241	// met:
242	// - it is followed by a LOCK (0xf0) prefix
243	// - it is followed by an xchg instruction
244	// then it should be disassembled as a xacquire/xrelease not repne/rep.
245	if (((nextByte == `0xf0`) \|\|
246	((nextByte & `0xfe`) == `0x86` \|\| (nextByte & `0xf8`) == `0x90`))) {
247	insn->xAcquireRelease = true;
248	if (!(byte == `0xf3` && nextByte == `0x90`)) // PAUSE instruction support
249	break;
250	}
251	// Also if the byte is 0xf3, and the following condition is met:
252	// - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
253	// "mov mem, imm" (opcode 0xc6/0xc7) instructions.
254	// then it should be disassembled as an xrelease not rep.
255	if (byte == `0xf3` && (nextByte == `0x88` \|\| nextByte == `0x89` \|\|
256	nextByte == `0xc6` \|\| nextByte == `0xc7`)) {
257	insn->xAcquireRelease = true;
258	break;
259	}
260	if (isREX(insn, prefix: nextByte)) {
261	uint8_t nnextByte;
262	// Go to REX prefix after the current one
263	if (consume(insn, ptr&: nnextByte))
264	return -`1`;
265	// We should be able to read next byte after REX prefix
266	if (peek(insn, byte&: nnextByte))
267	return -`1`;
268	--insn->readerCursor;
269	}
270	}
271
272	switch (byte) {
273	case `0xf0`: // LOCK
274	insn->hasLockPrefix = true;
275	break;
276	case `0xf2`: // REPNE/REPNZ
277	case `0xf3`: { // REP or REPE/REPZ
278	uint8_t nextByte;
279	if (peek(insn, byte&: nextByte))
280	break;
281	// TODO:
282	// 1. There could be several 0x66
283	// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
284	// it's not mandatory prefix
285	// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
286	// 0x0f exactly after it to be mandatory prefix
287	// 4. if (nextByte == 0xd5) it's REX2 and we need
288	// 0x0f exactly after it to be mandatory prefix
289	if (isREX(insn, prefix: nextByte) \|\| isREX2(insn, prefix: nextByte) \|\| nextByte == `0x0f` \|\|
290	nextByte == `0x66`)
291	// The last of 0xf2 /0xf3 is mandatory prefix
292	insn->mandatoryPrefix = byte;
293	insn->repeatPrefix = byte;
294	break;
295	}
296	case `0x2e`: // CS segment override -OR- Branch not taken
297	insn->segmentOverride = SEG_OVERRIDE_CS;
298	break;
299	case `0x36`: // SS segment override -OR- Branch taken
300	insn->segmentOverride = SEG_OVERRIDE_SS;
301	break;
302	case `0x3e`: // DS segment override
303	insn->segmentOverride = SEG_OVERRIDE_DS;
304	break;
305	case `0x26`: // ES segment override
306	insn->segmentOverride = SEG_OVERRIDE_ES;
307	break;
308	case `0x64`: // FS segment override
309	insn->segmentOverride = SEG_OVERRIDE_FS;
310	break;
311	case `0x65`: // GS segment override
312	insn->segmentOverride = SEG_OVERRIDE_GS;
313	break;
314	case `0x66`: { // Operand-size override {
315	uint8_t nextByte;
316	insn->hasOpSize = true;
317	if (peek(insn, byte&: nextByte))
318	break;
319	// 0x66 can't overwrite existing mandatory prefix and should be ignored
320	if (!insn->mandatoryPrefix && (nextByte == `0x0f` \|\| isREX(insn, prefix: nextByte)))
321	insn->mandatoryPrefix = byte;
322	break;
323	}
324	case `0x67`: // Address-size override
325	insn->hasAdSize = true;
326	break;
327	default: // Not a prefix byte
328	isPrefix = false;
329	break;
330	}
331
332	if (isPrefix)
333	LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
334	}
335
336	insn->vectorExtensionType = TYPE_NO_VEX_XOP;
337
338	if (byte == `0x62`) {
339	uint8_t byte1, byte2;
340	if (consume(insn, ptr&: byte1)) {
341	LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
342	return -`1`;
343	}
344
345	if (peek(insn, byte&: byte2)) {
346	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
347	return -`1`;
348	}
349
350	if ((insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)) {
351	insn->vectorExtensionType = TYPE_EVEX;
352	} else {
353	--insn->readerCursor; // unconsume byte1
354	--insn->readerCursor; // unconsume byte
355	}
356
357	if (insn->vectorExtensionType == TYPE_EVEX) {
358	insn->vectorExtensionPrefix[`0`] = byte;
359	insn->vectorExtensionPrefix[`1`] = byte1;
360	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`2`])) {
361	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
362	return -`1`;
363	}
364	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`3`])) {
365	LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
366	return -`1`;
367	}
368
369	if (insn->mode == MODE_64BIT) {
370	// We simulate the REX prefix for simplicity's sake
371	insn->rexPrefix = `0x40` \|
372	(wFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `3`) \|
373	(rFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `2`) \|
374	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `1`) \|
375	(bFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `0`);
376
377	// We simulate the REX2 prefix for simplicity's sake
378	insn->rex2ExtensionPrefix[`1`] =
379	(r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `6`) \|
380	(x2FromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `5`) \|
381	(b2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`);
382	}
383
384	LLVM_DEBUG(
385	dbgs() << format(
386	"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
387	insn->vectorExtensionPrefix[`0`], insn->vectorExtensionPrefix[`1`],
388	insn->vectorExtensionPrefix[`2`], insn->vectorExtensionPrefix[`3`]));
389	}
390	} else if (byte == `0xc4`) {
391	uint8_t byte1;
392	if (peek(insn, byte&: byte1)) {
393	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
394	return -`1`;
395	}
396
397	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
398	insn->vectorExtensionType = TYPE_VEX_3B;
399	else
400	--insn->readerCursor;
401
402	if (insn->vectorExtensionType == TYPE_VEX_3B) {
403	insn->vectorExtensionPrefix[`0`] = byte;
404	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
405	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
406
407	// We simulate the REX prefix for simplicity's sake
408
409	if (insn->mode == MODE_64BIT)
410	insn->rexPrefix = `0x40` \|
411	(wFromVEX3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
412	(rFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
413	(xFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
414	(bFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
415
416	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
417	insn->vectorExtensionPrefix[`0`],
418	insn->vectorExtensionPrefix[`1`],
419	insn->vectorExtensionPrefix[`2`]));
420	}
421	} else if (byte == `0xc5`) {
422	uint8_t byte1;
423	if (peek(insn, byte&: byte1)) {
424	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
425	return -`1`;
426	}
427
428	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
429	insn->vectorExtensionType = TYPE_VEX_2B;
430	else
431	--insn->readerCursor;
432
433	if (insn->vectorExtensionType == TYPE_VEX_2B) {
434	insn->vectorExtensionPrefix[`0`] = byte;
435	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
436
437	if (insn->mode == MODE_64BIT)
438	insn->rexPrefix =
439	`0x40` \| (rFromVEX2of2(insn->vectorExtensionPrefix[`1`]) << `2`);
440
441	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
442	default:
443	break;
444	case VEX_PREFIX_66:
445	insn->hasOpSize = true;
446	break;
447	}
448
449	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
450	insn->vectorExtensionPrefix[`0`],
451	insn->vectorExtensionPrefix[`1`]));
452	}
453	} else if (byte == `0x8f`) {
454	uint8_t byte1;
455	if (peek(insn, byte&: byte1)) {
456	LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
457	return -`1`;
458	}
459
460	if ((byte1 & `0x38`) != `0x0`) // 0 in these 3 bits is a POP instruction.
461	insn->vectorExtensionType = TYPE_XOP;
462	else
463	--insn->readerCursor;
464
465	if (insn->vectorExtensionType == TYPE_XOP) {
466	insn->vectorExtensionPrefix[`0`] = byte;
467	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
468	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
469
470	// We simulate the REX prefix for simplicity's sake
471
472	if (insn->mode == MODE_64BIT)
473	insn->rexPrefix = `0x40` \|
474	(wFromXOP3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
475	(rFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
476	(xFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
477	(bFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
478
479	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
480	default:
481	break;
482	case VEX_PREFIX_66:
483	insn->hasOpSize = true;
484	break;
485	}
486
487	LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
488	insn->vectorExtensionPrefix[`0`],
489	insn->vectorExtensionPrefix[`1`],
490	insn->vectorExtensionPrefix[`2`]));
491	}
492	} else if (isREX2(insn, prefix: byte)) {
493	uint8_t byte1;
494	if (peek(insn, byte&: byte1)) {
495	LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
496	return -`1`;
497	}
498	insn->rex2ExtensionPrefix[`0`] = byte;
499	consume(insn, ptr&: insn->rex2ExtensionPrefix[`1`]);
500
501	// We simulate the REX prefix for simplicity's sake
502	insn->rexPrefix = `0x40` \| (wFromREX2(insn->rex2ExtensionPrefix[`1`]) << `3`) \|
503	(rFromREX2(insn->rex2ExtensionPrefix[`1`]) << `2`) \|
504	(xFromREX2(insn->rex2ExtensionPrefix[`1`]) << `1`) \|
505	(bFromREX2(insn->rex2ExtensionPrefix[`1`]) << `0`);
506	LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
507	insn->rex2ExtensionPrefix[`0`],
508	insn->rex2ExtensionPrefix[`1`]));
509	} else if (isREX(insn, prefix: byte)) {
510	if (peek(insn, byte&: nextByte))
511	return -`1`;
512	insn->rexPrefix = byte;
513	LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
514	} else
515	--insn->readerCursor;
516
517	if (insn->mode == MODE_16BIT) {
518	insn->registerSize = (insn->hasOpSize ? `4` : `2`);
519	insn->addressSize = (insn->hasAdSize ? `4` : `2`);
520	insn->displacementSize = (insn->hasAdSize ? `4` : `2`);
521	insn->immediateSize = (insn->hasOpSize ? `4` : `2`);
522	} else if (insn->mode == MODE_32BIT) {
523	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
524	insn->addressSize = (insn->hasAdSize ? `2` : `4`);
525	insn->displacementSize = (insn->hasAdSize ? `2` : `4`);
526	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
527	} else if (insn->mode == MODE_64BIT) {
528	insn->displacementSize = `4`;
529	if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
530	insn->registerSize = `8`;
531	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
532	insn->immediateSize = `4`;
533	insn->hasOpSize = false;
534	} else {
535	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
536	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
537	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
538	}
539	}
540
541	return `0`;
542	}
543
544	// Consumes the SIB byte to determine addressing information.
545	static int readSIB(struct InternalInstruction *insn) {
546	SIBBase sibBaseBase = SIB_BASE_NONE;
547	uint8_t index, base;
548
549	LLVM_DEBUG(dbgs() << "readSIB()");
550	switch (insn->addressSize) {
551	case `2`:
552	default:
553	llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
554	case `4`:
555	insn->sibIndexBase = SIB_INDEX_EAX;
556	sibBaseBase = SIB_BASE_EAX;
557	break;
558	case `8`:
559	insn->sibIndexBase = SIB_INDEX_RAX;
560	sibBaseBase = SIB_BASE_RAX;
561	break;
562	}
563
564	if (consume(insn, ptr&: insn->sib))
565	return -`1`;
566
567	index = indexFromSIB(insn->sib) \| (xFromREX(insn->rexPrefix) << `3`) \|
568	(x2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
569
570	if (index == `0x4`) {
571	insn->sibIndex = SIB_INDEX_NONE;
572	} else {
573	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
574	}
575
576	insn->sibScale = `1` << scaleFromSIB(insn->sib);
577
578	base = baseFromSIB(insn->sib) \| (bFromREX(insn->rexPrefix) << `3`) \|
579	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
580
581	switch (base) {
582	case `0x5`:
583	case `0xd`:
584	switch (modFromModRM(insn->modRM)) {
585	case `0x0`:
586	insn->eaDisplacement = EA_DISP_32;
587	insn->sibBase = SIB_BASE_NONE;
588	break;
589	case `0x1`:
590	insn->eaDisplacement = EA_DISP_8;
591	insn->sibBase = (SIBBase)(sibBaseBase + base);
592	break;
593	case `0x2`:
594	insn->eaDisplacement = EA_DISP_32;
595	insn->sibBase = (SIBBase)(sibBaseBase + base);
596	break;
597	default:
598	llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
599	}
600	break;
601	default:
602	insn->sibBase = (SIBBase)(sibBaseBase + base);
603	break;
604	}
605
606	return `0`;
607	}
608
609	static int readDisplacement(struct InternalInstruction *insn) {
610	int8_t d8;
611	int16_t d16;
612	int32_t d32;
613	LLVM_DEBUG(dbgs() << "readDisplacement()");
614
615	insn->displacementOffset = insn->readerCursor - insn->startLocation;
616	switch (insn->eaDisplacement) {
617	case EA_DISP_NONE:
618	break;
619	case EA_DISP_8:
620	if (consume(insn, ptr&: d8))
621	return -`1`;
622	insn->displacement = d8;
623	break;
624	case EA_DISP_16:
625	if (consume(insn, ptr&: d16))
626	return -`1`;
627	insn->displacement = d16;
628	break;
629	case EA_DISP_32:
630	if (consume(insn, ptr&: d32))
631	return -`1`;
632	insn->displacement = d32;
633	break;
634	}
635
636	return `0`;
637	}
638
639	// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
640	static int readModRM(struct InternalInstruction *insn) {
641	uint8_t mod, rm, reg;
642	LLVM_DEBUG(dbgs() << "readModRM()");
643
644	if (insn->consumedModRM)
645	return `0`;
646
647	if (consume(insn, ptr&: insn->modRM))
648	return -`1`;
649	insn->consumedModRM = true;
650
651	mod = modFromModRM(insn->modRM);
652	rm = rmFromModRM(insn->modRM);
653	reg = regFromModRM(insn->modRM);
654
655	// This goes by insn->registerSize to pick the correct register, which messes
656	// up if we're using (say) XMM or 8-bit register operands. That gets fixed in
657	// fixupReg().
658	switch (insn->registerSize) {
659	case `2`:
660	insn->regBase = MODRM_REG_AX;
661	insn->eaRegBase = EA_REG_AX;
662	break;
663	case `4`:
664	insn->regBase = MODRM_REG_EAX;
665	insn->eaRegBase = EA_REG_EAX;
666	break;
667	case `8`:
668	insn->regBase = MODRM_REG_RAX;
669	insn->eaRegBase = EA_REG_RAX;
670	break;
671	}
672
673	reg \|= (rFromREX(insn->rexPrefix) << `3`) \|
674	(r2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
675	rm \|= (bFromREX(insn->rexPrefix) << `3`) \|
676	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
677
678	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
679	reg \|= r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`;
680
681	insn->reg = (Reg)(insn->regBase + reg);
682
683	switch (insn->addressSize) {
684	case `2`: {
685	EABase eaBaseBase = EA_BASE_BX_SI;
686
687	switch (mod) {
688	case `0x0`:
689	if (rm == `0x6`) {
690	insn->eaBase = EA_BASE_NONE;
691	insn->eaDisplacement = EA_DISP_16;
692	if (readDisplacement(insn))
693	return -`1`;
694	} else {
695	insn->eaBase = (EABase)(eaBaseBase + rm);
696	insn->eaDisplacement = EA_DISP_NONE;
697	}
698	break;
699	case `0x1`:
700	insn->eaBase = (EABase)(eaBaseBase + rm);
701	insn->eaDisplacement = EA_DISP_8;
702	insn->displacementSize = `1`;
703	if (readDisplacement(insn))
704	return -`1`;
705	break;
706	case `0x2`:
707	insn->eaBase = (EABase)(eaBaseBase + rm);
708	insn->eaDisplacement = EA_DISP_16;
709	if (readDisplacement(insn))
710	return -`1`;
711	break;
712	case `0x3`:
713	insn->eaBase = (EABase)(insn->eaRegBase + rm);
714	if (readDisplacement(insn))
715	return -`1`;
716	break;
717	}
718	break;
719	}
720	case `4`:
721	case `8`: {
722	EABase eaBaseBase = (insn->addressSize == `4` ? EA_BASE_EAX : EA_BASE_RAX);
723
724	switch (mod) {
725	case `0x0`:
726	insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
727	// In determining whether RIP-relative mode is used (rm=5),
728	// or whether a SIB byte is present (rm=4),
729	// the extension bits (REX.b and EVEX.x) are ignored.
730	switch (rm & `7`) {
731	case `0x4`: // SIB byte is present
732	insn->eaBase = (insn->addressSize == `4` ? EA_BASE_sib : EA_BASE_sib64);
733	if (readSIB(insn) \|\| readDisplacement(insn))
734	return -`1`;
735	break;
736	case `0x5`: // RIP-relative
737	insn->eaBase = EA_BASE_NONE;
738	insn->eaDisplacement = EA_DISP_32;
739	if (readDisplacement(insn))
740	return -`1`;
741	break;
742	default:
743	insn->eaBase = (EABase)(eaBaseBase + rm);
744	break;
745	}
746	break;
747	case `0x1`:
748	insn->displacementSize = `1`;
749	[[fallthrough]];
750	case `0x2`:
751	insn->eaDisplacement = (mod == `0x1` ? EA_DISP_8 : EA_DISP_32);
752	switch (rm & `7`) {
753	case `0x4`: // SIB byte is present
754	insn->eaBase = EA_BASE_sib;
755	if (readSIB(insn) \|\| readDisplacement(insn))
756	return -`1`;
757	break;
758	default:
759	insn->eaBase = (EABase)(eaBaseBase + rm);
760	if (readDisplacement(insn))
761	return -`1`;
762	break;
763	}
764	break;
765	case `0x3`:
766	insn->eaDisplacement = EA_DISP_NONE;
767	insn->eaBase = (EABase)(insn->eaRegBase + rm);
768	break;
769	}
770	break;
771	}
772	} // switch (insn->addressSize)
773
774	return `0`;
775	}
776
777	#define GENERIC_FIXUP_FUNC(name, base, prefix) \
778	static uint16_t name(struct InternalInstruction *insn, OperandType type, \
779	uint8_t index, uint8_t *valid) { \
780	*valid = 1; \
781	switch (type) { \
782	default: \
783	debug("Unhandled register type"); \
784	*valid = 0; \
785	return 0; \
786	case TYPE_Rv: \
787	return base + index; \
788	case TYPE_R8: \
789	if (insn->rexPrefix && index >= 4 && index <= 7) \
790	return prefix##_SPL + (index - 4); \
791	else \
792	return prefix##_AL + index; \
793	case TYPE_R16: \
794	return prefix##_AX + index; \
795	case TYPE_R32: \
796	return prefix##_EAX + index; \
797	case TYPE_R64: \
798	return prefix##_RAX + index; \
799	case TYPE_ZMM: \
800	return prefix##_ZMM0 + index; \
801	case TYPE_YMM: \
802	return prefix##_YMM0 + index; \
803	case TYPE_XMM: \
804	return prefix##_XMM0 + index; \
805	case TYPE_TMM: \
806	if (index > 7) \
807	*valid = 0; \
808	return prefix##_TMM0 + index; \
809	case TYPE_VK: \
810	index &= 0xf; \
811	if (index > 7) \
812	*valid = 0; \
813	return prefix##_K0 + index; \
814	case TYPE_VK_PAIR: \
815	if (index > 7) \
816	*valid = 0; \
817	return prefix##_K0_K1 + (index / 2); \
818	case TYPE_MM64: \
819	return prefix##_MM0 + (index & 0x7); \
820	case TYPE_SEGMENTREG: \
821	if ((index & 7) > 5) \
822	*valid = 0; \
823	return prefix##_ES + (index & 7); \
824	case TYPE_DEBUGREG: \
825	if (index > 15) \
826	*valid = 0; \
827	return prefix##_DR0 + index; \
828	case TYPE_CONTROLREG: \
829	if (index > 15) \
830	*valid = 0; \
831	return prefix##_CR0 + index; \
832	case TYPE_MVSIBX: \
833	return prefix##_XMM0 + index; \
834	case TYPE_MVSIBY: \
835	return prefix##_YMM0 + index; \
836	case TYPE_MVSIBZ: \
837	return prefix##_ZMM0 + index; \
838	} \
839	}
840
841	// Consult an operand type to determine the meaning of the reg or R/M field. If
842	// the operand is an XMM operand, for example, an operand would be XMM0 instead
843	// of AX, which readModRM() would otherwise misinterpret it as.
844	//
845	// @param insn - The instruction containing the operand.
846	// @param type - The operand type.
847	// @param index - The existing value of the field as reported by readModRM().
848	// @param valid - The address of a uint8_t. The target is set to 1 if the
849	// field is valid for the register class; 0 if not.
850	// @return - The proper value.
851	GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
852	GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
853
854	// Consult an operand specifier to determine which of the fixupValue functions*
855	// to use in correcting readModRM()'ss interpretation.
856	//
857	// @param insn - See fixupValue().*
858	// @param op - The operand specifier.
859	// @return - 0 if fixup was successful; -1 if the register returned was
860	// invalid for its class.
861	static int fixupReg(struct InternalInstruction *insn,
862	const struct OperandSpecifier *op) {
863	uint8_t valid;
864	LLVM_DEBUG(dbgs() << "fixupReg()");
865
866	switch ((OperandEncoding)op->encoding) {
867	default:
868	debug("Expected a REG or R/M encoding in fixupReg");
869	return -`1`;
870	case ENCODING_VVVV:
871	insn->vvvv =
872	(Reg)fixupRegValue(insn, type: (OperandType)op->type, index: insn->vvvv, valid: &valid);
873	if (!valid)
874	return -`1`;
875	break;
876	case ENCODING_REG:
877	insn->reg = (Reg)fixupRegValue(insn, type: (OperandType)op->type,
878	index: insn->reg - insn->regBase, valid: &valid);
879	if (!valid)
880	return -`1`;
881	break;
882	CASE_ENCODING_RM:
883	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
884	modFromModRM(insn->modRM) == `3`) {
885	// EVEX_X can extend the register id to 32 for a non-GPR register that is
886	// encoded in RM.
887	// mode : MODE_64_BIT
888	// Only 8 vector registers are available in 32 bit mode
889	// mod : 3
890	// RM encodes a register
891	switch (op->type) {
892	case TYPE_Rv:
893	case TYPE_R8:
894	case TYPE_R16:
895	case TYPE_R32:
896	case TYPE_R64:
897	break;
898	default:
899	insn->eaBase =
900	(EABase)(insn->eaBase +
901	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`));
902	break;
903	}
904	}
905	[[fallthrough]];
906	case ENCODING_SIB:
907	if (insn->eaBase >= insn->eaRegBase) {
908	insn->eaBase = (EABase)fixupRMValue(
909	insn, type: (OperandType)op->type, index: insn->eaBase - insn->eaRegBase, valid: &valid);
910	if (!valid)
911	return -`1`;
912	}
913	break;
914	}
915
916	return `0`;
917	}
918
919	// Read the opcode (except the ModR/M byte in the case of extended or escape
920	// opcodes).
921	static bool readOpcode(struct InternalInstruction *insn) {
922	uint8_t current;
923	LLVM_DEBUG(dbgs() << "readOpcode()");
924
925	insn->opcodeType = ONEBYTE;
926	if (insn->vectorExtensionType == TYPE_EVEX) {
927	switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])) {
928	default:
929	LLVM_DEBUG(
930	dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
931	mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])));
932	return true;
933	case VEX_LOB_0F:
934	insn->opcodeType = TWOBYTE;
935	return consume(insn, ptr&: insn->opcode);
936	case VEX_LOB_0F38:
937	insn->opcodeType = THREEBYTE_38;
938	return consume(insn, ptr&: insn->opcode);
939	case VEX_LOB_0F3A:
940	insn->opcodeType = THREEBYTE_3A;
941	return consume(insn, ptr&: insn->opcode);
942	case VEX_LOB_MAP4:
943	insn->opcodeType = MAP4;
944	return consume(insn, ptr&: insn->opcode);
945	case VEX_LOB_MAP5:
946	insn->opcodeType = MAP5;
947	return consume(insn, ptr&: insn->opcode);
948	case VEX_LOB_MAP6:
949	insn->opcodeType = MAP6;
950	return consume(insn, ptr&: insn->opcode);
951	case VEX_LOB_MAP7:
952	insn->opcodeType = MAP7;
953	return consume(insn, ptr&: insn->opcode);
954	}
955	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
956	switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])) {
957	default:
958	LLVM_DEBUG(
959	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
960	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
961	return true;
962	case VEX_LOB_0F:
963	insn->opcodeType = TWOBYTE;
964	return consume(insn, ptr&: insn->opcode);
965	case VEX_LOB_0F38:
966	insn->opcodeType = THREEBYTE_38;
967	return consume(insn, ptr&: insn->opcode);
968	case VEX_LOB_0F3A:
969	insn->opcodeType = THREEBYTE_3A;
970	return consume(insn, ptr&: insn->opcode);
971	case VEX_LOB_MAP5:
972	insn->opcodeType = MAP5;
973	return consume(insn, ptr&: insn->opcode);
974	case VEX_LOB_MAP6:
975	insn->opcodeType = MAP6;
976	return consume(insn, ptr&: insn->opcode);
977	case VEX_LOB_MAP7:
978	insn->opcodeType = MAP7;
979	return consume(insn, ptr&: insn->opcode);
980	}
981	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
982	insn->opcodeType = TWOBYTE;
983	return consume(insn, ptr&: insn->opcode);
984	} else if (insn->vectorExtensionType == TYPE_XOP) {
985	switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[`1`])) {
986	default:
987	LLVM_DEBUG(
988	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
989	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
990	return true;
991	case XOP_MAP_SELECT_8:
992	insn->opcodeType = XOP8_MAP;
993	return consume(insn, ptr&: insn->opcode);
994	case XOP_MAP_SELECT_9:
995	insn->opcodeType = XOP9_MAP;
996	return consume(insn, ptr&: insn->opcode);
997	case XOP_MAP_SELECT_A:
998	insn->opcodeType = XOPA_MAP;
999	return consume(insn, ptr&: insn->opcode);
1000	}
1001	} else if (mFromREX2(insn->rex2ExtensionPrefix[`1`])) {
1002	// m bit indicates opcode map 1
1003	insn->opcodeType = TWOBYTE;
1004	return consume(insn, ptr&: insn->opcode);
1005	}
1006
1007	if (consume(insn, ptr&: current))
1008	return true;
1009
1010	if (current == `0x0f`) {
1011	LLVM_DEBUG(
1012	dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1013	if (consume(insn, ptr&: current))
1014	return true;
1015
1016	if (current == `0x38`) {
1017	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1018	current));
1019	if (consume(insn, ptr&: current))
1020	return true;
1021
1022	insn->opcodeType = THREEBYTE_38;
1023	} else if (current == `0x3a`) {
1024	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1025	current));
1026	if (consume(insn, ptr&: current))
1027	return true;
1028
1029	insn->opcodeType = THREEBYTE_3A;
1030	} else if (current == `0x0f`) {
1031	LLVM_DEBUG(
1032	dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1033
1034	// Consume operands before the opcode to comply with the 3DNow encoding
1035	if (readModRM(insn))
1036	return true;
1037
1038	if (consume(insn, ptr&: current))
1039	return true;
1040
1041	insn->opcodeType = THREEDNOW_MAP;
1042	} else {
1043	LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1044	insn->opcodeType = TWOBYTE;
1045	}
1046	} else if (insn->mandatoryPrefix)
1047	// The opcode with mandatory prefix must start with opcode escape.
1048	// If not it's legacy repeat prefix
1049	insn->mandatoryPrefix = `0`;
1050
1051	// At this point we have consumed the full opcode.
1052	// Anything we consume from here on must be unconsumed.
1053	insn->opcode = current;
1054
1055	return false;
1056	}
1057
1058	// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1059	static bool is16BitEquivalent(const char orig, const* char *equiv) {
1060	for (int i = `0`;; i++) {
1061	if (orig[i] == `'\0'` && equiv[i] == `'\0'`)
1062	return true;
1063	if (orig[i] == `'\0'` \|\| equiv[i] == `'\0'`)
1064	return false;
1065	if (orig[i] != equiv[i]) {
1066	if ((orig[i] == `'Q'` \|\| orig[i] == `'L'`) && equiv[i] == `'W'`)
1067	continue;
1068	if ((orig[i] == `'6'` \|\| orig[i] == `'3'`) && equiv[i] == `'1'`)
1069	continue;
1070	if ((orig[i] == `'4'` \|\| orig[i] == `'2'`) && equiv[i] == `'6'`)
1071	continue;
1072	return false;
1073	}
1074	}
1075	}
1076
1077	// Determine whether this instruction is a 64-bit instruction.
1078	static bool is64Bit(const char *name) {
1079	for (int i = `0`;; ++i) {
1080	if (name[i] == `'\0'`)
1081	return false;
1082	if (name[i] == `'6'` && name[i + `1`] == `'4'`)
1083	return true;
1084	}
1085	}
1086
1087	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1088	// for extended and escape opcodes, and using a supplied attribute mask.
1089	static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1090	struct InternalInstruction *insn,
1091	uint16_t attrMask) {
1092	auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1093	const ContextDecision *decision;
1094	switch (insn->opcodeType) {
1095	case ONEBYTE:
1096	decision = &ONEBYTE_SYM;
1097	break;
1098	case TWOBYTE:
1099	decision = &TWOBYTE_SYM;
1100	break;
1101	case THREEBYTE_38:
1102	decision = &THREEBYTE38_SYM;
1103	break;
1104	case THREEBYTE_3A:
1105	decision = &THREEBYTE3A_SYM;
1106	break;
1107	case XOP8_MAP:
1108	decision = &XOP8_MAP_SYM;
1109	break;
1110	case XOP9_MAP:
1111	decision = &XOP9_MAP_SYM;
1112	break;
1113	case XOPA_MAP:
1114	decision = &XOPA_MAP_SYM;
1115	break;
1116	case THREEDNOW_MAP:
1117	decision = &THREEDNOW_MAP_SYM;
1118	break;
1119	case MAP4:
1120	decision = &MAP4_SYM;
1121	break;
1122	case MAP5:
1123	decision = &MAP5_SYM;
1124	break;
1125	case MAP6:
1126	decision = &MAP6_SYM;
1127	break;
1128	case MAP7:
1129	decision = &MAP7_SYM;
1130	break;
1131	}
1132
1133	if (decision->opcodeDecisions[insnCtx]
1134	.modRMDecisions[insn->opcode]
1135	.modrm_type != MODRM_ONEENTRY) {
1136	if (readModRM(insn))
1137	return -`1`;
1138	*instructionID =
1139	decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: insn->modRM);
1140	} else {
1141	*instructionID = decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: `0`);
1142	}
1143
1144	return `0`;
1145	}
1146
1147	static bool isCCMPOrCTEST(InternalInstruction *insn) {
1148	if (insn->opcodeType != MAP4)
1149	return false;
1150	if (insn->opcode == `0x83` && regFromModRM(insn->modRM) == `7`)
1151	return true;
1152	switch (insn->opcode & `0xfe`) {
1153	default:
1154	return false;
1155	case `0x38`:
1156	case `0x3a`:
1157	case `0x84`:
1158	return true;
1159	case `0x80`:
1160	return regFromModRM(insn->modRM) == `7`;
1161	case `0xf6`:
1162	return regFromModRM(insn->modRM) == `0`;
1163	}
1164	}
1165
1166	static bool isNF(InternalInstruction *insn) {
1167	if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1168	return false;
1169	if (insn->opcodeType == MAP4)
1170	return true;
1171	// Below NF instructions are not in map4.
1172	if (insn->opcodeType == THREEBYTE_38 &&
1173	ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) == VEX_PREFIX_NONE) {
1174	switch (insn->opcode) {
1175	case `0xf2`: // ANDN
1176	case `0xf3`: // BLSI, BLSR, BLSMSK
1177	case `0xf5`: // BZHI
1178	case `0xf7`: // BEXTR
1179	return true;
1180	default:
1181	break;
1182	}
1183	}
1184	return false;
1185	}
1186
1187	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1188	// for extended and escape opcodes. Determines the attributes and context for
1189	// the instruction before doing so.
1190	static int getInstructionID(struct InternalInstruction *insn,
1191	const MCInstrInfo *mii) {
1192	uint16_t attrMask;
1193	uint16_t instructionID;
1194
1195	LLVM_DEBUG(dbgs() << "getID()");
1196
1197	attrMask = ATTR_NONE;
1198
1199	if (insn->mode == MODE_64BIT)
1200	attrMask \|= ATTR_64BIT;
1201
1202	if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1203	attrMask \|= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1204
1205	if (insn->vectorExtensionType == TYPE_EVEX) {
1206	switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) {
1207	case VEX_PREFIX_66:
1208	attrMask \|= ATTR_OPSIZE;
1209	break;
1210	case VEX_PREFIX_F3:
1211	attrMask \|= ATTR_XS;
1212	break;
1213	case VEX_PREFIX_F2:
1214	attrMask \|= ATTR_XD;
1215	break;
1216	}
1217
1218	if (zFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1219	attrMask \|= ATTR_EVEXKZ;
1220	if (bFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1221	attrMask \|= ATTR_EVEXB;
1222	if (isNF(insn) && !readModRM(insn) &&
1223	!isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1224	attrMask \|= ATTR_EVEXNF;
1225	// aaa is not used a opmask in MAP4
1226	else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]) &&
1227	(insn->opcodeType != MAP4))
1228	attrMask \|= ATTR_EVEXK;
1229	if (lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1230	attrMask \|= ATTR_VEXL;
1231	if (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1232	attrMask \|= ATTR_EVEXL2;
1233	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1234	switch (ppFromVEX3of3(insn->vectorExtensionPrefix[`2`])) {
1235	case VEX_PREFIX_66:
1236	attrMask \|= ATTR_OPSIZE;
1237	break;
1238	case VEX_PREFIX_F3:
1239	attrMask \|= ATTR_XS;
1240	break;
1241	case VEX_PREFIX_F2:
1242	attrMask \|= ATTR_XD;
1243	break;
1244	}
1245
1246	if (lFromVEX3of3(insn->vectorExtensionPrefix[`2`]))
1247	attrMask \|= ATTR_VEXL;
1248	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1249	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
1250	case VEX_PREFIX_66:
1251	attrMask \|= ATTR_OPSIZE;
1252	if (insn->hasAdSize)
1253	attrMask \|= ATTR_ADSIZE;
1254	break;
1255	case VEX_PREFIX_F3:
1256	attrMask \|= ATTR_XS;
1257	break;
1258	case VEX_PREFIX_F2:
1259	attrMask \|= ATTR_XD;
1260	break;
1261	}
1262
1263	if (lFromVEX2of2(insn->vectorExtensionPrefix[`1`]))
1264	attrMask \|= ATTR_VEXL;
1265	} else if (insn->vectorExtensionType == TYPE_XOP) {
1266	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
1267	case VEX_PREFIX_66:
1268	attrMask \|= ATTR_OPSIZE;
1269	break;
1270	case VEX_PREFIX_F3:
1271	attrMask \|= ATTR_XS;
1272	break;
1273	case VEX_PREFIX_F2:
1274	attrMask \|= ATTR_XD;
1275	break;
1276	}
1277
1278	if (lFromXOP3of3(insn->vectorExtensionPrefix[`2`]))
1279	attrMask \|= ATTR_VEXL;
1280	} else {
1281	return -`1`;
1282	}
1283	} else if (!insn->mandatoryPrefix) {
1284	// If we don't have mandatory prefix we should use legacy prefixes here
1285	if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1286	attrMask \|= ATTR_OPSIZE;
1287	if (insn->hasAdSize)
1288	attrMask \|= ATTR_ADSIZE;
1289	if (insn->opcodeType == ONEBYTE) {
1290	if (insn->repeatPrefix == `0xf3` && (insn->opcode == `0x90`))
1291	// Special support for PAUSE
1292	attrMask \|= ATTR_XS;
1293	} else {
1294	if (insn->repeatPrefix == `0xf2`)
1295	attrMask \|= ATTR_XD;
1296	else if (insn->repeatPrefix == `0xf3`)
1297	attrMask \|= ATTR_XS;
1298	}
1299	} else {
1300	switch (insn->mandatoryPrefix) {
1301	case `0xf2`:
1302	attrMask \|= ATTR_XD;
1303	break;
1304	case `0xf3`:
1305	attrMask \|= ATTR_XS;
1306	break;
1307	case `0x66`:
1308	if (insn->mode != MODE_16BIT)
1309	attrMask \|= ATTR_OPSIZE;
1310	if (insn->hasAdSize)
1311	attrMask \|= ATTR_ADSIZE;
1312	break;
1313	case `0x67`:
1314	attrMask \|= ATTR_ADSIZE;
1315	break;
1316	}
1317	}
1318
1319	if (insn->rexPrefix & `0x08`) {
1320	attrMask \|= ATTR_REXW;
1321	attrMask &= ~ATTR_ADSIZE;
1322	}
1323
1324	// Absolute jump and pushp/popp need special handling
1325	if (insn->rex2ExtensionPrefix[`0`] == `0xd5` && insn->opcodeType == ONEBYTE &&
1326	(insn->opcode == `0xA1` \|\| (insn->opcode & `0xf0`) == `0x50`))
1327	attrMask \|= ATTR_REX2;
1328
1329	if (insn->mode == MODE_16BIT) {
1330	// JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1331	// of the AdSize prefix is inverted w.r.t. 32-bit mode.
1332	if (insn->opcodeType == ONEBYTE && insn->opcode == `0xE3`)
1333	attrMask ^= ATTR_ADSIZE;
1334	// If we're in 16-bit mode and this is one of the relative jumps and opsize
1335	// prefix isn't present, we need to force the opsize attribute since the
1336	// prefix is inverted relative to 32-bit mode.
1337	if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1338	(insn->opcode == `0xE8` \|\| insn->opcode == `0xE9`))
1339	attrMask \|= ATTR_OPSIZE;
1340
1341	if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1342	insn->opcode >= `0x80` && insn->opcode <= `0x8F`)
1343	attrMask \|= ATTR_OPSIZE;
1344	}
1345
1346
1347	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1348	return -`1`;
1349
1350	// The following clauses compensate for limitations of the tables.
1351
1352	if (insn->mode != MODE_64BIT &&
1353	insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1354	// The tables can't distinquish between cases where the W-bit is used to
1355	// select register size and cases where its a required part of the opcode.
1356	if ((insn->vectorExtensionType == TYPE_EVEX &&
1357	wFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) \|\|
1358	(insn->vectorExtensionType == TYPE_VEX_3B &&
1359	wFromVEX3of3(insn->vectorExtensionPrefix[`2`])) \|\|
1360	(insn->vectorExtensionType == TYPE_XOP &&
1361	wFromXOP3of3(insn->vectorExtensionPrefix[`2`]))) {
1362
1363	uint16_t instructionIDWithREXW;
1364	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithREXW, insn,
1365	attrMask: attrMask \| ATTR_REXW)) {
1366	insn->instructionID = instructionID;
1367	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1368	return `0`;
1369	}
1370
1371	auto SpecName = mii->getName(Opcode: instructionIDWithREXW);
1372	// If not a 64-bit instruction. Switch the opcode.
1373	if (!is64Bit(name: SpecName.data())) {
1374	insn->instructionID = instructionIDWithREXW;
1375	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1376	return `0`;
1377	}
1378	}
1379	}
1380
1381	// Absolute moves, umonitor, and movdir64b need special handling.
1382	// -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1383	// inverted w.r.t.
1384	// -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1385	// any position.
1386	if ((insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`)) \|\|
1387	(insn->opcodeType == TWOBYTE && (insn->opcode == `0xAE`)) \|\|
1388	(insn->opcodeType == THREEBYTE_38 && insn->opcode == `0xF8`) \|\|
1389	(insn->opcodeType == MAP4 && insn->opcode == `0xF8`)) {
1390	// Make sure we observed the prefixes in any position.
1391	if (insn->hasAdSize)
1392	attrMask \|= ATTR_ADSIZE;
1393	if (insn->hasOpSize)
1394	attrMask \|= ATTR_OPSIZE;
1395
1396	// In 16-bit, invert the attributes.
1397	if (insn->mode == MODE_16BIT) {
1398	attrMask ^= ATTR_ADSIZE;
1399
1400	// The OpSize attribute is only valid with the absolute moves.
1401	if (insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`))
1402	attrMask ^= ATTR_OPSIZE;
1403	}
1404
1405	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1406	return -`1`;
1407
1408	insn->instructionID = instructionID;
1409	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1410	return `0`;
1411	}
1412
1413	if ((insn->mode == MODE_16BIT \|\| insn->hasOpSize) &&
1414	!(attrMask & ATTR_OPSIZE)) {
1415	// The instruction tables make no distinction between instructions that
1416	// allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1417	// particular spot (i.e., many MMX operations). In general we're
1418	// conservative, but in the specific case where OpSize is present but not in
1419	// the right place we check if there's a 16-bit operation.
1420	const struct InstructionSpecifier *spec;
1421	uint16_t instructionIDWithOpsize;
1422	llvm::StringRef specName, specWithOpSizeName;
1423
1424	spec = &INSTRUCTIONS_SYM[instructionID];
1425
1426	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithOpsize, insn,
1427	attrMask: attrMask \| ATTR_OPSIZE)) {
1428	// ModRM required with OpSize but not present. Give up and return the
1429	// version without OpSize set.
1430	insn->instructionID = instructionID;
1431	insn->spec = spec;
1432	return `0`;
1433	}
1434
1435	specName = mii->getName(Opcode: instructionID);
1436	specWithOpSizeName = mii->getName(Opcode: instructionIDWithOpsize);
1437
1438	if (is16BitEquivalent(orig: specName.data(), equiv: specWithOpSizeName.data()) &&
1439	(insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1440	insn->instructionID = instructionIDWithOpsize;
1441	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1442	} else {
1443	insn->instructionID = instructionID;
1444	insn->spec = spec;
1445	}
1446	return `0`;
1447	}
1448
1449	if (insn->opcodeType == ONEBYTE && insn->opcode == `0x90` &&
1450	insn->rexPrefix & `0x01`) {
1451	// NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1452	// as XCHG %r8, %eax.
1453	const struct InstructionSpecifier *spec;
1454	uint16_t instructionIDWithNewOpcode;
1455	const struct InstructionSpecifier *specWithNewOpcode;
1456
1457	spec = &INSTRUCTIONS_SYM[instructionID];
1458
1459	// Borrow opcode from one of the other XCHGar opcodes
1460	insn->opcode = `0x91`;
1461
1462	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithNewOpcode, insn,
1463	attrMask)) {
1464	insn->opcode = `0x90`;
1465
1466	insn->instructionID = instructionID;
1467	insn->spec = spec;
1468	return `0`;
1469	}
1470
1471	specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1472
1473	// Change back
1474	insn->opcode = `0x90`;
1475
1476	insn->instructionID = instructionIDWithNewOpcode;
1477	insn->spec = specWithNewOpcode;
1478
1479	return `0`;
1480	}
1481
1482	insn->instructionID = instructionID;
1483	insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1484
1485	return `0`;
1486	}
1487
1488	// Read an operand from the opcode field of an instruction and interprets it
1489	// appropriately given the operand width. Handles AddRegFrm instructions.
1490	//
1491	// @param insn - the instruction whose opcode field is to be read.
1492	// @param size - The width (in bytes) of the register being specified.
1493	// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1494	// RAX.
1495	// @return - 0 on success; nonzero otherwise.
1496	static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1497	LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1498
1499	if (size == `0`)
1500	size = insn->registerSize;
1501
1502	auto setOpcodeRegister = [&](unsigned base) {
1503	insn->opcodeRegister =
1504	(Reg)(base + ((bFromREX(insn->rexPrefix) << `3`) \|
1505	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`) \|
1506	(insn->opcode & `7`)));
1507	};
1508
1509	switch (size) {
1510	case `1`:
1511	setOpcodeRegister (MODRM_REG_AL);
1512	if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + `0x4` &&
1513	insn->opcodeRegister < MODRM_REG_AL + `0x8`) {
1514	insn->opcodeRegister =
1515	(Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - `4`));
1516	}
1517
1518	break;
1519	case `2`:
1520	setOpcodeRegister (MODRM_REG_AX);
1521	break;
1522	case `4`:
1523	setOpcodeRegister (MODRM_REG_EAX);
1524	break;
1525	case `8`:
1526	setOpcodeRegister (MODRM_REG_RAX);
1527	break;
1528	}
1529
1530	return `0`;
1531	}
1532
1533	// Consume an immediate operand from an instruction, given the desired operand
1534	// size.
1535	//
1536	// @param insn - The instruction whose operand is to be read.
1537	// @param size - The width (in bytes) of the operand.
1538	// @return - 0 if the immediate was successfully consumed; nonzero
1539	// otherwise.
1540	static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1541	uint8_t imm8;
1542	uint16_t imm16;
1543	uint32_t imm32;
1544	uint64_t imm64;
1545
1546	LLVM_DEBUG(dbgs() << "readImmediate()");
1547
1548	assert(insn->numImmediatesConsumed < `2` && "Already consumed two immediates");
1549
1550	insn->immediateSize = size;
1551	insn->immediateOffset = insn->readerCursor - insn->startLocation;
1552
1553	switch (size) {
1554	case `1`:
1555	if (consume(insn, ptr&: imm8))
1556	return -`1`;
1557	insn->immediates[insn->numImmediatesConsumed] = imm8;
1558	break;
1559	case `2`:
1560	if (consume(insn, ptr&: imm16))
1561	return -`1`;
1562	insn->immediates[insn->numImmediatesConsumed] = imm16;
1563	break;
1564	case `4`:
1565	if (consume(insn, ptr&: imm32))
1566	return -`1`;
1567	insn->immediates[insn->numImmediatesConsumed] = imm32;
1568	break;
1569	case `8`:
1570	if (consume(insn, ptr&: imm64))
1571	return -`1`;
1572	insn->immediates[insn->numImmediatesConsumed] = imm64;
1573	break;
1574	default:
1575	llvm_unreachable("invalid size");
1576	}
1577
1578	insn->numImmediatesConsumed++;
1579
1580	return `0`;
1581	}
1582
1583	// Consume vvvv from an instruction if it has a VEX prefix.
1584	static int readVVVV(struct InternalInstruction *insn) {
1585	LLVM_DEBUG(dbgs() << "readVVVV()");
1586
1587	int vvvv;
1588	if (insn->vectorExtensionType == TYPE_EVEX)
1589	vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `4` \|
1590	vvvvFromEVEX3of4(insn->vectorExtensionPrefix[`2`]));
1591	else if (insn->vectorExtensionType == TYPE_VEX_3B)
1592	vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[`2`]);
1593	else if (insn->vectorExtensionType == TYPE_VEX_2B)
1594	vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[`1`]);
1595	else if (insn->vectorExtensionType == TYPE_XOP)
1596	vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[`2`]);
1597	else
1598	return -`1`;
1599
1600	if (insn->mode != MODE_64BIT)
1601	vvvv &= `0xf`; // Can only clear bit 4. Bit 3 must be cleared later.
1602
1603	insn->vvvv = static_cast<Reg>(vvvv);
1604	return `0`;
1605	}
1606
1607	// Read an mask register from the opcode field of an instruction.
1608	//
1609	// @param insn - The instruction whose opcode field is to be read.
1610	// @return - 0 on success; nonzero otherwise.
1611	static int readMaskRegister(struct InternalInstruction *insn) {
1612	LLVM_DEBUG(dbgs() << "readMaskRegister()");
1613
1614	if (insn->vectorExtensionType != TYPE_EVEX)
1615	return -`1`;
1616
1617	insn->writemask =
1618	static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]));
1619	return `0`;
1620	}
1621
1622	// Consults the specifier for an instruction and consumes all
1623	// operands for that instruction, interpreting them as it goes.
1624	static int readOperands(struct InternalInstruction *insn) {
1625	int hasVVVV, needVVVV;
1626	int sawRegImm = `0`;
1627
1628	LLVM_DEBUG(dbgs() << "readOperands()");
1629
1630	// If non-zero vvvv specified, make sure one of the operands uses it.
1631	hasVVVV = !readVVVV(insn);
1632	needVVVV = hasVVVV && (insn->vvvv != `0`);
1633
1634	for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1635	switch (Op.encoding) {
1636	case ENCODING_NONE:
1637	case ENCODING_SI:
1638	case ENCODING_DI:
1639	break;
1640	CASE_ENCODING_VSIB:
1641	// VSIB can use the V2 bit so check only the other bits.
1642	if (needVVVV)
1643	needVVVV = hasVVVV & ((insn->vvvv & `0xf`) != `0`);
1644	if (readModRM(insn))
1645	return -`1`;
1646
1647	// Reject if SIB wasn't used.
1648	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1649	return -`1`;
1650
1651	// If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1652	if (insn->sibIndex == SIB_INDEX_NONE)
1653	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + `4`);
1654
1655	// If EVEX.v2 is set this is one of the 16-31 registers.
1656	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1657	v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1658	insn->sibIndex = (SIBIndex)(insn->sibIndex + `16`);
1659
1660	// Adjust the index register to the correct size.
1661	switch ((OperandType)Op.type) {
1662	default:
1663	debug("Unhandled VSIB index type");
1664	return -`1`;
1665	case TYPE_MVSIBX:
1666	insn->sibIndex =
1667	(SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1668	break;
1669	case TYPE_MVSIBY:
1670	insn->sibIndex =
1671	(SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1672	break;
1673	case TYPE_MVSIBZ:
1674	insn->sibIndex =
1675	(SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1676	break;
1677	}
1678
1679	// Apply the AVX512 compressed displacement scaling factor.
1680	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1681	insn->displacement *= `1` << (Op.encoding - ENCODING_VSIB);
1682	break;
1683	case ENCODING_SIB:
1684	// Reject if SIB wasn't used.
1685	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1686	return -`1`;
1687	if (readModRM(insn))
1688	return -`1`;
1689	if (fixupReg(insn, op: &Op))
1690	return -`1`;
1691	break;
1692	case ENCODING_REG:
1693	CASE_ENCODING_RM:
1694	if (readModRM(insn))
1695	return -`1`;
1696	if (fixupReg(insn, op: &Op))
1697	return -`1`;
1698	// Apply the AVX512 compressed displacement scaling factor.
1699	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1700	insn->displacement *= `1` << (Op.encoding - ENCODING_RM);
1701	break;
1702	case ENCODING_IB:
1703	if (sawRegImm) {
1704	// Saw a register immediate so don't read again and instead split the
1705	// previous immediate. FIXME: This is a hack.
1706	insn->immediates[insn->numImmediatesConsumed] =
1707	insn->immediates[insn->numImmediatesConsumed - `1`] & `0xf`;
1708	++insn->numImmediatesConsumed;
1709	break;
1710	}
1711	if (readImmediate(insn, size: `1`))
1712	return -`1`;
1713	if (Op.type == TYPE_XMM \|\| Op.type == TYPE_YMM)
1714	sawRegImm = `1`;
1715	break;
1716	case ENCODING_IW:
1717	if (readImmediate(insn, size: `2`))
1718	return -`1`;
1719	break;
1720	case ENCODING_ID:
1721	if (readImmediate(insn, size: `4`))
1722	return -`1`;
1723	break;
1724	case ENCODING_IO:
1725	if (readImmediate(insn, size: `8`))
1726	return -`1`;
1727	break;
1728	case ENCODING_Iv:
1729	if (readImmediate(insn, size: insn->immediateSize))
1730	return -`1`;
1731	break;
1732	case ENCODING_Ia:
1733	if (readImmediate(insn, size: insn->addressSize))
1734	return -`1`;
1735	break;
1736	case ENCODING_IRC:
1737	insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `1`) \|
1738	lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1739	break;
1740	case ENCODING_RB:
1741	if (readOpcodeRegister(insn, size: `1`))
1742	return -`1`;
1743	break;
1744	case ENCODING_RW:
1745	if (readOpcodeRegister(insn, size: `2`))
1746	return -`1`;
1747	break;
1748	case ENCODING_RD:
1749	if (readOpcodeRegister(insn, size: `4`))
1750	return -`1`;
1751	break;
1752	case ENCODING_RO:
1753	if (readOpcodeRegister(insn, size: `8`))
1754	return -`1`;
1755	break;
1756	case ENCODING_Rv:
1757	if (readOpcodeRegister(insn, size: `0`))
1758	return -`1`;
1759	break;
1760	case ENCODING_CF:
1761	insn->immediates[`1`] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[`2`]);
1762	needVVVV = false; // oszc shares the same bits with VVVV
1763	break;
1764	case ENCODING_CC:
1765	if (isCCMPOrCTEST(insn))
1766	insn->immediates[`2`] = scFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1767	else
1768	insn->immediates[`1`] = insn->opcode & `0xf`;
1769	break;
1770	case ENCODING_FP:
1771	break;
1772	case ENCODING_VVVV:
1773	needVVVV = `0`; // Mark that we have found a VVVV operand.
1774	if (!hasVVVV)
1775	return -`1`;
1776	if (insn->mode != MODE_64BIT)
1777	insn->vvvv = static_cast<Reg>(insn->vvvv & `0x7`);
1778	if (fixupReg(insn, op: &Op))
1779	return -`1`;
1780	break;
1781	case ENCODING_WRITEMASK:
1782	if (readMaskRegister(insn))
1783	return -`1`;
1784	break;
1785	case ENCODING_DUP:
1786	break;
1787	default:
1788	LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1789	return -`1`;
1790	}
1791	}
1792
1793	// If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1794	if (needVVVV)
1795	return -`1`;
1796
1797	return `0`;
1798	}
1799
1800	namespace llvm {
1801
1802	// Fill-ins to make the compiler happy. These constants are never actually
1803	// assigned; they are just filler to make an automatically-generated switch
1804	// statement work.
1805	namespace X86 {
1806	enum {
1807	BX_SI = `500`,
1808	BX_DI = `501`,
1809	BP_SI = `502`,
1810	BP_DI = `503`,
1811	sib = `504`,
1812	sib64 = `505`
1813	};
1814	} // namespace X86
1815
1816	} // namespace llvm
1817
1818	static bool translateInstruction(MCInst &target,
1819	InternalInstruction &source,
1820	const MCDisassembler *Dis);
1821
1822	namespace {
1823
1824	/// Generic disassembler for all X86 platforms. All each platform class should
1825	/// have to do is subclass the constructor, and provide a different
1826	/// disassemblerMode value.
1827	class X86GenericDisassembler : public MCDisassembler {
1828	std::unique_ptr<const MCInstrInfo> MII;
1829	public:
1830	X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1831	std::unique_ptr<const MCInstrInfo> MII);
1832	public:
1833	DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1834	ArrayRef<uint8_t> Bytes, uint64_t Address,
1835	raw_ostream &cStream) const override;
1836
1837	private:
1838	DisassemblerMode fMode;
1839	};
1840
1841	} // namespace
1842
1843	X86GenericDisassembler::X86GenericDisassembler(
1844	const MCSubtargetInfo &STI,
1845	MCContext &Ctx,
1846	std::unique_ptr<const MCInstrInfo> MII)
1847	: MCDisassembler (STI, Ctx), MII (std::move(MII)) {
1848	const FeatureBitset &FB = STI.getFeatureBits();
1849	if (FB [X86::Is16Bit]) {
1850	fMode = MODE_16BIT;
1851	return;
1852	} else if (FB [X86::Is32Bit]) {
1853	fMode = MODE_32BIT;
1854	return;
1855	} else if (FB [X86::Is64Bit]) {
1856	fMode = MODE_64BIT;
1857	return;
1858	}
1859
1860	llvm_unreachable("Invalid CPU mode");
1861	}
1862
1863	MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1864	MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1865	raw_ostream &CStream) const {
1866	CommentStream = &CStream;
1867
1868	InternalInstruction Insn;
1869	memset(s: &Insn, c: `0`, n: sizeof(InternalInstruction));
1870	Insn.bytes = Bytes;
1871	Insn.startLocation = Address;
1872	Insn.readerCursor = Address;
1873	Insn.mode = fMode;
1874
1875	if (Bytes.empty() \|\| readPrefixes(insn: &Insn) \|\| readOpcode(insn: &Insn) \|\|
1876	getInstructionID(insn: &Insn, mii: MII.get()) \|\| Insn.instructionID == `0` \|\|
1877	readOperands(insn: &Insn)) {
1878	Size = Insn.readerCursor - Address;
1879	return Fail;
1880	}
1881
1882	Insn.operands = x86OperandSets[Insn.spec->operands];
1883	Insn.length = Insn.readerCursor - Insn.startLocation;
1884	Size = Insn.length;
1885	if (Size > `15`)
1886	LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1887
1888	bool Ret = translateInstruction(target&: Instr, source&: Insn, Dis: this);
1889	if (!Ret) {
1890	unsigned Flags = X86::IP_NO_PREFIX;
1891	if (Insn.hasAdSize)
1892	Flags \|= X86::IP_HAS_AD_SIZE;
1893	if (!Insn.mandatoryPrefix) {
1894	if (Insn.hasOpSize)
1895	Flags \|= X86::IP_HAS_OP_SIZE;
1896	if (Insn.repeatPrefix == `0xf2`)
1897	Flags \|= X86::IP_HAS_REPEAT_NE;
1898	else if (Insn.repeatPrefix == `0xf3` &&
1899	// It should not be 'pause' f3 90
1900	Insn.opcode != `0x90`)
1901	Flags \|= X86::IP_HAS_REPEAT;
1902	if (Insn.hasLockPrefix)
1903	Flags \|= X86::IP_HAS_LOCK;
1904	}
1905	Instr.setFlags(Flags);
1906	}
1907	return (!Ret) ? Success : Fail;
1908	}
1909
1910	//
1911	// Private code that translates from struct InternalInstructions to MCInsts.
1912	//
1913
1914	/// translateRegister - Translates an internal register to the appropriate LLVM
1915	/// register, and appends it as an operand to an MCInst.
1916	///
1917	/// @param mcInst - The MCInst to append to.
1918	/// @param reg - The Reg to append.
1919	static void translateRegister(MCInst &mcInst, Reg reg) {
1920	#define ENTRY(x) X86::x,
1921	static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1922	#undef ENTRY
1923
1924	MCPhysReg llvmRegnum = llvmRegnums[reg];
1925	mcInst.addOperand(Op: MCOperand::createReg(Reg: llvmRegnum));
1926	}
1927
1928	static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1929	`0`, // SEG_OVERRIDE_NONE
1930	X86::CS,
1931	X86::SS,
1932	X86::DS,
1933	X86::ES,
1934	X86::FS,
1935	X86::GS
1936	};
1937
1938	/// translateSrcIndex - Appends a source index operand to an MCInst.
1939	///
1940	/// @param mcInst - The MCInst to append to.
1941	/// @param insn - The internal instruction.
1942	static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1943	unsigned baseRegNo;
1944
1945	if (insn.mode == MODE_64BIT)
1946	baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1947	else if (insn.mode == MODE_32BIT)
1948	baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1949	else {
1950	assert(insn.mode == MODE_16BIT);
1951	baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1952	}
1953	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1954	mcInst.addOperand(Op: baseReg);
1955
1956	MCOperand segmentReg;
1957	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
1958	mcInst.addOperand(Op: segmentReg);
1959	return false;
1960	}
1961
1962	/// translateDstIndex - Appends a destination index operand to an MCInst.
1963	///
1964	/// @param mcInst - The MCInst to append to.
1965	/// @param insn - The internal instruction.
1966
1967	static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1968	unsigned baseRegNo;
1969
1970	if (insn.mode == MODE_64BIT)
1971	baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1972	else if (insn.mode == MODE_32BIT)
1973	baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1974	else {
1975	assert(insn.mode == MODE_16BIT);
1976	baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1977	}
1978	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1979	mcInst.addOperand(Op: baseReg);
1980	return false;
1981	}
1982
1983	/// translateImmediate - Appends an immediate operand to an MCInst.
1984	///
1985	/// @param mcInst - The MCInst to append to.
1986	/// @param immediate - The immediate value to append.
1987	/// @param operand - The operand, as stored in the descriptor table.
1988	/// @param insn - The internal instruction.
1989	static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1990	const OperandSpecifier &operand,
1991	InternalInstruction &insn,
1992	const MCDisassembler *Dis) {
1993	// Sign-extend the immediate if necessary.
1994
1995	OperandType type = (OperandType)operand.type;
1996
1997	bool isBranch = false;
1998	uint64_t pcrel = `0`;
1999	if (type == TYPE_REL) {
2000	isBranch = true;
2001	pcrel = insn.startLocation + insn.length;
2002	switch (operand.encoding) {
2003	default:
2004	break;
2005	case ENCODING_Iv:
2006	switch (insn.displacementSize) {
2007	default:
2008	break;
2009	case `1`:
2010	if(immediate & `0x80`)
2011	immediate \|= ~(`0xffull`);
2012	break;
2013	case `2`:
2014	if(immediate & `0x8000`)
2015	immediate \|= ~(`0xffffull`);
2016	break;
2017	case `4`:
2018	if(immediate & `0x80000000`)
2019	immediate \|= ~(`0xffffffffull`);
2020	break;
2021	case `8`:
2022	break;
2023	}
2024	break;
2025	case ENCODING_IB:
2026	if(immediate & `0x80`)
2027	immediate \|= ~(`0xffull`);
2028	break;
2029	case ENCODING_IW:
2030	if(immediate & `0x8000`)
2031	immediate \|= ~(`0xffffull`);
2032	break;
2033	case ENCODING_ID:
2034	if(immediate & `0x80000000`)
2035	immediate \|= ~(`0xffffffffull`);
2036	break;
2037	}
2038	}
2039	// By default sign-extend all X86 immediates based on their encoding.
2040	else if (type == TYPE_IMM) {
2041	switch (operand.encoding) {
2042	default:
2043	break;
2044	case ENCODING_IB:
2045	if(immediate & `0x80`)
2046	immediate \|= ~(`0xffull`);
2047	break;
2048	case ENCODING_IW:
2049	if(immediate & `0x8000`)
2050	immediate \|= ~(`0xffffull`);
2051	break;
2052	case ENCODING_ID:
2053	if(immediate & `0x80000000`)
2054	immediate \|= ~(`0xffffffffull`);
2055	break;
2056	case ENCODING_IO:
2057	break;
2058	}
2059	}
2060
2061	switch (type) {
2062	case TYPE_XMM:
2063	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::XMM0 + (immediate >> `4`)));
2064	return;
2065	case TYPE_YMM:
2066	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::YMM0 + (immediate >> `4`)));
2067	return;
2068	case TYPE_ZMM:
2069	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ZMM0 + (immediate >> `4`)));
2070	return;
2071	default:
2072	// operand is 64 bits wide. Do nothing.
2073	break;
2074	}
2075
2076	if (!Dis->tryAddingSymbolicOperand(
2077	Inst&: mcInst, Value: immediate + pcrel, Address: insn.startLocation, IsBranch: isBranch,
2078	Offset: insn.immediateOffset, OpSize: insn.immediateSize, InstSize: insn.length))
2079	mcInst.addOperand(Op: MCOperand::createImm(Val: immediate));
2080
2081	if (type == TYPE_MOFFS) {
2082	MCOperand segmentReg;
2083	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2084	mcInst.addOperand(Op: segmentReg);
2085	}
2086	}
2087
2088	/// translateRMRegister - Translates a register stored in the R/M field of the
2089	/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2090	/// @param mcInst - The MCInst to append to.
2091	/// @param insn - The internal instruction to extract the R/M field
2092	/// from.
2093	/// @return - 0 on success; -1 otherwise
2094	static bool translateRMRegister(MCInst &mcInst,
2095	InternalInstruction &insn) {
2096	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2097	debug("A R/M register operand may not have a SIB byte");
2098	return true;
2099	}
2100
2101	switch (insn.eaBase) {
2102	default:
2103	debug("Unexpected EA base register");
2104	return true;
2105	case EA_BASE_NONE:
2106	debug("EA_BASE_NONE for ModR/M base");
2107	return true;
2108	#define ENTRY(x) case EA_BASE_##x:
2109	ALL_EA_BASES
2110	#undef ENTRY
2111	debug("A R/M register operand may not have a base; "
2112	"the operand must be a register.");
2113	return true;
2114	#define ENTRY(x) \
2115	case EA_REG_##x: \
2116	mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2117	ALL_REGS
2118	#undef ENTRY
2119	}
2120
2121	return false;
2122	}
2123
2124	/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2125	/// fields of an internal instruction (and possibly its SIB byte) to a memory
2126	/// operand in LLVM's format, and appends it to an MCInst.
2127	///
2128	/// @param mcInst - The MCInst to append to.
2129	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2130	/// from.
2131	/// @param ForceSIB - The instruction must use SIB.
2132	/// @return - 0 on success; nonzero otherwise
2133	static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2134	const MCDisassembler *Dis,
2135	bool ForceSIB = false) {
2136	// Addresses in an MCInst are represented as five operands:
2137	// 1. basereg (register) The R/M base, or (if there is a SIB) the
2138	// SIB base
2139	// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2140	// scale amount
2141	// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2142	// the index (which is multiplied by the
2143	// scale amount)
2144	// 4. displacement (immediate) 0, or the displacement if there is one
2145	// 5. segmentreg (register) x86_registerNONE for now, but could be set
2146	// if we have segment overrides
2147
2148	MCOperand baseReg;
2149	MCOperand scaleAmount;
2150	MCOperand indexReg;
2151	MCOperand displacement;
2152	MCOperand segmentReg;
2153	uint64_t pcrel = `0`;
2154
2155	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2156	if (insn.sibBase != SIB_BASE_NONE) {
2157	switch (insn.sibBase) {
2158	default:
2159	debug("Unexpected sibBase");
2160	return true;
2161	#define ENTRY(x) \
2162	case SIB_BASE_##x: \
2163	baseReg = MCOperand::createReg(X86::x); break;
2164	ALL_SIB_BASES
2165	#undef ENTRY
2166	}
2167	} else {
2168	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2169	}
2170
2171	if (insn.sibIndex != SIB_INDEX_NONE) {
2172	switch (insn.sibIndex) {
2173	default:
2174	debug("Unexpected sibIndex");
2175	return true;
2176	#define ENTRY(x) \
2177	case SIB_INDEX_##x: \
2178	indexReg = MCOperand::createReg(X86::x); break;
2179	EA_BASES_32BIT
2180	EA_BASES_64BIT
2181	REGS_XMM
2182	REGS_YMM
2183	REGS_ZMM
2184	#undef ENTRY
2185	}
2186	} else {
2187	// Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2188	// but no index is used and modrm alone should have been enough.
2189	// -No base register in 32-bit mode. In 64-bit mode this is used to
2190	// avoid rip-relative addressing.
2191	// -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2192	// base always requires a SIB byte.
2193	// -A scale other than 1 is used.
2194	if (!ForceSIB &&
2195	(insn.sibScale != `1` \|\|
2196	(insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) \|\|
2197	(insn.sibBase != SIB_BASE_NONE &&
2198	insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2199	insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2200	indexReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIZ :
2201	X86::RIZ);
2202	} else
2203	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2204	}
2205
2206	scaleAmount = MCOperand::createImm(Val: insn.sibScale);
2207	} else {
2208	switch (insn.eaBase) {
2209	case EA_BASE_NONE:
2210	if (insn.eaDisplacement == EA_DISP_NONE) {
2211	debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2212	return true;
2213	}
2214	if (insn.mode == MODE_64BIT){
2215	pcrel = insn.startLocation + insn.length;
2216	Dis->tryAddingPcLoadReferenceComment(Value: insn.displacement + pcrel,
2217	Address: insn.startLocation +
2218	insn.displacementOffset);
2219	// Section 2.2.1.6
2220	baseReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIP :
2221	X86::RIP);
2222	}
2223	else
2224	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2225
2226	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2227	break;
2228	case EA_BASE_BX_SI:
2229	baseReg = MCOperand::createReg(Reg: X86::BX);
2230	indexReg = MCOperand::createReg(Reg: X86::SI);
2231	break;
2232	case EA_BASE_BX_DI:
2233	baseReg = MCOperand::createReg(Reg: X86::BX);
2234	indexReg = MCOperand::createReg(Reg: X86::DI);
2235	break;
2236	case EA_BASE_BP_SI:
2237	baseReg = MCOperand::createReg(Reg: X86::BP);
2238	indexReg = MCOperand::createReg(Reg: X86::SI);
2239	break;
2240	case EA_BASE_BP_DI:
2241	baseReg = MCOperand::createReg(Reg: X86::BP);
2242	indexReg = MCOperand::createReg(Reg: X86::DI);
2243	break;
2244	default:
2245	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2246	switch (insn.eaBase) {
2247	default:
2248	debug("Unexpected eaBase");
2249	return true;
2250	// Here, we will use the fill-ins defined above. However,
2251	// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2252	// sib and sib64 were handled in the top-level if, so they're only
2253	// placeholders to keep the compiler happy.
2254	#define ENTRY(x) \
2255	case EA_BASE_##x: \
2256	baseReg = MCOperand::createReg(X86::x); break;
2257	ALL_EA_BASES
2258	#undef ENTRY
2259	#define ENTRY(x) case EA_REG_##x:
2260	ALL_REGS
2261	#undef ENTRY
2262	debug("A R/M memory operand may not be a register; "
2263	"the base field must be a base.");
2264	return true;
2265	}
2266	}
2267
2268	scaleAmount = MCOperand::createImm(Val: `1`);
2269	}
2270
2271	displacement = MCOperand::createImm(Val: insn.displacement);
2272
2273	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2274
2275	mcInst.addOperand(Op: baseReg);
2276	mcInst.addOperand(Op: scaleAmount);
2277	mcInst.addOperand(Op: indexReg);
2278
2279	const uint8_t dispSize =
2280	(insn.eaDisplacement == EA_DISP_NONE) ? `0` : insn.displacementSize;
2281
2282	if (!Dis->tryAddingSymbolicOperand(
2283	Inst&: mcInst, Value: insn.displacement + pcrel, Address: insn.startLocation, IsBranch: false,
2284	Offset: insn.displacementOffset, OpSize: dispSize, InstSize: insn.length))
2285	mcInst.addOperand(Op: displacement);
2286	mcInst.addOperand(Op: segmentReg);
2287	return false;
2288	}
2289
2290	/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2291	/// byte of an instruction to LLVM form, and appends it to an MCInst.
2292	///
2293	/// @param mcInst - The MCInst to append to.
2294	/// @param operand - The operand, as stored in the descriptor table.
2295	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2296	/// from.
2297	/// @return - 0 on success; nonzero otherwise
2298	static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2299	InternalInstruction &insn, const MCDisassembler *Dis) {
2300	switch (operand.type) {
2301	default:
2302	debug("Unexpected type for a R/M operand");
2303	return true;
2304	case TYPE_R8:
2305	case TYPE_R16:
2306	case TYPE_R32:
2307	case TYPE_R64:
2308	case TYPE_Rv:
2309	case TYPE_MM64:
2310	case TYPE_XMM:
2311	case TYPE_YMM:
2312	case TYPE_ZMM:
2313	case TYPE_TMM:
2314	case TYPE_VK_PAIR:
2315	case TYPE_VK:
2316	case TYPE_DEBUGREG:
2317	case TYPE_CONTROLREG:
2318	case TYPE_BNDR:
2319	return translateRMRegister(mcInst, insn);
2320	case TYPE_M:
2321	case TYPE_MVSIBX:
2322	case TYPE_MVSIBY:
2323	case TYPE_MVSIBZ:
2324	return translateRMMemory(mcInst, insn, Dis);
2325	case TYPE_MSIB:
2326	return translateRMMemory(mcInst, insn, Dis, ForceSIB: true);
2327	}
2328	}
2329
2330	/// translateFPRegister - Translates a stack position on the FPU stack to its
2331	/// LLVM form, and appends it to an MCInst.
2332	///
2333	/// @param mcInst - The MCInst to append to.
2334	/// @param stackPos - The stack position to translate.
2335	static void translateFPRegister(MCInst &mcInst,
2336	uint8_t stackPos) {
2337	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ST0 + stackPos));
2338	}
2339
2340	/// translateMaskRegister - Translates a 3-bit mask register number to
2341	/// LLVM form, and appends it to an MCInst.
2342	///
2343	/// @param mcInst - The MCInst to append to.
2344	/// @param maskRegNum - Number of mask register from 0 to 7.
2345	/// @return - false on success; true otherwise.
2346	static bool translateMaskRegister(MCInst &mcInst,
2347	uint8_t maskRegNum) {
2348	if (maskRegNum >= `8`) {
2349	debug("Invalid mask register number");
2350	return true;
2351	}
2352
2353	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::K0 + maskRegNum));
2354	return false;
2355	}
2356
2357	/// translateOperand - Translates an operand stored in an internal instruction
2358	/// to LLVM's format and appends it to an MCInst.
2359	///
2360	/// @param mcInst - The MCInst to append to.
2361	/// @param operand - The operand, as stored in the descriptor table.
2362	/// @param insn - The internal instruction.
2363	/// @return - false on success; true otherwise.
2364	static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2365	InternalInstruction &insn,
2366	const MCDisassembler *Dis) {
2367	switch (operand.encoding) {
2368	default:
2369	debug("Unhandled operand encoding during translation");
2370	return true;
2371	case ENCODING_REG:
2372	translateRegister(mcInst, reg: insn.reg);
2373	return false;
2374	case ENCODING_WRITEMASK:
2375	return translateMaskRegister(mcInst, maskRegNum: insn.writemask);
2376	case ENCODING_SIB:
2377	CASE_ENCODING_RM:
2378	CASE_ENCODING_VSIB:
2379	return translateRM(mcInst, operand, insn, Dis);
2380	case ENCODING_IB:
2381	case ENCODING_IW:
2382	case ENCODING_ID:
2383	case ENCODING_IO:
2384	case ENCODING_Iv:
2385	case ENCODING_Ia:
2386	translateImmediate(mcInst,
2387	immediate: insn.immediates[insn.numImmediatesTranslated++],
2388	operand,
2389	insn,
2390	Dis);
2391	return false;
2392	case ENCODING_IRC:
2393	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.RC));
2394	return false;
2395	case ENCODING_SI:
2396	return translateSrcIndex(mcInst, insn);
2397	case ENCODING_DI:
2398	return translateDstIndex(mcInst, insn);
2399	case ENCODING_RB:
2400	case ENCODING_RW:
2401	case ENCODING_RD:
2402	case ENCODING_RO:
2403	case ENCODING_Rv:
2404	translateRegister(mcInst, reg: insn.opcodeRegister);
2405	return false;
2406	case ENCODING_CF:
2407	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2408	return false;
2409	case ENCODING_CC:
2410	if (isCCMPOrCTEST(insn: &insn))
2411	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`2`]));
2412	else
2413	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2414	return false;
2415	case ENCODING_FP:
2416	translateFPRegister(mcInst, stackPos: insn.modRM & `7`);
2417	return false;
2418	case ENCODING_VVVV:
2419	translateRegister(mcInst, reg: insn.vvvv);
2420	return false;
2421	case ENCODING_DUP:
2422	return translateOperand(mcInst, operand: insn.operands [operand.type - TYPE_DUP0],
2423	insn, Dis);
2424	}
2425	}
2426
2427	/// translateInstruction - Translates an internal instruction and all its
2428	/// operands to an MCInst.
2429	///
2430	/// @param mcInst - The MCInst to populate with the instruction's data.
2431	/// @param insn - The internal instruction.
2432	/// @return - false on success; true otherwise.
2433	static bool translateInstruction(MCInst &mcInst,
2434	InternalInstruction &insn,
2435	const MCDisassembler *Dis) {
2436	if (!insn.spec) {
2437	debug("Instruction has no specification");
2438	return true;
2439	}
2440
2441	mcInst.clear();
2442	mcInst.setOpcode(insn.instructionID);
2443	// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2444	// prefix bytes should be disassembled as xrelease and xacquire then set the
2445	// opcode to those instead of the rep and repne opcodes.
2446	if (insn.xAcquireRelease) {
2447	if(mcInst.getOpcode() == X86::REP_PREFIX)
2448	mcInst.setOpcode(X86::XRELEASE_PREFIX);
2449	else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2450	mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2451	}
2452
2453	insn.numImmediatesTranslated = `0`;
2454
2455	for (const auto &Op : insn.operands) {
2456	if (Op.encoding != ENCODING_NONE) {
2457	if (translateOperand(mcInst, operand: Op, insn, Dis)) {
2458	return true;
2459	}
2460	}
2461	}
2462
2463	return false;
2464	}
2465
2466	static MCDisassembler createX86Disassembler(const* Target &T,
2467	const MCSubtargetInfo &STI,
2468	MCContext &Ctx) {
2469	std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2470	return new X86GenericDisassembler (STI, Ctx, std::move(MII));
2471	}
2472
2473	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
2474	// Register the disassembler.
2475	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_32Target(),
2476	Fn: createX86Disassembler);
2477	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_64Target(),
2478	Fn: createX86Disassembler);
2479	}
2480

Browse the source code of llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp