X86Disassembler.cpp source code [llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp]

1	//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is part of the X86 Disassembler.
10	// It contains code to translate the data produced by the decoder into
11	// MCInsts.
12	//
13	//
14	// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15	// 64-bit X86 instruction sets. The main decode sequence for an assembly
16	// instruction in this disassembler is:
17	//
18	// 1. Read the prefix bytes and determine the attributes of the instruction.
19	// These attributes, recorded in enum attributeBits
20	// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21	// provides a mapping from bitmasks to contexts, which are represented by
22	// enum InstructionContext (ibid.).
23	//
24	// 2. Read the opcode, and determine what kind of opcode it is. The
25	// disassembler distinguishes four kinds of opcodes, which are enumerated in
26	// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27	// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28	// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29	//
30	// 3. Depending on the opcode type, look in one of four ClassDecision structures
31	// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32	// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33	// a ModRMDecision (ibid.).
34	//
35	// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36	// instructions that have ModRMReg / ModRMMem forms in LLVM, need the
37	// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38	// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39	// ModR/M byte is required and how to interpret it.
40	//
41	// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42	// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43	// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44	// meanings of its operands.
45	//
46	// 6. For each operand, its encoding is an entry from OperandEncoding
47	// (X86DisassemblerDecoderCommon.h) and its type is an entry from
48	// OperandType (ibid.). The encoding indicates how to read it from the
49	// instruction; the type indicates how to interpret the value once it has
50	// been read. For example, a register operand could be stored in the R/M
51	// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52	// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53	// register, for instance). Given this information, the operands can be
54	// extracted and interpreted.
55	//
56	// 7. As the last step, the disassembler translates the instruction information
57	// and operands into a format understandable by the client - in this case, an
58	// MCInst for use by the MC infrastructure.
59	//
60	// The disassembler is broken broadly into two parts: the table emitter that
61	// emits the instruction decode tables discussed above during compilation, and
62	// the disassembler itself. The table emitter is documented in more detail in
63	// utils/TableGen/X86DisassemblerEmitter.h.
64	//
65	// X86Disassembler.cpp contains the code responsible for step 7, and for
66	// invoking the decoder to execute steps 1-6.
67	// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68	// table emitter and the disassembler.
69	// X86DisassemblerDecoder.h contains the public interface of the decoder,
70	// factored out into C for possible use by other projects.
71	// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72	// responsible for steps 1-6.
73	//
74	//===----------------------------------------------------------------------===//
75
76	#include "MCTargetDesc/X86BaseInfo.h"
77	#include "MCTargetDesc/X86MCTargetDesc.h"
78	#include "TargetInfo/X86TargetInfo.h"
79	#include "X86DisassemblerDecoder.h"
80	#include "llvm-c/Visibility.h"
81	#include "llvm/MC/MCContext.h"
82	#include "llvm/MC/MCDisassembler/MCDisassembler.h"
83	#include "llvm/MC/MCExpr.h"
84	#include "llvm/MC/MCInst.h"
85	#include "llvm/MC/MCInstrInfo.h"
86	#include "llvm/MC/MCSubtargetInfo.h"
87	#include "llvm/MC/TargetRegistry.h"
88	#include "llvm/Support/Debug.h"
89	#include "llvm/Support/Format.h"
90	#include "llvm/Support/raw_ostream.h"
91
92	using namespace llvm;
93	using namespace llvm::X86Disassembler;
94
95	#define DEBUG_TYPE "x86-disassembler"
96
97	#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
98
99	// Specifies whether a ModR/M byte is needed and (if so) which
100	// instruction each possible value of the ModR/M byte corresponds to. Once
101	// this information is known, we have narrowed down to a single instruction.
102	struct ModRMDecision {
103	uint8_t modrm_type;
104	uint16_t instructionIDs;
105	};
106
107	// Specifies which set of ModR/M->instruction tables to look at
108	// given a particular opcode.
109	struct OpcodeDecision {
110	ModRMDecision modRMDecisions[`256`];
111	};
112
113	// Specifies which opcode->instruction tables to look at given
114	// a particular context (set of attributes). Since there are many possible
115	// contexts, the decoder first uses CONTEXTS_SYM to determine which context
116	// applies given a specific set of attributes. Hence there are only IC_max
117	// entries in this table, rather than 2^(ATTR_max).
118	struct ContextDecision {
119	OpcodeDecision opcodeDecisions[IC_max];
120	};
121
122	#include "X86GenDisassemblerTables.inc"
123
124	static InstrUID decode(OpcodeType type, InstructionContext insnContext,
125	uint8_t opcode, uint8_t modRM) {
126	const struct ModRMDecision *dec;
127
128	switch (type) {
129	case ONEBYTE:
130	dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
131	break;
132	case TWOBYTE:
133	dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
134	break;
135	case THREEBYTE_38:
136	dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
137	break;
138	case THREEBYTE_3A:
139	dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
140	break;
141	case XOP8_MAP:
142	dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
143	break;
144	case XOP9_MAP:
145	dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
146	break;
147	case XOPA_MAP:
148	dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
149	break;
150	case THREEDNOW_MAP:
151	dec =
152	&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
153	break;
154	case MAP4:
155	dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
156	break;
157	case MAP5:
158	dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
159	break;
160	case MAP6:
161	dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
162	break;
163	case MAP7:
164	dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
165	break;
166	}
167
168	switch (dec->modrm_type) {
169	default:
170	llvm_unreachable("Corrupt table! Unknown modrm_type");
171	return `0`;
172	case MODRM_ONEENTRY:
173	return modRMTable[dec->instructionIDs];
174	case MODRM_SPLITRM:
175	if (modFromModRM(modRM) == `0x3`)
176	return modRMTable[dec->instructionIDs + `1`];
177	return modRMTable[dec->instructionIDs];
178	case MODRM_SPLITREG:
179	if (modFromModRM(modRM) == `0x3`)
180	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`) + `8`];
181	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
182	case MODRM_SPLITMISC:
183	if (modFromModRM(modRM) == `0x3`)
184	return modRMTable[dec->instructionIDs + (modRM & `0x3f`) + `8`];
185	return modRMTable[dec->instructionIDs + ((modRM & `0x38`) >> `3`)];
186	case MODRM_FULL:
187	return modRMTable[dec->instructionIDs + modRM];
188	}
189	}
190
191	static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
192	uint64_t offset = insn->readerCursor - insn->startLocation;
193	if (offset >= insn->bytes.size())
194	return true;
195	byte = insn->bytes [offset];
196	return false;
197	}
198
199	template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
200	auto r = insn->bytes;
201	uint64_t offset = insn->readerCursor - insn->startLocation;
202	if (offset + sizeof(T) > r.size())
203	return true;
204	ptr = support::endian::read<T>(&r [offset], llvm::endianness::little);
205	insn->readerCursor += sizeof(T);
206	return false;
207	}
208
209	static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
210	return insn->mode == MODE_64BIT && prefix >= `0x40` && prefix <= `0x4f`;
211	}
212
213	static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
214	return insn->mode == MODE_64BIT && prefix == `0xd5`;
215	}
216
217	// Consumes all of an instruction's prefix bytes, and marks the
218	// instruction as having them. Also sets the instruction's default operand,
219	// address, and other relevant data sizes to report operands correctly.
220	//
221	// insn must not be empty.
222	static int readPrefixes(struct InternalInstruction *insn) {
223	bool isPrefix = true;
224	uint8_t byte = `0`;
225	uint8_t nextByte;
226
227	LLVM_DEBUG(dbgs() << "readPrefixes()");
228
229	while (isPrefix) {
230	// If we fail reading prefixes, just stop here and let the opcode reader
231	// deal with it.
232	if (consume(insn, ptr&: byte))
233	break;
234
235	// If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
236	// break and let it be disassembled as a normal "instruction".
237	if (insn->readerCursor - `1` == insn->startLocation && byte == `0xf0`) // LOCK
238	break;
239
240	if ((byte == `0xf2` \|\| byte == `0xf3`) && !peek(insn, byte&: nextByte)) {
241	// If the byte is 0xf2 or 0xf3, and any of the following conditions are
242	// met:
243	// - it is followed by a LOCK (0xf0) prefix
244	// - it is followed by an xchg instruction
245	// then it should be disassembled as a xacquire/xrelease not repne/rep.
246	if (((nextByte == `0xf0`) \|\|
247	((nextByte & `0xfe`) == `0x86` \|\| (nextByte & `0xf8`) == `0x90`))) {
248	insn->xAcquireRelease = true;
249	if (!(byte == `0xf3` && nextByte == `0x90`)) // PAUSE instruction support
250	break;
251	}
252	// Also if the byte is 0xf3, and the following condition is met:
253	// - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
254	// "mov mem, imm" (opcode 0xc6/0xc7) instructions.
255	// then it should be disassembled as an xrelease not rep.
256	if (byte == `0xf3` && (nextByte == `0x88` \|\| nextByte == `0x89` \|\|
257	nextByte == `0xc6` \|\| nextByte == `0xc7`)) {
258	insn->xAcquireRelease = true;
259	break;
260	}
261	if (isREX(insn, prefix: nextByte)) {
262	uint8_t nnextByte;
263	// Go to REX prefix after the current one
264	if (consume(insn, ptr&: nnextByte))
265	return -`1`;
266	// We should be able to read next byte after REX prefix
267	if (peek(insn, byte&: nnextByte))
268	return -`1`;
269	--insn->readerCursor;
270	}
271	}
272
273	switch (byte) {
274	case `0xf0`: // LOCK
275	insn->hasLockPrefix = true;
276	break;
277	case `0xf2`: // REPNE/REPNZ
278	case `0xf3`: { // REP or REPE/REPZ
279	uint8_t nextByte;
280	if (peek(insn, byte&: nextByte))
281	break;
282	// TODO:
283	// 1. There could be several 0x66
284	// 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
285	// it's not mandatory prefix
286	// 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
287	// 0x0f exactly after it to be mandatory prefix
288	// 4. if (nextByte == 0xd5) it's REX2 and we need
289	// 0x0f exactly after it to be mandatory prefix
290	if (isREX(insn, prefix: nextByte) \|\| isREX2(insn, prefix: nextByte) \|\| nextByte == `0x0f` \|\|
291	nextByte == `0x66`)
292	// The last of 0xf2 /0xf3 is mandatory prefix
293	insn->mandatoryPrefix = byte;
294	insn->repeatPrefix = byte;
295	break;
296	}
297	case `0x2e`: // CS segment override -OR- Branch not taken
298	insn->segmentOverride = SEG_OVERRIDE_CS;
299	break;
300	case `0x36`: // SS segment override -OR- Branch taken
301	insn->segmentOverride = SEG_OVERRIDE_SS;
302	break;
303	case `0x3e`: // DS segment override
304	insn->segmentOverride = SEG_OVERRIDE_DS;
305	break;
306	case `0x26`: // ES segment override
307	insn->segmentOverride = SEG_OVERRIDE_ES;
308	break;
309	case `0x64`: // FS segment override
310	insn->segmentOverride = SEG_OVERRIDE_FS;
311	break;
312	case `0x65`: // GS segment override
313	insn->segmentOverride = SEG_OVERRIDE_GS;
314	break;
315	case `0x66`: { // Operand-size override {
316	uint8_t nextByte;
317	insn->hasOpSize = true;
318	if (peek(insn, byte&: nextByte))
319	break;
320	// 0x66 can't overwrite existing mandatory prefix and should be ignored
321	if (!insn->mandatoryPrefix && (nextByte == `0x0f` \|\| isREX(insn, prefix: nextByte)))
322	insn->mandatoryPrefix = byte;
323	break;
324	}
325	case `0x67`: // Address-size override
326	insn->hasAdSize = true;
327	break;
328	default: // Not a prefix byte
329	isPrefix = false;
330	break;
331	}
332
333	if (isREX(insn, prefix: byte)) {
334	insn->rexPrefix = byte;
335	isPrefix = true;
336	LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
337	} else if (isPrefix) {
338	insn->rexPrefix = `0`;
339	}
340
341	if (isPrefix)
342	LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
343	}
344
345	insn->vectorExtensionType = TYPE_NO_VEX_XOP;
346
347	if (byte == `0x62`) {
348	uint8_t byte1, byte2;
349	if (consume(insn, ptr&: byte1)) {
350	LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
351	return -`1`;
352	}
353
354	if (peek(insn, byte&: byte2)) {
355	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
356	return -`1`;
357	}
358
359	if ((insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)) {
360	insn->vectorExtensionType = TYPE_EVEX;
361	} else {
362	--insn->readerCursor; // unconsume byte1
363	--insn->readerCursor; // unconsume byte
364	}
365
366	if (insn->vectorExtensionType == TYPE_EVEX) {
367	insn->vectorExtensionPrefix[`0`] = byte;
368	insn->vectorExtensionPrefix[`1`] = byte1;
369	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`2`])) {
370	LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
371	return -`1`;
372	}
373	if (consume(insn, ptr&: insn->vectorExtensionPrefix[`3`])) {
374	LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
375	return -`1`;
376	}
377
378	if (insn->mode == MODE_64BIT) {
379	// We simulate the REX prefix for simplicity's sake
380	insn->rexPrefix = `0x40` \|
381	(wFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `3`) \|
382	(rFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `2`) \|
383	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `1`) \|
384	(bFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `0`);
385
386	// We simulate the REX2 prefix for simplicity's sake
387	insn->rex2ExtensionPrefix[`1`] =
388	(r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `6`) \|
389	(uFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) << `5`) \|
390	(b2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`);
391	}
392
393	LLVM_DEBUG(
394	dbgs() << format(
395	"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
396	insn->vectorExtensionPrefix[`0`], insn->vectorExtensionPrefix[`1`],
397	insn->vectorExtensionPrefix[`2`], insn->vectorExtensionPrefix[`3`]));
398	}
399	} else if (byte == `0xc4`) {
400	uint8_t byte1;
401	if (peek(insn, byte&: byte1)) {
402	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
403	return -`1`;
404	}
405
406	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
407	insn->vectorExtensionType = TYPE_VEX_3B;
408	else
409	--insn->readerCursor;
410
411	if (insn->vectorExtensionType == TYPE_VEX_3B) {
412	insn->vectorExtensionPrefix[`0`] = byte;
413	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
414	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
415
416	// We simulate the REX prefix for simplicity's sake
417
418	if (insn->mode == MODE_64BIT)
419	insn->rexPrefix = `0x40` \|
420	(wFromVEX3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
421	(rFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
422	(xFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
423	(bFromVEX2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
424
425	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
426	insn->vectorExtensionPrefix[`0`],
427	insn->vectorExtensionPrefix[`1`],
428	insn->vectorExtensionPrefix[`2`]));
429	}
430	} else if (byte == `0xc5`) {
431	uint8_t byte1;
432	if (peek(insn, byte&: byte1)) {
433	LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
434	return -`1`;
435	}
436
437	if (insn->mode == MODE_64BIT \|\| (byte1 & `0xc0`) == `0xc0`)
438	insn->vectorExtensionType = TYPE_VEX_2B;
439	else
440	--insn->readerCursor;
441
442	if (insn->vectorExtensionType == TYPE_VEX_2B) {
443	insn->vectorExtensionPrefix[`0`] = byte;
444	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
445
446	if (insn->mode == MODE_64BIT)
447	insn->rexPrefix =
448	`0x40` \| (rFromVEX2of2(insn->vectorExtensionPrefix[`1`]) << `2`);
449
450	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
451	default:
452	break;
453	case VEX_PREFIX_66:
454	insn->hasOpSize = true;
455	break;
456	}
457
458	LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
459	insn->vectorExtensionPrefix[`0`],
460	insn->vectorExtensionPrefix[`1`]));
461	}
462	} else if (byte == `0x8f`) {
463	uint8_t byte1;
464	if (peek(insn, byte&: byte1)) {
465	LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
466	return -`1`;
467	}
468
469	if ((byte1 & `0x38`) != `0x0`) // 0 in these 3 bits is a POP instruction.
470	insn->vectorExtensionType = TYPE_XOP;
471	else
472	--insn->readerCursor;
473
474	if (insn->vectorExtensionType == TYPE_XOP) {
475	insn->vectorExtensionPrefix[`0`] = byte;
476	consume(insn, ptr&: insn->vectorExtensionPrefix[`1`]);
477	consume(insn, ptr&: insn->vectorExtensionPrefix[`2`]);
478
479	// We simulate the REX prefix for simplicity's sake
480
481	if (insn->mode == MODE_64BIT)
482	insn->rexPrefix = `0x40` \|
483	(wFromXOP3of3(insn->vectorExtensionPrefix[`2`]) << `3`) \|
484	(rFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `2`) \|
485	(xFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `1`) \|
486	(bFromXOP2of3(insn->vectorExtensionPrefix[`1`]) << `0`);
487
488	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
489	default:
490	break;
491	case VEX_PREFIX_66:
492	insn->hasOpSize = true;
493	break;
494	}
495
496	LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
497	insn->vectorExtensionPrefix[`0`],
498	insn->vectorExtensionPrefix[`1`],
499	insn->vectorExtensionPrefix[`2`]));
500	}
501	} else if (isREX2(insn, prefix: byte)) {
502	uint8_t byte1;
503	if (peek(insn, byte&: byte1)) {
504	LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
505	return -`1`;
506	}
507	insn->rex2ExtensionPrefix[`0`] = byte;
508	consume(insn, ptr&: insn->rex2ExtensionPrefix[`1`]);
509
510	// We simulate the REX prefix for simplicity's sake
511	insn->rexPrefix = `0x40` \| (wFromREX2(insn->rex2ExtensionPrefix[`1`]) << `3`) \|
512	(rFromREX2(insn->rex2ExtensionPrefix[`1`]) << `2`) \|
513	(xFromREX2(insn->rex2ExtensionPrefix[`1`]) << `1`) \|
514	(bFromREX2(insn->rex2ExtensionPrefix[`1`]) << `0`);
515	LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
516	insn->rex2ExtensionPrefix[`0`],
517	insn->rex2ExtensionPrefix[`1`]));
518	} else
519	--insn->readerCursor;
520
521	if (insn->mode == MODE_16BIT) {
522	insn->registerSize = (insn->hasOpSize ? `4` : `2`);
523	insn->addressSize = (insn->hasAdSize ? `4` : `2`);
524	insn->displacementSize = (insn->hasAdSize ? `4` : `2`);
525	insn->immediateSize = (insn->hasOpSize ? `4` : `2`);
526	} else if (insn->mode == MODE_32BIT) {
527	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
528	insn->addressSize = (insn->hasAdSize ? `2` : `4`);
529	insn->displacementSize = (insn->hasAdSize ? `2` : `4`);
530	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
531	} else if (insn->mode == MODE_64BIT) {
532	insn->displacementSize = `4`;
533	if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
534	insn->registerSize = `8`;
535	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
536	insn->immediateSize = `4`;
537	insn->hasOpSize = false;
538	} else {
539	insn->registerSize = (insn->hasOpSize ? `2` : `4`);
540	insn->addressSize = (insn->hasAdSize ? `4` : `8`);
541	insn->immediateSize = (insn->hasOpSize ? `2` : `4`);
542	}
543	}
544
545	return `0`;
546	}
547
548	// Consumes the SIB byte to determine addressing information.
549	static int readSIB(struct InternalInstruction *insn) {
550	SIBBase sibBaseBase = SIB_BASE_NONE;
551	uint8_t index, base;
552
553	LLVM_DEBUG(dbgs() << "readSIB()");
554	switch (insn->addressSize) {
555	case `2`:
556	default:
557	llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
558	case `4`:
559	insn->sibIndexBase = SIB_INDEX_EAX;
560	sibBaseBase = SIB_BASE_EAX;
561	break;
562	case `8`:
563	insn->sibIndexBase = SIB_INDEX_RAX;
564	sibBaseBase = SIB_BASE_RAX;
565	break;
566	}
567
568	if (consume(insn, ptr&: insn->sib))
569	return -`1`;
570
571	index = indexFromSIB(insn->sib) \| (xFromREX(insn->rexPrefix) << `3`) \|
572	(x2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
573
574	if (index == `0x4`) {
575	insn->sibIndex = SIB_INDEX_NONE;
576	} else {
577	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
578	}
579
580	insn->sibScale = `1` << scaleFromSIB(insn->sib);
581
582	base = baseFromSIB(insn->sib) \| (bFromREX(insn->rexPrefix) << `3`) \|
583	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
584
585	switch (base) {
586	case `0x5`:
587	case `0xd`:
588	switch (modFromModRM(insn->modRM)) {
589	case `0x0`:
590	insn->eaDisplacement = EA_DISP_32;
591	insn->sibBase = SIB_BASE_NONE;
592	break;
593	case `0x1`:
594	insn->eaDisplacement = EA_DISP_8;
595	insn->sibBase = (SIBBase)(sibBaseBase + base);
596	break;
597	case `0x2`:
598	insn->eaDisplacement = EA_DISP_32;
599	insn->sibBase = (SIBBase)(sibBaseBase + base);
600	break;
601	default:
602	llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
603	}
604	break;
605	default:
606	insn->sibBase = (SIBBase)(sibBaseBase + base);
607	break;
608	}
609
610	return `0`;
611	}
612
613	static int readDisplacement(struct InternalInstruction *insn) {
614	int8_t d8;
615	int16_t d16;
616	int32_t d32;
617	LLVM_DEBUG(dbgs() << "readDisplacement()");
618
619	insn->displacementOffset = insn->readerCursor - insn->startLocation;
620	switch (insn->eaDisplacement) {
621	case EA_DISP_NONE:
622	break;
623	case EA_DISP_8:
624	if (consume(insn, ptr&: d8))
625	return -`1`;
626	insn->displacement = d8;
627	break;
628	case EA_DISP_16:
629	if (consume(insn, ptr&: d16))
630	return -`1`;
631	insn->displacement = d16;
632	break;
633	case EA_DISP_32:
634	if (consume(insn, ptr&: d32))
635	return -`1`;
636	insn->displacement = d32;
637	break;
638	}
639
640	return `0`;
641	}
642
643	// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
644	static int readModRM(struct InternalInstruction *insn) {
645	uint8_t mod, rm, reg;
646	LLVM_DEBUG(dbgs() << "readModRM()");
647
648	if (insn->consumedModRM)
649	return `0`;
650
651	if (consume(insn, ptr&: insn->modRM))
652	return -`1`;
653	insn->consumedModRM = true;
654
655	mod = modFromModRM(insn->modRM);
656	rm = rmFromModRM(insn->modRM);
657	reg = regFromModRM(insn->modRM);
658
659	// This goes by insn->registerSize to pick the correct register, which messes
660	// up if we're using (say) XMM or 8-bit register operands. That gets fixed in
661	// fixupReg().
662	switch (insn->registerSize) {
663	case `2`:
664	insn->regBase = MODRM_REG_AX;
665	insn->eaRegBase = EA_REG_AX;
666	break;
667	case `4`:
668	insn->regBase = MODRM_REG_EAX;
669	insn->eaRegBase = EA_REG_EAX;
670	break;
671	case `8`:
672	insn->regBase = MODRM_REG_RAX;
673	insn->eaRegBase = EA_REG_RAX;
674	break;
675	}
676
677	reg \|= (rFromREX(insn->rexPrefix) << `3`) \|
678	(r2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
679	rm \|= (bFromREX(insn->rexPrefix) << `3`) \|
680	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`);
681
682	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
683	reg \|= r2FromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`;
684
685	insn->reg = (Reg)(insn->regBase + reg);
686
687	switch (insn->addressSize) {
688	case `2`: {
689	EABase eaBaseBase = EA_BASE_BX_SI;
690
691	switch (mod) {
692	case `0x0`:
693	if (rm == `0x6`) {
694	insn->eaBase = EA_BASE_NONE;
695	insn->eaDisplacement = EA_DISP_16;
696	if (readDisplacement(insn))
697	return -`1`;
698	} else {
699	insn->eaBase = (EABase)(eaBaseBase + rm);
700	insn->eaDisplacement = EA_DISP_NONE;
701	}
702	break;
703	case `0x1`:
704	insn->eaBase = (EABase)(eaBaseBase + rm);
705	insn->eaDisplacement = EA_DISP_8;
706	insn->displacementSize = `1`;
707	if (readDisplacement(insn))
708	return -`1`;
709	break;
710	case `0x2`:
711	insn->eaBase = (EABase)(eaBaseBase + rm);
712	insn->eaDisplacement = EA_DISP_16;
713	if (readDisplacement(insn))
714	return -`1`;
715	break;
716	case `0x3`:
717	insn->eaBase = (EABase)(insn->eaRegBase + rm);
718	if (readDisplacement(insn))
719	return -`1`;
720	break;
721	}
722	break;
723	}
724	case `4`:
725	case `8`: {
726	EABase eaBaseBase = (insn->addressSize == `4` ? EA_BASE_EAX : EA_BASE_RAX);
727
728	switch (mod) {
729	case `0x0`:
730	insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
731	// In determining whether RIP-relative mode is used (rm=5),
732	// or whether a SIB byte is present (rm=4),
733	// the extension bits (REX.b and EVEX.x) are ignored.
734	switch (rm & `7`) {
735	case `0x4`: // SIB byte is present
736	insn->eaBase = (insn->addressSize == `4` ? EA_BASE_sib : EA_BASE_sib64);
737	if (readSIB(insn) \|\| readDisplacement(insn))
738	return -`1`;
739	break;
740	case `0x5`: // RIP-relative
741	insn->eaBase = EA_BASE_NONE;
742	insn->eaDisplacement = EA_DISP_32;
743	if (readDisplacement(insn))
744	return -`1`;
745	break;
746	default:
747	insn->eaBase = (EABase)(eaBaseBase + rm);
748	break;
749	}
750	break;
751	case `0x1`:
752	insn->displacementSize = `1`;
753	[[fallthrough]];
754	case `0x2`:
755	insn->eaDisplacement = (mod == `0x1` ? EA_DISP_8 : EA_DISP_32);
756	switch (rm & `7`) {
757	case `0x4`: // SIB byte is present
758	insn->eaBase = EA_BASE_sib;
759	if (readSIB(insn) \|\| readDisplacement(insn))
760	return -`1`;
761	break;
762	default:
763	insn->eaBase = (EABase)(eaBaseBase + rm);
764	if (readDisplacement(insn))
765	return -`1`;
766	break;
767	}
768	break;
769	case `0x3`:
770	insn->eaDisplacement = EA_DISP_NONE;
771	insn->eaBase = (EABase)(insn->eaRegBase + rm);
772	break;
773	}
774	break;
775	}
776	} // switch (insn->addressSize)
777
778	return `0`;
779	}
780
781	#define GENERIC_FIXUP_FUNC(name, base, prefix) \
782	static uint16_t name(struct InternalInstruction *insn, OperandType type, \
783	uint8_t index, uint8_t *valid) { \
784	*valid = 1; \
785	switch (type) { \
786	default: \
787	debug("Unhandled register type"); \
788	*valid = 0; \
789	return 0; \
790	case TYPE_Rv: \
791	return base + index; \
792	case TYPE_R8: \
793	if (insn->rexPrefix && index >= 4 && index <= 7) \
794	return prefix##_SPL + (index - 4); \
795	else \
796	return prefix##_AL + index; \
797	case TYPE_R16: \
798	return prefix##_AX + index; \
799	case TYPE_R32: \
800	return prefix##_EAX + index; \
801	case TYPE_R64: \
802	return prefix##_RAX + index; \
803	case TYPE_ZMM: \
804	return prefix##_ZMM0 + index; \
805	case TYPE_YMM: \
806	return prefix##_YMM0 + index; \
807	case TYPE_XMM: \
808	return prefix##_XMM0 + index; \
809	case TYPE_TMM: \
810	if (index > 7) \
811	*valid = 0; \
812	return prefix##_TMM0 + index; \
813	case TYPE_VK: \
814	index &= 0xf; \
815	if (index > 7) \
816	*valid = 0; \
817	return prefix##_K0 + index; \
818	case TYPE_VK_PAIR: \
819	if (index > 7) \
820	*valid = 0; \
821	return prefix##_K0_K1 + (index / 2); \
822	case TYPE_MM64: \
823	return prefix##_MM0 + (index & 0x7); \
824	case TYPE_SEGMENTREG: \
825	if ((index & 7) > 5) \
826	*valid = 0; \
827	return prefix##_ES + (index & 7); \
828	case TYPE_DEBUGREG: \
829	if (index > 15) \
830	*valid = 0; \
831	return prefix##_DR0 + index; \
832	case TYPE_CONTROLREG: \
833	if (index > 15) \
834	*valid = 0; \
835	return prefix##_CR0 + index; \
836	case TYPE_MVSIBX: \
837	return prefix##_XMM0 + index; \
838	case TYPE_MVSIBY: \
839	return prefix##_YMM0 + index; \
840	case TYPE_MVSIBZ: \
841	return prefix##_ZMM0 + index; \
842	} \
843	}
844
845	// Consult an operand type to determine the meaning of the reg or R/M field. If
846	// the operand is an XMM operand, for example, an operand would be XMM0 instead
847	// of AX, which readModRM() would otherwise misinterpret it as.
848	//
849	// @param insn - The instruction containing the operand.
850	// @param type - The operand type.
851	// @param index - The existing value of the field as reported by readModRM().
852	// @param valid - The address of a uint8_t. The target is set to 1 if the
853	// field is valid for the register class; 0 if not.
854	// @return - The proper value.
855	GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
856	GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
857
858	// Consult an operand specifier to determine which of the fixupValue functions*
859	// to use in correcting readModRM()'ss interpretation.
860	//
861	// @param insn - See fixupValue().*
862	// @param op - The operand specifier.
863	// @return - 0 if fixup was successful; -1 if the register returned was
864	// invalid for its class.
865	static int fixupReg(struct InternalInstruction *insn,
866	const struct OperandSpecifier *op) {
867	uint8_t valid;
868	LLVM_DEBUG(dbgs() << "fixupReg()");
869
870	switch ((OperandEncoding)op->encoding) {
871	default:
872	debug("Expected a REG or R/M encoding in fixupReg");
873	return -`1`;
874	case ENCODING_VVVV:
875	insn->vvvv =
876	(Reg)fixupRegValue(insn, type: (OperandType)op->type, index: insn->vvvv, valid: &valid);
877	if (!valid)
878	return -`1`;
879	break;
880	case ENCODING_REG:
881	insn->reg = (Reg)fixupRegValue(insn, type: (OperandType)op->type,
882	index: insn->reg - insn->regBase, valid: &valid);
883	if (!valid)
884	return -`1`;
885	break;
886	CASE_ENCODING_RM:
887	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
888	modFromModRM(insn->modRM) == `3`) {
889	// EVEX_X can extend the register id to 32 for a non-GPR register that is
890	// encoded in RM.
891	// mode : MODE_64_BIT
892	// Only 8 vector registers are available in 32 bit mode
893	// mod : 3
894	// RM encodes a register
895	switch (op->type) {
896	case TYPE_Rv:
897	case TYPE_R8:
898	case TYPE_R16:
899	case TYPE_R32:
900	case TYPE_R64:
901	break;
902	default:
903	insn->eaBase =
904	(EABase)(insn->eaBase +
905	(xFromEVEX2of4(insn->vectorExtensionPrefix[`1`]) << `4`));
906	break;
907	}
908	}
909	[[fallthrough]];
910	case ENCODING_SIB:
911	if (insn->eaBase >= insn->eaRegBase) {
912	insn->eaBase = (EABase)fixupRMValue(
913	insn, type: (OperandType)op->type, index: insn->eaBase - insn->eaRegBase, valid: &valid);
914	if (!valid)
915	return -`1`;
916	}
917	break;
918	}
919
920	return `0`;
921	}
922
923	// Read the opcode (except the ModR/M byte in the case of extended or escape
924	// opcodes).
925	static bool readOpcode(struct InternalInstruction *insn) {
926	uint8_t current;
927	LLVM_DEBUG(dbgs() << "readOpcode()");
928
929	insn->opcodeType = ONEBYTE;
930	if (insn->vectorExtensionType == TYPE_EVEX) {
931	switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])) {
932	default:
933	LLVM_DEBUG(
934	dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
935	mmmFromEVEX2of4(insn->vectorExtensionPrefix[`1`])));
936	return true;
937	case VEX_LOB_0F:
938	insn->opcodeType = TWOBYTE;
939	return consume(insn, ptr&: insn->opcode);
940	case VEX_LOB_0F38:
941	insn->opcodeType = THREEBYTE_38;
942	return consume(insn, ptr&: insn->opcode);
943	case VEX_LOB_0F3A:
944	insn->opcodeType = THREEBYTE_3A;
945	return consume(insn, ptr&: insn->opcode);
946	case VEX_LOB_MAP4:
947	insn->opcodeType = MAP4;
948	return consume(insn, ptr&: insn->opcode);
949	case VEX_LOB_MAP5:
950	insn->opcodeType = MAP5;
951	return consume(insn, ptr&: insn->opcode);
952	case VEX_LOB_MAP6:
953	insn->opcodeType = MAP6;
954	return consume(insn, ptr&: insn->opcode);
955	case VEX_LOB_MAP7:
956	insn->opcodeType = MAP7;
957	return consume(insn, ptr&: insn->opcode);
958	}
959	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
960	switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])) {
961	default:
962	LLVM_DEBUG(
963	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
964	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
965	return true;
966	case VEX_LOB_0F:
967	insn->opcodeType = TWOBYTE;
968	return consume(insn, ptr&: insn->opcode);
969	case VEX_LOB_0F38:
970	insn->opcodeType = THREEBYTE_38;
971	return consume(insn, ptr&: insn->opcode);
972	case VEX_LOB_0F3A:
973	insn->opcodeType = THREEBYTE_3A;
974	return consume(insn, ptr&: insn->opcode);
975	case VEX_LOB_MAP5:
976	insn->opcodeType = MAP5;
977	return consume(insn, ptr&: insn->opcode);
978	case VEX_LOB_MAP6:
979	insn->opcodeType = MAP6;
980	return consume(insn, ptr&: insn->opcode);
981	case VEX_LOB_MAP7:
982	insn->opcodeType = MAP7;
983	return consume(insn, ptr&: insn->opcode);
984	}
985	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
986	insn->opcodeType = TWOBYTE;
987	return consume(insn, ptr&: insn->opcode);
988	} else if (insn->vectorExtensionType == TYPE_XOP) {
989	switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[`1`])) {
990	default:
991	LLVM_DEBUG(
992	dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
993	mmmmmFromVEX2of3(insn->vectorExtensionPrefix[`1`])));
994	return true;
995	case XOP_MAP_SELECT_8:
996	insn->opcodeType = XOP8_MAP;
997	return consume(insn, ptr&: insn->opcode);
998	case XOP_MAP_SELECT_9:
999	insn->opcodeType = XOP9_MAP;
1000	return consume(insn, ptr&: insn->opcode);
1001	case XOP_MAP_SELECT_A:
1002	insn->opcodeType = XOPA_MAP;
1003	return consume(insn, ptr&: insn->opcode);
1004	}
1005	} else if (mFromREX2(insn->rex2ExtensionPrefix[`1`])) {
1006	// m bit indicates opcode map 1
1007	insn->opcodeType = TWOBYTE;
1008	return consume(insn, ptr&: insn->opcode);
1009	}
1010
1011	if (consume(insn, ptr&: current))
1012	return true;
1013
1014	if (current == `0x0f`) {
1015	LLVM_DEBUG(
1016	dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1017	if (consume(insn, ptr&: current))
1018	return true;
1019
1020	if (current == `0x38`) {
1021	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1022	current));
1023	if (consume(insn, ptr&: current))
1024	return true;
1025
1026	insn->opcodeType = THREEBYTE_38;
1027	} else if (current == `0x3a`) {
1028	LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1029	current));
1030	if (consume(insn, ptr&: current))
1031	return true;
1032
1033	insn->opcodeType = THREEBYTE_3A;
1034	} else if (current == `0x0f`) {
1035	LLVM_DEBUG(
1036	dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1037
1038	// Consume operands before the opcode to comply with the 3DNow encoding
1039	if (readModRM(insn))
1040	return true;
1041
1042	if (consume(insn, ptr&: current))
1043	return true;
1044
1045	insn->opcodeType = THREEDNOW_MAP;
1046	} else {
1047	LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1048	insn->opcodeType = TWOBYTE;
1049	}
1050	} else if (insn->mandatoryPrefix)
1051	// The opcode with mandatory prefix must start with opcode escape.
1052	// If not it's legacy repeat prefix
1053	insn->mandatoryPrefix = `0`;
1054
1055	// At this point we have consumed the full opcode.
1056	// Anything we consume from here on must be unconsumed.
1057	insn->opcode = current;
1058
1059	return false;
1060	}
1061
1062	// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1063	static bool is16BitEquivalent(const char orig, const* char *equiv) {
1064	for (int i = `0`;; i++) {
1065	if (orig[i] == `'\0'` && equiv[i] == `'\0'`)
1066	return true;
1067	if (orig[i] == `'\0'` \|\| equiv[i] == `'\0'`)
1068	return false;
1069	if (orig[i] != equiv[i]) {
1070	if ((orig[i] == `'Q'` \|\| orig[i] == `'L'`) && equiv[i] == `'W'`)
1071	continue;
1072	if ((orig[i] == `'6'` \|\| orig[i] == `'3'`) && equiv[i] == `'1'`)
1073	continue;
1074	if ((orig[i] == `'4'` \|\| orig[i] == `'2'`) && equiv[i] == `'6'`)
1075	continue;
1076	return false;
1077	}
1078	}
1079	}
1080
1081	// Determine whether this instruction is a 64-bit instruction.
1082	static bool is64Bit(const char *name) {
1083	for (int i = `0`;; ++i) {
1084	if (name[i] == `'\0'`)
1085	return false;
1086	if (name[i] == `'6'` && name[i + `1`] == `'4'`)
1087	return true;
1088	}
1089	}
1090
1091	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1092	// for extended and escape opcodes, and using a supplied attribute mask.
1093	static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1094	struct InternalInstruction *insn,
1095	uint16_t attrMask) {
1096	auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1097	const ContextDecision *decision;
1098	switch (insn->opcodeType) {
1099	case ONEBYTE:
1100	decision = &ONEBYTE_SYM;
1101	break;
1102	case TWOBYTE:
1103	decision = &TWOBYTE_SYM;
1104	break;
1105	case THREEBYTE_38:
1106	decision = &THREEBYTE38_SYM;
1107	break;
1108	case THREEBYTE_3A:
1109	decision = &THREEBYTE3A_SYM;
1110	break;
1111	case XOP8_MAP:
1112	decision = &XOP8_MAP_SYM;
1113	break;
1114	case XOP9_MAP:
1115	decision = &XOP9_MAP_SYM;
1116	break;
1117	case XOPA_MAP:
1118	decision = &XOPA_MAP_SYM;
1119	break;
1120	case THREEDNOW_MAP:
1121	decision = &THREEDNOW_MAP_SYM;
1122	break;
1123	case MAP4:
1124	decision = &MAP4_SYM;
1125	break;
1126	case MAP5:
1127	decision = &MAP5_SYM;
1128	break;
1129	case MAP6:
1130	decision = &MAP6_SYM;
1131	break;
1132	case MAP7:
1133	decision = &MAP7_SYM;
1134	break;
1135	}
1136
1137	if (decision->opcodeDecisions[insnCtx]
1138	.modRMDecisions[insn->opcode]
1139	.modrm_type != MODRM_ONEENTRY) {
1140	if (readModRM(insn))
1141	return -`1`;
1142	*instructionID =
1143	decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: insn->modRM);
1144	} else {
1145	*instructionID = decode(type: insn->opcodeType, insnContext: insnCtx, opcode: insn->opcode, modRM: `0`);
1146	}
1147
1148	return `0`;
1149	}
1150
1151	static bool isCCMPOrCTEST(InternalInstruction *insn) {
1152	if (insn->opcodeType != MAP4)
1153	return false;
1154	if (insn->opcode == `0x83` && regFromModRM(insn->modRM) == `7`)
1155	return true;
1156	switch (insn->opcode & `0xfe`) {
1157	default:
1158	return false;
1159	case `0x38`:
1160	case `0x3a`:
1161	case `0x84`:
1162	return true;
1163	case `0x80`:
1164	return regFromModRM(insn->modRM) == `7`;
1165	case `0xf6`:
1166	return regFromModRM(insn->modRM) == `0`;
1167	}
1168	}
1169
1170	static bool isNF(InternalInstruction *insn) {
1171	if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1172	return false;
1173	if (insn->opcodeType == MAP4)
1174	return true;
1175	// Below NF instructions are not in map4.
1176	if (insn->opcodeType == THREEBYTE_38 &&
1177	ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) == VEX_PREFIX_NONE) {
1178	switch (insn->opcode) {
1179	case `0xf2`: // ANDN
1180	case `0xf3`: // BLSI, BLSR, BLSMSK
1181	case `0xf5`: // BZHI
1182	case `0xf7`: // BEXTR
1183	return true;
1184	default:
1185	break;
1186	}
1187	}
1188	return false;
1189	}
1190
1191	// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1192	// for extended and escape opcodes. Determines the attributes and context for
1193	// the instruction before doing so.
1194	static int getInstructionID(struct InternalInstruction *insn,
1195	const MCInstrInfo *mii) {
1196	uint16_t attrMask;
1197	uint16_t instructionID;
1198
1199	LLVM_DEBUG(dbgs() << "getID()");
1200
1201	attrMask = ATTR_NONE;
1202
1203	if (insn->mode == MODE_64BIT)
1204	attrMask \|= ATTR_64BIT;
1205
1206	if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1207	attrMask \|= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1208
1209	if (insn->vectorExtensionType == TYPE_EVEX) {
1210	switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) {
1211	case VEX_PREFIX_66:
1212	attrMask \|= ATTR_OPSIZE;
1213	break;
1214	case VEX_PREFIX_F3:
1215	attrMask \|= ATTR_XS;
1216	break;
1217	case VEX_PREFIX_F2:
1218	attrMask \|= ATTR_XD;
1219	break;
1220	}
1221
1222	if (zFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1223	attrMask \|= ATTR_EVEXKZ;
1224	if (isNF(insn) && !readModRM(insn) &&
1225	!isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1226	attrMask \|= ATTR_EVEXNF;
1227	// aaa is not used a opmask in MAP4
1228	else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]) &&
1229	(insn->opcodeType != MAP4))
1230	attrMask \|= ATTR_EVEXK;
1231	if (bFromEVEX4of4(insn->vectorExtensionPrefix[`3`])) {
1232	attrMask \|= ATTR_EVEXB;
1233	if (uFromEVEX3of4(insn->vectorExtensionPrefix[`2`]) && !readModRM(insn) &&
1234	modFromModRM(insn->modRM) == `3`)
1235	attrMask \|= ATTR_EVEXU;
1236	}
1237	if (lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1238	attrMask \|= ATTR_VEXL;
1239	if (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1240	attrMask \|= ATTR_EVEXL2;
1241	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1242	switch (ppFromVEX3of3(insn->vectorExtensionPrefix[`2`])) {
1243	case VEX_PREFIX_66:
1244	attrMask \|= ATTR_OPSIZE;
1245	break;
1246	case VEX_PREFIX_F3:
1247	attrMask \|= ATTR_XS;
1248	break;
1249	case VEX_PREFIX_F2:
1250	attrMask \|= ATTR_XD;
1251	break;
1252	}
1253
1254	if (lFromVEX3of3(insn->vectorExtensionPrefix[`2`]))
1255	attrMask \|= ATTR_VEXL;
1256	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1257	switch (ppFromVEX2of2(insn->vectorExtensionPrefix[`1`])) {
1258	case VEX_PREFIX_66:
1259	attrMask \|= ATTR_OPSIZE;
1260	if (insn->hasAdSize)
1261	attrMask \|= ATTR_ADSIZE;
1262	break;
1263	case VEX_PREFIX_F3:
1264	attrMask \|= ATTR_XS;
1265	break;
1266	case VEX_PREFIX_F2:
1267	attrMask \|= ATTR_XD;
1268	break;
1269	}
1270
1271	if (lFromVEX2of2(insn->vectorExtensionPrefix[`1`]))
1272	attrMask \|= ATTR_VEXL;
1273	} else if (insn->vectorExtensionType == TYPE_XOP) {
1274	switch (ppFromXOP3of3(insn->vectorExtensionPrefix[`2`])) {
1275	case VEX_PREFIX_66:
1276	attrMask \|= ATTR_OPSIZE;
1277	break;
1278	case VEX_PREFIX_F3:
1279	attrMask \|= ATTR_XS;
1280	break;
1281	case VEX_PREFIX_F2:
1282	attrMask \|= ATTR_XD;
1283	break;
1284	}
1285
1286	if (lFromXOP3of3(insn->vectorExtensionPrefix[`2`]))
1287	attrMask \|= ATTR_VEXL;
1288	} else {
1289	return -`1`;
1290	}
1291	} else if (!insn->mandatoryPrefix) {
1292	// If we don't have mandatory prefix we should use legacy prefixes here
1293	if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1294	attrMask \|= ATTR_OPSIZE;
1295	if (insn->hasAdSize)
1296	attrMask \|= ATTR_ADSIZE;
1297	if (insn->opcodeType == ONEBYTE) {
1298	if (insn->repeatPrefix == `0xf3` && (insn->opcode == `0x90`))
1299	// Special support for PAUSE
1300	attrMask \|= ATTR_XS;
1301	} else {
1302	if (insn->repeatPrefix == `0xf2`)
1303	attrMask \|= ATTR_XD;
1304	else if (insn->repeatPrefix == `0xf3`)
1305	attrMask \|= ATTR_XS;
1306	}
1307	} else {
1308	switch (insn->mandatoryPrefix) {
1309	case `0xf2`:
1310	attrMask \|= ATTR_XD;
1311	break;
1312	case `0xf3`:
1313	attrMask \|= ATTR_XS;
1314	break;
1315	case `0x66`:
1316	if (insn->mode != MODE_16BIT)
1317	attrMask \|= ATTR_OPSIZE;
1318	if (insn->hasAdSize)
1319	attrMask \|= ATTR_ADSIZE;
1320	break;
1321	case `0x67`:
1322	attrMask \|= ATTR_ADSIZE;
1323	break;
1324	}
1325	}
1326
1327	if (insn->rexPrefix & `0x08`) {
1328	attrMask \|= ATTR_REXW;
1329	attrMask &= ~ATTR_ADSIZE;
1330	}
1331
1332	// Absolute jump and pushp/popp need special handling
1333	if (insn->rex2ExtensionPrefix[`0`] == `0xd5` && insn->opcodeType == ONEBYTE &&
1334	(insn->opcode == `0xA1` \|\| (insn->opcode & `0xf0`) == `0x50`))
1335	attrMask \|= ATTR_REX2;
1336
1337	if (insn->mode == MODE_16BIT) {
1338	// JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1339	// of the AdSize prefix is inverted w.r.t. 32-bit mode.
1340	if (insn->opcodeType == ONEBYTE && insn->opcode == `0xE3`)
1341	attrMask ^= ATTR_ADSIZE;
1342	// If we're in 16-bit mode and this is one of the relative jumps and opsize
1343	// prefix isn't present, we need to force the opsize attribute since the
1344	// prefix is inverted relative to 32-bit mode.
1345	if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1346	(insn->opcode == `0xE8` \|\| insn->opcode == `0xE9`))
1347	attrMask \|= ATTR_OPSIZE;
1348
1349	if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1350	insn->opcode >= `0x80` && insn->opcode <= `0x8F`)
1351	attrMask \|= ATTR_OPSIZE;
1352	}
1353
1354
1355	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1356	return -`1`;
1357
1358	// The following clauses compensate for limitations of the tables.
1359
1360	if (insn->mode != MODE_64BIT &&
1361	insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1362	// The tables can't distinquish between cases where the W-bit is used to
1363	// select register size and cases where its a required part of the opcode.
1364	if ((insn->vectorExtensionType == TYPE_EVEX &&
1365	wFromEVEX3of4(insn->vectorExtensionPrefix[`2`])) \|\|
1366	(insn->vectorExtensionType == TYPE_VEX_3B &&
1367	wFromVEX3of3(insn->vectorExtensionPrefix[`2`])) \|\|
1368	(insn->vectorExtensionType == TYPE_XOP &&
1369	wFromXOP3of3(insn->vectorExtensionPrefix[`2`]))) {
1370
1371	uint16_t instructionIDWithREXW;
1372	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithREXW, insn,
1373	attrMask: attrMask \| ATTR_REXW)) {
1374	insn->instructionID = instructionID;
1375	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1376	return `0`;
1377	}
1378
1379	auto SpecName = mii->getName(Opcode: instructionIDWithREXW);
1380	// If not a 64-bit instruction. Switch the opcode.
1381	if (!is64Bit(name: SpecName.data())) {
1382	insn->instructionID = instructionIDWithREXW;
1383	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1384	return `0`;
1385	}
1386	}
1387	}
1388
1389	// Absolute moves, umonitor, and movdir64b need special handling.
1390	// -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1391	// inverted w.r.t.
1392	// -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1393	// any position.
1394	if ((insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`)) \|\|
1395	(insn->opcodeType == TWOBYTE && (insn->opcode == `0xAE`)) \|\|
1396	(insn->opcodeType == THREEBYTE_38 && insn->opcode == `0xF8`) \|\|
1397	(insn->opcodeType == MAP4 && insn->opcode == `0xF8`)) {
1398	// Make sure we observed the prefixes in any position.
1399	if (insn->hasAdSize)
1400	attrMask \|= ATTR_ADSIZE;
1401	if (insn->hasOpSize)
1402	attrMask \|= ATTR_OPSIZE;
1403
1404	// In 16-bit, invert the attributes.
1405	if (insn->mode == MODE_16BIT) {
1406	attrMask ^= ATTR_ADSIZE;
1407
1408	// The OpSize attribute is only valid with the absolute moves.
1409	if (insn->opcodeType == ONEBYTE && ((insn->opcode & `0xFC`) == `0xA0`))
1410	attrMask ^= ATTR_OPSIZE;
1411	}
1412
1413	if (getInstructionIDWithAttrMask(instructionID: &instructionID, insn, attrMask))
1414	return -`1`;
1415
1416	insn->instructionID = instructionID;
1417	insn->spec = &INSTRUCTIONS_SYM[instructionID];
1418	return `0`;
1419	}
1420
1421	if ((insn->mode == MODE_16BIT \|\| insn->hasOpSize) &&
1422	!(attrMask & ATTR_OPSIZE)) {
1423	// The instruction tables make no distinction between instructions that
1424	// allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1425	// particular spot (i.e., many MMX operations). In general we're
1426	// conservative, but in the specific case where OpSize is present but not in
1427	// the right place we check if there's a 16-bit operation.
1428	const struct InstructionSpecifier *spec;
1429	uint16_t instructionIDWithOpsize;
1430	llvm::StringRef specName, specWithOpSizeName;
1431
1432	spec = &INSTRUCTIONS_SYM[instructionID];
1433
1434	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithOpsize, insn,
1435	attrMask: attrMask \| ATTR_OPSIZE)) {
1436	// ModRM required with OpSize but not present. Give up and return the
1437	// version without OpSize set.
1438	insn->instructionID = instructionID;
1439	insn->spec = spec;
1440	return `0`;
1441	}
1442
1443	specName = mii->getName(Opcode: instructionID);
1444	specWithOpSizeName = mii->getName(Opcode: instructionIDWithOpsize);
1445
1446	if (is16BitEquivalent(orig: specName.data(), equiv: specWithOpSizeName.data()) &&
1447	(insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1448	insn->instructionID = instructionIDWithOpsize;
1449	insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1450	} else {
1451	insn->instructionID = instructionID;
1452	insn->spec = spec;
1453	}
1454	return `0`;
1455	}
1456
1457	if (insn->opcodeType == ONEBYTE && insn->opcode == `0x90` &&
1458	insn->rexPrefix & `0x01`) {
1459	// NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1460	// as XCHG %r8, %eax.
1461	const struct InstructionSpecifier *spec;
1462	uint16_t instructionIDWithNewOpcode;
1463	const struct InstructionSpecifier *specWithNewOpcode;
1464
1465	spec = &INSTRUCTIONS_SYM[instructionID];
1466
1467	// Borrow opcode from one of the other XCHGar opcodes
1468	insn->opcode = `0x91`;
1469
1470	if (getInstructionIDWithAttrMask(instructionID: &instructionIDWithNewOpcode, insn,
1471	attrMask)) {
1472	insn->opcode = `0x90`;
1473
1474	insn->instructionID = instructionID;
1475	insn->spec = spec;
1476	return `0`;
1477	}
1478
1479	specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1480
1481	// Change back
1482	insn->opcode = `0x90`;
1483
1484	insn->instructionID = instructionIDWithNewOpcode;
1485	insn->spec = specWithNewOpcode;
1486
1487	return `0`;
1488	}
1489
1490	insn->instructionID = instructionID;
1491	insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1492
1493	return `0`;
1494	}
1495
1496	// Read an operand from the opcode field of an instruction and interprets it
1497	// appropriately given the operand width. Handles AddRegFrm instructions.
1498	//
1499	// @param insn - the instruction whose opcode field is to be read.
1500	// @param size - The width (in bytes) of the register being specified.
1501	// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1502	// RAX.
1503	// @return - 0 on success; nonzero otherwise.
1504	static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1505	LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1506
1507	if (size == `0`)
1508	size = insn->registerSize;
1509
1510	auto setOpcodeRegister = [&](unsigned base) {
1511	insn->opcodeRegister =
1512	(Reg)(base + ((bFromREX(insn->rexPrefix) << `3`) \|
1513	(b2FromREX2(insn->rex2ExtensionPrefix[`1`]) << `4`) \|
1514	(insn->opcode & `7`)));
1515	};
1516
1517	switch (size) {
1518	case `1`:
1519	setOpcodeRegister (MODRM_REG_AL);
1520	if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + `0x4` &&
1521	insn->opcodeRegister < MODRM_REG_AL + `0x8`) {
1522	insn->opcodeRegister =
1523	(Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - `4`));
1524	}
1525
1526	break;
1527	case `2`:
1528	setOpcodeRegister (MODRM_REG_AX);
1529	break;
1530	case `4`:
1531	setOpcodeRegister (MODRM_REG_EAX);
1532	break;
1533	case `8`:
1534	setOpcodeRegister (MODRM_REG_RAX);
1535	break;
1536	}
1537
1538	return `0`;
1539	}
1540
1541	// Consume an immediate operand from an instruction, given the desired operand
1542	// size.
1543	//
1544	// @param insn - The instruction whose operand is to be read.
1545	// @param size - The width (in bytes) of the operand.
1546	// @return - 0 if the immediate was successfully consumed; nonzero
1547	// otherwise.
1548	static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1549	uint8_t imm8;
1550	uint16_t imm16;
1551	uint32_t imm32;
1552	uint64_t imm64;
1553
1554	LLVM_DEBUG(dbgs() << "readImmediate()");
1555
1556	assert(insn->numImmediatesConsumed < `2` && "Already consumed two immediates");
1557
1558	insn->immediateSize = size;
1559	insn->immediateOffset = insn->readerCursor - insn->startLocation;
1560
1561	switch (size) {
1562	case `1`:
1563	if (consume(insn, ptr&: imm8))
1564	return -`1`;
1565	insn->immediates[insn->numImmediatesConsumed] = imm8;
1566	break;
1567	case `2`:
1568	if (consume(insn, ptr&: imm16))
1569	return -`1`;
1570	insn->immediates[insn->numImmediatesConsumed] = imm16;
1571	break;
1572	case `4`:
1573	if (consume(insn, ptr&: imm32))
1574	return -`1`;
1575	insn->immediates[insn->numImmediatesConsumed] = imm32;
1576	break;
1577	case `8`:
1578	if (consume(insn, ptr&: imm64))
1579	return -`1`;
1580	insn->immediates[insn->numImmediatesConsumed] = imm64;
1581	break;
1582	default:
1583	llvm_unreachable("invalid size");
1584	}
1585
1586	insn->numImmediatesConsumed++;
1587
1588	return `0`;
1589	}
1590
1591	// Consume vvvv from an instruction if it has a VEX prefix.
1592	static int readVVVV(struct InternalInstruction *insn) {
1593	LLVM_DEBUG(dbgs() << "readVVVV()");
1594
1595	int vvvv;
1596	if (insn->vectorExtensionType == TYPE_EVEX)
1597	vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `4` \|
1598	vvvvFromEVEX3of4(insn->vectorExtensionPrefix[`2`]));
1599	else if (insn->vectorExtensionType == TYPE_VEX_3B)
1600	vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[`2`]);
1601	else if (insn->vectorExtensionType == TYPE_VEX_2B)
1602	vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[`1`]);
1603	else if (insn->vectorExtensionType == TYPE_XOP)
1604	vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[`2`]);
1605	else
1606	return -`1`;
1607
1608	if (insn->mode != MODE_64BIT)
1609	vvvv &= `0xf`; // Can only clear bit 4. Bit 3 must be cleared later.
1610
1611	insn->vvvv = static_cast<Reg>(vvvv);
1612	return `0`;
1613	}
1614
1615	// Read an mask register from the opcode field of an instruction.
1616	//
1617	// @param insn - The instruction whose opcode field is to be read.
1618	// @return - 0 on success; nonzero otherwise.
1619	static int readMaskRegister(struct InternalInstruction *insn) {
1620	LLVM_DEBUG(dbgs() << "readMaskRegister()");
1621
1622	if (insn->vectorExtensionType != TYPE_EVEX)
1623	return -`1`;
1624
1625	insn->writemask =
1626	static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[`3`]));
1627	return `0`;
1628	}
1629
1630	// Consults the specifier for an instruction and consumes all
1631	// operands for that instruction, interpreting them as it goes.
1632	static int readOperands(struct InternalInstruction *insn) {
1633	int hasVVVV, needVVVV;
1634	int sawRegImm = `0`;
1635
1636	LLVM_DEBUG(dbgs() << "readOperands()");
1637
1638	// If non-zero vvvv specified, make sure one of the operands uses it.
1639	hasVVVV = !readVVVV(insn);
1640	needVVVV = hasVVVV && (insn->vvvv != `0`);
1641
1642	for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1643	switch (Op.encoding) {
1644	case ENCODING_NONE:
1645	case ENCODING_SI:
1646	case ENCODING_DI:
1647	break;
1648	CASE_ENCODING_VSIB:
1649	// VSIB can use the V2 bit so check only the other bits.
1650	if (needVVVV)
1651	needVVVV = hasVVVV & ((insn->vvvv & `0xf`) != `0`);
1652	if (readModRM(insn))
1653	return -`1`;
1654
1655	// Reject if SIB wasn't used.
1656	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1657	return -`1`;
1658
1659	// If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1660	if (insn->sibIndex == SIB_INDEX_NONE)
1661	insn->sibIndex = (SIBIndex)(insn->sibIndexBase + `4`);
1662
1663	// If EVEX.v2 is set this is one of the 16-31 registers.
1664	if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1665	v2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]))
1666	insn->sibIndex = (SIBIndex)(insn->sibIndex + `16`);
1667
1668	// Adjust the index register to the correct size.
1669	switch ((OperandType)Op.type) {
1670	default:
1671	debug("Unhandled VSIB index type");
1672	return -`1`;
1673	case TYPE_MVSIBX:
1674	insn->sibIndex =
1675	(SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1676	break;
1677	case TYPE_MVSIBY:
1678	insn->sibIndex =
1679	(SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1680	break;
1681	case TYPE_MVSIBZ:
1682	insn->sibIndex =
1683	(SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1684	break;
1685	}
1686
1687	// Apply the AVX512 compressed displacement scaling factor.
1688	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1689	insn->displacement *= `1` << (Op.encoding - ENCODING_VSIB);
1690	break;
1691	case ENCODING_SIB:
1692	// Reject if SIB wasn't used.
1693	if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1694	return -`1`;
1695	if (readModRM(insn))
1696	return -`1`;
1697	if (fixupReg(insn, op: &Op))
1698	return -`1`;
1699	break;
1700	case ENCODING_REG:
1701	CASE_ENCODING_RM:
1702	if (readModRM(insn))
1703	return -`1`;
1704	if (fixupReg(insn, op: &Op))
1705	return -`1`;
1706	// Apply the AVX512 compressed displacement scaling factor.
1707	if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1708	insn->displacement *= `1` << (Op.encoding - ENCODING_RM);
1709	break;
1710	case ENCODING_IB:
1711	if (sawRegImm) {
1712	// Saw a register immediate so don't read again and instead split the
1713	// previous immediate. FIXME: This is a hack.
1714	insn->immediates[insn->numImmediatesConsumed] =
1715	insn->immediates[insn->numImmediatesConsumed - `1`] & `0xf`;
1716	++insn->numImmediatesConsumed;
1717	break;
1718	}
1719	if (readImmediate(insn, size: `1`))
1720	return -`1`;
1721	if (Op.type == TYPE_XMM \|\| Op.type == TYPE_YMM)
1722	sawRegImm = `1`;
1723	break;
1724	case ENCODING_IW:
1725	if (readImmediate(insn, size: `2`))
1726	return -`1`;
1727	break;
1728	case ENCODING_ID:
1729	if (readImmediate(insn, size: `4`))
1730	return -`1`;
1731	break;
1732	case ENCODING_IO:
1733	if (readImmediate(insn, size: `8`))
1734	return -`1`;
1735	break;
1736	case ENCODING_Iv:
1737	if (readImmediate(insn, size: insn->immediateSize))
1738	return -`1`;
1739	break;
1740	case ENCODING_Ia:
1741	if (readImmediate(insn, size: insn->addressSize))
1742	return -`1`;
1743	break;
1744	case ENCODING_IRC:
1745	insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[`3`]) << `1`) \|
1746	lFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1747	break;
1748	case ENCODING_RB:
1749	if (readOpcodeRegister(insn, size: `1`))
1750	return -`1`;
1751	break;
1752	case ENCODING_RW:
1753	if (readOpcodeRegister(insn, size: `2`))
1754	return -`1`;
1755	break;
1756	case ENCODING_RD:
1757	if (readOpcodeRegister(insn, size: `4`))
1758	return -`1`;
1759	break;
1760	case ENCODING_RO:
1761	if (readOpcodeRegister(insn, size: `8`))
1762	return -`1`;
1763	break;
1764	case ENCODING_Rv:
1765	if (readOpcodeRegister(insn, size: `0`))
1766	return -`1`;
1767	break;
1768	case ENCODING_CF:
1769	insn->immediates[`1`] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[`2`]);
1770	needVVVV = false; // oszc shares the same bits with VVVV
1771	break;
1772	case ENCODING_CC:
1773	if (isCCMPOrCTEST(insn))
1774	insn->immediates[`2`] = scFromEVEX4of4(insn->vectorExtensionPrefix[`3`]);
1775	else
1776	insn->immediates[`1`] = insn->opcode & `0xf`;
1777	break;
1778	case ENCODING_FP:
1779	break;
1780	case ENCODING_VVVV:
1781	needVVVV = `0`; // Mark that we have found a VVVV operand.
1782	if (!hasVVVV)
1783	return -`1`;
1784	if (insn->mode != MODE_64BIT)
1785	insn->vvvv = static_cast<Reg>(insn->vvvv & `0x7`);
1786	if (fixupReg(insn, op: &Op))
1787	return -`1`;
1788	break;
1789	case ENCODING_WRITEMASK:
1790	if (readMaskRegister(insn))
1791	return -`1`;
1792	break;
1793	case ENCODING_DUP:
1794	break;
1795	default:
1796	LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1797	return -`1`;
1798	}
1799	}
1800
1801	// If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1802	if (needVVVV)
1803	return -`1`;
1804
1805	return `0`;
1806	}
1807
1808	namespace llvm {
1809
1810	// Fill-ins to make the compiler happy. These constants are never actually
1811	// assigned; they are just filler to make an automatically-generated switch
1812	// statement work.
1813	namespace X86 {
1814	enum {
1815	BX_SI = `500`,
1816	BX_DI = `501`,
1817	BP_SI = `502`,
1818	BP_DI = `503`,
1819	sib = `504`,
1820	sib64 = `505`
1821	};
1822	} // namespace X86
1823
1824	} // namespace llvm
1825
1826	static bool translateInstruction(MCInst &target,
1827	InternalInstruction &source,
1828	const MCDisassembler *Dis);
1829
1830	namespace {
1831
1832	/// Generic disassembler for all X86 platforms. All each platform class should
1833	/// have to do is subclass the constructor, and provide a different
1834	/// disassemblerMode value.
1835	class X86GenericDisassembler : public MCDisassembler {
1836	std::unique_ptr<const MCInstrInfo> MII;
1837	public:
1838	X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1839	std::unique_ptr<const MCInstrInfo> MII);
1840	public:
1841	DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1842	ArrayRef<uint8_t> Bytes, uint64_t Address,
1843	raw_ostream &cStream) const override;
1844
1845	private:
1846	DisassemblerMode fMode;
1847	};
1848
1849	} // namespace
1850
1851	X86GenericDisassembler::X86GenericDisassembler(
1852	const MCSubtargetInfo &STI,
1853	MCContext &Ctx,
1854	std::unique_ptr<const MCInstrInfo> MII)
1855	: MCDisassembler (STI, Ctx), MII (std::move(MII)) {
1856	const FeatureBitset &FB = STI.getFeatureBits();
1857	if (FB [X86::Is16Bit]) {
1858	fMode = MODE_16BIT;
1859	return;
1860	} else if (FB [X86::Is32Bit]) {
1861	fMode = MODE_32BIT;
1862	return;
1863	} else if (FB [X86::Is64Bit]) {
1864	fMode = MODE_64BIT;
1865	return;
1866	}
1867
1868	llvm_unreachable("Invalid CPU mode");
1869	}
1870
1871	MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1872	MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1873	raw_ostream &CStream) const {
1874	CommentStream = &CStream;
1875
1876	InternalInstruction Insn;
1877	memset(s: &Insn, c: `0`, n: sizeof(InternalInstruction));
1878	Insn.bytes = Bytes;
1879	Insn.startLocation = Address;
1880	Insn.readerCursor = Address;
1881	Insn.mode = fMode;
1882
1883	if (Bytes.empty() \|\| readPrefixes(insn: &Insn) \|\| readOpcode(insn: &Insn) \|\|
1884	getInstructionID(insn: &Insn, mii: MII.get()) \|\| Insn.instructionID == `0` \|\|
1885	readOperands(insn: &Insn)) {
1886	Size = Insn.readerCursor - Address;
1887	return Fail;
1888	}
1889
1890	Insn.operands = x86OperandSets[Insn.spec->operands];
1891	Insn.length = Insn.readerCursor - Insn.startLocation;
1892	Size = Insn.length;
1893	if (Size > `15`)
1894	LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1895
1896	bool Ret = translateInstruction(target&: Instr, source&: Insn, Dis: this);
1897	if (!Ret) {
1898	unsigned Flags = X86::IP_NO_PREFIX;
1899	if (Insn.hasAdSize)
1900	Flags \|= X86::IP_HAS_AD_SIZE;
1901	if (!Insn.mandatoryPrefix) {
1902	if (Insn.hasOpSize)
1903	Flags \|= X86::IP_HAS_OP_SIZE;
1904	if (Insn.repeatPrefix == `0xf2`)
1905	Flags \|= X86::IP_HAS_REPEAT_NE;
1906	else if (Insn.repeatPrefix == `0xf3` &&
1907	// It should not be 'pause' f3 90
1908	Insn.opcode != `0x90`)
1909	Flags \|= X86::IP_HAS_REPEAT;
1910	if (Insn.hasLockPrefix)
1911	Flags \|= X86::IP_HAS_LOCK;
1912	}
1913	Instr.setFlags(Flags);
1914	}
1915	return (!Ret) ? Success : Fail;
1916	}
1917
1918	//
1919	// Private code that translates from struct InternalInstructions to MCInsts.
1920	//
1921
1922	/// translateRegister - Translates an internal register to the appropriate LLVM
1923	/// register, and appends it as an operand to an MCInst.
1924	///
1925	/// @param mcInst - The MCInst to append to.
1926	/// @param reg - The Reg to append.
1927	static void translateRegister(MCInst &mcInst, Reg reg) {
1928	#define ENTRY(x) X86::x,
1929	static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1930	#undef ENTRY
1931
1932	MCPhysReg llvmRegnum = llvmRegnums[reg];
1933	mcInst.addOperand(Op: MCOperand::createReg(Reg: llvmRegnum));
1934	}
1935
1936	static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1937	`0`, // SEG_OVERRIDE_NONE
1938	X86::CS,
1939	X86::SS,
1940	X86::DS,
1941	X86::ES,
1942	X86::FS,
1943	X86::GS
1944	};
1945
1946	/// translateSrcIndex - Appends a source index operand to an MCInst.
1947	///
1948	/// @param mcInst - The MCInst to append to.
1949	/// @param insn - The internal instruction.
1950	static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1951	unsigned baseRegNo;
1952
1953	if (insn.mode == MODE_64BIT)
1954	baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1955	else if (insn.mode == MODE_32BIT)
1956	baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1957	else {
1958	assert(insn.mode == MODE_16BIT);
1959	baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1960	}
1961	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1962	mcInst.addOperand(Op: baseReg);
1963
1964	MCOperand segmentReg;
1965	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
1966	mcInst.addOperand(Op: segmentReg);
1967	return false;
1968	}
1969
1970	/// translateDstIndex - Appends a destination index operand to an MCInst.
1971	///
1972	/// @param mcInst - The MCInst to append to.
1973	/// @param insn - The internal instruction.
1974
1975	static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1976	unsigned baseRegNo;
1977
1978	if (insn.mode == MODE_64BIT)
1979	baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1980	else if (insn.mode == MODE_32BIT)
1981	baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1982	else {
1983	assert(insn.mode == MODE_16BIT);
1984	baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1985	}
1986	MCOperand baseReg = MCOperand::createReg(Reg: baseRegNo);
1987	mcInst.addOperand(Op: baseReg);
1988	return false;
1989	}
1990
1991	/// translateImmediate - Appends an immediate operand to an MCInst.
1992	///
1993	/// @param mcInst - The MCInst to append to.
1994	/// @param immediate - The immediate value to append.
1995	/// @param operand - The operand, as stored in the descriptor table.
1996	/// @param insn - The internal instruction.
1997	static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1998	const OperandSpecifier &operand,
1999	InternalInstruction &insn,
2000	const MCDisassembler *Dis) {
2001	// Sign-extend the immediate if necessary.
2002
2003	OperandType type = (OperandType)operand.type;
2004
2005	bool isBranch = false;
2006	uint64_t pcrel = `0`;
2007	if (type == TYPE_REL) {
2008	isBranch = true;
2009	pcrel = insn.startLocation + insn.length;
2010	switch (operand.encoding) {
2011	default:
2012	break;
2013	case ENCODING_Iv:
2014	switch (insn.displacementSize) {
2015	default:
2016	break;
2017	case `1`:
2018	if(immediate & `0x80`)
2019	immediate \|= ~(`0xffull`);
2020	break;
2021	case `2`:
2022	if(immediate & `0x8000`)
2023	immediate \|= ~(`0xffffull`);
2024	break;
2025	case `4`:
2026	if(immediate & `0x80000000`)
2027	immediate \|= ~(`0xffffffffull`);
2028	break;
2029	case `8`:
2030	break;
2031	}
2032	break;
2033	case ENCODING_IB:
2034	if(immediate & `0x80`)
2035	immediate \|= ~(`0xffull`);
2036	break;
2037	case ENCODING_IW:
2038	if(immediate & `0x8000`)
2039	immediate \|= ~(`0xffffull`);
2040	break;
2041	case ENCODING_ID:
2042	if(immediate & `0x80000000`)
2043	immediate \|= ~(`0xffffffffull`);
2044	break;
2045	}
2046	}
2047	// By default sign-extend all X86 immediates based on their encoding.
2048	else if (type == TYPE_IMM) {
2049	switch (operand.encoding) {
2050	default:
2051	break;
2052	case ENCODING_IB:
2053	if(immediate & `0x80`)
2054	immediate \|= ~(`0xffull`);
2055	break;
2056	case ENCODING_IW:
2057	if(immediate & `0x8000`)
2058	immediate \|= ~(`0xffffull`);
2059	break;
2060	case ENCODING_ID:
2061	if(immediate & `0x80000000`)
2062	immediate \|= ~(`0xffffffffull`);
2063	break;
2064	case ENCODING_IO:
2065	break;
2066	}
2067	}
2068
2069	switch (type) {
2070	case TYPE_XMM:
2071	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::XMM0 + (immediate >> `4`)));
2072	return;
2073	case TYPE_YMM:
2074	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::YMM0 + (immediate >> `4`)));
2075	return;
2076	case TYPE_ZMM:
2077	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ZMM0 + (immediate >> `4`)));
2078	return;
2079	default:
2080	// operand is 64 bits wide. Do nothing.
2081	break;
2082	}
2083
2084	if (!Dis->tryAddingSymbolicOperand(
2085	Inst&: mcInst, Value: immediate + pcrel, Address: insn.startLocation, IsBranch: isBranch,
2086	Offset: insn.immediateOffset, OpSize: insn.immediateSize, InstSize: insn.length))
2087	mcInst.addOperand(Op: MCOperand::createImm(Val: immediate));
2088
2089	if (type == TYPE_MOFFS) {
2090	MCOperand segmentReg;
2091	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2092	mcInst.addOperand(Op: segmentReg);
2093	}
2094	}
2095
2096	/// translateRMRegister - Translates a register stored in the R/M field of the
2097	/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2098	/// @param mcInst - The MCInst to append to.
2099	/// @param insn - The internal instruction to extract the R/M field
2100	/// from.
2101	/// @return - 0 on success; -1 otherwise
2102	static bool translateRMRegister(MCInst &mcInst,
2103	InternalInstruction &insn) {
2104	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2105	debug("A R/M register operand may not have a SIB byte");
2106	return true;
2107	}
2108
2109	switch (insn.eaBase) {
2110	default:
2111	debug("Unexpected EA base register");
2112	return true;
2113	case EA_BASE_NONE:
2114	debug("EA_BASE_NONE for ModR/M base");
2115	return true;
2116	#define ENTRY(x) case EA_BASE_##x:
2117	ALL_EA_BASES
2118	#undef ENTRY
2119	debug("A R/M register operand may not have a base; "
2120	"the operand must be a register.");
2121	return true;
2122	#define ENTRY(x) \
2123	case EA_REG_##x: \
2124	mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2125	ALL_REGS
2126	#undef ENTRY
2127	}
2128
2129	return false;
2130	}
2131
2132	/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2133	/// fields of an internal instruction (and possibly its SIB byte) to a memory
2134	/// operand in LLVM's format, and appends it to an MCInst.
2135	///
2136	/// @param mcInst - The MCInst to append to.
2137	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2138	/// from.
2139	/// @param ForceSIB - The instruction must use SIB.
2140	/// @return - 0 on success; nonzero otherwise
2141	static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2142	const MCDisassembler *Dis,
2143	bool ForceSIB = false) {
2144	// Addresses in an MCInst are represented as five operands:
2145	// 1. basereg (register) The R/M base, or (if there is a SIB) the
2146	// SIB base
2147	// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2148	// scale amount
2149	// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2150	// the index (which is multiplied by the
2151	// scale amount)
2152	// 4. displacement (immediate) 0, or the displacement if there is one
2153	// 5. segmentreg (register) x86_registerNONE for now, but could be set
2154	// if we have segment overrides
2155
2156	MCOperand baseReg;
2157	MCOperand scaleAmount;
2158	MCOperand indexReg;
2159	MCOperand displacement;
2160	MCOperand segmentReg;
2161	uint64_t pcrel = `0`;
2162
2163	if (insn.eaBase == EA_BASE_sib \|\| insn.eaBase == EA_BASE_sib64) {
2164	if (insn.sibBase != SIB_BASE_NONE) {
2165	switch (insn.sibBase) {
2166	default:
2167	debug("Unexpected sibBase");
2168	return true;
2169	#define ENTRY(x) \
2170	case SIB_BASE_##x: \
2171	baseReg = MCOperand::createReg(X86::x); break;
2172	ALL_SIB_BASES
2173	#undef ENTRY
2174	}
2175	} else {
2176	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2177	}
2178
2179	if (insn.sibIndex != SIB_INDEX_NONE) {
2180	switch (insn.sibIndex) {
2181	default:
2182	debug("Unexpected sibIndex");
2183	return true;
2184	#define ENTRY(x) \
2185	case SIB_INDEX_##x: \
2186	indexReg = MCOperand::createReg(X86::x); break;
2187	EA_BASES_32BIT
2188	EA_BASES_64BIT
2189	REGS_XMM
2190	REGS_YMM
2191	REGS_ZMM
2192	#undef ENTRY
2193	}
2194	} else {
2195	// Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2196	// but no index is used and modrm alone should have been enough.
2197	// -No base register in 32-bit mode. In 64-bit mode this is used to
2198	// avoid rip-relative addressing.
2199	// -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2200	// base always requires a SIB byte.
2201	// -A scale other than 1 is used.
2202	if (!ForceSIB &&
2203	(insn.sibScale != `1` \|\|
2204	(insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) \|\|
2205	(insn.sibBase != SIB_BASE_NONE &&
2206	insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2207	insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2208	indexReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIZ :
2209	X86::RIZ);
2210	} else
2211	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2212	}
2213
2214	scaleAmount = MCOperand::createImm(Val: insn.sibScale);
2215	} else {
2216	switch (insn.eaBase) {
2217	case EA_BASE_NONE:
2218	if (insn.eaDisplacement == EA_DISP_NONE) {
2219	debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2220	return true;
2221	}
2222	if (insn.mode == MODE_64BIT){
2223	pcrel = insn.startLocation + insn.length;
2224	Dis->tryAddingPcLoadReferenceComment(Value: insn.displacement + pcrel,
2225	Address: insn.startLocation +
2226	insn.displacementOffset);
2227	// Section 2.2.1.6
2228	baseReg = MCOperand::createReg(Reg: insn.addressSize == `4` ? X86::EIP :
2229	X86::RIP);
2230	}
2231	else
2232	baseReg = MCOperand::createReg(Reg: X86::NoRegister);
2233
2234	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2235	break;
2236	case EA_BASE_BX_SI:
2237	baseReg = MCOperand::createReg(Reg: X86::BX);
2238	indexReg = MCOperand::createReg(Reg: X86::SI);
2239	break;
2240	case EA_BASE_BX_DI:
2241	baseReg = MCOperand::createReg(Reg: X86::BX);
2242	indexReg = MCOperand::createReg(Reg: X86::DI);
2243	break;
2244	case EA_BASE_BP_SI:
2245	baseReg = MCOperand::createReg(Reg: X86::BP);
2246	indexReg = MCOperand::createReg(Reg: X86::SI);
2247	break;
2248	case EA_BASE_BP_DI:
2249	baseReg = MCOperand::createReg(Reg: X86::BP);
2250	indexReg = MCOperand::createReg(Reg: X86::DI);
2251	break;
2252	default:
2253	indexReg = MCOperand::createReg(Reg: X86::NoRegister);
2254	switch (insn.eaBase) {
2255	default:
2256	debug("Unexpected eaBase");
2257	return true;
2258	// Here, we will use the fill-ins defined above. However,
2259	// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2260	// sib and sib64 were handled in the top-level if, so they're only
2261	// placeholders to keep the compiler happy.
2262	#define ENTRY(x) \
2263	case EA_BASE_##x: \
2264	baseReg = MCOperand::createReg(X86::x); break;
2265	ALL_EA_BASES
2266	#undef ENTRY
2267	#define ENTRY(x) case EA_REG_##x:
2268	ALL_REGS
2269	#undef ENTRY
2270	debug("A R/M memory operand may not be a register; "
2271	"the base field must be a base.");
2272	return true;
2273	}
2274	}
2275
2276	scaleAmount = MCOperand::createImm(Val: `1`);
2277	}
2278
2279	displacement = MCOperand::createImm(Val: insn.displacement);
2280
2281	segmentReg = MCOperand::createReg(Reg: segmentRegnums[insn.segmentOverride]);
2282
2283	mcInst.addOperand(Op: baseReg);
2284	mcInst.addOperand(Op: scaleAmount);
2285	mcInst.addOperand(Op: indexReg);
2286
2287	const uint8_t dispSize =
2288	(insn.eaDisplacement == EA_DISP_NONE) ? `0` : insn.displacementSize;
2289
2290	if (!Dis->tryAddingSymbolicOperand(
2291	Inst&: mcInst, Value: insn.displacement + pcrel, Address: insn.startLocation, IsBranch: false,
2292	Offset: insn.displacementOffset, OpSize: dispSize, InstSize: insn.length))
2293	mcInst.addOperand(Op: displacement);
2294	mcInst.addOperand(Op: segmentReg);
2295	return false;
2296	}
2297
2298	/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2299	/// byte of an instruction to LLVM form, and appends it to an MCInst.
2300	///
2301	/// @param mcInst - The MCInst to append to.
2302	/// @param operand - The operand, as stored in the descriptor table.
2303	/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2304	/// from.
2305	/// @return - 0 on success; nonzero otherwise
2306	static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2307	InternalInstruction &insn, const MCDisassembler *Dis) {
2308	switch (operand.type) {
2309	default:
2310	debug("Unexpected type for a R/M operand");
2311	return true;
2312	case TYPE_R8:
2313	case TYPE_R16:
2314	case TYPE_R32:
2315	case TYPE_R64:
2316	case TYPE_Rv:
2317	case TYPE_MM64:
2318	case TYPE_XMM:
2319	case TYPE_YMM:
2320	case TYPE_ZMM:
2321	case TYPE_TMM:
2322	case TYPE_VK_PAIR:
2323	case TYPE_VK:
2324	case TYPE_DEBUGREG:
2325	case TYPE_CONTROLREG:
2326	case TYPE_BNDR:
2327	return translateRMRegister(mcInst, insn);
2328	case TYPE_M:
2329	case TYPE_MVSIBX:
2330	case TYPE_MVSIBY:
2331	case TYPE_MVSIBZ:
2332	return translateRMMemory(mcInst, insn, Dis);
2333	case TYPE_MSIB:
2334	return translateRMMemory(mcInst, insn, Dis, ForceSIB: true);
2335	}
2336	}
2337
2338	/// translateFPRegister - Translates a stack position on the FPU stack to its
2339	/// LLVM form, and appends it to an MCInst.
2340	///
2341	/// @param mcInst - The MCInst to append to.
2342	/// @param stackPos - The stack position to translate.
2343	static void translateFPRegister(MCInst &mcInst,
2344	uint8_t stackPos) {
2345	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::ST0 + stackPos));
2346	}
2347
2348	/// translateMaskRegister - Translates a 3-bit mask register number to
2349	/// LLVM form, and appends it to an MCInst.
2350	///
2351	/// @param mcInst - The MCInst to append to.
2352	/// @param maskRegNum - Number of mask register from 0 to 7.
2353	/// @return - false on success; true otherwise.
2354	static bool translateMaskRegister(MCInst &mcInst,
2355	uint8_t maskRegNum) {
2356	if (maskRegNum >= `8`) {
2357	debug("Invalid mask register number");
2358	return true;
2359	}
2360
2361	mcInst.addOperand(Op: MCOperand::createReg(Reg: X86::K0 + maskRegNum));
2362	return false;
2363	}
2364
2365	/// translateOperand - Translates an operand stored in an internal instruction
2366	/// to LLVM's format and appends it to an MCInst.
2367	///
2368	/// @param mcInst - The MCInst to append to.
2369	/// @param operand - The operand, as stored in the descriptor table.
2370	/// @param insn - The internal instruction.
2371	/// @return - false on success; true otherwise.
2372	static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2373	InternalInstruction &insn,
2374	const MCDisassembler *Dis) {
2375	switch (operand.encoding) {
2376	default:
2377	debug("Unhandled operand encoding during translation");
2378	return true;
2379	case ENCODING_REG:
2380	translateRegister(mcInst, reg: insn.reg);
2381	return false;
2382	case ENCODING_WRITEMASK:
2383	return translateMaskRegister(mcInst, maskRegNum: insn.writemask);
2384	case ENCODING_SIB:
2385	CASE_ENCODING_RM:
2386	CASE_ENCODING_VSIB:
2387	return translateRM(mcInst, operand, insn, Dis);
2388	case ENCODING_IB:
2389	case ENCODING_IW:
2390	case ENCODING_ID:
2391	case ENCODING_IO:
2392	case ENCODING_Iv:
2393	case ENCODING_Ia:
2394	translateImmediate(mcInst,
2395	immediate: insn.immediates[insn.numImmediatesTranslated++],
2396	operand,
2397	insn,
2398	Dis);
2399	return false;
2400	case ENCODING_IRC:
2401	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.RC));
2402	return false;
2403	case ENCODING_SI:
2404	return translateSrcIndex(mcInst, insn);
2405	case ENCODING_DI:
2406	return translateDstIndex(mcInst, insn);
2407	case ENCODING_RB:
2408	case ENCODING_RW:
2409	case ENCODING_RD:
2410	case ENCODING_RO:
2411	case ENCODING_Rv:
2412	translateRegister(mcInst, reg: insn.opcodeRegister);
2413	return false;
2414	case ENCODING_CF:
2415	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2416	return false;
2417	case ENCODING_CC:
2418	if (isCCMPOrCTEST(insn: &insn))
2419	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`2`]));
2420	else
2421	mcInst.addOperand(Op: MCOperand::createImm(Val: insn.immediates[`1`]));
2422	return false;
2423	case ENCODING_FP:
2424	translateFPRegister(mcInst, stackPos: insn.modRM & `7`);
2425	return false;
2426	case ENCODING_VVVV:
2427	translateRegister(mcInst, reg: insn.vvvv);
2428	return false;
2429	case ENCODING_DUP:
2430	return translateOperand(mcInst, operand: insn.operands [operand.type - TYPE_DUP0],
2431	insn, Dis);
2432	}
2433	}
2434
2435	/// translateInstruction - Translates an internal instruction and all its
2436	/// operands to an MCInst.
2437	///
2438	/// @param mcInst - The MCInst to populate with the instruction's data.
2439	/// @param insn - The internal instruction.
2440	/// @return - false on success; true otherwise.
2441	static bool translateInstruction(MCInst &mcInst,
2442	InternalInstruction &insn,
2443	const MCDisassembler *Dis) {
2444	if (!insn.spec) {
2445	debug("Instruction has no specification");
2446	return true;
2447	}
2448
2449	mcInst.clear();
2450	mcInst.setOpcode(insn.instructionID);
2451	// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2452	// prefix bytes should be disassembled as xrelease and xacquire then set the
2453	// opcode to those instead of the rep and repne opcodes.
2454	if (insn.xAcquireRelease) {
2455	if(mcInst.getOpcode() == X86::REP_PREFIX)
2456	mcInst.setOpcode(X86::XRELEASE_PREFIX);
2457	else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2458	mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2459	}
2460
2461	insn.numImmediatesTranslated = `0`;
2462
2463	for (const auto &Op : insn.operands) {
2464	if (Op.encoding != ENCODING_NONE) {
2465	if (translateOperand(mcInst, operand: Op, insn, Dis)) {
2466	return true;
2467	}
2468	}
2469	}
2470
2471	return false;
2472	}
2473
2474	static MCDisassembler createX86Disassembler(const* Target &T,
2475	const MCSubtargetInfo &STI,
2476	MCContext &Ctx) {
2477	std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2478	return new X86GenericDisassembler (STI, Ctx, std::move(MII));
2479	}
2480
2481	extern "C" LLVM_C_ABI void LLVMInitializeX86Disassembler() {
2482	// Register the disassembler.
2483	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_32Target(),
2484	Fn: createX86Disassembler);
2485	TargetRegistry::RegisterMCDisassembler(T&: getTheX86_64Target(),
2486	Fn: createX86Disassembler);
2487	}
2488

Browse the source code of llvm_projects/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp