CodeEmitterGen.cpp source code [llvm_projects/llvm/utils/TableGen/CodeEmitterGen.cpp]

1	//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// CodeEmitterGen uses the descriptions of instructions and their fields to
10	// construct an automated code emitter: a function called
11	// getBinaryCodeForInstr() that, given a MCInst, returns the value of the
12	// instruction - either as an uint64_t or as an APInt, depending on the
13	// maximum bit width of all Inst definitions.
14	//
15	// In addition, it generates another function called getOperandBitOffset()
16	// that, given a MCInst and an operand index, returns the minimum of indices of
17	// all bits that carry some portion of the respective operand. When the target's
18	// encodeInstruction() stores the instruction in a little-endian byte order, the
19	// returned value is the offset of the start of the operand in the encoded
20	// instruction. Other targets might need to adjust the returned value according
21	// to their encodeInstruction() implementation.
22	//
23	//===----------------------------------------------------------------------===//
24
25	#include "Common/CodeGenHwModes.h"
26	#include "Common/CodeGenInstruction.h"
27	#include "Common/CodeGenTarget.h"
28	#include "Common/InfoByHwMode.h"
29	#include "Common/VarLenCodeEmitterGen.h"
30	#include "llvm/ADT/APInt.h"
31	#include "llvm/ADT/ArrayRef.h"
32	#include "llvm/ADT/StringExtras.h"
33	#include "llvm/Support/Casting.h"
34	#include "llvm/Support/Format.h"
35	#include "llvm/Support/FormatVariadic.h"
36	#include "llvm/Support/raw_ostream.h"
37	#include "llvm/TableGen/CodeGenHelpers.h"
38	#include "llvm/TableGen/Error.h"
39	#include "llvm/TableGen/Record.h"
40	#include "llvm/TableGen/TableGenBackend.h"
41	#include <cstdint>
42	#include <map>
43	#include <set>
44	#include <string>
45	#include <utility>
46	#include <vector>
47
48	using namespace llvm;
49
50	namespace {
51
52	// A map of uniqued case statements. The key is the body of the case statement
53	// and the value is a list of cases which share the same body.
54	using CaseMapT = std::map<std::string, std::vector<unsigned>>;
55
56	class CodeEmitterGen {
57	const RecordKeeper &RK;
58	CodeGenTarget Target;
59	const CodeGenHwModes &CGH;
60
61	public:
62	explicit CodeEmitterGen(const RecordKeeper &RK);
63
64	void run(raw_ostream &O);
65
66	private:
67	int getVariableBit(const std::string &VarName, const BitsInit BI, int* Bit);
68	std::pair<std::string, std::string> getInstructionCases(const Record *R);
69	void addInstructionCasesForEncoding(const Record *R,
70	const Record *EncodingDef,
71	std::string &Case,
72	std::string &BitOffsetCase);
73	bool addCodeToMergeInOperand(const Record R, const* BitsInit *BI,
74	const std::string &VarName, std::string &Case,
75	std::string &BitOffsetCase);
76
77	void emitInstructionBaseValues(
78	raw_ostream &O, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
79	unsigned HwMode = DefaultMode);
80	unsigned BitWidth = `0u`;
81	bool UseAPInt = false;
82	};
83
84	} // end anonymous namespace
85
86	// If the VarBitInit at position 'bit' matches the specified variable then
87	// return the variable bit position. Otherwise return -1.
88	int CodeEmitterGen::getVariableBit(const std::string &VarName,
89	const BitsInit BI, int* Bit) {
90	if (const VarBitInit *VBI = dyn_cast<VarBitInit>(Val: BI->getBit(Bit))) {
91	if (const VarInit *VI = dyn_cast<VarInit>(Val: VBI->getBitVar()))
92	if (VI->getName() == VarName)
93	return VBI->getBitNum();
94	} else if (const VarInit *VI = dyn_cast<VarInit>(Val: BI->getBit(Bit))) {
95	if (VI->getName() == VarName)
96	return `0`;
97	}
98
99	return -`1`;
100	}
101
102	// Returns true if it succeeds, false if an error.
103	bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R,
104	const BitsInit *BI,
105	const std::string &VarName,
106	std::string &Case,
107	std::string &BitOffsetCase) {
108	const CodeGenInstruction &CGI = Target.getInstruction(InstRec: R);
109
110	// Determine if VarName actually contributes to the Inst encoding.
111	int Bit = BI->getNumBits() - `1`;
112
113	// Scan for a bit that this contributed to.
114	for (; Bit >= `0`;) {
115	if (getVariableBit(VarName, BI, Bit) != -`1`)
116	break;
117
118	--Bit;
119	}
120
121	// If we found no bits, ignore this value, otherwise emit the call to get the
122	// operand encoding.
123	if (Bit < `0`)
124	return true;
125
126	// If the operand matches by name, reference according to that
127	// operand number. Non-matching operands are assumed to be in
128	// order.
129	unsigned OpIdx;
130	if (auto SubOp = CGI.Operands.findSubOperandAlias(Name: VarName)) {
131	OpIdx = CGI.Operands [SubOp ->first].MIOperandNo + SubOp ->second;
132	} else if (auto MayBeOpIdx = CGI.Operands.findOperandNamed(Name: VarName)) {
133	// Get the machine operand number for the indicated operand.
134	OpIdx = CGI.Operands [*MayBeOpIdx].MIOperandNo;
135	} else {
136	PrintError(Rec: R, Msg: Twine ("No operand named ") + VarName + " in record " +
137	R->getName());
138	return false;
139	}
140
141	std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(Op: OpIdx);
142	StringRef EncoderMethodName =
143	CGI.Operands [SO.first].EncoderMethodNames [SO.second];
144
145	raw_string_ostream OS(Case);
146	indent Indent(`6`);
147
148	OS << Indent << "// op: " << VarName << `'\n'`;
149
150	if (UseAPInt)
151	OS << Indent << "op.clearAllBits();\n";
152
153	if (!EncoderMethodName.empty()) {
154	if (UseAPInt)
155	OS << Indent << EncoderMethodName << "(MI, " << OpIdx
156	<< ", op, Fixups, STI);\n";
157	else
158	OS << Indent << "op = " << EncoderMethodName << "(MI, " << OpIdx
159	<< ", Fixups, STI);\n";
160	} else {
161	if (UseAPInt)
162	OS << Indent << "getMachineOpValue(MI, MI.getOperand(" << OpIdx
163	<< "), op, Fixups, STI);\n";
164	else
165	OS << Indent << "op = getMachineOpValue(MI, MI.getOperand(" << OpIdx
166	<< "), Fixups, STI);\n";
167	}
168
169	unsigned BitOffset = -`1`;
170	for (; Bit >= `0`;) {
171	int VarBit = getVariableBit(VarName, BI, Bit);
172
173	// If this bit isn't from a variable, skip it.
174	if (VarBit == -`1`) {
175	--Bit;
176	continue;
177	}
178
179	// Figure out the consecutive range of bits covered by this operand, in
180	// order to generate better encoding code.
181	int BeginInstBit = Bit;
182	int BeginVarBit = VarBit;
183	int N = `1`;
184	for (--Bit; Bit >= `0`;) {
185	VarBit = getVariableBit(VarName, BI, Bit);
186	if (VarBit == -`1` \|\| VarBit != (BeginVarBit - N))
187	break;
188	++N;
189	--Bit;
190	}
191
192	unsigned LoBit = BeginVarBit - N + `1`;
193	unsigned LoInstBit = BeginInstBit - N + `1`;
194	BitOffset = LoInstBit;
195	if (UseAPInt) {
196	if (N > `64`)
197	OS << Indent << "Value.insertBits(op.extractBits(" << N << ", " << LoBit
198	<< "), " << LoInstBit << ");\n";
199	else
200	OS << Indent << "Value.insertBits(op.extractBitsAsZExtValue(" << N
201	<< ", " << LoBit << "), " << LoInstBit << ", " << N << ");\n";
202	} else {
203	uint64_t OpMask = maskTrailingOnes<uint64_t>(N) << LoBit;
204	OS << Indent << "Value \|= (op & " << format_hex(N: OpMask, Width: `0`) << `')'`;
205	int OpShift = BeginInstBit - BeginVarBit;
206	if (OpShift > `0`)
207	OS << " << " << OpShift;
208	else if (OpShift < `0`)
209	OS << " >> " << -OpShift;
210	OS << ";\n";
211	}
212	}
213
214	if (BitOffset != (unsigned)-`1`) {
215	BitOffsetCase += " case " + utostr(X: OpIdx) + ":\n";
216	BitOffsetCase += " // op: " + VarName + "\n";
217	BitOffsetCase += " return " + utostr(X: BitOffset) + ";\n";
218	}
219
220	return true;
221	}
222
223	static void emitCaseMap(raw_ostream &O, const CaseMapT &CaseMap,
224	function_ref<void(raw_ostream &, unsigned)> PrintCase) {
225	for (const auto &[CaseBody, Cases] : CaseMap) {
226	ListSeparator LS("\n");
227	for (unsigned Case : Cases) {
228	O << LS << " case ";
229	PrintCase (O, Case);
230	O << ":";
231	}
232	O << " {\n";
233	O << CaseBody;
234	O << " break;\n"
235	<< " }\n";
236	}
237	}
238
239	std::pair<std::string, std::string>
240	CodeEmitterGen::getInstructionCases(const Record *R) {
241	std::string Case, BitOffsetCase;
242
243	auto Append = [&](const std::string &S) {
244	Case += S;
245	BitOffsetCase += S;
246	};
247
248	if (const Record *RV = R->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
249	EncodingInfoByHwMode EBM(RV, CGH);
250
251	// Invoke the interface to obtain the HwMode ID controlling the
252	// EncodingInfo for the current subtarget. This interface will
253	// mask off irrelevant HwMode IDs.
254	Append (" unsigned HwMode = "
255	"STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);\n");
256	Case += " switch (HwMode) {\n";
257	Case += " default: llvm_unreachable(\"Unknown hardware mode!\"); "
258	"break;\n";
259	for (auto &[ModeId, Encoding] : EBM) {
260	if (ModeId == DefaultMode) {
261	Case +=
262	" case " + itostr(X: DefaultMode) + ": InstBitsByHw = InstBits";
263	} else {
264	Case += " case " + itostr(X: ModeId) + ": InstBitsByHw = InstBits_" +
265	CGH.getMode(Id: ModeId).Name.str();
266	}
267	Case += "; break;\n";
268	}
269	Case += " };\n";
270
271	// We need to remodify the 'Inst' value from the table we found above.
272	if (UseAPInt) {
273	int NumWords = APInt::getNumWords(BitWidth);
274	Case += " Inst = APInt(" + itostr(X: BitWidth);
275	Case += ", ArrayRef(InstBitsByHw + TableIndex * " + itostr(X: NumWords) +
276	", " + itostr(X: NumWords);
277	Case += "));\n";
278	Case += " Value = Inst;\n";
279	} else {
280	Case += " Value = InstBitsByHw[TableIndex];\n";
281	}
282
283	Append (" switch (HwMode) {\n");
284	Append (" default: llvm_unreachable(\"Unhandled HwMode\");\n");
285
286	// Attempt to unique the per-hw-mode encoding case statements. This helps
287	// reduce the code size if 2 or more hw-modes share the same encoding for
288	// the fields of the instruction.
289	CaseMapT CaseMap, BitOffsetCaseMap;
290	std::string ModeCase, ModeBitOffsetCase;
291
292	auto PrintHWMode = [](raw_ostream &O, unsigned Mode) { O << Mode; };
293
294	for (auto &[ModeId, Encoding] : EBM) {
295	ModeCase.clear();
296	ModeBitOffsetCase.clear();
297	addInstructionCasesForEncoding(R, EncodingDef: Encoding, Case&: ModeCase, BitOffsetCase&: ModeBitOffsetCase);
298	CaseMap [ModeCase].push_back(x: ModeId);
299	BitOffsetCaseMap [ModeBitOffsetCase].push_back(x: ModeId);
300	}
301
302	raw_string_ostream CaseOS(Case);
303	raw_string_ostream BitOffsetCaseOS(BitOffsetCase);
304	emitCaseMap(O&: CaseOS, CaseMap, PrintCase: PrintHWMode);
305	emitCaseMap(O&: BitOffsetCaseOS, CaseMap: BitOffsetCaseMap, PrintCase: PrintHWMode);
306
307	Append (" }\n");
308	return {std::move(Case), std::move(BitOffsetCase)};
309	}
310	addInstructionCasesForEncoding(R, EncodingDef: R, Case, BitOffsetCase);
311	return {std::move(Case), std::move(BitOffsetCase)};
312	}
313
314	void CodeEmitterGen::addInstructionCasesForEncoding(
315	const Record R, const* Record *EncodingDef, std::string &Case,
316	std::string &BitOffsetCase) {
317	const BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
318
319	// Loop over all of the fields in the instruction, determining which are the
320	// operands to the instruction.
321	bool Success = true;
322	size_t OrigBitOffsetCaseSize = BitOffsetCase.size();
323	BitOffsetCase += " switch (OpNum) {\n";
324	size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size();
325	for (const RecordVal &RV : EncodingDef->getValues()) {
326	// Ignore fixed fields in the record, we're looking for values like:
327	// bits<5> RST = { ?, ?, ?, ?, ? };
328	if (RV.isNonconcreteOK() \|\| RV.getValue()->isComplete())
329	continue;
330
331	Success &=
332	addCodeToMergeInOperand(R, BI, VarName: RV.getName().str(), Case, BitOffsetCase);
333	}
334	// Avoid empty switches.
335	if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop)
336	BitOffsetCase.resize(n: OrigBitOffsetCaseSize);
337	else
338	BitOffsetCase += " }\n";
339
340	if (!Success) {
341	// Dump the record, so we can see what's going on...
342	std::string E;
343	raw_string_ostream S(E);
344	S << "Dumping record for previous error:\n";
345	S << *R;
346	PrintNote(Msg: E);
347	}
348
349	StringRef PostEmitter = R->getValueAsString(FieldName: "PostEncoderMethod");
350	if (!PostEmitter.empty()) {
351	Case += " Value = ";
352	Case += PostEmitter;
353	Case += "(MI, Value";
354	Case += ", STI";
355	Case += ");\n";
356	}
357	}
358
359	static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
360	for (unsigned I = `0`; I < Bits.getNumWords(); ++I)
361	OS << ((I > `0`) ? ", " : "") << "UINT64_C(" << Bits.getRawData()[I] << ")";
362	}
363
364	void CodeEmitterGen::emitInstructionBaseValues(
365	raw_ostream &O, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
366	unsigned HwMode) {
367	if (HwMode == DefaultMode)
368	O << " static const uint64_t InstBits[] = {\n";
369	else
370	O << " static const uint64_t InstBits_" << CGH.getModeName(Id: HwMode)
371	<< "[] = {\n";
372
373	for (const CodeGenInstruction *CGI : NumberedInstructions) {
374	const Record *R = CGI->TheDef;
375	const Record *EncodingDef = R;
376	if (const Record *RV = R->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
377	EncodingInfoByHwMode EBM(RV, CGH);
378	if (EBM.hasMode(M: HwMode)) {
379	EncodingDef = EBM.get(Mode: HwMode);
380	} else {
381	// If the HwMode does not match, then Encoding '0'
382	// should be generated.
383	APInt Value(BitWidth, `0`);
384	O << " ";
385	emitInstBits(OS&: O, Bits: Value);
386	O << "," << `'\t'` << "// " << R->getName() << "\n";
387	continue;
388	}
389	}
390	const BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
391
392	// Start by filling in fixed values.
393	APInt Value(BitWidth, `0`);
394	for (unsigned I = `0`, E = BI->getNumBits(); I != E; ++I) {
395	if (const auto *B = dyn_cast<BitInit>(Val: BI->getBit(Bit: I)); B && B->getValue())
396	Value.setBit(I);
397	}
398	O << " ";
399	emitInstBits(OS&: O, Bits: Value);
400	O << "," << `'\t'` << "// " << R->getName() << "\n";
401	}
402	O << " };\n";
403	}
404
405	CodeEmitterGen::CodeEmitterGen(const RecordKeeper &RK)
406	: RK(RK), Target (RK), CGH(Target.getHwModes()) {
407	// For little-endian instruction bit encodings, reverse the bit order.
408	Target.reverseBitsForLittleEndianEncoding();
409	}
410
411	void CodeEmitterGen::run(raw_ostream &O) {
412	emitSourceFileHeader(Desc: "Machine Code Emitter", OS&: O);
413
414	ArrayRef<const CodeGenInstruction *> EncodedInstructions =
415	Target.getTargetNonPseudoInstructions();
416
417	if (Target.hasVariableLengthEncodings()) {
418	emitVarLenCodeEmitter(R: RK, OS&: O);
419	return;
420	}
421	// The set of HwModes used by instruction encodings.
422	std::set<unsigned> HwModes;
423	BitWidth = `0`;
424	for (const CodeGenInstruction *CGI : EncodedInstructions) {
425	const Record *R = CGI->TheDef;
426	if (const Record *RV = R->getValueAsOptionalDef(FieldName: "EncodingInfos")) {
427	EncodingInfoByHwMode EBM(RV, CGH);
428	for (const auto &[Key, Value] : EBM) {
429	const BitsInit *BI = Value->getValueAsBitsInit(FieldName: "Inst");
430	BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
431	HwModes.insert(x: Key);
432	}
433	continue;
434	}
435	const BitsInit *BI = R->getValueAsBitsInit(FieldName: "Inst");
436	BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
437	}
438	UseAPInt = BitWidth > `64`;
439
440	// Emit function declaration
441	if (UseAPInt) {
442	O << "void " << Target.getName()
443	<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
444	<< " SmallVectorImpl<MCFixup> &Fixups,\n"
445	<< " APInt &Inst,\n"
446	<< " APInt &Scratch,\n"
447	<< " const MCSubtargetInfo &STI) const {\n";
448	} else {
449	O << "uint64_t " << Target.getName();
450	O << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
451	<< " SmallVectorImpl<MCFixup> &Fixups,\n"
452	<< " const MCSubtargetInfo &STI) const {\n";
453	}
454
455	// Emit instruction base values
456	emitInstructionBaseValues(O, NumberedInstructions: EncodedInstructions, HwMode: DefaultMode);
457	if (!HwModes.empty()) {
458	// Emit table for instrs whose encodings are controlled by HwModes.
459	for (unsigned HwMode : HwModes) {
460	if (HwMode == DefaultMode)
461	continue;
462	emitInstructionBaseValues(O, NumberedInstructions: EncodedInstructions, HwMode);
463	}
464
465	// This pointer will be assigned to the HwMode table later.
466	O << " const uint64_t *InstBitsByHw;\n";
467	}
468
469	// Map to accumulate all the cases.
470	CaseMapT CaseMap, BitOffsetCaseMap;
471
472	// Construct all cases statement for each opcode
473	for (auto [Index, CGI] : enumerate(First&: EncodedInstructions)) {
474	const Record *R = CGI->TheDef;
475	auto [Case, BitOffsetCase] = getInstructionCases(R);
476
477	CaseMap [Case].push_back(x: Index);
478	BitOffsetCaseMap [BitOffsetCase].push_back(x: Index);
479	}
480
481	auto PrintInstName = [&](raw_ostream &OS, unsigned Index) {
482	const CodeGenInstruction *CGI = EncodedInstructions [Index];
483	const Record *R = CGI->TheDef;
484	OS << R->getValueAsString(FieldName: "Namespace") << "::" << R->getName();
485	};
486
487	unsigned FirstSupportedOpcode = EncodedInstructions.front()->EnumVal;
488	O << " constexpr unsigned FirstSupportedOpcode = " << FirstSupportedOpcode
489	<< ";\n";
490	O << R"(
491	const unsigned opcode = MI.getOpcode();
492	if (opcode < FirstSupportedOpcode)
493	reportUnsupportedInst(MI);
494	unsigned TableIndex = opcode - FirstSupportedOpcode;
495	)";
496
497	// Emit initial function code
498	if (UseAPInt) {
499	int NumWords = APInt::getNumWords(BitWidth);
500	O << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
501	<< " Scratch = Scratch.zext(" << BitWidth << ");\n"
502	<< " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + TableIndex * "
503	<< NumWords << ", " << NumWords << "));\n"
504	<< " APInt &Value = Inst;\n"
505	<< " APInt &op = Scratch;\n"
506	<< " switch (opcode) {\n";
507	} else {
508	O << " uint64_t Value = InstBits[TableIndex];\n"
509	<< " uint64_t op = 0;\n"
510	<< " (void)op; // suppress warning\n"
511	<< " switch (opcode) {\n";
512	}
513
514	// Emit each case statement
515	emitCaseMap(O, CaseMap, PrintCase: PrintInstName);
516
517	// Default case: unhandled opcode.
518	O << " default:\n"
519	<< " reportUnsupportedInst(MI);\n"
520	<< " }\n";
521	if (UseAPInt)
522	O << " Inst = Value;\n";
523	else
524	O << " return Value;\n";
525	O << "}\n\n";
526
527	IfDefEmitter IfDef(O, "GET_OPERAND_BIT_OFFSET");
528	O << "uint32_t " << Target.getName()
529	<< "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
530	<< " unsigned OpNum,\n"
531	<< " const MCSubtargetInfo &STI) const {\n"
532	<< " switch (MI.getOpcode()) {\n";
533	emitCaseMap(O, CaseMap: BitOffsetCaseMap, PrintCase: PrintInstName);
534	O << " default:\n"
535	<< " reportUnsupportedInst(MI);\n"
536	<< " }\n"
537	<< " reportUnsupportedOperand(MI, OpNum);\n"
538	<< "}\n";
539	}
540
541	static TableGen::Emitter::OptClass<CodeEmitterGen>
542	X("gen-emitter", "Generate machine code emitter");
543

Browse the source code of llvm_projects/llvm/utils/TableGen/CodeEmitterGen.cpp