InstrBuilder.cpp source code [llvm_projects/llvm/lib/MCA/InstrBuilder.cpp]

1	//===--------------------- InstrBuilder.cpp ---------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// This file implements the InstrBuilder interface.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/MCA/InstrBuilder.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/DenseMap.h"
17	#include "llvm/ADT/Hashing.h"
18	#include "llvm/ADT/Statistic.h"
19	#include "llvm/MC/MCInst.h"
20	#include "llvm/Support/Debug.h"
21	#include "llvm/Support/WithColor.h"
22	#include "llvm/Support/raw_ostream.h"
23
24	#define DEBUG_TYPE "llvm-mca-instrbuilder"
25
26	namespace llvm {
27	namespace mca {
28
29	char RecycledInstErr::ID = `0`;
30
31	InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
32	const llvm::MCInstrInfo &mcii,
33	const llvm::MCRegisterInfo &mri,
34	const llvm::MCInstrAnalysis *mcia,
35	const mca::InstrumentManager &im, unsigned cl)
36	: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
37	FirstReturnInst(true), CallLatency(cl) {
38	const MCSchedModel &SM = STI.getSchedModel();
39	ProcResourceMasks.resize(N: SM.getNumProcResourceKinds());
40	computeProcResourceMasks(SM: STI.getSchedModel(), Masks: ProcResourceMasks);
41	}
42
43	static void initializeUsedResources(InstrDesc &ID,
44	const MCSchedClassDesc &SCDesc,
45	const MCSubtargetInfo &STI,
46	ArrayRef<uint64_t> ProcResourceMasks) {
47	const MCSchedModel &SM = STI.getSchedModel();
48
49	// Populate resources consumed.
50	using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
51	SmallVector<ResourcePlusCycles, `4`> Worklist;
52
53	// Track cycles contributed by resources that are in a "Super" relationship.
54	// This is required if we want to correctly match the behavior of method
55	// SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
56	// of "consumed" processor resources and resource cycles, the logic in
57	// ExpandProcResource() doesn't update the number of resource cycles
58	// contributed by a "Super" resource to a group.
59	// We need to take this into account when we find that a processor resource is
60	// part of a group, and it is also used as the "Super" of other resources.
61	// This map stores the number of cycles contributed by sub-resources that are
62	// part of a "Super" resource. The key value is the "Super" resource mask ID.
63	DenseMap<uint64_t, unsigned> SuperResources;
64
65	unsigned NumProcResources = SM.getNumProcResourceKinds();
66	APInt Buffers(NumProcResources, `0`);
67
68	bool AllInOrderResources = true;
69	bool AnyDispatchHazards = false;
70	for (unsigned I = `0`, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
71	const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(SC: &SCDesc) + I;
72	const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: PRE->ProcResourceIdx);
73	if (!PRE->ReleaseAtCycle) {
74	#ifndef NDEBUG
75	WithColor::warning()
76	<< "Ignoring invalid write of zero cycles on processor resource "
77	<< PR.Name << "\n";
78	WithColor::note() << "found in scheduling class " << SCDesc.Name
79	<< " (write index #" << I << ")\n";
80	#endif
81	continue;
82	}
83
84	uint64_t Mask = ProcResourceMasks [PRE->ProcResourceIdx];
85	if (PR.BufferSize < `0`) {
86	AllInOrderResources = false;
87	} else {
88	Buffers.setBit(getResourceStateIndex(Mask));
89	AnyDispatchHazards \|= (PR.BufferSize == `0`);
90	AllInOrderResources &= (PR.BufferSize <= `1`);
91	}
92
93	CycleSegment RCy(`0`, PRE->ReleaseAtCycle, false);
94	Worklist.emplace_back(Args: ResourcePlusCycles (Mask, ResourceUsage (RCy)));
95	if (PR.SuperIdx) {
96	uint64_t Super = ProcResourceMasks [PR.SuperIdx];
97	SuperResources [Super] += PRE->ReleaseAtCycle;
98	}
99	}
100
101	ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
102
103	// Sort elements by mask popcount, so that we prioritize resource units over
104	// resource groups, and smaller groups over larger groups.
105	sort(C&: Worklist, Comp: [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
106	unsigned popcntA = llvm::popcount(Value: A.first);
107	unsigned popcntB = llvm::popcount(Value: B.first);
108	if (popcntA < popcntB)
109	return true;
110	if (popcntA > popcntB)
111	return false;
112	return A.first < B.first;
113	});
114
115	uint64_t UsedResourceUnits = `0`;
116	uint64_t UsedResourceGroups = `0`;
117	uint64_t UnitsFromResourceGroups = `0`;
118
119	// Remove cycles contributed by smaller resources, and check if there
120	// are partially overlapping resource groups.
121	ID.HasPartiallyOverlappingGroups = false;
122
123	for (unsigned I = `0`, E = Worklist.size(); I < E; ++I) {
124	ResourcePlusCycles &A = Worklist [I];
125	if (!A.second.size()) {
126	assert(llvm::popcount(A.first) > `1` && "Expected a group!");
127	UsedResourceGroups \|= llvm::bit_floor(Value: A.first);
128	continue;
129	}
130
131	ID.Resources.emplace_back(Args&: A);
132	uint64_t NormalizedMask = A.first;
133
134	if (llvm::popcount(Value: A.first) == `1`) {
135	UsedResourceUnits \|= A.first;
136	} else {
137	// Remove the leading 1 from the resource group mask.
138	NormalizedMask ^= llvm::bit_floor(Value: NormalizedMask);
139	if (UnitsFromResourceGroups & NormalizedMask)
140	ID.HasPartiallyOverlappingGroups = true;
141
142	UnitsFromResourceGroups \|= NormalizedMask;
143	UsedResourceGroups \|= (A.first ^ NormalizedMask);
144	}
145
146	for (unsigned J = I + `1`; J < E; ++J) {
147	ResourcePlusCycles &B = Worklist [J];
148	if ((NormalizedMask & B.first) == NormalizedMask) {
149	B.second.CS.subtract(Cycles: A.second.size() - SuperResources [A.first]);
150	if (llvm::popcount(Value: B.first) > `1`)
151	B.second.NumUnits++;
152	}
153	}
154	}
155
156	// A SchedWrite may specify a number of cycles in which a resource group
157	// is reserved. For example (on target x86; cpu Haswell):
158	//
159	// SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
160	// let ReleaseAtCycles = [2, 2, 3];
161	// }
162	//
163	// This means:
164	// Resource units HWPort0 and HWPort1 are both used for 2cy.
165	// Resource group HWPort01 is the union of HWPort0 and HWPort1.
166	// Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
167	// will not be usable for 2 entire cycles from instruction issue.
168	//
169	// On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
170	// of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
171	// extra delay on top of the 2 cycles latency.
172	// During those extra cycles, HWPort01 is not usable by other instructions.
173	for (ResourcePlusCycles &RPC : ID.Resources) {
174	if (llvm::popcount(Value: RPC.first) > `1` && !RPC.second.isReserved()) {
175	// Remove the leading 1 from the resource group mask.
176	uint64_t Mask = RPC.first ^ llvm::bit_floor(Value: RPC.first);
177	uint64_t MaxResourceUnits = llvm::popcount(Value: Mask);
178	if (RPC.second.NumUnits > (unsigned)llvm::popcount(Value: Mask)) {
179	RPC.second.setReserved();
180	RPC.second.NumUnits = MaxResourceUnits;
181	}
182	}
183	}
184
185	// Identify extra buffers that are consumed through super resources.
186	for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
187	for (unsigned I = `1`, E = NumProcResources; I < E; ++I) {
188	const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: I);
189	if (PR.BufferSize == -`1`)
190	continue;
191
192	uint64_t Mask = ProcResourceMasks [I];
193	if (Mask != SR.first && ((Mask & SR.first) == SR.first))
194	Buffers.setBit(getResourceStateIndex(Mask));
195	}
196	}
197
198	ID.UsedBuffers = Buffers.getZExtValue();
199	ID.UsedProcResUnits = UsedResourceUnits;
200	ID.UsedProcResGroups = UsedResourceGroups;
201
202	LLVM_DEBUG({
203	for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
204	dbgs() << "\t\tResource Mask=" << format_hex(R.first, `16`) << ", "
205	<< "Reserved=" << R.second.isReserved() << ", "
206	<< "#Units=" << R.second.NumUnits << ", "
207	<< "cy=" << R.second.size() << `'\n'`;
208	uint64_t BufferIDs = ID.UsedBuffers;
209	while (BufferIDs) {
210	uint64_t Current = BufferIDs & (-BufferIDs);
211	dbgs() << "\t\tBuffer Mask=" << format_hex(Current, `16`) << `'\n'`;
212	BufferIDs ^= Current;
213	}
214	dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, `16`) << `'\n'`;
215	dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, `16`)
216	<< `'\n'`;
217	dbgs() << "\t\tHasPartiallyOverlappingGroups="
218	<< ID.HasPartiallyOverlappingGroups << `'\n'`;
219	});
220	}
221
222	static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
223	const MCSchedClassDesc &SCDesc,
224	const MCSubtargetInfo &STI,
225	unsigned CallLatency) {
226	if (MCDesc.isCall()) {
227	// We cannot estimate how long this call will take.
228	// Artificially set an arbitrarily high latency.
229	ID.MaxLatency = CallLatency;
230	return;
231	}
232
233	int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
234	// If latency is unknown, then conservatively assume the MaxLatency set for
235	// calls.
236	ID.MaxLatency = Latency < `0` ? CallLatency : static_cast<unsigned>(Latency);
237	}
238
239	static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
240	// Count register definitions, and skip non register operands in the process.
241	unsigned I, E;
242	unsigned NumExplicitDefs = MCDesc.getNumDefs();
243	for (I = `0`, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
244	const MCOperand &Op = MCI.getOperand(i: I);
245	if (Op.isReg())
246	--NumExplicitDefs;
247	}
248
249	if (NumExplicitDefs) {
250	return make_error<InstructionError<MCInst>>(
251	Args: "Expected more register operand definitions.", Args: MCI);
252	}
253
254	if (MCDesc.hasOptionalDef()) {
255	// Always assume that the optional definition is the last operand.
256	const MCOperand &Op = MCI.getOperand(i: MCDesc.getNumOperands() - `1`);
257	if (I == MCI.getNumOperands() \|\| !Op.isReg()) {
258	std::string Message =
259	"expected a register operand for an optional definition. Instruction "
260	"has not been correctly analyzed.";
261	return make_error<InstructionError<MCInst>>(Args&: Message, Args: MCI);
262	}
263	}
264
265	return ErrorSuccess ();
266	}
267
268	void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
269	unsigned SchedClassID) {
270	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
271	const MCSchedModel &SM = STI.getSchedModel();
272	const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID);
273
274	// Assumptions made by this algorithm:
275	// 1. The number of explicit and implicit register definitions in a MCInst
276	// matches the number of explicit and implicit definitions according to
277	// the opcode descriptor (MCInstrDesc).
278	// 2. Uses start at index #(MCDesc.getNumDefs()).
279	// 3. There can only be a single optional register definition, an it is
280	// either the last operand of the sequence (excluding extra operands
281	// contributed by variadic opcodes) or one of the explicit register
282	// definitions. The latter occurs for some Thumb1 instructions.
283	//
284	// These assumptions work quite well for most out-of-order in-tree targets
285	// like x86. This is mainly because the vast majority of instructions is
286	// expanded to MCInst using a straightforward lowering logic that preserves
287	// the ordering of the operands.
288	//
289	// About assumption 1.
290	// The algorithm allows non-register operands between register operand
291	// definitions. This helps to handle some special ARM instructions with
292	// implicit operand increment (-mtriple=armv7):
293	//
294	// vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
295	// @ <MCOperand Reg:59>
296	// @ <MCOperand Imm:0> (!!)
297	// @ <MCOperand Reg:67>
298	// @ <MCOperand Imm:0>
299	// @ <MCOperand Imm:14>
300	// @ <MCOperand Reg:0>>
301	//
302	// MCDesc reports:
303	// 6 explicit operands.
304	// 1 optional definition
305	// 2 explicit definitions (!!)
306	//
307	// The presence of an 'Imm' operand between the two register definitions
308	// breaks the assumption that "register definitions are always at the
309	// beginning of the operand sequence".
310	//
311	// To workaround this issue, this algorithm ignores (i.e. skips) any
312	// non-register operands between register definitions. The optional
313	// definition is still at index #(NumOperands-1).
314	//
315	// According to assumption 2. register reads start at #(NumExplicitDefs-1).
316	// That means, register R1 from the example is both read and written.
317	unsigned NumExplicitDefs = MCDesc.getNumDefs();
318	unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
319	unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
320	unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
321	if (MCDesc.hasOptionalDef())
322	TotalDefs++;
323
324	unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
325	ID.Writes.resize(N: TotalDefs + NumVariadicOps);
326	// Iterate over the operands list, and skip non-register or constant register
327	// operands. The first NumExplicitDefs register operands are expected to be
328	// register definitions.
329	unsigned CurrentDef = `0`;
330	unsigned OptionalDefIdx = MCDesc.getNumOperands() - `1`;
331	unsigned i = `0`;
332	for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
333	const MCOperand &Op = MCI.getOperand(i);
334	if (!Op.isReg())
335	continue;
336
337	if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
338	OptionalDefIdx = CurrentDef++;
339	continue;
340	}
341	if (MRI.isConstant(RegNo: Op.getReg())) {
342	CurrentDef++;
343	continue;
344	}
345
346	WriteDescriptor &Write = ID.Writes [CurrentDef];
347	Write.OpIndex = i;
348	if (CurrentDef < NumWriteLatencyEntries) {
349	const MCWriteLatencyEntry &WLE =
350	*STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: CurrentDef);
351	// Conservatively default to MaxLatency.
352	Write.Latency =
353	WLE.Cycles < `0` ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
354	Write.SClassOrWriteResourceID = WLE.WriteResourceID;
355	} else {
356	// Assign a default latency for this write.
357	Write.Latency = ID.MaxLatency;
358	Write.SClassOrWriteResourceID = `0`;
359	}
360	Write.IsOptionalDef = false;
361	LLVM_DEBUG({
362	dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
363	<< ", Latency=" << Write.Latency
364	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
365	});
366	CurrentDef++;
367	}
368
369	assert(CurrentDef == NumExplicitDefs &&
370	"Expected more register operand definitions.");
371	for (CurrentDef = `0`; CurrentDef < NumImplicitDefs; ++CurrentDef) {
372	unsigned Index = NumExplicitDefs + CurrentDef;
373	WriteDescriptor &Write = ID.Writes [Index];
374	Write.OpIndex = ~CurrentDef;
375	Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
376	if (Index < NumWriteLatencyEntries) {
377	const MCWriteLatencyEntry &WLE =
378	*STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: Index);
379	// Conservatively default to MaxLatency.
380	Write.Latency =
381	WLE.Cycles < `0` ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
382	Write.SClassOrWriteResourceID = WLE.WriteResourceID;
383	} else {
384	// Assign a default latency for this write.
385	Write.Latency = ID.MaxLatency;
386	Write.SClassOrWriteResourceID = `0`;
387	}
388
389	Write.IsOptionalDef = false;
390	assert(Write.RegisterID != `0` && "Expected a valid phys register!");
391	LLVM_DEBUG({
392	dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
393	<< ", PhysReg=" << MRI.getName(Write.RegisterID)
394	<< ", Latency=" << Write.Latency
395	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
396	});
397	}
398
399	if (MCDesc.hasOptionalDef()) {
400	WriteDescriptor &Write = ID.Writes [NumExplicitDefs + NumImplicitDefs];
401	Write.OpIndex = OptionalDefIdx;
402	// Assign a default latency for this write.
403	Write.Latency = ID.MaxLatency;
404	Write.SClassOrWriteResourceID = `0`;
405	Write.IsOptionalDef = true;
406	LLVM_DEBUG({
407	dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
408	<< ", Latency=" << Write.Latency
409	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
410	});
411	}
412
413	if (!NumVariadicOps)
414	return;
415
416	bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
417	CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
418	for (unsigned I = `0`, OpIndex = MCDesc.getNumOperands();
419	I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
420	const MCOperand &Op = MCI.getOperand(i: OpIndex);
421	if (!Op.isReg())
422	continue;
423	if (MRI.isConstant(RegNo: Op.getReg()))
424	continue;
425
426	WriteDescriptor &Write = ID.Writes [CurrentDef];
427	Write.OpIndex = OpIndex;
428	// Assign a default latency for this write.
429	Write.Latency = ID.MaxLatency;
430	Write.SClassOrWriteResourceID = `0`;
431	Write.IsOptionalDef = false;
432	++CurrentDef;
433	LLVM_DEBUG({
434	dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
435	<< ", Latency=" << Write.Latency
436	<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << `'\n'`;
437	});
438	}
439
440	ID.Writes.resize(N: CurrentDef);
441	}
442
443	void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
444	unsigned SchedClassID) {
445	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
446	unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
447	unsigned NumImplicitUses = MCDesc.implicit_uses().size();
448	// Remove the optional definition.
449	if (MCDesc.hasOptionalDef())
450	--NumExplicitUses;
451	unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
452	unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
453	ID.Reads.resize(N: TotalUses);
454	unsigned CurrentUse = `0`;
455	for (unsigned I = `0`, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
456	++I, ++OpIndex) {
457	const MCOperand &Op = MCI.getOperand(i: OpIndex);
458	if (!Op.isReg())
459	continue;
460	if (MRI.isConstant(RegNo: Op.getReg()))
461	continue;
462
463	ReadDescriptor &Read = ID.Reads [CurrentUse];
464	Read.OpIndex = OpIndex;
465	Read.UseIndex = I;
466	Read.SchedClassID = SchedClassID;
467	++CurrentUse;
468	LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
469	<< ", UseIndex=" << Read.UseIndex << `'\n'`);
470	}
471
472	// For the purpose of ReadAdvance, implicit uses come directly after explicit
473	// uses. The "UseIndex" must be updated according to that implicit layout.
474	for (unsigned I = `0`; I < NumImplicitUses; ++I) {
475	ReadDescriptor &Read = ID.Reads [CurrentUse + I];
476	Read.OpIndex = ~I;
477	Read.UseIndex = NumExplicitUses + I;
478	Read.RegisterID = MCDesc.implicit_uses()[I];
479	if (MRI.isConstant(RegNo: Read.RegisterID))
480	continue;
481	Read.SchedClassID = SchedClassID;
482	LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
483	<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
484	<< MRI.getName(Read.RegisterID) << `'\n'`);
485	}
486
487	CurrentUse += NumImplicitUses;
488
489	bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
490	for (unsigned I = `0`, OpIndex = MCDesc.getNumOperands();
491	I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
492	const MCOperand &Op = MCI.getOperand(i: OpIndex);
493	if (!Op.isReg())
494	continue;
495
496	ReadDescriptor &Read = ID.Reads [CurrentUse];
497	Read.OpIndex = OpIndex;
498	Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
499	Read.SchedClassID = SchedClassID;
500	++CurrentUse;
501	LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
502	<< ", UseIndex=" << Read.UseIndex << `'\n'`);
503	}
504
505	ID.Reads.resize(N: CurrentUse);
506	}
507
508	hash_code hashMCOperand(const MCOperand &MCO) {
509	hash_code TypeHash = hash_combine(args: MCO.isReg(), args: MCO.isImm(), args: MCO.isSFPImm(),
510	args: MCO.isDFPImm(), args: MCO.isExpr(), args: MCO.isInst());
511	if (MCO.isReg())
512	return hash_combine(args: TypeHash, args: MCO.getReg());
513
514	return TypeHash;
515	}
516
517	hash_code hashMCInst(const MCInst &MCI) {
518	hash_code InstructionHash = hash_combine(args: MCI.getOpcode(), args: MCI.getFlags());
519	for (unsigned I = `0`; I < MCI.getNumOperands(); ++I) {
520	InstructionHash =
521	hash_combine(args: InstructionHash, args: hashMCOperand(MCO: MCI.getOperand(i: I)));
522	}
523	return InstructionHash;
524	}
525
526	Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
527	const MCInst &MCI) const {
528	if (ID.NumMicroOps != `0`)
529	return ErrorSuccess ();
530
531	bool UsesBuffers = ID.UsedBuffers;
532	bool UsesResources = !ID.Resources.empty();
533	if (!UsesBuffers && !UsesResources)
534	return ErrorSuccess ();
535
536	// FIXME: see PR44797. We should revisit these checks and possibly move them
537	// in CodeGenSchedule.cpp.
538	StringRef Message = "found an inconsistent instruction that decodes to zero "
539	"opcodes and that consumes scheduler resources.";
540	return make_error<InstructionError<MCInst>>(Args: std::string (Message), Args: MCI);
541	}
542
543	Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI,
544	unsigned SchedClassID) {
545	const MCSchedModel &SM = STI.getSchedModel();
546	unsigned CPUID = SM.getProcessorID();
547	while (SchedClassID && SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant())
548	SchedClassID =
549	STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID);
550
551	if (!SchedClassID) {
552	return make_error<InstructionError<MCInst>>(
553	Args: "unable to resolve scheduling class for write variant.", Args: MCI);
554	}
555
556	return SchedClassID;
557	}
558
559	Expected<const InstrDesc &>
560	InstrBuilder::createInstrDescImpl(const MCInst &MCI,
561	const SmallVector<Instrument *> &IVec) {
562	assert(STI.getSchedModel().hasInstrSchedModel() &&
563	"Itineraries are not yet supported!");
564
565	// Obtain the instruction descriptor from the opcode.
566	unsigned short Opcode = MCI.getOpcode();
567	const MCInstrDesc &MCDesc = MCII.get(Opcode);
568	const MCSchedModel &SM = STI.getSchedModel();
569
570	// Then obtain the scheduling class information from the instruction.
571	// Allow InstrumentManager to override and use a different SchedClassID
572	unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
573	bool IsVariant = SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant();
574
575	// Try to solve variant scheduling classes.
576	if (IsVariant) {
577	Expected<unsigned> VariantSchedClassIDOrErr =
578	getVariantSchedClassID(MCI, SchedClassID);
579	if (!VariantSchedClassIDOrErr) {
580	return VariantSchedClassIDOrErr.takeError();
581	}
582
583	SchedClassID = *VariantSchedClassIDOrErr;
584	}
585
586	// Check if this instruction is supported. Otherwise, report an error.
587	const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID);
588	if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
589	return make_error<InstructionError<MCInst>>(
590	Args: "found an unsupported instruction in the input assembly sequence", Args: MCI);
591	}
592
593	LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << `'\n'`);
594	LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << `'\n'`);
595	LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << `'\n'`);
596
597	// Create a new empty descriptor.
598	std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
599	ID ->NumMicroOps = SCDesc.NumMicroOps;
600	ID ->SchedClassID = SchedClassID;
601
602	if (MCDesc.isCall() && FirstCallInst) {
603	// We don't correctly model calls.
604	WithColor::warning() << "found a call in the input assembly sequence.\n";
605	WithColor::note() << "call instructions are not correctly modeled. "
606	<< "Assume a latency of " << CallLatency << "cy.\n";
607	FirstCallInst = false;
608	}
609
610	if (MCDesc.isReturn() && FirstReturnInst) {
611	WithColor::warning() << "found a return instruction in the input"
612	<< " assembly sequence.\n";
613	WithColor::note() << "program counter updates are ignored.\n";
614	FirstReturnInst = false;
615	}
616
617	initializeUsedResources(ID&: *ID, SCDesc, STI, ProcResourceMasks);
618	computeMaxLatency(ID&: *ID, MCDesc, SCDesc, STI, CallLatency);
619
620	if (Error Err = verifyOperands(MCDesc, MCI))
621	return std::move(Err);
622
623	populateWrites(ID&: *ID, MCI, SchedClassID);
624	populateReads(ID&: *ID, MCI, SchedClassID);
625
626	LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << `'\n'`);
627	LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << `'\n'`);
628
629	// Validation check on the instruction descriptor.
630	if (Error Err = verifyInstrDesc(ID: *ID, MCI))
631	return std::move(Err);
632
633	// Now add the new descriptor.
634	bool IsVariadic = MCDesc.isVariadic();
635	if ((ID ->IsRecyclable = !IsVariadic && !IsVariant)) {
636	auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID);
637	Descriptors [DKey] = std::move(ID);
638	return *Descriptors [DKey];
639	}
640
641	auto VDKey = std::make_pair(x: hashMCInst(MCI), y&: SchedClassID);
642	assert(
643	!VariantDescriptors.contains(VDKey) &&
644	"Expected VariantDescriptors to not already have a value for this key.");
645	VariantDescriptors [VDKey] = std::move(ID);
646	return *VariantDescriptors [VDKey];
647	}
648
649	Expected<const InstrDesc &>
650	InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
651	const SmallVector<Instrument *> &IVec) {
652	// Cache lookup using SchedClassID from Instrumentation
653	unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
654
655	auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID);
656	if (Descriptors.find_as(Val: DKey) != Descriptors.end())
657	return *Descriptors [DKey];
658
659	Expected<unsigned> VariantSchedClassIDOrErr =
660	getVariantSchedClassID(MCI, SchedClassID);
661	if (!VariantSchedClassIDOrErr) {
662	return VariantSchedClassIDOrErr.takeError();
663	}
664
665	SchedClassID = *VariantSchedClassIDOrErr;
666
667	auto VDKey = std::make_pair(x: hashMCInst(MCI), y&: SchedClassID);
668	if (VariantDescriptors.contains(Val: VDKey))
669	return *VariantDescriptors [VDKey];
670
671	return createInstrDescImpl(MCI, IVec);
672	}
673
674	STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
675
676	Expected<std::unique_ptr<Instruction>>
677	InstrBuilder::createInstruction(const MCInst &MCI,
678	const SmallVector<Instrument *> &IVec) {
679	Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
680	if (!DescOrErr)
681	return DescOrErr.takeError();
682	const InstrDesc &D = *DescOrErr;
683	Instruction NewIS = nullptr*;
684	std::unique_ptr<Instruction> CreatedIS;
685	bool IsInstRecycled = false;
686
687	if (!D.IsRecyclable)
688	++NumVariantInst;
689
690	if (D.IsRecyclable && InstRecycleCB) {
691	if (auto *I = InstRecycleCB (D)) {
692	NewIS = I;
693	NewIS->reset();
694	IsInstRecycled = true;
695	}
696	}
697	if (!IsInstRecycled) {
698	CreatedIS = std::make_unique<Instruction>(args: D, args: MCI.getOpcode());
699	NewIS = CreatedIS.get();
700	}
701
702	const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode());
703	const MCSchedClassDesc &SCDesc =
704	*STI.getSchedModel().getSchedClassDesc(SchedClassIdx: D.SchedClassID);
705
706	NewIS->setMayLoad(MCDesc.mayLoad());
707	NewIS->setMayStore(MCDesc.mayStore());
708	NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
709	NewIS->setBeginGroup(SCDesc.BeginGroup);
710	NewIS->setEndGroup(SCDesc.EndGroup);
711	NewIS->setRetireOOO(SCDesc.RetireOOO);
712
713	// Check if this is a dependency breaking instruction.
714	APInt Mask;
715
716	bool IsZeroIdiom = false;
717	bool IsDepBreaking = false;
718	if (MCIA) {
719	unsigned ProcID = STI.getSchedModel().getProcessorID();
720	IsZeroIdiom = MCIA->isZeroIdiom(MI: MCI, Mask, CPUID: ProcID);
721	IsDepBreaking =
722	IsZeroIdiom \|\| MCIA->isDependencyBreaking(MI: MCI, Mask, CPUID: ProcID);
723	if (MCIA->isOptimizableRegisterMove(MI: MCI, CPUID: ProcID))
724	NewIS->setOptimizableMove();
725	}
726
727	// Initialize Reads first.
728	MCPhysReg RegID = `0`;
729	size_t Idx = `0U`;
730	for (const ReadDescriptor &RD : D.Reads) {
731	if (!RD.isImplicitRead()) {
732	// explicit read.
733	const MCOperand &Op = MCI.getOperand(i: RD.OpIndex);
734	// Skip non-register operands.
735	if (!Op.isReg())
736	continue;
737	RegID = Op.getReg();
738	} else {
739	// Implicit read.
740	RegID = RD.RegisterID;
741	}
742
743	// Skip invalid register operands.
744	if (!RegID)
745	continue;
746
747	// Okay, this is a register operand. Create a ReadState for it.
748	ReadState RS = nullptr*;
749	if (IsInstRecycled && Idx < NewIS->getUses().size()) {
750	NewIS->getUses()[Idx] = ReadState (RD, RegID);
751	RS = &NewIS->getUses()[Idx++];
752	} else {
753	NewIS->getUses().emplace_back(Args: RD, Args&: RegID);
754	RS = &NewIS->getUses().back();
755	++Idx;
756	}
757
758	if (IsDepBreaking) {
759	// A mask of all zeroes means: explicit input operands are not
760	// independent.
761	if (Mask.isZero()) {
762	if (!RD.isImplicitRead())
763	RS->setIndependentFromDef();
764	} else {
765	// Check if this register operand is independent according to `Mask`.
766	// Note that Mask may not have enough bits to describe all explicit and
767	// implicit input operands. If this register operand doesn't have a
768	// corresponding bit in Mask, then conservatively assume that it is
769	// dependent.
770	if (Mask.getBitWidth() > RD.UseIndex) {
771	// Okay. This map describe register use `RD.UseIndex`.
772	if (Mask [RD.UseIndex])
773	RS->setIndependentFromDef();
774	}
775	}
776	}
777	}
778	if (IsInstRecycled && Idx < NewIS->getUses().size())
779	NewIS->getUses().pop_back_n(NumItems: NewIS->getUses().size() - Idx);
780
781	// Early exit if there are no writes.
782	if (D.Writes.empty()) {
783	if (IsInstRecycled)
784	return llvm::make_error<RecycledInstErr>(Args&: NewIS);
785	else
786	return std::move(CreatedIS);
787	}
788
789	// Track register writes that implicitly clear the upper portion of the
790	// underlying super-registers using an APInt.
791	APInt WriteMask(D.Writes.size(), `0`);
792
793	// Now query the MCInstrAnalysis object to obtain information about which
794	// register writes implicitly clear the upper portion of a super-register.
795	if (MCIA)
796	MCIA->clearsSuperRegisters(MRI, Inst: MCI, Writes&: WriteMask);
797
798	// Initialize writes.
799	unsigned WriteIndex = `0`;
800	Idx = `0U`;
801	for (const WriteDescriptor &WD : D.Writes) {
802	RegID = WD.isImplicitWrite() ? WD.RegisterID
803	: MCI.getOperand(i: WD.OpIndex).getReg();
804	// Check if this is a optional definition that references NoReg or a write
805	// to a constant register.
806	if ((WD.IsOptionalDef && !RegID) \|\| MRI.isConstant(RegNo: RegID)) {
807	++WriteIndex;
808	continue;
809	}
810
811	assert(RegID && "Expected a valid register ID!");
812	if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
813	NewIS->getDefs()[Idx++] =
814	WriteState (WD, RegID,
815	/ ClearsSuperRegs / WriteMask [WriteIndex],
816	/ WritesZero / IsZeroIdiom);
817	} else {
818	NewIS->getDefs().emplace_back(Args: WD, Args&: RegID,
819	/ ClearsSuperRegs / Args: WriteMask [WriteIndex],
820	/ WritesZero / Args&: IsZeroIdiom);
821	++Idx;
822	}
823	++WriteIndex;
824	}
825	if (IsInstRecycled && Idx < NewIS->getDefs().size())
826	NewIS->getDefs().pop_back_n(NumItems: NewIS->getDefs().size() - Idx);
827
828	if (IsInstRecycled)
829	return llvm::make_error<RecycledInstErr>(Args&: NewIS);
830	else
831	return std::move(CreatedIS);
832	}
833	} // namespace mca
834	} // namespace llvm
835

Browse the source code of llvm_projects/llvm/lib/MCA/InstrBuilder.cpp