LSUnit.h source code [llvm_projects/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h]

1	//===------------------------- LSUnit.h --------------------------- C++--===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// A Load/Store unit class that models load/store queues and that implements
11	/// a simple weak memory consistency model.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H
16	#define LLVM_MCA_HARDWAREUNITS_LSUNIT_H
17
18	#include "llvm/ADT/DenseMap.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/MC/MCSchedule.h"
21	#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
22	#include "llvm/MCA/Instruction.h"
23	#include "llvm/Support/Compiler.h"
24
25	namespace llvm {
26	namespace mca {
27
28	/// Abstract base interface for LS (load/store) units in llvm-mca.
29	class LLVM_ABI LSUnitBase : public HardwareUnit {
30	/// Load queue size.
31	///
32	/// A value of zero for this field means that the load queue is unbounded.
33	/// Processor models can declare the size of a load queue via tablegen (see
34	/// the definition of tablegen class LoadQueue in
35	/// llvm/Target/TargetSchedule.td).
36	unsigned LQSize;
37
38	/// Load queue size.
39	///
40	/// A value of zero for this field means that the store queue is unbounded.
41	/// Processor models can declare the size of a store queue via tablegen (see
42	/// the definition of tablegen class StoreQueue in
43	/// llvm/Target/TargetSchedule.td).
44	unsigned SQSize;
45
46	unsigned UsedLQEntries;
47	unsigned UsedSQEntries;
48
49	/// True if loads don't alias with stores.
50	///
51	/// By default, the LS unit assumes that loads and stores don't alias with
52	/// each other. If this field is set to false, then loads are always assumed
53	/// to alias with stores.
54	const bool NoAlias;
55
56	public:
57	LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
58	unsigned StoreQueueSize, bool AssumeNoAlias);
59
60	~LSUnitBase() override;
61
62	/// Returns the total number of entries in the load queue.
63	unsigned getLoadQueueSize() const { return LQSize; }
64
65	/// Returns the total number of entries in the store queue.
66	unsigned getStoreQueueSize() const { return SQSize; }
67
68	unsigned getUsedLQEntries() const { return UsedLQEntries; }
69	unsigned getUsedSQEntries() const { return UsedSQEntries; }
70	void acquireLQSlot() { ++UsedLQEntries; }
71	void acquireSQSlot() { ++UsedSQEntries; }
72	void releaseLQSlot() { --UsedLQEntries; }
73	void releaseSQSlot() { --UsedSQEntries; }
74
75	bool assumeNoAlias() const { return NoAlias; }
76
77	enum Status {
78	LSU_AVAILABLE = `0`,
79	LSU_LQUEUE_FULL, // Load Queue unavailable
80	LSU_SQUEUE_FULL // Store Queue unavailable
81	};
82
83	/// This method checks the availability of the load/store buffers.
84	///
85	/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
86	/// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
87	/// not a memory operation.
88	virtual Status isAvailable(const InstRef &IR) const = `0`;
89
90	/// Allocates LS resources for instruction IR.
91	///
92	/// This method assumes that a previous call to `isAvailable(IR)` succeeded
93	/// with a LSUnitBase::Status value of LSU_AVAILABLE.
94	/// Returns the GroupID associated with this instruction. That value will be
95	/// used to set the LSUTokenID field in class Instruction.
96	virtual unsigned dispatch(const InstRef &IR) = `0`;
97
98	bool isSQEmpty() const { return !UsedSQEntries; }
99	bool isLQEmpty() const { return !UsedLQEntries; }
100	bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
101	bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
102
103	/// Check if a peviously dispatched instruction IR is now ready for execution.
104	virtual bool isReady(const InstRef &IR) const = `0`;
105
106	/// Check if instruction IR only depends on memory instructions that are
107	/// currently executing.
108	virtual bool isPending(const InstRef &IR) const = `0`;
109
110	/// Check if instruction IR is still waiting on memory operations, and the
111	/// wait time is still unknown.
112	virtual bool isWaiting(const InstRef &IR) const = `0`;
113
114	virtual bool hasDependentUsers(const InstRef &IR) const = `0`;
115
116	virtual const CriticalDependency getCriticalPredecessor(unsigned GroupId) = `0`;
117
118	virtual void onInstructionExecuted(const InstRef &IR) = `0`;
119
120	// Loads are tracked by the LDQ (load queue) from dispatch until completion.
121	// Stores are tracked by the STQ (store queue) from dispatch until commitment.
122	// By default we conservatively assume that the LDQ receives a load at
123	// dispatch. Loads leave the LDQ at retirement stage.
124	virtual void onInstructionRetired(const InstRef &IR) = `0`;
125
126	virtual void onInstructionIssued(const InstRef &IR) = `0`;
127
128	virtual void cycleEvent() = `0`;
129
130	#ifndef NDEBUG
131	virtual void dump() const = `0`;
132	#endif
133	};
134
135	/// Default Load/Store Unit (LS Unit) for simulated processors.
136	///
137	/// Each load (or store) consumes one entry in the load (or store) queue.
138	///
139	/// Rules are:
140	/// 1) A younger load is allowed to pass an older load only if there are no
141	/// stores nor barriers in between the two loads.
142	/// 2) An younger store is not allowed to pass an older store.
143	/// 3) A younger store is not allowed to pass an older load.
144	/// 4) A younger load is allowed to pass an older store only if the load does
145	/// not alias with the store.
146	///
147	/// This class optimistically assumes that loads don't alias store operations.
148	/// Under this assumption, younger loads are always allowed to pass older
149	/// stores (this would only affects rule 4).
150	/// Essentially, this class doesn't perform any sort alias analysis to
151	/// identify aliasing loads and stores.
152	///
153	/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
154	/// set to `false` by the constructor of LSUnit.
155	///
156	/// Note that this class doesn't know about the existence of different memory
157	/// types for memory operations (example: write-through, write-combining, etc.).
158	/// Derived classes are responsible for implementing that extra knowledge, and
159	/// provide different sets of rules for loads and stores by overriding method
160	/// `isReady()`.
161	/// To emulate a write-combining memory type, rule 2. must be relaxed in a
162	/// derived class to enable the reordering of non-aliasing store operations.
163	///
164	/// No assumptions are made by this class on the size of the store buffer. This
165	/// class doesn't know how to identify cases where store-to-load forwarding may
166	/// occur.
167	///
168	/// LSUnit doesn't attempt to predict whether a load or store hits or misses
169	/// the L1 cache. To be more specific, LSUnit doesn't know anything about
170	/// cache hierarchy and memory types.
171	/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
172	/// scheduling model provides an "optimistic" load-to-use latency (which usually
173	/// matches the load-to-use latency for when there is a hit in the L1D).
174	/// Derived classes may expand this knowledge.
175	///
176	/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
177	/// memory-barrier like instructions.
178	/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
179	/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
180	/// serializes loads without forcing a flush of the load queue.
181	/// Similarly, instructions that both `mayStore` and have `unmodeled side
182	/// effects` are treated like store barriers. A full memory
183	/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
184	/// effects. This is obviously inaccurate, but this is the best that we can do
185	/// at the moment.
186	///
187	/// Each load/store barrier consumes one entry in the load/store queue. A
188	/// load/store barrier enforces ordering of loads/stores:
189	/// - A younger load cannot pass a load barrier.
190	/// - A younger store cannot pass a store barrier.
191	///
192	/// A younger load has to wait for the memory load barrier to execute.
193	/// A load/store barrier is "executed" when it becomes the oldest entry in
194	/// the load/store queue(s). That also means, all the older loads/stores have
195	/// already been executed.
196	class LLVM_ABI LSUnit : public LSUnitBase {
197
198	// This class doesn't know about the latency of a load instruction. So, it
199	// conservatively/pessimistically assumes that the latency of a load opcode
200	// matches the instruction latency.
201	//
202	// FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
203	// and load/store conflicts, the latency of a load is determined by the depth
204	// of the load pipeline. So, we could use field `LoadLatency` in the
205	// MCSchedModel to model that latency.
206	// Field `LoadLatency` often matches the so-called 'load-to-use' latency from
207	// L1D, and it usually already accounts for any extra latency due to data
208	// forwarding.
209	// When doing throughput analysis, `LoadLatency` is likely to
210	// be a better predictor of load latency than instruction latency. This is
211	// particularly true when simulating code with temporal/spatial locality of
212	// memory accesses.
213	// Using `LoadLatency` (instead of the instruction latency) is also expected
214	// to improve the load queue allocation for long latency instructions with
215	// folded memory operands (See PR39829).
216	//
217	// FIXME: On some processors, load/store operations are split into multiple
218	// uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
219	// not 256-bit data types. So, a 256-bit load is effectively split into two
220	// 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
221	// simplicity, this class optimistically assumes that a load instruction only
222	// consumes one entry in the LoadQueue. Similarly, store instructions only
223	// consume a single entry in the StoreQueue.
224	// In future, we should reassess the quality of this design, and consider
225	// alternative approaches that let instructions specify the number of
226	// load/store queue entries which they consume at dispatch stage (See
227	// PR39830).
228	//
229	// An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
230	// conservatively treated as a store barrier. It forces older store to be
231	// executed before newer stores are issued.
232	//
233	// An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
234	// conservatively treated as a load barrier. It forces older loads to execute
235	// before newer loads are issued.
236
237	protected:
238	/// A node of a memory dependency graph. A MemoryGroup describes a set of
239	/// instructions with same memory dependencies.
240	///
241	/// By construction, instructions of a MemoryGroup don't depend on each other.
242	/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
243	/// A Memory group identifier is then stored as a "token" in field
244	/// Instruction::LSUTokenID of each dispatched instructions. That token is
245	/// used internally by the LSUnit to track memory dependencies.
246	class MemoryGroup {
247	unsigned NumPredecessors = `0`;
248	unsigned NumExecutingPredecessors = `0`;
249	unsigned NumExecutedPredecessors = `0`;
250
251	unsigned NumInstructions = `0`;
252	unsigned NumExecuting = `0`;
253	unsigned NumExecuted = `0`;
254	// Successors that are in a order dependency with this group.
255	SmallVector<MemoryGroup *, `4`> OrderSucc;
256	// Successors that are in a data dependency with this group.
257	SmallVector<MemoryGroup *, `4`> DataSucc;
258
259	CriticalDependency CriticalPredecessor;
260	InstRef CriticalMemoryInstruction;
261
262	MemoryGroup(const MemoryGroup &) = delete;
263	MemoryGroup &operator=(const MemoryGroup &) = delete;
264
265	public:
266	MemoryGroup() = default;
267	MemoryGroup(MemoryGroup &&) = default;
268
269	size_t getNumSuccessors() const {
270	return OrderSucc.size() + DataSucc.size();
271	}
272	unsigned getNumPredecessors() const { return NumPredecessors; }
273	unsigned getNumExecutingPredecessors() const {
274	return NumExecutingPredecessors;
275	}
276	unsigned getNumExecutedPredecessors() const {
277	return NumExecutedPredecessors;
278	}
279	unsigned getNumInstructions() const { return NumInstructions; }
280	unsigned getNumExecuting() const { return NumExecuting; }
281	unsigned getNumExecuted() const { return NumExecuted; }
282
283	const InstRef &getCriticalMemoryInstruction() const {
284	return CriticalMemoryInstruction;
285	}
286	const CriticalDependency &getCriticalPredecessor() const {
287	return CriticalPredecessor;
288	}
289
290	void addSuccessor(MemoryGroup Group, bool* IsDataDependent) {
291	// Do not need to add a dependency if there is no data
292	// dependency and all instructions from this group have been
293	// issued already.
294	if (!IsDataDependent && isExecuting())
295	return;
296
297	Group->NumPredecessors++;
298	assert(!isExecuted() && "Should have been removed!");
299	if (isExecuting())
300	Group->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: IsDataDependent);
301
302	if (IsDataDependent)
303	DataSucc.emplace_back(Args&: Group);
304	else
305	OrderSucc.emplace_back(Args&: Group);
306	}
307
308	bool isWaiting() const {
309	return NumPredecessors >
310	(NumExecutingPredecessors + NumExecutedPredecessors);
311	}
312	bool isPending() const {
313	return NumExecutingPredecessors &&
314	((NumExecutedPredecessors + NumExecutingPredecessors) ==
315	NumPredecessors);
316	}
317	bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
318	bool isExecuting() const {
319	return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
320	}
321	bool isExecuted() const { return NumInstructions == NumExecuted; }
322
323	void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) {
324	assert(!isReady() && "Unexpected group-start event!");
325	NumExecutingPredecessors++;
326
327	if (!ShouldUpdateCriticalDep)
328	return;
329
330	unsigned Cycles = IR.getInstruction()->getCyclesLeft();
331	if (CriticalPredecessor.Cycles < Cycles) {
332	CriticalPredecessor.IID = IR.getSourceIndex();
333	CriticalPredecessor.Cycles = Cycles;
334	}
335	}
336
337	void onGroupExecuted() {
338	assert(!isReady() && "Inconsistent state found!");
339	NumExecutingPredecessors--;
340	NumExecutedPredecessors++;
341	}
342
343	void onInstructionIssued(const InstRef &IR) {
344	assert(!isExecuting() && "Invalid internal state!");
345	++NumExecuting;
346
347	// update the CriticalMemDep.
348	const Instruction &IS = *IR.getInstruction();
349	if ((bool)CriticalMemoryInstruction) {
350	const Instruction &OtherIS =
351	*CriticalMemoryInstruction.getInstruction();
352	if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
353	CriticalMemoryInstruction = IR;
354	} else {
355	CriticalMemoryInstruction = IR;
356	}
357
358	if (!isExecuting())
359	return;
360
361	// Notify successors that this group started execution.
362	for (MemoryGroup *MG : OrderSucc) {
363	MG->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: false);
364	// Release the order dependency with this group.
365	MG->onGroupExecuted();
366	}
367
368	for (MemoryGroup *MG : DataSucc)
369	MG->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: true);
370	}
371
372	void onInstructionExecuted(const InstRef &IR) {
373	assert(isReady() && !isExecuted() && "Invalid internal state!");
374	--NumExecuting;
375	++NumExecuted;
376
377	if (CriticalMemoryInstruction &&
378	CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) {
379	CriticalMemoryInstruction.invalidate();
380	}
381
382	if (!isExecuted())
383	return;
384
385	// Notify data dependent successors that this group has finished
386	// execution.
387	for (MemoryGroup *MG : DataSucc)
388	MG->onGroupExecuted();
389	}
390
391	void addInstruction() {
392	assert(!getNumSuccessors() && "Cannot add instructions to this group!");
393	++NumInstructions;
394	}
395
396	void cycleEvent() {
397	if (isWaiting() && CriticalPredecessor.Cycles)
398	CriticalPredecessor.Cycles--;
399	}
400	};
401	/// Used to map group identifiers to MemoryGroups.
402	DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
403	unsigned NextGroupID = `1`;
404
405	unsigned CurrentLoadGroupID;
406	unsigned CurrentLoadBarrierGroupID;
407	unsigned CurrentStoreGroupID;
408	unsigned CurrentStoreBarrierGroupID;
409
410	public:
411	LSUnit(const MCSchedModel &SM)
412	: LSUnit (SM, / LQSize / `0`, / SQSize / `0`, / NoAlias / false) {}
413	LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
414	: LSUnit (SM, LQ, SQ, / NoAlias / false) {}
415	LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
416	: LSUnitBase (SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(`0`),
417	CurrentLoadBarrierGroupID(`0`), CurrentStoreGroupID(`0`),
418	CurrentStoreBarrierGroupID(`0`) {}
419
420	/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
421	/// accomodate instruction IR.
422	Status isAvailable(const InstRef &IR) const override;
423
424	bool isReady(const InstRef &IR) const override {
425	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
426	const MemoryGroup &Group = getGroup(Index: GroupID);
427	return Group.isReady();
428	}
429
430	bool isPending(const InstRef &IR) const override {
431	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
432	const MemoryGroup &Group = getGroup(Index: GroupID);
433	return Group.isPending();
434	}
435
436	bool isWaiting(const InstRef &IR) const override {
437	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
438	const MemoryGroup &Group = getGroup(Index: GroupID);
439	return Group.isWaiting();
440	}
441
442	bool hasDependentUsers(const InstRef &IR) const override {
443	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
444	const MemoryGroup &Group = getGroup(Index: GroupID);
445	return !Group.isExecuted() && Group.getNumSuccessors();
446	}
447
448	const CriticalDependency getCriticalPredecessor(unsigned GroupId) override {
449	const MemoryGroup &Group = getGroup(Index: GroupId);
450	return Group.getCriticalPredecessor();
451	}
452
453	/// Allocates LS resources for instruction IR.
454	///
455	/// This method assumes that a previous call to `isAvailable(IR)` succeeded
456	/// returning LSU_AVAILABLE.
457	///
458	/// Rules are:
459	/// By default, rules are:
460	/// 1. A store may not pass a previous store.
461	/// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
462	/// 3. A load may pass a previous load.
463	/// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
464	/// 5. A load has to wait until an older load barrier is fully executed.
465	/// 6. A store has to wait until an older store barrier is fully executed.
466	unsigned dispatch(const InstRef &IR) override;
467
468	void onInstructionIssued(const InstRef &IR) override {
469	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
470	Groups [GroupID]->onInstructionIssued(IR);
471	}
472
473	void onInstructionRetired(const InstRef &IR) override;
474
475	void onInstructionExecuted(const InstRef &IR) override;
476
477	void cycleEvent() override;
478
479	#ifndef NDEBUG
480	void dump() const override;
481	#endif
482
483	private:
484	bool isValidGroupID(unsigned Index) const {
485	return Index && Groups.contains(Val: Index);
486	}
487
488	const MemoryGroup &getGroup(unsigned Index) const {
489	assert(isValidGroupID(Index) && "Group doesn't exist!");
490	return *Groups.find(Val: Index)->second;
491	}
492
493	MemoryGroup &getGroup(unsigned Index) {
494	assert(isValidGroupID(Index) && "Group doesn't exist!");
495	return *Groups.find(Val: Index)->second;
496	}
497
498	unsigned createMemoryGroup() {
499	Groups.insert(KV: std::make_pair(x&: NextGroupID, y: std::make_unique<MemoryGroup>()));
500	return NextGroupID++;
501	}
502	};
503
504	} // namespace mca
505	} // namespace llvm
506
507	#endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H
508

Browse the source code of llvm_projects/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h