1 | //===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A scheduler for processor resource units and processor resource groups. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/MCA/HardwareUnits/Scheduler.h" |
14 | #include "llvm/Support/Debug.h" |
15 | #include "llvm/Support/raw_ostream.h" |
16 | |
17 | namespace llvm { |
18 | namespace mca { |
19 | |
20 | #define DEBUG_TYPE "llvm-mca" |
21 | |
22 | void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) { |
23 | // Ensure we have a valid (non-null) strategy object. |
24 | Strategy = S ? std::move(S) : std::make_unique<DefaultSchedulerStrategy>(); |
25 | } |
26 | |
27 | // Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. |
28 | SchedulerStrategy::~SchedulerStrategy() = default; |
29 | DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; |
30 | |
31 | #ifndef NDEBUG |
32 | void Scheduler::dump() const { |
33 | dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; |
34 | dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; |
35 | dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; |
36 | Resources->dump(); |
37 | } |
38 | #endif |
39 | |
40 | Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { |
41 | ResourceStateEvent RSE = |
42 | Resources->canBeDispatched(ConsumedBuffers: IR.getInstruction()->getUsedBuffers()); |
43 | HadTokenStall = RSE != RS_BUFFER_AVAILABLE; |
44 | |
45 | switch (RSE) { |
46 | case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: |
47 | return Scheduler::SC_BUFFERS_FULL; |
48 | case ResourceStateEvent::RS_RESERVED: |
49 | return Scheduler::SC_DISPATCH_GROUP_STALL; |
50 | case ResourceStateEvent::RS_BUFFER_AVAILABLE: |
51 | break; |
52 | } |
53 | |
54 | // Give lower priority to LSUnit stall events. |
55 | LSUnit::Status LSS = LSU.isAvailable(IR); |
56 | HadTokenStall = LSS != LSUnit::LSU_AVAILABLE; |
57 | |
58 | switch (LSS) { |
59 | case LSUnit::LSU_LQUEUE_FULL: |
60 | return Scheduler::SC_LOAD_QUEUE_FULL; |
61 | case LSUnit::LSU_SQUEUE_FULL: |
62 | return Scheduler::SC_STORE_QUEUE_FULL; |
63 | case LSUnit::LSU_AVAILABLE: |
64 | return Scheduler::SC_AVAILABLE; |
65 | } |
66 | |
67 | llvm_unreachable("Don't know how to process this LSU state result!" ); |
68 | } |
69 | |
70 | void Scheduler::issueInstructionImpl( |
71 | InstRef &IR, |
72 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources) { |
73 | Instruction *IS = IR.getInstruction(); |
74 | const InstrDesc &D = IS->getDesc(); |
75 | |
76 | // Issue the instruction and collect all the consumed resources |
77 | // into a vector. That vector is then used to notify the listener. |
78 | Resources->issueInstruction(Desc: D, Pipes&: UsedResources); |
79 | |
80 | // Notify the instruction that it started executing. |
81 | // This updates the internal state of each write. |
82 | IS->execute(IID: IR.getSourceIndex()); |
83 | |
84 | IS->computeCriticalRegDep(); |
85 | |
86 | if (IS->isMemOp()) { |
87 | LSU.onInstructionIssued(IR); |
88 | const MemoryGroup &Group = LSU.getGroup(Index: IS->getLSUTokenID()); |
89 | IS->setCriticalMemDep(Group.getCriticalPredecessor()); |
90 | } |
91 | |
92 | if (IS->isExecuting()) |
93 | IssuedSet.emplace_back(args&: IR); |
94 | else if (IS->isExecuted()) |
95 | LSU.onInstructionExecuted(IR); |
96 | } |
97 | |
98 | // Release the buffered resources and issue the instruction. |
99 | void Scheduler::issueInstruction( |
100 | InstRef &IR, |
101 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources, |
102 | SmallVectorImpl<InstRef> &PendingInstructions, |
103 | SmallVectorImpl<InstRef> &ReadyInstructions) { |
104 | const Instruction &Inst = *IR.getInstruction(); |
105 | bool HasDependentUsers = Inst.hasDependentUsers(); |
106 | HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); |
107 | |
108 | Resources->releaseBuffers(ConsumedBuffers: Inst.getUsedBuffers()); |
109 | issueInstructionImpl(IR, UsedResources); |
110 | // Instructions that have been issued during this cycle might have unblocked |
111 | // other dependent instructions. Dependent instructions may be issued during |
112 | // this same cycle if operands have ReadAdvance entries. Promote those |
113 | // instructions to the ReadySet and notify the caller that those are ready. |
114 | if (HasDependentUsers) |
115 | if (promoteToPendingSet(Pending&: PendingInstructions)) |
116 | promoteToReadySet(Ready&: ReadyInstructions); |
117 | } |
118 | |
119 | bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) { |
120 | // Scan the set of waiting instructions and promote them to the |
121 | // ready set if operands are all ready. |
122 | unsigned PromotedElements = 0; |
123 | for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) { |
124 | InstRef &IR = *I; |
125 | if (!IR) |
126 | break; |
127 | |
128 | // Check if there are unsolved register dependencies. |
129 | Instruction &IS = *IR.getInstruction(); |
130 | if (!IS.isReady() && !IS.updatePending()) { |
131 | ++I; |
132 | continue; |
133 | } |
134 | // Check if there are unsolved memory dependencies. |
135 | if (IS.isMemOp() && !LSU.isReady(IR)) { |
136 | ++I; |
137 | continue; |
138 | } |
139 | |
140 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
141 | << " promoted to the READY set.\n" ); |
142 | |
143 | Ready.emplace_back(Args&: IR); |
144 | ReadySet.emplace_back(args&: IR); |
145 | |
146 | IR.invalidate(); |
147 | ++PromotedElements; |
148 | std::iter_swap(a: I, b: E - PromotedElements); |
149 | } |
150 | |
151 | PendingSet.resize(new_size: PendingSet.size() - PromotedElements); |
152 | return PromotedElements; |
153 | } |
154 | |
155 | bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) { |
156 | // Scan the set of waiting instructions and promote them to the |
157 | // pending set if operands are all ready. |
158 | unsigned RemovedElements = 0; |
159 | for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { |
160 | InstRef &IR = *I; |
161 | if (!IR) |
162 | break; |
163 | |
164 | // Check if this instruction is now ready. In case, force |
165 | // a transition in state using method 'updateDispatched()'. |
166 | Instruction &IS = *IR.getInstruction(); |
167 | if (IS.isDispatched() && !IS.updateDispatched()) { |
168 | ++I; |
169 | continue; |
170 | } |
171 | |
172 | if (IS.isMemOp() && LSU.isWaiting(IR)) { |
173 | ++I; |
174 | continue; |
175 | } |
176 | |
177 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
178 | << " promoted to the PENDING set.\n" ); |
179 | |
180 | Pending.emplace_back(Args&: IR); |
181 | PendingSet.emplace_back(args&: IR); |
182 | |
183 | IR.invalidate(); |
184 | ++RemovedElements; |
185 | std::iter_swap(a: I, b: E - RemovedElements); |
186 | } |
187 | |
188 | WaitSet.resize(new_size: WaitSet.size() - RemovedElements); |
189 | return RemovedElements; |
190 | } |
191 | |
192 | InstRef Scheduler::select() { |
193 | unsigned QueueIndex = ReadySet.size(); |
194 | for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { |
195 | InstRef &IR = ReadySet[I]; |
196 | if (QueueIndex == ReadySet.size() || |
197 | Strategy->compare(Lhs: IR, Rhs: ReadySet[QueueIndex])) { |
198 | Instruction &IS = *IR.getInstruction(); |
199 | uint64_t BusyResourceMask = Resources->checkAvailability(Desc: IS.getDesc()); |
200 | if (BusyResourceMask) |
201 | IS.setCriticalResourceMask(BusyResourceMask); |
202 | BusyResourceUnits |= BusyResourceMask; |
203 | if (!BusyResourceMask) |
204 | QueueIndex = I; |
205 | } |
206 | } |
207 | |
208 | if (QueueIndex == ReadySet.size()) |
209 | return InstRef(); |
210 | |
211 | // We found an instruction to issue. |
212 | InstRef IR = ReadySet[QueueIndex]; |
213 | std::swap(a&: ReadySet[QueueIndex], b&: ReadySet[ReadySet.size() - 1]); |
214 | ReadySet.pop_back(); |
215 | return IR; |
216 | } |
217 | |
218 | void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) { |
219 | unsigned RemovedElements = 0; |
220 | for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { |
221 | InstRef &IR = *I; |
222 | if (!IR) |
223 | break; |
224 | Instruction &IS = *IR.getInstruction(); |
225 | if (!IS.isExecuted()) { |
226 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
227 | << " is still executing.\n" ); |
228 | ++I; |
229 | continue; |
230 | } |
231 | |
232 | // Instruction IR has completed execution. |
233 | LSU.onInstructionExecuted(IR); |
234 | Executed.emplace_back(Args&: IR); |
235 | ++RemovedElements; |
236 | IR.invalidate(); |
237 | std::iter_swap(a: I, b: E - RemovedElements); |
238 | } |
239 | |
240 | IssuedSet.resize(new_size: IssuedSet.size() - RemovedElements); |
241 | } |
242 | |
243 | uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) { |
244 | llvm::append_range(C&: Insts, R&: ReadySet); |
245 | return BusyResourceUnits; |
246 | } |
247 | |
248 | void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps, |
249 | SmallVectorImpl<InstRef> &MemDeps) { |
250 | const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet; |
251 | for (const InstRef &IR : make_range(x: PendingSet.begin(), y: EndIt)) { |
252 | const Instruction &IS = *IR.getInstruction(); |
253 | if (Resources->checkAvailability(Desc: IS.getDesc())) |
254 | continue; |
255 | |
256 | if (IS.isMemOp() && LSU.isPending(IR)) |
257 | MemDeps.emplace_back(Args: IR); |
258 | |
259 | if (IS.isPending()) |
260 | RegDeps.emplace_back(Args: IR); |
261 | } |
262 | } |
263 | |
264 | void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed, |
265 | SmallVectorImpl<InstRef> &Executed, |
266 | SmallVectorImpl<InstRef> &Pending, |
267 | SmallVectorImpl<InstRef> &Ready) { |
268 | LSU.cycleEvent(); |
269 | |
270 | // Release consumed resources. |
271 | Resources->cycleEvent(ResourcesFreed&: Freed); |
272 | |
273 | for (InstRef &IR : IssuedSet) |
274 | IR.getInstruction()->cycleEvent(); |
275 | updateIssuedSet(Executed); |
276 | |
277 | for (InstRef &IR : PendingSet) |
278 | IR.getInstruction()->cycleEvent(); |
279 | |
280 | for (InstRef &IR : WaitSet) |
281 | IR.getInstruction()->cycleEvent(); |
282 | |
283 | promoteToPendingSet(Pending); |
284 | promoteToReadySet(Ready); |
285 | |
286 | NumDispatchedToThePendingSet = 0; |
287 | BusyResourceUnits = 0; |
288 | } |
289 | |
290 | bool Scheduler::mustIssueImmediately(const InstRef &IR) const { |
291 | const InstrDesc &Desc = IR.getInstruction()->getDesc(); |
292 | if (Desc.isZeroLatency()) |
293 | return true; |
294 | // Instructions that use an in-order dispatch/issue processor resource must be |
295 | // issued immediately to the pipeline(s). Any other in-order buffered |
296 | // resources (i.e. BufferSize=1) is consumed. |
297 | return Desc.MustIssueImmediately; |
298 | } |
299 | |
300 | bool Scheduler::dispatch(InstRef &IR) { |
301 | Instruction &IS = *IR.getInstruction(); |
302 | Resources->reserveBuffers(ConsumedBuffers: IS.getUsedBuffers()); |
303 | |
304 | // If necessary, reserve queue entries in the load-store unit (LSU). |
305 | if (IS.isMemOp()) |
306 | IS.setLSUTokenID(LSU.dispatch(IR)); |
307 | |
308 | if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) { |
309 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n" ); |
310 | WaitSet.push_back(x: IR); |
311 | return false; |
312 | } |
313 | |
314 | if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) { |
315 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR |
316 | << " to the PendingSet\n" ); |
317 | PendingSet.push_back(x: IR); |
318 | ++NumDispatchedToThePendingSet; |
319 | return false; |
320 | } |
321 | |
322 | assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) && |
323 | "Unexpected internal state found!" ); |
324 | // Don't add a zero-latency instruction to the Ready queue. |
325 | // A zero-latency instruction doesn't consume any scheduler resources. That is |
326 | // because it doesn't need to be executed, and it is often removed at register |
327 | // renaming stage. For example, register-register moves are often optimized at |
328 | // register renaming stage by simply updating register aliases. On some |
329 | // targets, zero-idiom instructions (for example: a xor that clears the value |
330 | // of a register) are treated specially, and are often eliminated at register |
331 | // renaming stage. |
332 | if (!mustIssueImmediately(IR)) { |
333 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n" ); |
334 | ReadySet.push_back(x: IR); |
335 | } |
336 | |
337 | return true; |
338 | } |
339 | |
340 | } // namespace mca |
341 | } // namespace llvm |
342 | |