1 | //===--------------------- Scheduler.cpp ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A scheduler for processor resource units and processor resource groups. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/MCA/HardwareUnits/Scheduler.h" |
14 | #include "llvm/Support/Debug.h" |
15 | #include "llvm/Support/raw_ostream.h" |
16 | |
17 | namespace llvm { |
18 | namespace mca { |
19 | |
20 | #define DEBUG_TYPE "llvm-mca" |
21 | |
22 | void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) { |
23 | // Ensure we have a valid (non-null) strategy object. |
24 | Strategy = S ? std::move(S) : std::make_unique<DefaultSchedulerStrategy>(); |
25 | } |
26 | |
27 | // Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. |
28 | SchedulerStrategy::~SchedulerStrategy() = default; |
29 | DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default; |
30 | |
31 | #ifndef NDEBUG |
32 | void Scheduler::dump() const { |
33 | dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n'; |
34 | dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n'; |
35 | dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n'; |
36 | Resources->dump(); |
37 | } |
38 | #endif |
39 | |
40 | Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { |
41 | ResourceStateEvent RSE = |
42 | Resources->canBeDispatched(ConsumedBuffers: IR.getInstruction()->getUsedBuffers()); |
43 | HadTokenStall = RSE != RS_BUFFER_AVAILABLE; |
44 | |
45 | switch (RSE) { |
46 | case ResourceStateEvent::RS_BUFFER_UNAVAILABLE: |
47 | return Scheduler::SC_BUFFERS_FULL; |
48 | case ResourceStateEvent::RS_RESERVED: |
49 | return Scheduler::SC_DISPATCH_GROUP_STALL; |
50 | case ResourceStateEvent::RS_BUFFER_AVAILABLE: |
51 | break; |
52 | } |
53 | |
54 | // Give lower priority to LSUnit stall events. |
55 | LSUnit::Status LSS = LSU.isAvailable(IR); |
56 | HadTokenStall = LSS != LSUnit::LSU_AVAILABLE; |
57 | |
58 | switch (LSS) { |
59 | case LSUnit::LSU_LQUEUE_FULL: |
60 | return Scheduler::SC_LOAD_QUEUE_FULL; |
61 | case LSUnit::LSU_SQUEUE_FULL: |
62 | return Scheduler::SC_STORE_QUEUE_FULL; |
63 | case LSUnit::LSU_AVAILABLE: |
64 | return Scheduler::SC_AVAILABLE; |
65 | } |
66 | |
67 | llvm_unreachable("Don't know how to process this LSU state result!" ); |
68 | } |
69 | |
70 | void Scheduler::issueInstructionImpl( |
71 | InstRef &IR, |
72 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources) { |
73 | Instruction *IS = IR.getInstruction(); |
74 | const InstrDesc &D = IS->getDesc(); |
75 | |
76 | // Issue the instruction and collect all the consumed resources |
77 | // into a vector. That vector is then used to notify the listener. |
78 | Resources->issueInstruction(Desc: D, Pipes&: UsedResources); |
79 | |
80 | // Notify the instruction that it started executing. |
81 | // This updates the internal state of each write. |
82 | IS->execute(IID: IR.getSourceIndex()); |
83 | |
84 | IS->computeCriticalRegDep(); |
85 | |
86 | if (IS->isMemOp()) { |
87 | LSU.onInstructionIssued(IR); |
88 | const CriticalDependency &MemDep = |
89 | LSU.getCriticalPredecessor(GroupId: IS->getLSUTokenID()); |
90 | IS->setCriticalMemDep(MemDep); |
91 | } |
92 | |
93 | if (IS->isExecuting()) |
94 | IssuedSet.emplace_back(args&: IR); |
95 | else if (IS->isExecuted()) |
96 | LSU.onInstructionExecuted(IR); |
97 | } |
98 | |
99 | // Release the buffered resources and issue the instruction. |
100 | void Scheduler::issueInstruction( |
101 | InstRef &IR, |
102 | SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &UsedResources, |
103 | SmallVectorImpl<InstRef> &PendingInstructions, |
104 | SmallVectorImpl<InstRef> &ReadyInstructions) { |
105 | const Instruction &Inst = *IR.getInstruction(); |
106 | bool HasDependentUsers = Inst.hasDependentUsers(); |
107 | HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); |
108 | |
109 | Resources->releaseBuffers(ConsumedBuffers: Inst.getUsedBuffers()); |
110 | issueInstructionImpl(IR, UsedResources); |
111 | // Instructions that have been issued during this cycle might have unblocked |
112 | // other dependent instructions. Dependent instructions may be issued during |
113 | // this same cycle if operands have ReadAdvance entries. Promote those |
114 | // instructions to the ReadySet and notify the caller that those are ready. |
115 | if (HasDependentUsers) |
116 | if (promoteToPendingSet(Pending&: PendingInstructions)) |
117 | promoteToReadySet(Ready&: ReadyInstructions); |
118 | } |
119 | |
120 | bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) { |
121 | // Scan the set of waiting instructions and promote them to the |
122 | // ready set if operands are all ready. |
123 | unsigned PromotedElements = 0; |
124 | for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) { |
125 | InstRef &IR = *I; |
126 | if (!IR) |
127 | break; |
128 | |
129 | // Check if there are unsolved register dependencies. |
130 | Instruction &IS = *IR.getInstruction(); |
131 | if (!IS.isReady() && !IS.updatePending()) { |
132 | ++I; |
133 | continue; |
134 | } |
135 | // Check if there are unsolved memory dependencies. |
136 | if (IS.isMemOp() && !LSU.isReady(IR)) { |
137 | ++I; |
138 | continue; |
139 | } |
140 | |
141 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
142 | << " promoted to the READY set.\n" ); |
143 | |
144 | Ready.emplace_back(Args&: IR); |
145 | ReadySet.emplace_back(args&: IR); |
146 | |
147 | IR.invalidate(); |
148 | ++PromotedElements; |
149 | std::iter_swap(a: I, b: E - PromotedElements); |
150 | } |
151 | |
152 | PendingSet.resize(new_size: PendingSet.size() - PromotedElements); |
153 | return PromotedElements; |
154 | } |
155 | |
156 | bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) { |
157 | // Scan the set of waiting instructions and promote them to the |
158 | // pending set if operands are all ready. |
159 | unsigned RemovedElements = 0; |
160 | for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) { |
161 | InstRef &IR = *I; |
162 | if (!IR) |
163 | break; |
164 | |
165 | // Check if this instruction is now ready. In case, force |
166 | // a transition in state using method 'updateDispatched()'. |
167 | Instruction &IS = *IR.getInstruction(); |
168 | if (IS.isDispatched() && !IS.updateDispatched()) { |
169 | ++I; |
170 | continue; |
171 | } |
172 | |
173 | if (IS.isMemOp() && LSU.isWaiting(IR)) { |
174 | ++I; |
175 | continue; |
176 | } |
177 | |
178 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
179 | << " promoted to the PENDING set.\n" ); |
180 | |
181 | Pending.emplace_back(Args&: IR); |
182 | PendingSet.emplace_back(args&: IR); |
183 | |
184 | IR.invalidate(); |
185 | ++RemovedElements; |
186 | std::iter_swap(a: I, b: E - RemovedElements); |
187 | } |
188 | |
189 | WaitSet.resize(new_size: WaitSet.size() - RemovedElements); |
190 | return RemovedElements; |
191 | } |
192 | |
193 | InstRef Scheduler::select() { |
194 | unsigned QueueIndex = ReadySet.size(); |
195 | for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) { |
196 | InstRef &IR = ReadySet[I]; |
197 | if (QueueIndex == ReadySet.size() || |
198 | Strategy->compare(Lhs: IR, Rhs: ReadySet[QueueIndex])) { |
199 | Instruction &IS = *IR.getInstruction(); |
200 | uint64_t BusyResourceMask = Resources->checkAvailability(Desc: IS.getDesc()); |
201 | if (BusyResourceMask) |
202 | IS.setCriticalResourceMask(BusyResourceMask); |
203 | BusyResourceUnits |= BusyResourceMask; |
204 | if (!BusyResourceMask) |
205 | QueueIndex = I; |
206 | } |
207 | } |
208 | |
209 | if (QueueIndex == ReadySet.size()) |
210 | return InstRef(); |
211 | |
212 | // We found an instruction to issue. |
213 | InstRef IR = ReadySet[QueueIndex]; |
214 | std::swap(a&: ReadySet[QueueIndex], b&: ReadySet[ReadySet.size() - 1]); |
215 | ReadySet.pop_back(); |
216 | return IR; |
217 | } |
218 | |
219 | void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) { |
220 | unsigned RemovedElements = 0; |
221 | for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) { |
222 | InstRef &IR = *I; |
223 | if (!IR) |
224 | break; |
225 | Instruction &IS = *IR.getInstruction(); |
226 | if (!IS.isExecuted()) { |
227 | LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR |
228 | << " is still executing.\n" ); |
229 | ++I; |
230 | continue; |
231 | } |
232 | |
233 | // Instruction IR has completed execution. |
234 | LSU.onInstructionExecuted(IR); |
235 | Executed.emplace_back(Args&: IR); |
236 | ++RemovedElements; |
237 | IR.invalidate(); |
238 | std::iter_swap(a: I, b: E - RemovedElements); |
239 | } |
240 | |
241 | IssuedSet.resize(new_size: IssuedSet.size() - RemovedElements); |
242 | } |
243 | |
244 | uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) { |
245 | llvm::append_range(C&: Insts, R&: ReadySet); |
246 | return BusyResourceUnits; |
247 | } |
248 | |
249 | void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps, |
250 | SmallVectorImpl<InstRef> &MemDeps) { |
251 | const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet; |
252 | for (const InstRef &IR : make_range(x: PendingSet.begin(), y: EndIt)) { |
253 | const Instruction &IS = *IR.getInstruction(); |
254 | if (Resources->checkAvailability(Desc: IS.getDesc())) |
255 | continue; |
256 | |
257 | if (IS.isMemOp() && LSU.isPending(IR)) |
258 | MemDeps.emplace_back(Args: IR); |
259 | |
260 | if (IS.isPending()) |
261 | RegDeps.emplace_back(Args: IR); |
262 | } |
263 | } |
264 | |
265 | void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed, |
266 | SmallVectorImpl<InstRef> &Executed, |
267 | SmallVectorImpl<InstRef> &Pending, |
268 | SmallVectorImpl<InstRef> &Ready) { |
269 | LSU.cycleEvent(); |
270 | |
271 | // Release consumed resources. |
272 | Resources->cycleEvent(ResourcesFreed&: Freed); |
273 | |
274 | for (InstRef &IR : IssuedSet) |
275 | IR.getInstruction()->cycleEvent(); |
276 | updateIssuedSet(Executed); |
277 | |
278 | for (InstRef &IR : PendingSet) |
279 | IR.getInstruction()->cycleEvent(); |
280 | |
281 | for (InstRef &IR : WaitSet) |
282 | IR.getInstruction()->cycleEvent(); |
283 | |
284 | promoteToPendingSet(Pending); |
285 | promoteToReadySet(Ready); |
286 | |
287 | NumDispatchedToThePendingSet = 0; |
288 | BusyResourceUnits = 0; |
289 | } |
290 | |
291 | bool Scheduler::mustIssueImmediately(const InstRef &IR) const { |
292 | const InstrDesc &Desc = IR.getInstruction()->getDesc(); |
293 | if (Desc.isZeroLatency()) |
294 | return true; |
295 | // Instructions that use an in-order dispatch/issue processor resource must be |
296 | // issued immediately to the pipeline(s). Any other in-order buffered |
297 | // resources (i.e. BufferSize=1) is consumed. |
298 | return Desc.MustIssueImmediately; |
299 | } |
300 | |
301 | bool Scheduler::dispatch(InstRef &IR) { |
302 | Instruction &IS = *IR.getInstruction(); |
303 | Resources->reserveBuffers(ConsumedBuffers: IS.getUsedBuffers()); |
304 | |
305 | // If necessary, reserve queue entries in the load-store unit (LSU). |
306 | if (IS.isMemOp()) |
307 | IS.setLSUTokenID(LSU.dispatch(IR)); |
308 | |
309 | if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) { |
310 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n" ); |
311 | WaitSet.push_back(x: IR); |
312 | return false; |
313 | } |
314 | |
315 | if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) { |
316 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR |
317 | << " to the PendingSet\n" ); |
318 | PendingSet.push_back(x: IR); |
319 | ++NumDispatchedToThePendingSet; |
320 | return false; |
321 | } |
322 | |
323 | assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) && |
324 | "Unexpected internal state found!" ); |
325 | // Don't add a zero-latency instruction to the Ready queue. |
326 | // A zero-latency instruction doesn't consume any scheduler resources. That is |
327 | // because it doesn't need to be executed, and it is often removed at register |
328 | // renaming stage. For example, register-register moves are often optimized at |
329 | // register renaming stage by simply updating register aliases. On some |
330 | // targets, zero-idiom instructions (for example: a xor that clears the value |
331 | // of a register) are treated specially, and are often eliminated at register |
332 | // renaming stage. |
333 | if (!mustIssueImmediately(IR)) { |
334 | LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n" ); |
335 | ReadySet.push_back(x: IR); |
336 | } |
337 | |
338 | return true; |
339 | } |
340 | |
341 | } // namespace mca |
342 | } // namespace llvm |
343 | |