1 | //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements a fast scheduler. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstrEmitter.h" |
14 | #include "SDNodeDbgValue.h" |
15 | #include "ScheduleDAGSDNodes.h" |
16 | #include "llvm/ADT/SmallSet.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/SchedulerRegistry.h" |
19 | #include "llvm/CodeGen/SelectionDAGISel.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
22 | #include "llvm/IR/InlineAsm.h" |
23 | #include "llvm/Support/Debug.h" |
24 | #include "llvm/Support/ErrorHandling.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "pre-RA-sched" |
29 | |
30 | STATISTIC(NumUnfolds, "Number of nodes unfolded" ); |
31 | STATISTIC(NumDups, "Number of duplicated nodes" ); |
32 | STATISTIC(NumPRCopies, "Number of physical copies" ); |
33 | |
34 | static RegisterScheduler |
35 | fastDAGScheduler("fast" , "Fast suboptimal list scheduling" , |
36 | createFastDAGScheduler); |
37 | static RegisterScheduler |
38 | linearizeDAGScheduler("linearize" , "Linearize DAG, no scheduling" , |
39 | createDAGLinearizer); |
40 | |
41 | |
42 | namespace { |
43 | /// FastPriorityQueue - A degenerate priority queue that considers |
44 | /// all nodes to have the same priority. |
45 | /// |
46 | struct FastPriorityQueue { |
47 | SmallVector<SUnit *, 16> Queue; |
48 | |
49 | bool empty() const { return Queue.empty(); } |
50 | |
51 | void push(SUnit *U) { |
52 | Queue.push_back(Elt: U); |
53 | } |
54 | |
55 | SUnit *pop() { |
56 | if (empty()) return nullptr; |
57 | return Queue.pop_back_val(); |
58 | } |
59 | }; |
60 | |
61 | //===----------------------------------------------------------------------===// |
62 | /// ScheduleDAGFast - The actual "fast" list scheduler implementation. |
63 | /// |
64 | class ScheduleDAGFast : public ScheduleDAGSDNodes { |
65 | private: |
66 | /// AvailableQueue - The priority queue to use for the available SUnits. |
67 | FastPriorityQueue AvailableQueue; |
68 | |
69 | /// LiveRegDefs - A set of physical registers and their definition |
70 | /// that are "live". These nodes must be scheduled before any other nodes that |
71 | /// modifies the registers can be scheduled. |
72 | unsigned NumLiveRegs = 0u; |
73 | std::vector<SUnit*> LiveRegDefs; |
74 | std::vector<unsigned> LiveRegCycles; |
75 | |
76 | public: |
77 | ScheduleDAGFast(MachineFunction &mf) |
78 | : ScheduleDAGSDNodes(mf) {} |
79 | |
80 | void Schedule() override; |
81 | |
82 | /// AddPred - adds a predecessor edge to SUnit SU. |
83 | /// This returns true if this is a new predecessor. |
84 | void AddPred(SUnit *SU, const SDep &D) { |
85 | SU->addPred(D); |
86 | } |
87 | |
88 | /// RemovePred - removes a predecessor edge from SUnit SU. |
89 | /// This returns true if an edge was removed. |
90 | void RemovePred(SUnit *SU, const SDep &D) { |
91 | SU->removePred(D); |
92 | } |
93 | |
94 | private: |
95 | void ReleasePred(SUnit *SU, SDep *PredEdge); |
96 | void ReleasePredecessors(SUnit *SU, unsigned CurCycle); |
97 | void ScheduleNodeBottomUp(SUnit*, unsigned); |
98 | SUnit *CopyAndMoveSuccessors(SUnit*); |
99 | void InsertCopiesAndMoveSuccs(SUnit*, unsigned, |
100 | const TargetRegisterClass*, |
101 | const TargetRegisterClass*, |
102 | SmallVectorImpl<SUnit*>&); |
103 | bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); |
104 | void ListScheduleBottomUp(); |
105 | |
106 | /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. |
107 | bool forceUnitLatencies() const override { return true; } |
108 | }; |
109 | } // end anonymous namespace |
110 | |
111 | |
112 | /// Schedule - Schedule the DAG using list scheduling. |
113 | void ScheduleDAGFast::Schedule() { |
114 | LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n" ); |
115 | |
116 | NumLiveRegs = 0; |
117 | LiveRegDefs.resize(new_size: TRI->getNumRegs(), x: nullptr); |
118 | LiveRegCycles.resize(new_size: TRI->getNumRegs(), x: 0); |
119 | |
120 | // Build the scheduling graph. |
121 | BuildSchedGraph(AA: nullptr); |
122 | |
123 | LLVM_DEBUG(dump()); |
124 | |
125 | // Execute the actual scheduling loop. |
126 | ListScheduleBottomUp(); |
127 | } |
128 | |
129 | //===----------------------------------------------------------------------===// |
130 | // Bottom-Up Scheduling |
131 | //===----------------------------------------------------------------------===// |
132 | |
133 | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to |
134 | /// the AvailableQueue if the count reaches zero. Also update its cycle bound. |
135 | void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { |
136 | SUnit *PredSU = PredEdge->getSUnit(); |
137 | |
138 | #ifndef NDEBUG |
139 | if (PredSU->NumSuccsLeft == 0) { |
140 | dbgs() << "*** Scheduling failed! ***\n" ; |
141 | dumpNode(*PredSU); |
142 | dbgs() << " has been released too many times!\n" ; |
143 | llvm_unreachable(nullptr); |
144 | } |
145 | #endif |
146 | --PredSU->NumSuccsLeft; |
147 | |
148 | // If all the node's successors are scheduled, this node is ready |
149 | // to be scheduled. Ignore the special EntrySU node. |
150 | if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { |
151 | PredSU->isAvailable = true; |
152 | AvailableQueue.push(U: PredSU); |
153 | } |
154 | } |
155 | |
156 | void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { |
157 | // Bottom up: release predecessors |
158 | for (SDep &Pred : SU->Preds) { |
159 | ReleasePred(SU, PredEdge: &Pred); |
160 | if (Pred.isAssignedRegDep()) { |
161 | // This is a physical register dependency and it's impossible or |
162 | // expensive to copy the register. Make sure nothing that can |
163 | // clobber the register is scheduled between the predecessor and |
164 | // this node. |
165 | if (!LiveRegDefs[Pred.getReg()]) { |
166 | ++NumLiveRegs; |
167 | LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); |
168 | LiveRegCycles[Pred.getReg()] = CurCycle; |
169 | } |
170 | } |
171 | } |
172 | } |
173 | |
174 | /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending |
175 | /// count of its predecessors. If a predecessor pending count is zero, add it to |
176 | /// the Available queue. |
177 | void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { |
178 | LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: " ); |
179 | LLVM_DEBUG(dumpNode(*SU)); |
180 | |
181 | assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!" ); |
182 | SU->setHeightToAtLeast(CurCycle); |
183 | Sequence.push_back(x: SU); |
184 | |
185 | ReleasePredecessors(SU, CurCycle); |
186 | |
187 | // Release all the implicit physical register defs that are live. |
188 | for (SDep &Succ : SU->Succs) { |
189 | if (Succ.isAssignedRegDep()) { |
190 | if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) { |
191 | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!" ); |
192 | assert(LiveRegDefs[Succ.getReg()] == SU && |
193 | "Physical register dependency violated?" ); |
194 | --NumLiveRegs; |
195 | LiveRegDefs[Succ.getReg()] = nullptr; |
196 | LiveRegCycles[Succ.getReg()] = 0; |
197 | } |
198 | } |
199 | } |
200 | |
201 | SU->isScheduled = true; |
202 | } |
203 | |
204 | /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled |
205 | /// successors to the newly created node. |
206 | SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { |
207 | if (SU->getNode()->getGluedNode()) |
208 | return nullptr; |
209 | |
210 | SDNode *N = SU->getNode(); |
211 | if (!N) |
212 | return nullptr; |
213 | |
214 | SUnit *NewSU; |
215 | bool TryUnfold = false; |
216 | for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { |
217 | MVT VT = N->getSimpleValueType(ResNo: i); |
218 | if (VT == MVT::Glue) |
219 | return nullptr; |
220 | else if (VT == MVT::Other) |
221 | TryUnfold = true; |
222 | } |
223 | for (const SDValue &Op : N->op_values()) { |
224 | MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo()); |
225 | if (VT == MVT::Glue) |
226 | return nullptr; |
227 | } |
228 | |
229 | if (TryUnfold) { |
230 | SmallVector<SDNode*, 2> NewNodes; |
231 | if (!TII->unfoldMemoryOperand(DAG&: *DAG, N, NewNodes)) |
232 | return nullptr; |
233 | |
234 | LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n" ); |
235 | assert(NewNodes.size() == 2 && "Expected a load folding node!" ); |
236 | |
237 | N = NewNodes[1]; |
238 | SDNode *LoadNode = NewNodes[0]; |
239 | unsigned NumVals = N->getNumValues(); |
240 | unsigned OldNumVals = SU->getNode()->getNumValues(); |
241 | for (unsigned i = 0; i != NumVals; ++i) |
242 | DAG->ReplaceAllUsesOfValueWith(From: SDValue(SU->getNode(), i), To: SDValue(N, i)); |
243 | DAG->ReplaceAllUsesOfValueWith(From: SDValue(SU->getNode(), OldNumVals-1), |
244 | To: SDValue(LoadNode, 1)); |
245 | |
246 | SUnit *NewSU = newSUnit(N); |
247 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
248 | N->setNodeId(NewSU->NodeNum); |
249 | |
250 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
251 | for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { |
252 | if (MCID.getOperandConstraint(OpNum: i, Constraint: MCOI::TIED_TO) != -1) { |
253 | NewSU->isTwoAddress = true; |
254 | break; |
255 | } |
256 | } |
257 | if (MCID.isCommutable()) |
258 | NewSU->isCommutable = true; |
259 | |
260 | // LoadNode may already exist. This can happen when there is another |
261 | // load from the same location and producing the same type of value |
262 | // but it has different alignment or volatileness. |
263 | bool isNewLoad = true; |
264 | SUnit *LoadSU; |
265 | if (LoadNode->getNodeId() != -1) { |
266 | LoadSU = &SUnits[LoadNode->getNodeId()]; |
267 | isNewLoad = false; |
268 | } else { |
269 | LoadSU = newSUnit(N: LoadNode); |
270 | LoadNode->setNodeId(LoadSU->NodeNum); |
271 | } |
272 | |
273 | SDep ChainPred; |
274 | SmallVector<SDep, 4> ChainSuccs; |
275 | SmallVector<SDep, 4> LoadPreds; |
276 | SmallVector<SDep, 4> NodePreds; |
277 | SmallVector<SDep, 4> NodeSuccs; |
278 | for (SDep &Pred : SU->Preds) { |
279 | if (Pred.isCtrl()) |
280 | ChainPred = Pred; |
281 | else if (Pred.getSUnit()->getNode() && |
282 | Pred.getSUnit()->getNode()->isOperandOf(N: LoadNode)) |
283 | LoadPreds.push_back(Elt: Pred); |
284 | else |
285 | NodePreds.push_back(Elt: Pred); |
286 | } |
287 | for (SDep &Succ : SU->Succs) { |
288 | if (Succ.isCtrl()) |
289 | ChainSuccs.push_back(Elt: Succ); |
290 | else |
291 | NodeSuccs.push_back(Elt: Succ); |
292 | } |
293 | |
294 | if (ChainPred.getSUnit()) { |
295 | RemovePred(SU, D: ChainPred); |
296 | if (isNewLoad) |
297 | AddPred(SU: LoadSU, D: ChainPred); |
298 | } |
299 | for (const SDep &Pred : LoadPreds) { |
300 | RemovePred(SU, D: Pred); |
301 | if (isNewLoad) { |
302 | AddPred(SU: LoadSU, D: Pred); |
303 | } |
304 | } |
305 | for (const SDep &Pred : NodePreds) { |
306 | RemovePred(SU, D: Pred); |
307 | AddPred(SU: NewSU, D: Pred); |
308 | } |
309 | for (SDep D : NodeSuccs) { |
310 | SUnit *SuccDep = D.getSUnit(); |
311 | D.setSUnit(SU); |
312 | RemovePred(SU: SuccDep, D); |
313 | D.setSUnit(NewSU); |
314 | AddPred(SU: SuccDep, D); |
315 | } |
316 | for (SDep D : ChainSuccs) { |
317 | SUnit *SuccDep = D.getSUnit(); |
318 | D.setSUnit(SU); |
319 | RemovePred(SU: SuccDep, D); |
320 | if (isNewLoad) { |
321 | D.setSUnit(LoadSU); |
322 | AddPred(SU: SuccDep, D); |
323 | } |
324 | } |
325 | if (isNewLoad) { |
326 | SDep D(LoadSU, SDep::Barrier); |
327 | D.setLatency(LoadSU->Latency); |
328 | AddPred(SU: NewSU, D); |
329 | } |
330 | |
331 | ++NumUnfolds; |
332 | |
333 | if (NewSU->NumSuccsLeft == 0) { |
334 | NewSU->isAvailable = true; |
335 | return NewSU; |
336 | } |
337 | SU = NewSU; |
338 | } |
339 | |
340 | LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n" ); |
341 | NewSU = Clone(Old: SU); |
342 | |
343 | // New SUnit has the exact same predecessors. |
344 | for (SDep &Pred : SU->Preds) |
345 | if (!Pred.isArtificial()) |
346 | AddPred(SU: NewSU, D: Pred); |
347 | |
348 | // Only copy scheduled successors. Cut them from old node's successor |
349 | // list and move them over. |
350 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
351 | for (SDep &Succ : SU->Succs) { |
352 | if (Succ.isArtificial()) |
353 | continue; |
354 | SUnit *SuccSU = Succ.getSUnit(); |
355 | if (SuccSU->isScheduled) { |
356 | SDep D = Succ; |
357 | D.setSUnit(NewSU); |
358 | AddPred(SU: SuccSU, D); |
359 | D.setSUnit(SU); |
360 | DelDeps.push_back(Elt: std::make_pair(x&: SuccSU, y&: D)); |
361 | } |
362 | } |
363 | for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) |
364 | RemovePred(SU: DelDeps[i].first, D: DelDeps[i].second); |
365 | |
366 | ++NumDups; |
367 | return NewSU; |
368 | } |
369 | |
370 | /// InsertCopiesAndMoveSuccs - Insert register copies and move all |
371 | /// scheduled successors of the given SUnit to the last copy. |
372 | void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, |
373 | const TargetRegisterClass *DestRC, |
374 | const TargetRegisterClass *SrcRC, |
375 | SmallVectorImpl<SUnit*> &Copies) { |
376 | SUnit *CopyFromSU = newSUnit(N: static_cast<SDNode *>(nullptr)); |
377 | CopyFromSU->CopySrcRC = SrcRC; |
378 | CopyFromSU->CopyDstRC = DestRC; |
379 | |
380 | SUnit *CopyToSU = newSUnit(N: static_cast<SDNode *>(nullptr)); |
381 | CopyToSU->CopySrcRC = DestRC; |
382 | CopyToSU->CopyDstRC = SrcRC; |
383 | |
384 | // Only copy scheduled successors. Cut them from old node's successor |
385 | // list and move them over. |
386 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
387 | for (SDep &Succ : SU->Succs) { |
388 | if (Succ.isArtificial()) |
389 | continue; |
390 | SUnit *SuccSU = Succ.getSUnit(); |
391 | if (SuccSU->isScheduled) { |
392 | SDep D = Succ; |
393 | D.setSUnit(CopyToSU); |
394 | AddPred(SU: SuccSU, D); |
395 | DelDeps.push_back(Elt: std::make_pair(x&: SuccSU, y&: Succ)); |
396 | } |
397 | } |
398 | for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { |
399 | RemovePred(SU: DelDeps[i].first, D: DelDeps[i].second); |
400 | } |
401 | SDep FromDep(SU, SDep::Data, Reg); |
402 | FromDep.setLatency(SU->Latency); |
403 | AddPred(SU: CopyFromSU, D: FromDep); |
404 | SDep ToDep(CopyFromSU, SDep::Data, 0); |
405 | ToDep.setLatency(CopyFromSU->Latency); |
406 | AddPred(SU: CopyToSU, D: ToDep); |
407 | |
408 | Copies.push_back(Elt: CopyFromSU); |
409 | Copies.push_back(Elt: CopyToSU); |
410 | |
411 | ++NumPRCopies; |
412 | } |
413 | |
414 | /// getPhysicalRegisterVT - Returns the ValueType of the physical register |
415 | /// definition of the specified node. |
416 | /// FIXME: Move to SelectionDAG? |
417 | static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, |
418 | const TargetInstrInfo *TII) { |
419 | unsigned NumRes; |
420 | if (N->getOpcode() == ISD::CopyFromReg) { |
421 | // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. |
422 | NumRes = 1; |
423 | } else { |
424 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
425 | assert(!MCID.implicit_defs().empty() && |
426 | "Physical reg def must be in implicit def list!" ); |
427 | NumRes = MCID.getNumDefs(); |
428 | for (MCPhysReg ImpDef : MCID.implicit_defs()) { |
429 | if (Reg == ImpDef) |
430 | break; |
431 | ++NumRes; |
432 | } |
433 | } |
434 | return N->getSimpleValueType(ResNo: NumRes); |
435 | } |
436 | |
437 | /// CheckForLiveRegDef - Return true and update live register vector if the |
438 | /// specified register def of the specified SUnit clobbers any "live" registers. |
439 | static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, |
440 | std::vector<SUnit *> &LiveRegDefs, |
441 | SmallSet<unsigned, 4> &RegAdded, |
442 | SmallVectorImpl<unsigned> &LRegs, |
443 | const TargetRegisterInfo *TRI, |
444 | const SDNode *Node = nullptr) { |
445 | bool Added = false; |
446 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
447 | // Check if Ref is live. |
448 | if (!LiveRegDefs[*AI]) |
449 | continue; |
450 | |
451 | // Allow multiple uses of the same def. |
452 | if (LiveRegDefs[*AI] == SU) |
453 | continue; |
454 | |
455 | // Allow multiple uses of same def |
456 | if (Node && LiveRegDefs[*AI]->getNode() == Node) |
457 | continue; |
458 | |
459 | // Add Reg to the set of interfering live regs. |
460 | if (RegAdded.insert(V: *AI).second) { |
461 | LRegs.push_back(Elt: *AI); |
462 | Added = true; |
463 | } |
464 | } |
465 | return Added; |
466 | } |
467 | |
468 | /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay |
469 | /// scheduling of the given node to satisfy live physical register dependencies. |
470 | /// If the specific node is the last one that's available to schedule, do |
471 | /// whatever is necessary (i.e. backtracking or cloning) to make it possible. |
472 | bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, |
473 | SmallVectorImpl<unsigned> &LRegs){ |
474 | if (NumLiveRegs == 0) |
475 | return false; |
476 | |
477 | SmallSet<unsigned, 4> RegAdded; |
478 | // If this node would clobber any "live" register, then it's not ready. |
479 | for (SDep &Pred : SU->Preds) { |
480 | if (Pred.isAssignedRegDep()) { |
481 | CheckForLiveRegDef(SU: Pred.getSUnit(), Reg: Pred.getReg(), LiveRegDefs, |
482 | RegAdded, LRegs, TRI); |
483 | } |
484 | } |
485 | |
486 | for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { |
487 | if (Node->getOpcode() == ISD::INLINEASM || |
488 | Node->getOpcode() == ISD::INLINEASM_BR) { |
489 | // Inline asm can clobber physical defs. |
490 | unsigned NumOps = Node->getNumOperands(); |
491 | if (Node->getOperand(Num: NumOps-1).getValueType() == MVT::Glue) |
492 | --NumOps; // Ignore the glue operand. |
493 | |
494 | for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { |
495 | unsigned Flags = Node->getConstantOperandVal(Num: i); |
496 | const InlineAsm::Flag F(Flags); |
497 | unsigned NumVals = F.getNumOperandRegisters(); |
498 | |
499 | ++i; // Skip the ID value. |
500 | if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || |
501 | F.isClobberKind()) { |
502 | // Check for def of register or earlyclobber register. |
503 | for (; NumVals; --NumVals, ++i) { |
504 | unsigned Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: i))->getReg(); |
505 | if (Register::isPhysicalRegister(Reg)) |
506 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); |
507 | } |
508 | } else |
509 | i += NumVals; |
510 | } |
511 | continue; |
512 | } |
513 | |
514 | if (Node->getOpcode() == ISD::CopyToReg) { |
515 | Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: 1))->getReg(); |
516 | if (Reg.isPhysical()) { |
517 | SDNode *SrcNode = Node->getOperand(Num: 2).getNode(); |
518 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, Node: SrcNode); |
519 | } |
520 | } |
521 | |
522 | if (!Node->isMachineOpcode()) |
523 | continue; |
524 | const MCInstrDesc &MCID = TII->get(Opcode: Node->getMachineOpcode()); |
525 | for (MCPhysReg Reg : MCID.implicit_defs()) |
526 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); |
527 | } |
528 | return !LRegs.empty(); |
529 | } |
530 | |
531 | |
532 | /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up |
533 | /// schedulers. |
534 | void ScheduleDAGFast::ListScheduleBottomUp() { |
535 | unsigned CurCycle = 0; |
536 | |
537 | // Release any predecessors of the special Exit node. |
538 | ReleasePredecessors(SU: &ExitSU, CurCycle); |
539 | |
540 | // Add root to Available queue. |
541 | if (!SUnits.empty()) { |
542 | SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; |
543 | assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!" ); |
544 | RootSU->isAvailable = true; |
545 | AvailableQueue.push(U: RootSU); |
546 | } |
547 | |
548 | // While Available queue is not empty, grab the node with the highest |
549 | // priority. If it is not ready put it back. Schedule the node. |
550 | SmallVector<SUnit*, 4> NotReady; |
551 | DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; |
552 | Sequence.reserve(n: SUnits.size()); |
553 | while (!AvailableQueue.empty()) { |
554 | bool Delayed = false; |
555 | LRegsMap.clear(); |
556 | SUnit *CurSU = AvailableQueue.pop(); |
557 | while (CurSU) { |
558 | SmallVector<unsigned, 4> LRegs; |
559 | if (!DelayForLiveRegsBottomUp(SU: CurSU, LRegs)) |
560 | break; |
561 | Delayed = true; |
562 | LRegsMap.insert(KV: std::make_pair(x&: CurSU, y&: LRegs)); |
563 | |
564 | CurSU->isPending = true; // This SU is not in AvailableQueue right now. |
565 | NotReady.push_back(Elt: CurSU); |
566 | CurSU = AvailableQueue.pop(); |
567 | } |
568 | |
569 | // All candidates are delayed due to live physical reg dependencies. |
570 | // Try code duplication or inserting cross class copies |
571 | // to resolve it. |
572 | if (Delayed && !CurSU) { |
573 | if (!CurSU) { |
574 | // Try duplicating the nodes that produces these |
575 | // "expensive to copy" values to break the dependency. In case even |
576 | // that doesn't work, insert cross class copies. |
577 | SUnit *TrySU = NotReady[0]; |
578 | SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; |
579 | assert(LRegs.size() == 1 && "Can't handle this yet!" ); |
580 | unsigned Reg = LRegs[0]; |
581 | SUnit *LRDef = LiveRegDefs[Reg]; |
582 | MVT VT = getPhysicalRegisterVT(N: LRDef->getNode(), Reg, TII); |
583 | const TargetRegisterClass *RC = |
584 | TRI->getMinimalPhysRegClass(Reg, VT); |
585 | const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); |
586 | |
587 | // If cross copy register class is the same as RC, then it must be |
588 | // possible copy the value directly. Do not try duplicate the def. |
589 | // If cross copy register class is not the same as RC, then it's |
590 | // possible to copy the value but it require cross register class copies |
591 | // and it is expensive. |
592 | // If cross copy register class is null, then it's not possible to copy |
593 | // the value at all. |
594 | SUnit *NewDef = nullptr; |
595 | if (DestRC != RC) { |
596 | NewDef = CopyAndMoveSuccessors(SU: LRDef); |
597 | if (!DestRC && !NewDef) |
598 | report_fatal_error(reason: "Can't handle live physical " |
599 | "register dependency!" ); |
600 | } |
601 | if (!NewDef) { |
602 | // Issue copies, these can be expensive cross register class copies. |
603 | SmallVector<SUnit*, 2> Copies; |
604 | InsertCopiesAndMoveSuccs(SU: LRDef, Reg, DestRC, SrcRC: RC, Copies); |
605 | LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum |
606 | << " to SU #" << Copies.front()->NodeNum << "\n" ); |
607 | AddPred(SU: TrySU, D: SDep(Copies.front(), SDep::Artificial)); |
608 | NewDef = Copies.back(); |
609 | } |
610 | |
611 | LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum |
612 | << " to SU #" << TrySU->NodeNum << "\n" ); |
613 | LiveRegDefs[Reg] = NewDef; |
614 | AddPred(SU: NewDef, D: SDep(TrySU, SDep::Artificial)); |
615 | TrySU->isAvailable = false; |
616 | CurSU = NewDef; |
617 | } |
618 | |
619 | if (!CurSU) { |
620 | llvm_unreachable("Unable to resolve live physical register dependencies!" ); |
621 | } |
622 | } |
623 | |
624 | // Add the nodes that aren't ready back onto the available list. |
625 | for (SUnit *SU : NotReady) { |
626 | SU->isPending = false; |
627 | // May no longer be available due to backtracking. |
628 | if (SU->isAvailable) |
629 | AvailableQueue.push(U: SU); |
630 | } |
631 | NotReady.clear(); |
632 | |
633 | if (CurSU) |
634 | ScheduleNodeBottomUp(SU: CurSU, CurCycle); |
635 | ++CurCycle; |
636 | } |
637 | |
638 | // Reverse the order since it is bottom up. |
639 | std::reverse(first: Sequence.begin(), last: Sequence.end()); |
640 | |
641 | #ifndef NDEBUG |
642 | VerifyScheduledSequence(/*isBottomUp=*/true); |
643 | #endif |
644 | } |
645 | |
646 | |
647 | namespace { |
648 | //===----------------------------------------------------------------------===// |
649 | // ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the |
650 | // DAG in topological order. |
651 | // IMPORTANT: this may not work for targets with phyreg dependency. |
652 | // |
653 | class ScheduleDAGLinearize : public ScheduleDAGSDNodes { |
654 | public: |
655 | ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} |
656 | |
657 | void Schedule() override; |
658 | |
659 | MachineBasicBlock * |
660 | EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; |
661 | |
662 | private: |
663 | std::vector<SDNode*> Sequence; |
664 | DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user |
665 | |
666 | void ScheduleNode(SDNode *N); |
667 | }; |
668 | } // end anonymous namespace |
669 | |
670 | void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { |
671 | if (N->getNodeId() != 0) |
672 | llvm_unreachable(nullptr); |
673 | |
674 | if (!N->isMachineOpcode() && |
675 | (N->getOpcode() == ISD::EntryToken || isPassiveNode(Node: N))) |
676 | // These nodes do not need to be translated into MIs. |
677 | return; |
678 | |
679 | LLVM_DEBUG(dbgs() << "\n*** Scheduling: " ); |
680 | LLVM_DEBUG(N->dump(DAG)); |
681 | Sequence.push_back(x: N); |
682 | |
683 | unsigned NumOps = N->getNumOperands(); |
684 | if (unsigned NumLeft = NumOps) { |
685 | SDNode *GluedOpN = nullptr; |
686 | do { |
687 | const SDValue &Op = N->getOperand(Num: NumLeft-1); |
688 | SDNode *OpN = Op.getNode(); |
689 | |
690 | if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { |
691 | // Schedule glue operand right above N. |
692 | GluedOpN = OpN; |
693 | assert(OpN->getNodeId() != 0 && "Glue operand not ready?" ); |
694 | OpN->setNodeId(0); |
695 | ScheduleNode(N: OpN); |
696 | continue; |
697 | } |
698 | |
699 | if (OpN == GluedOpN) |
700 | // Glue operand is already scheduled. |
701 | continue; |
702 | |
703 | DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(Val: OpN); |
704 | if (DI != GluedMap.end() && DI->second != N) |
705 | // Users of glues are counted against the glued users. |
706 | OpN = DI->second; |
707 | |
708 | unsigned Degree = OpN->getNodeId(); |
709 | assert(Degree > 0 && "Predecessor over-released!" ); |
710 | OpN->setNodeId(--Degree); |
711 | if (Degree == 0) |
712 | ScheduleNode(N: OpN); |
713 | } while (--NumLeft); |
714 | } |
715 | } |
716 | |
717 | /// findGluedUser - Find the representative use of a glue value by walking |
718 | /// the use chain. |
719 | static SDNode *findGluedUser(SDNode *N) { |
720 | while (SDNode *Glued = N->getGluedUser()) |
721 | N = Glued; |
722 | return N; |
723 | } |
724 | |
725 | void ScheduleDAGLinearize::Schedule() { |
726 | LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n" ); |
727 | |
728 | SmallVector<SDNode*, 8> Glues; |
729 | unsigned DAGSize = 0; |
730 | for (SDNode &Node : DAG->allnodes()) { |
731 | SDNode *N = &Node; |
732 | |
733 | // Use node id to record degree. |
734 | unsigned Degree = N->use_size(); |
735 | N->setNodeId(Degree); |
736 | unsigned NumVals = N->getNumValues(); |
737 | if (NumVals && N->getValueType(ResNo: NumVals-1) == MVT::Glue && |
738 | N->hasAnyUseOfValue(Value: NumVals-1)) { |
739 | SDNode *User = findGluedUser(N); |
740 | if (User) { |
741 | Glues.push_back(Elt: N); |
742 | GluedMap.insert(KV: std::make_pair(x&: N, y&: User)); |
743 | } |
744 | } |
745 | |
746 | if (N->isMachineOpcode() || |
747 | (N->getOpcode() != ISD::EntryToken && !isPassiveNode(Node: N))) |
748 | ++DAGSize; |
749 | } |
750 | |
751 | for (SDNode *Glue : Glues) { |
752 | SDNode *GUser = GluedMap[Glue]; |
753 | unsigned Degree = Glue->getNodeId(); |
754 | unsigned UDegree = GUser->getNodeId(); |
755 | |
756 | // Glue user must be scheduled together with the glue operand. So other |
757 | // users of the glue operand must be treated as its users. |
758 | SDNode *ImmGUser = Glue->getGluedUser(); |
759 | for (const SDNode *U : Glue->uses()) |
760 | if (U == ImmGUser) |
761 | --Degree; |
762 | GUser->setNodeId(UDegree + Degree); |
763 | Glue->setNodeId(1); |
764 | } |
765 | |
766 | Sequence.reserve(n: DAGSize); |
767 | ScheduleNode(N: DAG->getRoot().getNode()); |
768 | } |
769 | |
770 | MachineBasicBlock* |
771 | ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { |
772 | InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); |
773 | DenseMap<SDValue, Register> VRBaseMap; |
774 | |
775 | LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n" ; }); |
776 | |
777 | unsigned NumNodes = Sequence.size(); |
778 | MachineBasicBlock *BB = Emitter.getBlock(); |
779 | for (unsigned i = 0; i != NumNodes; ++i) { |
780 | SDNode *N = Sequence[NumNodes-i-1]; |
781 | LLVM_DEBUG(N->dump(DAG)); |
782 | Emitter.EmitNode(Node: N, IsClone: false, IsCloned: false, VRBaseMap); |
783 | |
784 | // Emit any debug values associated with the node. |
785 | if (N->getHasDebugValue()) { |
786 | MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); |
787 | for (auto *DV : DAG->GetDbgValues(SD: N)) { |
788 | if (!DV->isEmitted()) |
789 | if (auto *DbgMI = Emitter.EmitDbgValue(SD: DV, VRBaseMap)) |
790 | BB->insert(I: InsertPos, MI: DbgMI); |
791 | } |
792 | } |
793 | } |
794 | |
795 | LLVM_DEBUG(dbgs() << '\n'); |
796 | |
797 | InsertPos = Emitter.getInsertPos(); |
798 | return Emitter.getBlock(); |
799 | } |
800 | |
801 | //===----------------------------------------------------------------------===// |
802 | // Public Constructor Functions |
803 | //===----------------------------------------------------------------------===// |
804 | |
805 | llvm::ScheduleDAGSDNodes *llvm::createFastDAGScheduler(SelectionDAGISel *IS, |
806 | CodeGenOptLevel) { |
807 | return new ScheduleDAGFast(*IS->MF); |
808 | } |
809 | |
810 | llvm::ScheduleDAGSDNodes *llvm::createDAGLinearizer(SelectionDAGISel *IS, |
811 | CodeGenOptLevel) { |
812 | return new ScheduleDAGLinearize(*IS->MF); |
813 | } |
814 | |