1 | //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements a fast scheduler. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstrEmitter.h" |
14 | #include "SDNodeDbgValue.h" |
15 | #include "ScheduleDAGSDNodes.h" |
16 | #include "llvm/ADT/SmallSet.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/SchedulerRegistry.h" |
19 | #include "llvm/CodeGen/SelectionDAGISel.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
22 | #include "llvm/IR/InlineAsm.h" |
23 | #include "llvm/Support/Debug.h" |
24 | #include "llvm/Support/ErrorHandling.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "pre-RA-sched" |
29 | |
30 | STATISTIC(NumUnfolds, "Number of nodes unfolded" ); |
31 | STATISTIC(NumDups, "Number of duplicated nodes" ); |
32 | STATISTIC(NumPRCopies, "Number of physical copies" ); |
33 | |
34 | static RegisterScheduler |
35 | fastDAGScheduler("fast" , "Fast suboptimal list scheduling" , |
36 | createFastDAGScheduler); |
37 | static RegisterScheduler |
38 | linearizeDAGScheduler("linearize" , "Linearize DAG, no scheduling" , |
39 | createDAGLinearizer); |
40 | |
41 | |
42 | namespace { |
43 | /// FastPriorityQueue - A degenerate priority queue that considers |
44 | /// all nodes to have the same priority. |
45 | /// |
46 | struct FastPriorityQueue { |
47 | SmallVector<SUnit *, 16> Queue; |
48 | |
49 | bool empty() const { return Queue.empty(); } |
50 | |
51 | void push(SUnit *U) { |
52 | Queue.push_back(Elt: U); |
53 | } |
54 | |
55 | SUnit *pop() { |
56 | if (empty()) return nullptr; |
57 | return Queue.pop_back_val(); |
58 | } |
59 | }; |
60 | |
61 | //===----------------------------------------------------------------------===// |
62 | /// ScheduleDAGFast - The actual "fast" list scheduler implementation. |
63 | /// |
64 | class ScheduleDAGFast : public ScheduleDAGSDNodes { |
65 | private: |
66 | /// AvailableQueue - The priority queue to use for the available SUnits. |
67 | FastPriorityQueue AvailableQueue; |
68 | |
69 | /// LiveRegDefs - A set of physical registers and their definition |
70 | /// that are "live". These nodes must be scheduled before any other nodes that |
71 | /// modifies the registers can be scheduled. |
72 | unsigned NumLiveRegs = 0u; |
73 | std::vector<SUnit*> LiveRegDefs; |
74 | std::vector<unsigned> LiveRegCycles; |
75 | |
76 | public: |
77 | ScheduleDAGFast(MachineFunction &mf) |
78 | : ScheduleDAGSDNodes(mf) {} |
79 | |
80 | void Schedule() override; |
81 | |
82 | /// AddPred - adds a predecessor edge to SUnit SU. |
83 | void AddPred(SUnit *SU, const SDep &D) { SU->addPred(D); } |
84 | |
85 | /// RemovePred - removes a predecessor edge from SUnit SU. |
86 | void RemovePred(SUnit *SU, const SDep &D) { SU->removePred(D); } |
87 | |
88 | private: |
89 | void ReleasePred(SUnit *SU, SDep *PredEdge); |
90 | void ReleasePredecessors(SUnit *SU, unsigned CurCycle); |
91 | void ScheduleNodeBottomUp(SUnit*, unsigned); |
92 | SUnit *CopyAndMoveSuccessors(SUnit*); |
93 | void InsertCopiesAndMoveSuccs(SUnit*, unsigned, |
94 | const TargetRegisterClass*, |
95 | const TargetRegisterClass*, |
96 | SmallVectorImpl<SUnit*>&); |
97 | bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); |
98 | void ListScheduleBottomUp(); |
99 | |
100 | /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. |
101 | bool forceUnitLatencies() const override { return true; } |
102 | }; |
103 | } // end anonymous namespace |
104 | |
105 | |
106 | /// Schedule - Schedule the DAG using list scheduling. |
107 | void ScheduleDAGFast::Schedule() { |
108 | LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n" ); |
109 | |
110 | NumLiveRegs = 0; |
111 | LiveRegDefs.resize(new_size: TRI->getNumRegs(), x: nullptr); |
112 | LiveRegCycles.resize(new_size: TRI->getNumRegs(), x: 0); |
113 | |
114 | // Build the scheduling graph. |
115 | BuildSchedGraph(); |
116 | |
117 | LLVM_DEBUG(dump()); |
118 | |
119 | // Execute the actual scheduling loop. |
120 | ListScheduleBottomUp(); |
121 | } |
122 | |
123 | //===----------------------------------------------------------------------===// |
124 | // Bottom-Up Scheduling |
125 | //===----------------------------------------------------------------------===// |
126 | |
127 | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to |
128 | /// the AvailableQueue if the count reaches zero. Also update its cycle bound. |
129 | void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { |
130 | SUnit *PredSU = PredEdge->getSUnit(); |
131 | |
132 | #ifndef NDEBUG |
133 | if (PredSU->NumSuccsLeft == 0) { |
134 | dbgs() << "*** Scheduling failed! ***\n" ; |
135 | dumpNode(*PredSU); |
136 | dbgs() << " has been released too many times!\n" ; |
137 | llvm_unreachable(nullptr); |
138 | } |
139 | #endif |
140 | --PredSU->NumSuccsLeft; |
141 | |
142 | // If all the node's successors are scheduled, this node is ready |
143 | // to be scheduled. Ignore the special EntrySU node. |
144 | if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { |
145 | PredSU->isAvailable = true; |
146 | AvailableQueue.push(U: PredSU); |
147 | } |
148 | } |
149 | |
150 | void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { |
151 | // Bottom up: release predecessors |
152 | for (SDep &Pred : SU->Preds) { |
153 | ReleasePred(SU, PredEdge: &Pred); |
154 | if (Pred.isAssignedRegDep()) { |
155 | // This is a physical register dependency and it's impossible or |
156 | // expensive to copy the register. Make sure nothing that can |
157 | // clobber the register is scheduled between the predecessor and |
158 | // this node. |
159 | if (!LiveRegDefs[Pred.getReg()]) { |
160 | ++NumLiveRegs; |
161 | LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); |
162 | LiveRegCycles[Pred.getReg()] = CurCycle; |
163 | } |
164 | } |
165 | } |
166 | } |
167 | |
168 | /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending |
169 | /// count of its predecessors. If a predecessor pending count is zero, add it to |
170 | /// the Available queue. |
171 | void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { |
172 | LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: " ); |
173 | LLVM_DEBUG(dumpNode(*SU)); |
174 | |
175 | assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!" ); |
176 | SU->setHeightToAtLeast(CurCycle); |
177 | Sequence.push_back(x: SU); |
178 | |
179 | ReleasePredecessors(SU, CurCycle); |
180 | |
181 | // Release all the implicit physical register defs that are live. |
182 | for (SDep &Succ : SU->Succs) { |
183 | if (Succ.isAssignedRegDep()) { |
184 | if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) { |
185 | assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!" ); |
186 | assert(LiveRegDefs[Succ.getReg()] == SU && |
187 | "Physical register dependency violated?" ); |
188 | --NumLiveRegs; |
189 | LiveRegDefs[Succ.getReg()] = nullptr; |
190 | LiveRegCycles[Succ.getReg()] = 0; |
191 | } |
192 | } |
193 | } |
194 | |
195 | SU->isScheduled = true; |
196 | } |
197 | |
198 | /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled |
199 | /// successors to the newly created node. |
200 | SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { |
201 | if (SU->getNode()->getGluedNode()) |
202 | return nullptr; |
203 | |
204 | SDNode *N = SU->getNode(); |
205 | if (!N) |
206 | return nullptr; |
207 | |
208 | SUnit *NewSU; |
209 | bool TryUnfold = false; |
210 | for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { |
211 | MVT VT = N->getSimpleValueType(ResNo: i); |
212 | if (VT == MVT::Glue) |
213 | return nullptr; |
214 | else if (VT == MVT::Other) |
215 | TryUnfold = true; |
216 | } |
217 | for (const SDValue &Op : N->op_values()) { |
218 | MVT VT = Op.getNode()->getSimpleValueType(ResNo: Op.getResNo()); |
219 | if (VT == MVT::Glue) |
220 | return nullptr; |
221 | } |
222 | |
223 | if (TryUnfold) { |
224 | SmallVector<SDNode*, 2> NewNodes; |
225 | if (!TII->unfoldMemoryOperand(DAG&: *DAG, N, NewNodes)) |
226 | return nullptr; |
227 | |
228 | LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n" ); |
229 | assert(NewNodes.size() == 2 && "Expected a load folding node!" ); |
230 | |
231 | N = NewNodes[1]; |
232 | SDNode *LoadNode = NewNodes[0]; |
233 | unsigned NumVals = N->getNumValues(); |
234 | unsigned OldNumVals = SU->getNode()->getNumValues(); |
235 | for (unsigned i = 0; i != NumVals; ++i) |
236 | DAG->ReplaceAllUsesOfValueWith(From: SDValue(SU->getNode(), i), To: SDValue(N, i)); |
237 | DAG->ReplaceAllUsesOfValueWith(From: SDValue(SU->getNode(), OldNumVals-1), |
238 | To: SDValue(LoadNode, 1)); |
239 | |
240 | SUnit *NewSU = newSUnit(N); |
241 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
242 | N->setNodeId(NewSU->NodeNum); |
243 | |
244 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
245 | for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { |
246 | if (MCID.getOperandConstraint(OpNum: i, Constraint: MCOI::TIED_TO) != -1) { |
247 | NewSU->isTwoAddress = true; |
248 | break; |
249 | } |
250 | } |
251 | if (MCID.isCommutable()) |
252 | NewSU->isCommutable = true; |
253 | |
254 | // LoadNode may already exist. This can happen when there is another |
255 | // load from the same location and producing the same type of value |
256 | // but it has different alignment or volatileness. |
257 | bool isNewLoad = true; |
258 | SUnit *LoadSU; |
259 | if (LoadNode->getNodeId() != -1) { |
260 | LoadSU = &SUnits[LoadNode->getNodeId()]; |
261 | isNewLoad = false; |
262 | } else { |
263 | LoadSU = newSUnit(N: LoadNode); |
264 | LoadNode->setNodeId(LoadSU->NodeNum); |
265 | } |
266 | |
267 | SDep ChainPred; |
268 | SmallVector<SDep, 4> ChainSuccs; |
269 | SmallVector<SDep, 4> LoadPreds; |
270 | SmallVector<SDep, 4> NodePreds; |
271 | SmallVector<SDep, 4> NodeSuccs; |
272 | for (SDep &Pred : SU->Preds) { |
273 | if (Pred.isCtrl()) |
274 | ChainPred = Pred; |
275 | else if (Pred.getSUnit()->getNode() && |
276 | Pred.getSUnit()->getNode()->isOperandOf(N: LoadNode)) |
277 | LoadPreds.push_back(Elt: Pred); |
278 | else |
279 | NodePreds.push_back(Elt: Pred); |
280 | } |
281 | for (SDep &Succ : SU->Succs) { |
282 | if (Succ.isCtrl()) |
283 | ChainSuccs.push_back(Elt: Succ); |
284 | else |
285 | NodeSuccs.push_back(Elt: Succ); |
286 | } |
287 | |
288 | if (ChainPred.getSUnit()) { |
289 | RemovePred(SU, D: ChainPred); |
290 | if (isNewLoad) |
291 | AddPred(SU: LoadSU, D: ChainPred); |
292 | } |
293 | for (const SDep &Pred : LoadPreds) { |
294 | RemovePred(SU, D: Pred); |
295 | if (isNewLoad) { |
296 | AddPred(SU: LoadSU, D: Pred); |
297 | } |
298 | } |
299 | for (const SDep &Pred : NodePreds) { |
300 | RemovePred(SU, D: Pred); |
301 | AddPred(SU: NewSU, D: Pred); |
302 | } |
303 | for (SDep D : NodeSuccs) { |
304 | SUnit *SuccDep = D.getSUnit(); |
305 | D.setSUnit(SU); |
306 | RemovePred(SU: SuccDep, D); |
307 | D.setSUnit(NewSU); |
308 | AddPred(SU: SuccDep, D); |
309 | } |
310 | for (SDep D : ChainSuccs) { |
311 | SUnit *SuccDep = D.getSUnit(); |
312 | D.setSUnit(SU); |
313 | RemovePred(SU: SuccDep, D); |
314 | if (isNewLoad) { |
315 | D.setSUnit(LoadSU); |
316 | AddPred(SU: SuccDep, D); |
317 | } |
318 | } |
319 | if (isNewLoad) { |
320 | SDep D(LoadSU, SDep::Barrier); |
321 | D.setLatency(LoadSU->Latency); |
322 | AddPred(SU: NewSU, D); |
323 | } |
324 | |
325 | ++NumUnfolds; |
326 | |
327 | if (NewSU->NumSuccsLeft == 0) { |
328 | NewSU->isAvailable = true; |
329 | return NewSU; |
330 | } |
331 | SU = NewSU; |
332 | } |
333 | |
334 | LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n" ); |
335 | NewSU = Clone(Old: SU); |
336 | |
337 | // New SUnit has the exact same predecessors. |
338 | for (SDep &Pred : SU->Preds) |
339 | if (!Pred.isArtificial()) |
340 | AddPred(SU: NewSU, D: Pred); |
341 | |
342 | // Only copy scheduled successors. Cut them from old node's successor |
343 | // list and move them over. |
344 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
345 | for (SDep &Succ : SU->Succs) { |
346 | if (Succ.isArtificial()) |
347 | continue; |
348 | SUnit *SuccSU = Succ.getSUnit(); |
349 | if (SuccSU->isScheduled) { |
350 | SDep D = Succ; |
351 | D.setSUnit(NewSU); |
352 | AddPred(SU: SuccSU, D); |
353 | D.setSUnit(SU); |
354 | DelDeps.push_back(Elt: std::make_pair(x&: SuccSU, y&: D)); |
355 | } |
356 | } |
357 | for (const auto &[Del, Dep] : DelDeps) |
358 | RemovePred(SU: Del, D: Dep); |
359 | |
360 | ++NumDups; |
361 | return NewSU; |
362 | } |
363 | |
364 | /// InsertCopiesAndMoveSuccs - Insert register copies and move all |
365 | /// scheduled successors of the given SUnit to the last copy. |
366 | void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, |
367 | const TargetRegisterClass *DestRC, |
368 | const TargetRegisterClass *SrcRC, |
369 | SmallVectorImpl<SUnit*> &Copies) { |
370 | SUnit *CopyFromSU = newSUnit(N: static_cast<SDNode *>(nullptr)); |
371 | CopyFromSU->CopySrcRC = SrcRC; |
372 | CopyFromSU->CopyDstRC = DestRC; |
373 | |
374 | SUnit *CopyToSU = newSUnit(N: static_cast<SDNode *>(nullptr)); |
375 | CopyToSU->CopySrcRC = DestRC; |
376 | CopyToSU->CopyDstRC = SrcRC; |
377 | |
378 | // Only copy scheduled successors. Cut them from old node's successor |
379 | // list and move them over. |
380 | SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; |
381 | for (SDep &Succ : SU->Succs) { |
382 | if (Succ.isArtificial()) |
383 | continue; |
384 | SUnit *SuccSU = Succ.getSUnit(); |
385 | if (SuccSU->isScheduled) { |
386 | SDep D = Succ; |
387 | D.setSUnit(CopyToSU); |
388 | AddPred(SU: SuccSU, D); |
389 | DelDeps.push_back(Elt: std::make_pair(x&: SuccSU, y&: Succ)); |
390 | } |
391 | } |
392 | for (const auto &[Del, Dep] : DelDeps) |
393 | RemovePred(SU: Del, D: Dep); |
394 | SDep FromDep(SU, SDep::Data, Reg); |
395 | FromDep.setLatency(SU->Latency); |
396 | AddPred(SU: CopyFromSU, D: FromDep); |
397 | SDep ToDep(CopyFromSU, SDep::Data, 0); |
398 | ToDep.setLatency(CopyFromSU->Latency); |
399 | AddPred(SU: CopyToSU, D: ToDep); |
400 | |
401 | Copies.push_back(Elt: CopyFromSU); |
402 | Copies.push_back(Elt: CopyToSU); |
403 | |
404 | ++NumPRCopies; |
405 | } |
406 | |
407 | /// getPhysicalRegisterVT - Returns the ValueType of the physical register |
408 | /// definition of the specified node. |
409 | /// FIXME: Move to SelectionDAG? |
410 | static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, |
411 | const TargetInstrInfo *TII) { |
412 | unsigned NumRes; |
413 | if (N->getOpcode() == ISD::CopyFromReg) { |
414 | // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. |
415 | NumRes = 1; |
416 | } else { |
417 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
418 | assert(!MCID.implicit_defs().empty() && |
419 | "Physical reg def must be in implicit def list!" ); |
420 | NumRes = MCID.getNumDefs(); |
421 | for (MCPhysReg ImpDef : MCID.implicit_defs()) { |
422 | if (Reg == ImpDef) |
423 | break; |
424 | ++NumRes; |
425 | } |
426 | } |
427 | return N->getSimpleValueType(ResNo: NumRes); |
428 | } |
429 | |
430 | /// CheckForLiveRegDef - Return true and update live register vector if the |
431 | /// specified register def of the specified SUnit clobbers any "live" registers. |
432 | static bool CheckForLiveRegDef(SUnit *SU, MCRegister Reg, |
433 | std::vector<SUnit *> &LiveRegDefs, |
434 | SmallSet<unsigned, 4> &RegAdded, |
435 | SmallVectorImpl<unsigned> &LRegs, |
436 | const TargetRegisterInfo *TRI, |
437 | const SDNode *Node = nullptr) { |
438 | bool Added = false; |
439 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { |
440 | // Check if Ref is live. |
441 | if (!LiveRegDefs[*AI]) |
442 | continue; |
443 | |
444 | // Allow multiple uses of the same def. |
445 | if (LiveRegDefs[*AI] == SU) |
446 | continue; |
447 | |
448 | // Allow multiple uses of same def |
449 | if (Node && LiveRegDefs[*AI]->getNode() == Node) |
450 | continue; |
451 | |
452 | // Add Reg to the set of interfering live regs. |
453 | if (RegAdded.insert(V: *AI).second) { |
454 | LRegs.push_back(Elt: *AI); |
455 | Added = true; |
456 | } |
457 | } |
458 | return Added; |
459 | } |
460 | |
461 | /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay |
462 | /// scheduling of the given node to satisfy live physical register dependencies. |
463 | /// If the specific node is the last one that's available to schedule, do |
464 | /// whatever is necessary (i.e. backtracking or cloning) to make it possible. |
465 | bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, |
466 | SmallVectorImpl<unsigned> &LRegs){ |
467 | if (NumLiveRegs == 0) |
468 | return false; |
469 | |
470 | SmallSet<unsigned, 4> RegAdded; |
471 | // If this node would clobber any "live" register, then it's not ready. |
472 | for (SDep &Pred : SU->Preds) { |
473 | if (Pred.isAssignedRegDep()) { |
474 | CheckForLiveRegDef(SU: Pred.getSUnit(), Reg: Pred.getReg(), LiveRegDefs, |
475 | RegAdded, LRegs, TRI); |
476 | } |
477 | } |
478 | |
479 | for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { |
480 | if (Node->getOpcode() == ISD::INLINEASM || |
481 | Node->getOpcode() == ISD::INLINEASM_BR) { |
482 | // Inline asm can clobber physical defs. |
483 | unsigned NumOps = Node->getNumOperands(); |
484 | if (Node->getOperand(Num: NumOps-1).getValueType() == MVT::Glue) |
485 | --NumOps; // Ignore the glue operand. |
486 | |
487 | for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { |
488 | unsigned Flags = Node->getConstantOperandVal(Num: i); |
489 | const InlineAsm::Flag F(Flags); |
490 | unsigned NumVals = F.getNumOperandRegisters(); |
491 | |
492 | ++i; // Skip the ID value. |
493 | if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || |
494 | F.isClobberKind()) { |
495 | // Check for def of register or earlyclobber register. |
496 | for (; NumVals; --NumVals, ++i) { |
497 | Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: i))->getReg(); |
498 | if (Reg.isPhysical()) |
499 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); |
500 | } |
501 | } else |
502 | i += NumVals; |
503 | } |
504 | continue; |
505 | } |
506 | |
507 | if (Node->getOpcode() == ISD::CopyToReg) { |
508 | Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: 1))->getReg(); |
509 | if (Reg.isPhysical()) { |
510 | SDNode *SrcNode = Node->getOperand(Num: 2).getNode(); |
511 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, Node: SrcNode); |
512 | } |
513 | } |
514 | |
515 | if (!Node->isMachineOpcode()) |
516 | continue; |
517 | const MCInstrDesc &MCID = TII->get(Opcode: Node->getMachineOpcode()); |
518 | for (MCPhysReg Reg : MCID.implicit_defs()) |
519 | CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); |
520 | } |
521 | return !LRegs.empty(); |
522 | } |
523 | |
524 | |
525 | /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up |
526 | /// schedulers. |
527 | void ScheduleDAGFast::ListScheduleBottomUp() { |
528 | unsigned CurCycle = 0; |
529 | |
530 | // Release any predecessors of the special Exit node. |
531 | ReleasePredecessors(SU: &ExitSU, CurCycle); |
532 | |
533 | // Add root to Available queue. |
534 | if (!SUnits.empty()) { |
535 | SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; |
536 | assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!" ); |
537 | RootSU->isAvailable = true; |
538 | AvailableQueue.push(U: RootSU); |
539 | } |
540 | |
541 | // While Available queue is not empty, grab the node with the highest |
542 | // priority. If it is not ready put it back. Schedule the node. |
543 | SmallVector<SUnit*, 4> NotReady; |
544 | DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; |
545 | Sequence.reserve(n: SUnits.size()); |
546 | while (!AvailableQueue.empty()) { |
547 | bool Delayed = false; |
548 | LRegsMap.clear(); |
549 | SUnit *CurSU = AvailableQueue.pop(); |
550 | while (CurSU) { |
551 | SmallVector<unsigned, 4> LRegs; |
552 | if (!DelayForLiveRegsBottomUp(SU: CurSU, LRegs)) |
553 | break; |
554 | Delayed = true; |
555 | LRegsMap.insert(KV: std::make_pair(x&: CurSU, y&: LRegs)); |
556 | |
557 | CurSU->isPending = true; // This SU is not in AvailableQueue right now. |
558 | NotReady.push_back(Elt: CurSU); |
559 | CurSU = AvailableQueue.pop(); |
560 | } |
561 | |
562 | // All candidates are delayed due to live physical reg dependencies. |
563 | // Try code duplication or inserting cross class copies |
564 | // to resolve it. |
565 | if (Delayed && !CurSU) { |
566 | if (!CurSU) { |
567 | // Try duplicating the nodes that produces these |
568 | // "expensive to copy" values to break the dependency. In case even |
569 | // that doesn't work, insert cross class copies. |
570 | SUnit *TrySU = NotReady[0]; |
571 | SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; |
572 | assert(LRegs.size() == 1 && "Can't handle this yet!" ); |
573 | unsigned Reg = LRegs[0]; |
574 | SUnit *LRDef = LiveRegDefs[Reg]; |
575 | MVT VT = getPhysicalRegisterVT(N: LRDef->getNode(), Reg, TII); |
576 | const TargetRegisterClass *RC = |
577 | TRI->getMinimalPhysRegClass(Reg, VT); |
578 | const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); |
579 | |
580 | // If cross copy register class is the same as RC, then it must be |
581 | // possible copy the value directly. Do not try duplicate the def. |
582 | // If cross copy register class is not the same as RC, then it's |
583 | // possible to copy the value but it require cross register class copies |
584 | // and it is expensive. |
585 | // If cross copy register class is null, then it's not possible to copy |
586 | // the value at all. |
587 | SUnit *NewDef = nullptr; |
588 | if (DestRC != RC) { |
589 | NewDef = CopyAndMoveSuccessors(SU: LRDef); |
590 | if (!DestRC && !NewDef) |
591 | report_fatal_error(reason: "Can't handle live physical " |
592 | "register dependency!" ); |
593 | } |
594 | if (!NewDef) { |
595 | // Issue copies, these can be expensive cross register class copies. |
596 | SmallVector<SUnit*, 2> Copies; |
597 | InsertCopiesAndMoveSuccs(SU: LRDef, Reg, DestRC, SrcRC: RC, Copies); |
598 | LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum |
599 | << " to SU #" << Copies.front()->NodeNum << "\n" ); |
600 | AddPred(SU: TrySU, D: SDep(Copies.front(), SDep::Artificial)); |
601 | NewDef = Copies.back(); |
602 | } |
603 | |
604 | LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum |
605 | << " to SU #" << TrySU->NodeNum << "\n" ); |
606 | LiveRegDefs[Reg] = NewDef; |
607 | AddPred(SU: NewDef, D: SDep(TrySU, SDep::Artificial)); |
608 | TrySU->isAvailable = false; |
609 | CurSU = NewDef; |
610 | } |
611 | |
612 | if (!CurSU) { |
613 | llvm_unreachable("Unable to resolve live physical register dependencies!" ); |
614 | } |
615 | } |
616 | |
617 | // Add the nodes that aren't ready back onto the available list. |
618 | for (SUnit *SU : NotReady) { |
619 | SU->isPending = false; |
620 | // May no longer be available due to backtracking. |
621 | if (SU->isAvailable) |
622 | AvailableQueue.push(U: SU); |
623 | } |
624 | NotReady.clear(); |
625 | |
626 | if (CurSU) |
627 | ScheduleNodeBottomUp(SU: CurSU, CurCycle); |
628 | ++CurCycle; |
629 | } |
630 | |
631 | // Reverse the order since it is bottom up. |
632 | std::reverse(first: Sequence.begin(), last: Sequence.end()); |
633 | |
634 | #ifndef NDEBUG |
635 | VerifyScheduledSequence(/*isBottomUp=*/true); |
636 | #endif |
637 | } |
638 | |
639 | |
640 | namespace { |
641 | //===----------------------------------------------------------------------===// |
642 | // ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the |
643 | // DAG in topological order. |
644 | // IMPORTANT: this may not work for targets with phyreg dependency. |
645 | // |
646 | class ScheduleDAGLinearize : public ScheduleDAGSDNodes { |
647 | public: |
648 | ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} |
649 | |
650 | void Schedule() override; |
651 | |
652 | MachineBasicBlock * |
653 | EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; |
654 | |
655 | private: |
656 | std::vector<SDNode*> Sequence; |
657 | DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user |
658 | |
659 | void ScheduleNode(SDNode *N); |
660 | }; |
661 | } // end anonymous namespace |
662 | |
663 | void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { |
664 | if (N->getNodeId() != 0) |
665 | llvm_unreachable(nullptr); |
666 | |
667 | if (!N->isMachineOpcode() && |
668 | (N->getOpcode() == ISD::EntryToken || isPassiveNode(Node: N))) |
669 | // These nodes do not need to be translated into MIs. |
670 | return; |
671 | |
672 | LLVM_DEBUG(dbgs() << "\n*** Scheduling: " ); |
673 | LLVM_DEBUG(N->dump(DAG)); |
674 | Sequence.push_back(x: N); |
675 | |
676 | unsigned NumOps = N->getNumOperands(); |
677 | if (unsigned NumLeft = NumOps) { |
678 | SDNode *GluedOpN = nullptr; |
679 | do { |
680 | const SDValue &Op = N->getOperand(Num: NumLeft-1); |
681 | SDNode *OpN = Op.getNode(); |
682 | |
683 | if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { |
684 | // Schedule glue operand right above N. |
685 | GluedOpN = OpN; |
686 | assert(OpN->getNodeId() != 0 && "Glue operand not ready?" ); |
687 | OpN->setNodeId(0); |
688 | ScheduleNode(N: OpN); |
689 | continue; |
690 | } |
691 | |
692 | if (OpN == GluedOpN) |
693 | // Glue operand is already scheduled. |
694 | continue; |
695 | |
696 | DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(Val: OpN); |
697 | if (DI != GluedMap.end() && DI->second != N) |
698 | // Users of glues are counted against the glued users. |
699 | OpN = DI->second; |
700 | |
701 | unsigned Degree = OpN->getNodeId(); |
702 | assert(Degree > 0 && "Predecessor over-released!" ); |
703 | OpN->setNodeId(--Degree); |
704 | if (Degree == 0) |
705 | ScheduleNode(N: OpN); |
706 | } while (--NumLeft); |
707 | } |
708 | } |
709 | |
710 | /// findGluedUser - Find the representative use of a glue value by walking |
711 | /// the use chain. |
712 | static SDNode *findGluedUser(SDNode *N) { |
713 | while (SDNode *Glued = N->getGluedUser()) |
714 | N = Glued; |
715 | return N; |
716 | } |
717 | |
718 | void ScheduleDAGLinearize::Schedule() { |
719 | LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n" ); |
720 | |
721 | SmallVector<SDNode*, 8> Glues; |
722 | unsigned DAGSize = 0; |
723 | for (SDNode &Node : DAG->allnodes()) { |
724 | SDNode *N = &Node; |
725 | |
726 | // Use node id to record degree. |
727 | unsigned Degree = N->use_size(); |
728 | N->setNodeId(Degree); |
729 | unsigned NumVals = N->getNumValues(); |
730 | if (NumVals && N->getValueType(ResNo: NumVals-1) == MVT::Glue && |
731 | N->hasAnyUseOfValue(Value: NumVals-1)) { |
732 | SDNode *User = findGluedUser(N); |
733 | if (User) { |
734 | Glues.push_back(Elt: N); |
735 | GluedMap.insert(KV: std::make_pair(x&: N, y&: User)); |
736 | } |
737 | } |
738 | |
739 | if (N->isMachineOpcode() || |
740 | (N->getOpcode() != ISD::EntryToken && !isPassiveNode(Node: N))) |
741 | ++DAGSize; |
742 | } |
743 | |
744 | for (SDNode *Glue : Glues) { |
745 | SDNode *GUser = GluedMap[Glue]; |
746 | unsigned Degree = Glue->getNodeId(); |
747 | unsigned UDegree = GUser->getNodeId(); |
748 | |
749 | // Glue user must be scheduled together with the glue operand. So other |
750 | // users of the glue operand must be treated as its users. |
751 | SDNode *ImmGUser = Glue->getGluedUser(); |
752 | for (const SDNode *U : Glue->users()) |
753 | if (U == ImmGUser) |
754 | --Degree; |
755 | GUser->setNodeId(UDegree + Degree); |
756 | Glue->setNodeId(1); |
757 | } |
758 | |
759 | Sequence.reserve(n: DAGSize); |
760 | ScheduleNode(N: DAG->getRoot().getNode()); |
761 | } |
762 | |
763 | MachineBasicBlock* |
764 | ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { |
765 | InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); |
766 | InstrEmitter::VRBaseMapType VRBaseMap; |
767 | |
768 | LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n" ; }); |
769 | |
770 | unsigned NumNodes = Sequence.size(); |
771 | MachineBasicBlock *BB = Emitter.getBlock(); |
772 | for (unsigned i = 0; i != NumNodes; ++i) { |
773 | SDNode *N = Sequence[NumNodes-i-1]; |
774 | LLVM_DEBUG(N->dump(DAG)); |
775 | Emitter.EmitNode(Node: N, IsClone: false, IsCloned: false, VRBaseMap); |
776 | |
777 | // Emit any debug values associated with the node. |
778 | if (N->getHasDebugValue()) { |
779 | MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); |
780 | for (auto *DV : DAG->GetDbgValues(SD: N)) { |
781 | if (!DV->isEmitted()) |
782 | if (auto *DbgMI = Emitter.EmitDbgValue(SD: DV, VRBaseMap)) |
783 | BB->insert(I: InsertPos, MI: DbgMI); |
784 | } |
785 | } |
786 | } |
787 | |
788 | LLVM_DEBUG(dbgs() << '\n'); |
789 | |
790 | InsertPos = Emitter.getInsertPos(); |
791 | return Emitter.getBlock(); |
792 | } |
793 | |
794 | //===----------------------------------------------------------------------===// |
795 | // Public Constructor Functions |
796 | //===----------------------------------------------------------------------===// |
797 | |
798 | llvm::ScheduleDAGSDNodes *llvm::createFastDAGScheduler(SelectionDAGISel *IS, |
799 | CodeGenOptLevel) { |
800 | return new ScheduleDAGFast(*IS->MF); |
801 | } |
802 | |
803 | llvm::ScheduleDAGSDNodes *llvm::createDAGLinearizer(SelectionDAGISel *IS, |
804 | CodeGenOptLevel) { |
805 | return new ScheduleDAGLinearize(*IS->MF); |
806 | } |
807 | |