1 | //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the ScheduleDAG class, which is a base class used by |
10 | // scheduling implementation classes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ScheduleDAGSDNodes.h" |
15 | #include "InstrEmitter.h" |
16 | #include "SDNodeDbgValue.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/SmallPtrSet.h" |
19 | #include "llvm/ADT/SmallSet.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/Statistic.h" |
22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
24 | #include "llvm/CodeGen/SelectionDAG.h" |
25 | #include "llvm/CodeGen/TargetInstrInfo.h" |
26 | #include "llvm/CodeGen/TargetLowering.h" |
27 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
28 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
29 | #include "llvm/Config/llvm-config.h" |
30 | #include "llvm/IR/MemoryModelRelaxationAnnotations.h" |
31 | #include "llvm/MC/MCInstrItineraries.h" |
32 | #include "llvm/Support/CommandLine.h" |
33 | #include "llvm/Support/Debug.h" |
34 | #include "llvm/Support/raw_ostream.h" |
35 | #include "llvm/Target/TargetMachine.h" |
36 | using namespace llvm; |
37 | |
38 | #define DEBUG_TYPE "pre-RA-sched" |
39 | |
40 | STATISTIC(LoadsClustered, "Number of loads clustered together" ); |
41 | |
42 | // This allows the latency-based scheduler to notice high latency instructions |
43 | // without a target itinerary. The choice of number here has more to do with |
44 | // balancing scheduler heuristics than with the actual machine latency. |
45 | static cl::opt<int> HighLatencyCycles( |
46 | "sched-high-latency-cycles" , cl::Hidden, cl::init(Val: 10), |
47 | cl::desc("Roughly estimate the number of cycles that 'long latency'" |
48 | "instructions take for targets with no itinerary" )); |
49 | |
50 | ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) |
51 | : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} |
52 | |
53 | /// Run - perform scheduling. |
54 | /// |
55 | void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { |
56 | BB = bb; |
57 | DAG = dag; |
58 | |
59 | // Clear the scheduler's SUnit DAG. |
60 | ScheduleDAG::clearDAG(); |
61 | Sequence.clear(); |
62 | |
63 | // Invoke the target's selection of scheduler. |
64 | Schedule(); |
65 | } |
66 | |
67 | /// NewSUnit - Creates a new SUnit and return a ptr to it. |
68 | /// |
69 | SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { |
70 | #ifndef NDEBUG |
71 | const SUnit *Addr = nullptr; |
72 | if (!SUnits.empty()) |
73 | Addr = &SUnits[0]; |
74 | #endif |
75 | SUnits.emplace_back(args&: N, args: (unsigned)SUnits.size()); |
76 | assert((Addr == nullptr || Addr == &SUnits[0]) && |
77 | "SUnits std::vector reallocated on the fly!" ); |
78 | SUnits.back().OrigNode = &SUnits.back(); |
79 | SUnit *SU = &SUnits.back(); |
80 | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
81 | if (!N || |
82 | (N->isMachineOpcode() && |
83 | N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) |
84 | SU->SchedulingPref = Sched::None; |
85 | else |
86 | SU->SchedulingPref = TLI.getSchedulingPreference(N); |
87 | return SU; |
88 | } |
89 | |
90 | SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { |
91 | SUnit *SU = newSUnit(N: Old->getNode()); |
92 | SU->OrigNode = Old->OrigNode; |
93 | SU->Latency = Old->Latency; |
94 | SU->isVRegCycle = Old->isVRegCycle; |
95 | SU->isCall = Old->isCall; |
96 | SU->isCallOp = Old->isCallOp; |
97 | SU->isTwoAddress = Old->isTwoAddress; |
98 | SU->isCommutable = Old->isCommutable; |
99 | SU->hasPhysRegDefs = Old->hasPhysRegDefs; |
100 | SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; |
101 | SU->isScheduleHigh = Old->isScheduleHigh; |
102 | SU->isScheduleLow = Old->isScheduleLow; |
103 | SU->SchedulingPref = Old->SchedulingPref; |
104 | Old->isCloned = true; |
105 | return SU; |
106 | } |
107 | |
108 | /// CheckForPhysRegDependency - Check if the dependency between def and use of |
109 | /// a specified operand is a physical register dependency. If so, returns the |
110 | /// register and the cost of copying the register. |
111 | static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
112 | const TargetRegisterInfo *TRI, |
113 | const TargetInstrInfo *TII, |
114 | const TargetLowering &TLI, |
115 | unsigned &PhysReg, int &Cost) { |
116 | if (Op != 2 || User->getOpcode() != ISD::CopyToReg) |
117 | return; |
118 | |
119 | unsigned Reg = cast<RegisterSDNode>(Val: User->getOperand(Num: 1))->getReg(); |
120 | if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) |
121 | return; |
122 | |
123 | if (Register::isVirtualRegister(Reg)) |
124 | return; |
125 | |
126 | unsigned ResNo = User->getOperand(Num: 2).getResNo(); |
127 | if (Def->getOpcode() == ISD::CopyFromReg && |
128 | cast<RegisterSDNode>(Val: Def->getOperand(Num: 1))->getReg() == Reg) { |
129 | PhysReg = Reg; |
130 | } else if (Def->isMachineOpcode()) { |
131 | const MCInstrDesc &II = TII->get(Opcode: Def->getMachineOpcode()); |
132 | if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg)) |
133 | PhysReg = Reg; |
134 | } |
135 | |
136 | if (PhysReg != 0) { |
137 | const TargetRegisterClass *RC = |
138 | TRI->getMinimalPhysRegClass(Reg, VT: Def->getSimpleValueType(ResNo)); |
139 | Cost = RC->getCopyCost(); |
140 | } |
141 | } |
142 | |
143 | // Helper for AddGlue to clone node operands. |
144 | static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, |
145 | SDValue = SDValue()) { |
146 | SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); |
147 | if (ExtraOper.getNode()) |
148 | Ops.push_back(Elt: ExtraOper); |
149 | |
150 | SDVTList VTList = DAG->getVTList(VTs); |
151 | MachineSDNode *MN = dyn_cast<MachineSDNode>(Val: N); |
152 | |
153 | // Store memory references. |
154 | SmallVector<MachineMemOperand *, 2> MMOs; |
155 | if (MN) |
156 | MMOs.assign(in_start: MN->memoperands_begin(), in_end: MN->memoperands_end()); |
157 | |
158 | DAG->MorphNodeTo(N, Opc: N->getOpcode(), VTs: VTList, Ops); |
159 | |
160 | // Reset the memory references |
161 | if (MN) |
162 | DAG->setNodeMemRefs(N: MN, NewMemRefs: MMOs); |
163 | } |
164 | |
165 | static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { |
166 | SDNode *GlueDestNode = Glue.getNode(); |
167 | |
168 | // Don't add glue from a node to itself. |
169 | if (GlueDestNode == N) return false; |
170 | |
171 | // Don't add a glue operand to something that already uses glue. |
172 | if (GlueDestNode && |
173 | N->getOperand(Num: N->getNumOperands()-1).getValueType() == MVT::Glue) { |
174 | return false; |
175 | } |
176 | // Don't add glue to something that already has a glue value. |
177 | if (N->getValueType(ResNo: N->getNumValues() - 1) == MVT::Glue) return false; |
178 | |
179 | SmallVector<EVT, 4> VTs(N->values()); |
180 | if (AddGlue) |
181 | VTs.push_back(Elt: MVT::Glue); |
182 | |
183 | CloneNodeWithValues(N, DAG, VTs, ExtraOper: Glue); |
184 | |
185 | return true; |
186 | } |
187 | |
188 | // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the |
189 | // node even though simply shrinking the value list is sufficient. |
190 | static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { |
191 | assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && |
192 | !N->hasAnyUseOfValue(N->getNumValues() - 1)) && |
193 | "expected an unused glue value" ); |
194 | |
195 | CloneNodeWithValues(N, DAG, |
196 | VTs: ArrayRef(N->value_begin(), N->getNumValues() - 1)); |
197 | } |
198 | |
199 | /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. |
200 | /// This function finds loads of the same base and different offsets. If the |
201 | /// offsets are not far apart (target specific), it add MVT::Glue inputs and |
202 | /// outputs to ensure they are scheduled together and in order. This |
203 | /// optimization may benefit some targets by improving cache locality. |
204 | void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { |
205 | SDValue Chain; |
206 | unsigned NumOps = Node->getNumOperands(); |
207 | if (Node->getOperand(Num: NumOps-1).getValueType() == MVT::Other) |
208 | Chain = Node->getOperand(Num: NumOps-1); |
209 | if (!Chain) |
210 | return; |
211 | |
212 | // Skip any load instruction that has a tied input. There may be an additional |
213 | // dependency requiring a different order than by increasing offsets, and the |
214 | // added glue may introduce a cycle. |
215 | auto hasTiedInput = [this](const SDNode *N) { |
216 | const MCInstrDesc &MCID = TII->get(Opcode: N->getMachineOpcode()); |
217 | for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { |
218 | if (MCID.getOperandConstraint(OpNum: I, Constraint: MCOI::TIED_TO) != -1) |
219 | return true; |
220 | } |
221 | |
222 | return false; |
223 | }; |
224 | |
225 | // Look for other loads of the same chain. Find loads that are loading from |
226 | // the same base pointer and different offsets. |
227 | SmallPtrSet<SDNode*, 16> Visited; |
228 | SmallVector<int64_t, 4> Offsets; |
229 | DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. |
230 | bool Cluster = false; |
231 | SDNode *Base = Node; |
232 | |
233 | if (hasTiedInput(Base)) |
234 | return; |
235 | |
236 | // This algorithm requires a reasonably low use count before finding a match |
237 | // to avoid uselessly blowing up compile time in large blocks. |
238 | unsigned UseCount = 0; |
239 | for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); |
240 | I != E && UseCount < 100; ++I, ++UseCount) { |
241 | if (I.getUse().getResNo() != Chain.getResNo()) |
242 | continue; |
243 | |
244 | SDNode *User = *I; |
245 | if (User == Node || !Visited.insert(Ptr: User).second) |
246 | continue; |
247 | int64_t Offset1, Offset2; |
248 | if (!TII->areLoadsFromSameBasePtr(Load1: Base, Load2: User, Offset1, Offset2) || |
249 | Offset1 == Offset2 || |
250 | hasTiedInput(User)) { |
251 | // FIXME: Should be ok if they addresses are identical. But earlier |
252 | // optimizations really should have eliminated one of the loads. |
253 | continue; |
254 | } |
255 | if (O2SMap.insert(KV: std::make_pair(x&: Offset1, y&: Base)).second) |
256 | Offsets.push_back(Elt: Offset1); |
257 | O2SMap.insert(KV: std::make_pair(x&: Offset2, y&: User)); |
258 | Offsets.push_back(Elt: Offset2); |
259 | if (Offset2 < Offset1) |
260 | Base = User; |
261 | Cluster = true; |
262 | // Reset UseCount to allow more matches. |
263 | UseCount = 0; |
264 | } |
265 | |
266 | if (!Cluster) |
267 | return; |
268 | |
269 | // Sort them in increasing order. |
270 | llvm::sort(C&: Offsets); |
271 | |
272 | // Check if the loads are close enough. |
273 | SmallVector<SDNode*, 4> Loads; |
274 | unsigned NumLoads = 0; |
275 | int64_t BaseOff = Offsets[0]; |
276 | SDNode *BaseLoad = O2SMap[BaseOff]; |
277 | Loads.push_back(Elt: BaseLoad); |
278 | for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { |
279 | int64_t Offset = Offsets[i]; |
280 | SDNode *Load = O2SMap[Offset]; |
281 | if (!TII->shouldScheduleLoadsNear(Load1: BaseLoad, Load2: Load, Offset1: BaseOff, Offset2: Offset,NumLoads)) |
282 | break; // Stop right here. Ignore loads that are further away. |
283 | Loads.push_back(Elt: Load); |
284 | ++NumLoads; |
285 | } |
286 | |
287 | if (NumLoads == 0) |
288 | return; |
289 | |
290 | // Cluster loads by adding MVT::Glue outputs and inputs. This also |
291 | // ensure they are scheduled in order of increasing addresses. |
292 | SDNode *Lead = Loads[0]; |
293 | SDValue InGlue; |
294 | if (AddGlue(N: Lead, Glue: InGlue, AddGlue: true, DAG)) |
295 | InGlue = SDValue(Lead, Lead->getNumValues() - 1); |
296 | for (unsigned I = 1, E = Loads.size(); I != E; ++I) { |
297 | bool OutGlue = I < E - 1; |
298 | SDNode *Load = Loads[I]; |
299 | |
300 | // If AddGlue fails, we could leave an unsused glue value. This should not |
301 | // cause any |
302 | if (AddGlue(N: Load, Glue: InGlue, AddGlue: OutGlue, DAG)) { |
303 | if (OutGlue) |
304 | InGlue = SDValue(Load, Load->getNumValues() - 1); |
305 | |
306 | ++LoadsClustered; |
307 | } |
308 | else if (!OutGlue && InGlue.getNode()) |
309 | RemoveUnusedGlue(N: InGlue.getNode(), DAG); |
310 | } |
311 | } |
312 | |
313 | /// ClusterNodes - Cluster certain nodes which should be scheduled together. |
314 | /// |
315 | void ScheduleDAGSDNodes::ClusterNodes() { |
316 | for (SDNode &NI : DAG->allnodes()) { |
317 | SDNode *Node = &NI; |
318 | if (!Node || !Node->isMachineOpcode()) |
319 | continue; |
320 | |
321 | unsigned Opc = Node->getMachineOpcode(); |
322 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
323 | if (MCID.mayLoad()) |
324 | // Cluster loads from "near" addresses into combined SUnits. |
325 | ClusterNeighboringLoads(Node); |
326 | } |
327 | } |
328 | |
329 | void ScheduleDAGSDNodes::BuildSchedUnits() { |
330 | // During scheduling, the NodeId field of SDNode is used to map SDNodes |
331 | // to their associated SUnits by holding SUnits table indices. A value |
332 | // of -1 means the SDNode does not yet have an associated SUnit. |
333 | unsigned NumNodes = 0; |
334 | for (SDNode &NI : DAG->allnodes()) { |
335 | NI.setNodeId(-1); |
336 | ++NumNodes; |
337 | } |
338 | |
339 | // Reserve entries in the vector for each of the SUnits we are creating. This |
340 | // ensure that reallocation of the vector won't happen, so SUnit*'s won't get |
341 | // invalidated. |
342 | // FIXME: Multiply by 2 because we may clone nodes during scheduling. |
343 | // This is a temporary workaround. |
344 | SUnits.reserve(n: NumNodes * 2); |
345 | |
346 | // Add all nodes in depth first order. |
347 | SmallVector<SDNode*, 64> Worklist; |
348 | SmallPtrSet<SDNode*, 32> Visited; |
349 | Worklist.push_back(Elt: DAG->getRoot().getNode()); |
350 | Visited.insert(Ptr: DAG->getRoot().getNode()); |
351 | |
352 | SmallVector<SUnit*, 8> CallSUnits; |
353 | while (!Worklist.empty()) { |
354 | SDNode *NI = Worklist.pop_back_val(); |
355 | |
356 | // Add all operands to the worklist unless they've already been added. |
357 | for (const SDValue &Op : NI->op_values()) |
358 | if (Visited.insert(Ptr: Op.getNode()).second) |
359 | Worklist.push_back(Elt: Op.getNode()); |
360 | |
361 | if (isPassiveNode(Node: NI)) // Leaf node, e.g. a TargetImmediate. |
362 | continue; |
363 | |
364 | // If this node has already been processed, stop now. |
365 | if (NI->getNodeId() != -1) continue; |
366 | |
367 | SUnit *NodeSUnit = newSUnit(N: NI); |
368 | |
369 | // See if anything is glued to this node, if so, add them to glued |
370 | // nodes. Nodes can have at most one glue input and one glue output. Glue |
371 | // is required to be the last operand and result of a node. |
372 | |
373 | // Scan up to find glued preds. |
374 | SDNode *N = NI; |
375 | while (N->getNumOperands() && |
376 | N->getOperand(Num: N->getNumOperands()-1).getValueType() == MVT::Glue) { |
377 | N = N->getOperand(Num: N->getNumOperands()-1).getNode(); |
378 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
379 | N->setNodeId(NodeSUnit->NodeNum); |
380 | if (N->isMachineOpcode() && TII->get(Opcode: N->getMachineOpcode()).isCall()) |
381 | NodeSUnit->isCall = true; |
382 | } |
383 | |
384 | // Scan down to find any glued succs. |
385 | N = NI; |
386 | while (N->getValueType(ResNo: N->getNumValues()-1) == MVT::Glue) { |
387 | SDValue GlueVal(N, N->getNumValues()-1); |
388 | |
389 | // There are either zero or one users of the Glue result. |
390 | bool HasGlueUse = false; |
391 | for (SDNode *U : N->uses()) |
392 | if (GlueVal.isOperandOf(N: U)) { |
393 | HasGlueUse = true; |
394 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
395 | N->setNodeId(NodeSUnit->NodeNum); |
396 | N = U; |
397 | if (N->isMachineOpcode() && TII->get(Opcode: N->getMachineOpcode()).isCall()) |
398 | NodeSUnit->isCall = true; |
399 | break; |
400 | } |
401 | if (!HasGlueUse) break; |
402 | } |
403 | |
404 | if (NodeSUnit->isCall) |
405 | CallSUnits.push_back(Elt: NodeSUnit); |
406 | |
407 | // Schedule zero-latency TokenFactor below any nodes that may increase the |
408 | // schedule height. Otherwise, ancestors of the TokenFactor may appear to |
409 | // have false stalls. |
410 | if (NI->getOpcode() == ISD::TokenFactor) |
411 | NodeSUnit->isScheduleLow = true; |
412 | |
413 | // If there are glue operands involved, N is now the bottom-most node |
414 | // of the sequence of nodes that are glued together. |
415 | // Update the SUnit. |
416 | NodeSUnit->setNode(N); |
417 | assert(N->getNodeId() == -1 && "Node already inserted!" ); |
418 | N->setNodeId(NodeSUnit->NodeNum); |
419 | |
420 | // Compute NumRegDefsLeft. This must be done before AddSchedEdges. |
421 | InitNumRegDefsLeft(SU: NodeSUnit); |
422 | |
423 | // Assign the Latency field of NodeSUnit using target-provided information. |
424 | computeLatency(SU: NodeSUnit); |
425 | } |
426 | |
427 | // Find all call operands. |
428 | while (!CallSUnits.empty()) { |
429 | SUnit *SU = CallSUnits.pop_back_val(); |
430 | for (const SDNode *SUNode = SU->getNode(); SUNode; |
431 | SUNode = SUNode->getGluedNode()) { |
432 | if (SUNode->getOpcode() != ISD::CopyToReg) |
433 | continue; |
434 | SDNode *SrcN = SUNode->getOperand(Num: 2).getNode(); |
435 | if (isPassiveNode(Node: SrcN)) continue; // Not scheduled. |
436 | SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; |
437 | SrcSU->isCallOp = true; |
438 | } |
439 | } |
440 | } |
441 | |
442 | void ScheduleDAGSDNodes::AddSchedEdges() { |
443 | const TargetSubtargetInfo &ST = MF.getSubtarget(); |
444 | |
445 | // Check to see if the scheduler cares about latencies. |
446 | bool UnitLatencies = forceUnitLatencies(); |
447 | |
448 | // Pass 2: add the preds, succs, etc. |
449 | for (SUnit &SU : SUnits) { |
450 | SDNode *MainNode = SU.getNode(); |
451 | |
452 | if (MainNode->isMachineOpcode()) { |
453 | unsigned Opc = MainNode->getMachineOpcode(); |
454 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
455 | for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { |
456 | if (MCID.getOperandConstraint(OpNum: i, Constraint: MCOI::TIED_TO) != -1) { |
457 | SU.isTwoAddress = true; |
458 | break; |
459 | } |
460 | } |
461 | if (MCID.isCommutable()) |
462 | SU.isCommutable = true; |
463 | } |
464 | |
465 | // Find all predecessors and successors of the group. |
466 | for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) { |
467 | if (N->isMachineOpcode() && |
468 | !TII->get(Opcode: N->getMachineOpcode()).implicit_defs().empty()) { |
469 | SU.hasPhysRegClobbers = true; |
470 | unsigned NumUsed = InstrEmitter::CountResults(Node: N); |
471 | while (NumUsed != 0 && !N->hasAnyUseOfValue(Value: NumUsed - 1)) |
472 | --NumUsed; // Skip over unused values at the end. |
473 | if (NumUsed > TII->get(Opcode: N->getMachineOpcode()).getNumDefs()) |
474 | SU.hasPhysRegDefs = true; |
475 | } |
476 | |
477 | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { |
478 | SDNode *OpN = N->getOperand(Num: i).getNode(); |
479 | unsigned DefIdx = N->getOperand(Num: i).getResNo(); |
480 | if (isPassiveNode(Node: OpN)) continue; // Not scheduled. |
481 | SUnit *OpSU = &SUnits[OpN->getNodeId()]; |
482 | assert(OpSU && "Node has no SUnit!" ); |
483 | if (OpSU == &SU) |
484 | continue; // In the same group. |
485 | |
486 | EVT OpVT = N->getOperand(Num: i).getValueType(); |
487 | assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!" ); |
488 | bool isChain = OpVT == MVT::Other; |
489 | |
490 | unsigned PhysReg = 0; |
491 | int Cost = 1; |
492 | // Determine if this is a physical register dependency. |
493 | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
494 | CheckForPhysRegDependency(Def: OpN, User: N, Op: i, TRI, TII, TLI, PhysReg, Cost); |
495 | assert((PhysReg == 0 || !isChain) && |
496 | "Chain dependence via physreg data?" ); |
497 | // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler |
498 | // emits a copy from the physical register to a virtual register unless |
499 | // it requires a cross class copy (cost < 0). That means we are only |
500 | // treating "expensive to copy" register dependency as physical register |
501 | // dependency. This may change in the future though. |
502 | if (Cost >= 0 && !StressSched) |
503 | PhysReg = 0; |
504 | |
505 | // If this is a ctrl dep, latency is 1. |
506 | unsigned OpLatency = isChain ? 1 : OpSU->Latency; |
507 | // Special-case TokenFactor chains as zero-latency. |
508 | if(isChain && OpN->getOpcode() == ISD::TokenFactor) |
509 | OpLatency = 0; |
510 | |
511 | SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) |
512 | : SDep(OpSU, SDep::Data, PhysReg); |
513 | Dep.setLatency(OpLatency); |
514 | if (!isChain && !UnitLatencies) { |
515 | computeOperandLatency(Def: OpN, Use: N, OpIdx: i, dep&: Dep); |
516 | ST.adjustSchedDependency(Def: OpSU, DefOpIdx: DefIdx, Use: &SU, UseOpIdx: i, Dep, SchedModel: nullptr); |
517 | } |
518 | |
519 | if (!SU.addPred(D: Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { |
520 | // Multiple register uses are combined in the same SUnit. For example, |
521 | // we could have a set of glued nodes with all their defs consumed by |
522 | // another set of glued nodes. Register pressure tracking sees this as |
523 | // a single use, so to keep pressure balanced we reduce the defs. |
524 | // |
525 | // We can't tell (without more book-keeping) if this results from |
526 | // glued nodes or duplicate operands. As long as we don't reduce |
527 | // NumRegDefsLeft to zero, we handle the common cases well. |
528 | --OpSU->NumRegDefsLeft; |
529 | } |
530 | } |
531 | } |
532 | } |
533 | } |
534 | |
535 | /// BuildSchedGraph - Build the SUnit graph from the selection dag that we |
536 | /// are input. This SUnit graph is similar to the SelectionDAG, but |
537 | /// excludes nodes that aren't interesting to scheduling, and represents |
538 | /// glued together nodes with a single SUnit. |
539 | void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { |
540 | // Cluster certain nodes which should be scheduled together. |
541 | ClusterNodes(); |
542 | // Populate the SUnits array. |
543 | BuildSchedUnits(); |
544 | // Compute all the scheduling dependencies between nodes. |
545 | AddSchedEdges(); |
546 | } |
547 | |
548 | // Initialize NumNodeDefs for the current Node's opcode. |
549 | void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { |
550 | // Check for phys reg copy. |
551 | if (!Node) |
552 | return; |
553 | |
554 | if (!Node->isMachineOpcode()) { |
555 | if (Node->getOpcode() == ISD::CopyFromReg) |
556 | NodeNumDefs = 1; |
557 | else |
558 | NodeNumDefs = 0; |
559 | return; |
560 | } |
561 | unsigned POpc = Node->getMachineOpcode(); |
562 | if (POpc == TargetOpcode::IMPLICIT_DEF) { |
563 | // No register need be allocated for this. |
564 | NodeNumDefs = 0; |
565 | return; |
566 | } |
567 | if (POpc == TargetOpcode::PATCHPOINT && |
568 | Node->getValueType(ResNo: 0) == MVT::Other) { |
569 | // PATCHPOINT is defined to have one result, but it might really have none |
570 | // if we're not using CallingConv::AnyReg. Don't mistake the chain for a |
571 | // real definition. |
572 | NodeNumDefs = 0; |
573 | return; |
574 | } |
575 | unsigned NRegDefs = SchedDAG->TII->get(Opcode: Node->getMachineOpcode()).getNumDefs(); |
576 | // Some instructions define regs that are not represented in the selection DAG |
577 | // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. |
578 | NodeNumDefs = std::min(a: Node->getNumValues(), b: NRegDefs); |
579 | DefIdx = 0; |
580 | } |
581 | |
582 | // Construct a RegDefIter for this SUnit and find the first valid value. |
583 | ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, |
584 | const ScheduleDAGSDNodes *SD) |
585 | : SchedDAG(SD), Node(SU->getNode()) { |
586 | InitNodeNumDefs(); |
587 | Advance(); |
588 | } |
589 | |
590 | // Advance to the next valid value defined by the SUnit. |
591 | void ScheduleDAGSDNodes::RegDefIter::Advance() { |
592 | for (;Node;) { // Visit all glued nodes. |
593 | for (;DefIdx < NodeNumDefs; ++DefIdx) { |
594 | if (!Node->hasAnyUseOfValue(Value: DefIdx)) |
595 | continue; |
596 | ValueType = Node->getSimpleValueType(ResNo: DefIdx); |
597 | ++DefIdx; |
598 | return; // Found a normal regdef. |
599 | } |
600 | Node = Node->getGluedNode(); |
601 | if (!Node) { |
602 | return; // No values left to visit. |
603 | } |
604 | InitNodeNumDefs(); |
605 | } |
606 | } |
607 | |
608 | void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { |
609 | assert(SU->NumRegDefsLeft == 0 && "expect a new node" ); |
610 | for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { |
611 | assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected" ); |
612 | ++SU->NumRegDefsLeft; |
613 | } |
614 | } |
615 | |
616 | void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { |
617 | SDNode *N = SU->getNode(); |
618 | |
619 | // TokenFactor operands are considered zero latency, and some schedulers |
620 | // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero |
621 | // whenever node latency is nonzero. |
622 | if (N && N->getOpcode() == ISD::TokenFactor) { |
623 | SU->Latency = 0; |
624 | return; |
625 | } |
626 | |
627 | // Check to see if the scheduler cares about latencies. |
628 | if (forceUnitLatencies()) { |
629 | SU->Latency = 1; |
630 | return; |
631 | } |
632 | |
633 | if (!InstrItins || InstrItins->isEmpty()) { |
634 | if (N && N->isMachineOpcode() && |
635 | TII->isHighLatencyDef(opc: N->getMachineOpcode())) |
636 | SU->Latency = HighLatencyCycles; |
637 | else |
638 | SU->Latency = 1; |
639 | return; |
640 | } |
641 | |
642 | // Compute the latency for the node. We use the sum of the latencies for |
643 | // all nodes glued together into this SUnit. |
644 | SU->Latency = 0; |
645 | for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) |
646 | if (N->isMachineOpcode()) |
647 | SU->Latency += TII->getInstrLatency(ItinData: InstrItins, Node: N); |
648 | } |
649 | |
650 | void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, |
651 | unsigned OpIdx, SDep& dep) const{ |
652 | // Check to see if the scheduler cares about latencies. |
653 | if (forceUnitLatencies()) |
654 | return; |
655 | |
656 | if (dep.getKind() != SDep::Data) |
657 | return; |
658 | |
659 | unsigned DefIdx = Use->getOperand(Num: OpIdx).getResNo(); |
660 | if (Use->isMachineOpcode()) |
661 | // Adjust the use operand index by num of defs. |
662 | OpIdx += TII->get(Opcode: Use->getMachineOpcode()).getNumDefs(); |
663 | std::optional<unsigned> Latency = |
664 | TII->getOperandLatency(ItinData: InstrItins, DefNode: Def, DefIdx, UseNode: Use, UseIdx: OpIdx); |
665 | if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg && |
666 | !BB->succ_empty()) { |
667 | unsigned Reg = cast<RegisterSDNode>(Val: Use->getOperand(Num: 1))->getReg(); |
668 | if (Register::isVirtualRegister(Reg)) |
669 | // This copy is a liveout value. It is likely coalesced, so reduce the |
670 | // latency so not to penalize the def. |
671 | // FIXME: need target specific adjustment here? |
672 | Latency = *Latency - 1; |
673 | } |
674 | if (Latency) |
675 | dep.setLatency(*Latency); |
676 | } |
677 | |
678 | void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { |
679 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
680 | dumpNodeName(SU); |
681 | dbgs() << ": " ; |
682 | |
683 | if (!SU.getNode()) { |
684 | dbgs() << "PHYS REG COPY\n" ; |
685 | return; |
686 | } |
687 | |
688 | SU.getNode()->dump(DAG); |
689 | dbgs() << "\n" ; |
690 | SmallVector<SDNode *, 4> GluedNodes; |
691 | for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode()) |
692 | GluedNodes.push_back(N); |
693 | while (!GluedNodes.empty()) { |
694 | dbgs() << " " ; |
695 | GluedNodes.back()->dump(DAG); |
696 | dbgs() << "\n" ; |
697 | GluedNodes.pop_back(); |
698 | } |
699 | #endif |
700 | } |
701 | |
702 | void ScheduleDAGSDNodes::dump() const { |
703 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
704 | if (EntrySU.getNode() != nullptr) |
705 | dumpNodeAll(EntrySU); |
706 | for (const SUnit &SU : SUnits) |
707 | dumpNodeAll(SU); |
708 | if (ExitSU.getNode() != nullptr) |
709 | dumpNodeAll(ExitSU); |
710 | #endif |
711 | } |
712 | |
713 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
714 | void ScheduleDAGSDNodes::dumpSchedule() const { |
715 | for (const SUnit *SU : Sequence) { |
716 | if (SU) |
717 | dumpNode(*SU); |
718 | else |
719 | dbgs() << "**** NOOP ****\n" ; |
720 | } |
721 | } |
722 | #endif |
723 | |
724 | #ifndef NDEBUG |
725 | /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that |
726 | /// their state is consistent with the nodes listed in Sequence. |
727 | /// |
728 | void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { |
729 | unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); |
730 | unsigned Noops = llvm::count(Sequence, nullptr); |
731 | assert(Sequence.size() - Noops == ScheduledNodes && |
732 | "The number of nodes scheduled doesn't match the expected number!" ); |
733 | } |
734 | #endif // NDEBUG |
735 | |
736 | /// ProcessSDDbgValues - Process SDDbgValues associated with this node. |
737 | static void |
738 | ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, |
739 | SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, |
740 | DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) { |
741 | if (!N->getHasDebugValue()) |
742 | return; |
743 | |
744 | /// Returns true if \p DV has any VReg operand locations which don't exist in |
745 | /// VRBaseMap. |
746 | auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { |
747 | for (const SDDbgOperand &L : DV->getLocationOps()) { |
748 | if (L.getKind() == SDDbgOperand::SDNODE && |
749 | VRBaseMap.count(Val: {L.getSDNode(), L.getResNo()}) == 0) |
750 | return true; |
751 | } |
752 | return false; |
753 | }; |
754 | |
755 | // Opportunistically insert immediate dbg_value uses, i.e. those with the same |
756 | // source order number as N. |
757 | MachineBasicBlock *BB = Emitter.getBlock(); |
758 | MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); |
759 | for (auto *DV : DAG->GetDbgValues(SD: N)) { |
760 | if (DV->isEmitted()) |
761 | continue; |
762 | unsigned DVOrder = DV->getOrder(); |
763 | if (Order != 0 && DVOrder != Order) |
764 | continue; |
765 | // If DV has any VReg location operands which haven't been mapped then |
766 | // either that node is no longer available or we just haven't visited the |
767 | // node yet. In the former case we should emit an undef dbg_value, but we |
768 | // can do it later. And for the latter we'll want to wait until all |
769 | // dependent nodes have been visited. |
770 | if (!DV->isInvalidated() && HasUnknownVReg(DV)) |
771 | continue; |
772 | MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: DV, VRBaseMap); |
773 | if (!DbgMI) |
774 | continue; |
775 | Orders.push_back(Elt: {DVOrder, DbgMI}); |
776 | BB->insert(I: InsertPos, MI: DbgMI); |
777 | } |
778 | } |
779 | |
780 | // ProcessSourceNode - Process nodes with source order numbers. These are added |
781 | // to a vector which EmitSchedule uses to determine how to insert dbg_value |
782 | // instructions in the right order. |
783 | static void |
784 | ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, |
785 | DenseMap<SDValue, Register> &VRBaseMap, |
786 | SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, |
787 | SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) { |
788 | unsigned Order = N->getIROrder(); |
789 | if (!Order || Seen.count(V: Order)) { |
790 | // Process any valid SDDbgValues even if node does not have any order |
791 | // assigned. |
792 | ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order: 0); |
793 | return; |
794 | } |
795 | |
796 | // If a new instruction was generated for this Order number, record it. |
797 | // Otherwise, leave this order number unseen: we will either find later |
798 | // instructions for it, or leave it unseen if there were no instructions at |
799 | // all. |
800 | if (NewInsn) { |
801 | Seen.insert(V: Order); |
802 | Orders.push_back(Elt: {Order, NewInsn}); |
803 | } |
804 | |
805 | // Even if no instruction was generated, a Value may have become defined via |
806 | // earlier nodes. Try to process them now. |
807 | ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); |
808 | } |
809 | |
810 | void ScheduleDAGSDNodes:: |
811 | EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, |
812 | MachineBasicBlock::iterator InsertPos) { |
813 | for (const SDep &Pred : SU->Preds) { |
814 | if (Pred.isCtrl()) |
815 | continue; // ignore chain preds |
816 | if (Pred.getSUnit()->CopyDstRC) { |
817 | // Copy to physical register. |
818 | DenseMap<SUnit *, Register>::iterator VRI = |
819 | VRBaseMap.find(Val: Pred.getSUnit()); |
820 | assert(VRI != VRBaseMap.end() && "Node emitted out of order - late" ); |
821 | // Find the destination physical register. |
822 | Register Reg; |
823 | for (const SDep &Succ : SU->Succs) { |
824 | if (Succ.isCtrl()) |
825 | continue; // ignore chain preds |
826 | if (Succ.getReg()) { |
827 | Reg = Succ.getReg(); |
828 | break; |
829 | } |
830 | } |
831 | BuildMI(BB&: *BB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Reg) |
832 | .addReg(RegNo: VRI->second); |
833 | } else { |
834 | // Copy from physical register. |
835 | assert(Pred.getReg() && "Unknown physical register!" ); |
836 | Register VRBase = MRI.createVirtualRegister(RegClass: SU->CopyDstRC); |
837 | bool isNew = VRBaseMap.insert(KV: std::make_pair(x&: SU, y&: VRBase)).second; |
838 | (void)isNew; // Silence compiler warning. |
839 | assert(isNew && "Node emitted out of order - early" ); |
840 | BuildMI(BB&: *BB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: VRBase) |
841 | .addReg(RegNo: Pred.getReg()); |
842 | } |
843 | break; |
844 | } |
845 | } |
846 | |
847 | /// EmitSchedule - Emit the machine code in scheduled order. Return the new |
848 | /// InsertPos and MachineBasicBlock that contains this insertion |
849 | /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does |
850 | /// not necessarily refer to returned BB. The emitter may split blocks. |
851 | MachineBasicBlock *ScheduleDAGSDNodes:: |
852 | EmitSchedule(MachineBasicBlock::iterator &InsertPos) { |
853 | InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); |
854 | DenseMap<SDValue, Register> VRBaseMap; |
855 | DenseMap<SUnit*, Register> CopyVRBaseMap; |
856 | SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; |
857 | SmallSet<Register, 8> Seen; |
858 | bool HasDbg = DAG->hasDebugValues(); |
859 | |
860 | // Emit a node, and determine where its first instruction is for debuginfo. |
861 | // Zero, one, or multiple instructions can be created when emitting a node. |
862 | auto EmitNode = |
863 | [&](SDNode *Node, bool IsClone, bool IsCloned, |
864 | DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * { |
865 | // Fetch instruction prior to this, or end() if nonexistant. |
866 | auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { |
867 | if (I == BB->begin()) |
868 | return BB->end(); |
869 | else |
870 | return std::prev(x: Emitter.getInsertPos()); |
871 | }; |
872 | |
873 | MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos()); |
874 | Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap); |
875 | MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos()); |
876 | |
877 | // If the iterator did not change, no instructions were inserted. |
878 | if (Before == After) |
879 | return nullptr; |
880 | |
881 | MachineInstr *MI; |
882 | if (Before == BB->end()) { |
883 | // There were no prior instructions; the new ones must start at the |
884 | // beginning of the block. |
885 | MI = &Emitter.getBlock()->instr_front(); |
886 | } else { |
887 | // Return first instruction after the pre-existing instructions. |
888 | MI = &*std::next(x: Before); |
889 | } |
890 | |
891 | if (MI->isCandidateForCallSiteEntry() && |
892 | DAG->getTarget().Options.EmitCallSiteInfo) { |
893 | MF.addCallSiteInfo(CallI: MI, CallInfo: DAG->getCallSiteInfo(Node)); |
894 | } |
895 | |
896 | if (DAG->getNoMergeSiteInfo(Node)) { |
897 | MI->setFlag(MachineInstr::MIFlag::NoMerge); |
898 | } |
899 | |
900 | if (MDNode *MD = DAG->getPCSections(Node)) |
901 | MI->setPCSections(MF, MD); |
902 | |
903 | // Set MMRAs on _all_ added instructions. |
904 | if (MDNode *MMRA = DAG->getMMRAMetadata(Node)) { |
905 | for (MachineBasicBlock::iterator It = MI->getIterator(), |
906 | End = std::next(x: After); |
907 | It != End; ++It) |
908 | It->setMMRAMetadata(MF, MMRAs: MMRA); |
909 | } |
910 | |
911 | return MI; |
912 | }; |
913 | |
914 | // If this is the first BB, emit byval parameter dbg_value's. |
915 | if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { |
916 | SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); |
917 | SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); |
918 | for (; PDI != PDE; ++PDI) { |
919 | MachineInstr *DbgMI= Emitter.EmitDbgValue(SD: *PDI, VRBaseMap); |
920 | if (DbgMI) { |
921 | BB->insert(I: InsertPos, MI: DbgMI); |
922 | // We re-emit the dbg_value closer to its use, too, after instructions |
923 | // are emitted to the BB. |
924 | (*PDI)->clearIsEmitted(); |
925 | } |
926 | } |
927 | } |
928 | |
929 | for (SUnit *SU : Sequence) { |
930 | if (!SU) { |
931 | // Null SUnit* is a noop. |
932 | TII->insertNoop(MBB&: *Emitter.getBlock(), MI: InsertPos); |
933 | continue; |
934 | } |
935 | |
936 | // For pre-regalloc scheduling, create instructions corresponding to the |
937 | // SDNode and any glued SDNodes and append them to the block. |
938 | if (!SU->getNode()) { |
939 | // Emit a copy. |
940 | EmitPhysRegCopy(SU, VRBaseMap&: CopyVRBaseMap, InsertPos); |
941 | continue; |
942 | } |
943 | |
944 | SmallVector<SDNode *, 4> GluedNodes; |
945 | for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) |
946 | GluedNodes.push_back(Elt: N); |
947 | while (!GluedNodes.empty()) { |
948 | SDNode *N = GluedNodes.back(); |
949 | auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap); |
950 | // Remember the source order of the inserted instruction. |
951 | if (HasDbg) |
952 | ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); |
953 | |
954 | if (MDNode *MD = DAG->getHeapAllocSite(Node: N)) |
955 | if (NewInsn && NewInsn->isCall()) |
956 | NewInsn->setHeapAllocMarker(MF, MD); |
957 | |
958 | GluedNodes.pop_back(); |
959 | } |
960 | auto NewInsn = |
961 | EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); |
962 | // Remember the source order of the inserted instruction. |
963 | if (HasDbg) |
964 | ProcessSourceNode(N: SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, |
965 | NewInsn); |
966 | |
967 | if (MDNode *MD = DAG->getHeapAllocSite(Node: SU->getNode())) { |
968 | if (NewInsn && NewInsn->isCall()) |
969 | NewInsn->setHeapAllocMarker(MF, MD); |
970 | } |
971 | } |
972 | |
973 | // Insert all the dbg_values which have not already been inserted in source |
974 | // order sequence. |
975 | if (HasDbg) { |
976 | MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); |
977 | |
978 | // Sort the source order instructions and use the order to insert debug |
979 | // values. Use stable_sort so that DBG_VALUEs are inserted in the same order |
980 | // regardless of the host's implementation fo std::sort. |
981 | llvm::stable_sort(Range&: Orders, C: less_first()); |
982 | std::stable_sort(first: DAG->DbgBegin(), last: DAG->DbgEnd(), |
983 | comp: [](const SDDbgValue *LHS, const SDDbgValue *RHS) { |
984 | return LHS->getOrder() < RHS->getOrder(); |
985 | }); |
986 | |
987 | SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); |
988 | SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); |
989 | // Now emit the rest according to source order. |
990 | unsigned LastOrder = 0; |
991 | for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { |
992 | unsigned Order = Orders[i].first; |
993 | MachineInstr *MI = Orders[i].second; |
994 | // Insert all SDDbgValue's whose order(s) are before "Order". |
995 | assert(MI); |
996 | for (; DI != DE; ++DI) { |
997 | if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) |
998 | break; |
999 | if ((*DI)->isEmitted()) |
1000 | continue; |
1001 | |
1002 | MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: *DI, VRBaseMap); |
1003 | if (DbgMI) { |
1004 | if (!LastOrder) |
1005 | // Insert to start of the BB (after PHIs). |
1006 | BB->insert(I: BBBegin, MI: DbgMI); |
1007 | else { |
1008 | // Insert at the instruction, which may be in a different |
1009 | // block, if the block was split by a custom inserter. |
1010 | MachineBasicBlock::iterator Pos = MI; |
1011 | MI->getParent()->insert(I: Pos, MI: DbgMI); |
1012 | } |
1013 | } |
1014 | } |
1015 | LastOrder = Order; |
1016 | } |
1017 | // Add trailing DbgValue's before the terminator. FIXME: May want to add |
1018 | // some of them before one or more conditional branches? |
1019 | SmallVector<MachineInstr*, 8> DbgMIs; |
1020 | for (; DI != DE; ++DI) { |
1021 | if ((*DI)->isEmitted()) |
1022 | continue; |
1023 | assert((*DI)->getOrder() >= LastOrder && |
1024 | "emitting DBG_VALUE out of order" ); |
1025 | if (MachineInstr *DbgMI = Emitter.EmitDbgValue(SD: *DI, VRBaseMap)) |
1026 | DbgMIs.push_back(Elt: DbgMI); |
1027 | } |
1028 | |
1029 | MachineBasicBlock *InsertBB = Emitter.getBlock(); |
1030 | MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); |
1031 | InsertBB->insert(I: Pos, S: DbgMIs.begin(), E: DbgMIs.end()); |
1032 | |
1033 | SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); |
1034 | SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); |
1035 | // Now emit the rest according to source order. |
1036 | LastOrder = 0; |
1037 | for (const auto &InstrOrder : Orders) { |
1038 | unsigned Order = InstrOrder.first; |
1039 | MachineInstr *MI = InstrOrder.second; |
1040 | if (!MI) |
1041 | continue; |
1042 | |
1043 | // Insert all SDDbgLabel's whose order(s) are before "Order". |
1044 | for (; DLI != DLE && |
1045 | (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; |
1046 | ++DLI) { |
1047 | MachineInstr *DbgMI = Emitter.EmitDbgLabel(SD: *DLI); |
1048 | if (DbgMI) { |
1049 | if (!LastOrder) |
1050 | // Insert to start of the BB (after PHIs). |
1051 | BB->insert(I: BBBegin, MI: DbgMI); |
1052 | else { |
1053 | // Insert at the instruction, which may be in a different |
1054 | // block, if the block was split by a custom inserter. |
1055 | MachineBasicBlock::iterator Pos = MI; |
1056 | MI->getParent()->insert(I: Pos, MI: DbgMI); |
1057 | } |
1058 | } |
1059 | } |
1060 | if (DLI == DLE) |
1061 | break; |
1062 | |
1063 | LastOrder = Order; |
1064 | } |
1065 | } |
1066 | |
1067 | InsertPos = Emitter.getInsertPos(); |
1068 | // In some cases, DBG_VALUEs might be inserted after the first terminator, |
1069 | // which results in an invalid MBB. If that happens, move the DBG_VALUEs |
1070 | // before the first terminator. |
1071 | MachineBasicBlock *InsertBB = Emitter.getBlock(); |
1072 | auto FirstTerm = InsertBB->getFirstTerminator(); |
1073 | if (FirstTerm != InsertBB->end()) { |
1074 | assert(!FirstTerm->isDebugValue() && |
1075 | "first terminator cannot be a debug value" ); |
1076 | for (MachineInstr &MI : make_early_inc_range( |
1077 | Range: make_range(x: std::next(x: FirstTerm), y: InsertBB->end()))) { |
1078 | // Only scan up to insertion point. |
1079 | if (&MI == InsertPos) |
1080 | break; |
1081 | |
1082 | if (!MI.isDebugValue()) |
1083 | continue; |
1084 | |
1085 | // The DBG_VALUE was referencing a value produced by a terminator. By |
1086 | // moving the DBG_VALUE, the referenced value also needs invalidating. |
1087 | MI.getOperand(i: 0).ChangeToRegister(Reg: 0, isDef: false); |
1088 | MI.moveBefore(MovePos: &*FirstTerm); |
1089 | } |
1090 | } |
1091 | return InsertBB; |
1092 | } |
1093 | |
1094 | /// Return the basic block label. |
1095 | std::string ScheduleDAGSDNodes::getDAGName() const { |
1096 | return "sunit-dag." + BB->getFullName(); |
1097 | } |
1098 | |