1 | //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "PPCMachineScheduler.h" |
10 | #include "MCTargetDesc/PPCMCTargetDesc.h" |
11 | |
12 | using namespace llvm; |
13 | |
14 | static cl::opt<bool> |
15 | DisableAddiLoadHeuristic("disable-ppc-sched-addi-load" , |
16 | cl::desc("Disable scheduling addi instruction before" |
17 | "load for ppc" ), cl::Hidden); |
18 | static cl::opt<bool> |
19 | EnableAddiHeuristic("ppc-postra-bias-addi" , |
20 | cl::desc("Enable scheduling addi instruction as early" |
21 | "as possible post ra" ), |
22 | cl::Hidden, cl::init(Val: true)); |
23 | |
24 | static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) { |
25 | return Cand.SU->getInstr()->getOpcode() == PPC::ADDI || |
26 | Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; |
27 | } |
28 | |
29 | bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, |
30 | SchedCandidate &TryCand, |
31 | SchedBoundary &Zone) const { |
32 | if (DisableAddiLoadHeuristic) |
33 | return false; |
34 | |
35 | SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; |
36 | SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; |
37 | if (isADDIInstr(Cand: FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { |
38 | TryCand.Reason = Stall; |
39 | return true; |
40 | } |
41 | if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(Cand: SecondCand)) { |
42 | TryCand.Reason = NoCand; |
43 | return true; |
44 | } |
45 | |
46 | return false; |
47 | } |
48 | |
49 | bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
50 | SchedCandidate &TryCand, |
51 | SchedBoundary *Zone) const { |
52 | // From GenericScheduler::tryCandidate |
53 | |
54 | // Initialize the candidate if needed. |
55 | if (!Cand.isValid()) { |
56 | TryCand.Reason = NodeOrder; |
57 | return true; |
58 | } |
59 | |
60 | // Bias PhysReg Defs and copies to their uses and defined respectively. |
61 | if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop), |
62 | CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg)) |
63 | return TryCand.Reason != NoCand; |
64 | |
65 | // Avoid exceeding the target's limit. |
66 | if (DAG->isTrackingPressure() && |
67 | tryPressure(TryP: TryCand.RPDelta.Excess, CandP: Cand.RPDelta.Excess, TryCand, Cand, |
68 | Reason: RegExcess, TRI, MF: DAG->MF)) |
69 | return TryCand.Reason != NoCand; |
70 | |
71 | // Avoid increasing the max critical pressure in the scheduled region. |
72 | if (DAG->isTrackingPressure() && |
73 | tryPressure(TryP: TryCand.RPDelta.CriticalMax, CandP: Cand.RPDelta.CriticalMax, |
74 | TryCand, Cand, Reason: RegCritical, TRI, MF: DAG->MF)) |
75 | return TryCand.Reason != NoCand; |
76 | |
77 | // We only compare a subset of features when comparing nodes between |
78 | // Top and Bottom boundary. Some properties are simply incomparable, in many |
79 | // other instances we should only override the other boundary if something |
80 | // is a clear good pick on one boundary. Skip heuristics that are more |
81 | // "tie-breaking" in nature. |
82 | bool SameBoundary = Zone != nullptr; |
83 | if (SameBoundary) { |
84 | // For loops that are acyclic path limited, aggressively schedule for |
85 | // latency. Within an single cycle, whenever CurrMOps > 0, allow normal |
86 | // heuristics to take precedence. |
87 | if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && |
88 | tryLatency(TryCand, Cand, Zone&: *Zone)) |
89 | return TryCand.Reason != NoCand; |
90 | |
91 | // Prioritize instructions that read unbuffered resources by stall cycles. |
92 | if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU), |
93 | CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall)) |
94 | return TryCand.Reason != NoCand; |
95 | } |
96 | |
97 | // Keep clustered nodes together to encourage downstream peephole |
98 | // optimizations which may reduce resource requirements. |
99 | // |
100 | // This is a best effort to set things up for a post-RA pass. Optimizations |
101 | // like generating loads of multiple registers should ideally be done within |
102 | // the scheduler pass by combining the loads during DAG postprocessing. |
103 | const SUnit *CandNextClusterSU = |
104 | Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); |
105 | const SUnit *TryCandNextClusterSU = |
106 | TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); |
107 | if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU, |
108 | CandVal: Cand.SU == CandNextClusterSU, TryCand, Cand, Reason: Cluster)) |
109 | return TryCand.Reason != NoCand; |
110 | |
111 | if (SameBoundary) { |
112 | // Weak edges are for clustering and other constraints. |
113 | if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop), |
114 | CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: Weak)) |
115 | return TryCand.Reason != NoCand; |
116 | } |
117 | |
118 | // Avoid increasing the max pressure of the entire region. |
119 | if (DAG->isTrackingPressure() && |
120 | tryPressure(TryP: TryCand.RPDelta.CurrentMax, CandP: Cand.RPDelta.CurrentMax, TryCand, |
121 | Cand, Reason: RegMax, TRI, MF: DAG->MF)) |
122 | return TryCand.Reason != NoCand; |
123 | |
124 | if (SameBoundary) { |
125 | // Avoid critical resource consumption and balance the schedule. |
126 | TryCand.initResourceDelta(DAG, SchedModel); |
127 | if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources, |
128 | TryCand, Cand, Reason: ResourceReduce)) |
129 | return TryCand.Reason != NoCand; |
130 | if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources, |
131 | CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand, |
132 | Reason: ResourceDemand)) |
133 | return TryCand.Reason != NoCand; |
134 | |
135 | // Avoid serializing long latency dependence chains. |
136 | // For acyclic path limited loops, latency was already checked above. |
137 | if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && |
138 | !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone)) |
139 | return TryCand.Reason != NoCand; |
140 | |
141 | // Fall through to original instruction order. |
142 | if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || |
143 | (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { |
144 | TryCand.Reason = NodeOrder; |
145 | } |
146 | } |
147 | |
148 | // GenericScheduler::tryCandidate end |
149 | |
150 | // Add powerpc specific heuristic only when TryCand isn't selected or |
151 | // selected as node order. |
152 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
153 | return true; |
154 | |
155 | // There are some benefits to schedule the ADDI before the load to hide the |
156 | // latency, as RA may create a true dependency between the load and addi. |
157 | if (SameBoundary) { |
158 | if (biasAddiLoadCandidate(Cand, TryCand, Zone&: *Zone)) |
159 | return TryCand.Reason != NoCand; |
160 | } |
161 | |
162 | return TryCand.Reason != NoCand; |
163 | } |
164 | |
165 | bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, |
166 | SchedCandidate &TryCand) const { |
167 | if (!EnableAddiHeuristic) |
168 | return false; |
169 | |
170 | if (isADDIInstr(Cand: TryCand) && !isADDIInstr(Cand)) { |
171 | TryCand.Reason = Stall; |
172 | return true; |
173 | } |
174 | return false; |
175 | } |
176 | |
177 | bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
178 | SchedCandidate &TryCand) { |
179 | // From PostGenericScheduler::tryCandidate |
180 | |
181 | // Initialize the candidate if needed. |
182 | if (!Cand.isValid()) { |
183 | TryCand.Reason = NodeOrder; |
184 | return true; |
185 | } |
186 | |
187 | // Prioritize instructions that read unbuffered resources by stall cycles. |
188 | if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU), |
189 | CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall)) |
190 | return TryCand.Reason != NoCand; |
191 | |
192 | // Keep clustered nodes together. |
193 | if (tryGreater(TryVal: TryCand.SU == DAG->getNextClusterSucc(), |
194 | CandVal: Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Reason: Cluster)) |
195 | return TryCand.Reason != NoCand; |
196 | |
197 | // Avoid critical resource consumption and balance the schedule. |
198 | if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources, |
199 | TryCand, Cand, Reason: ResourceReduce)) |
200 | return TryCand.Reason != NoCand; |
201 | if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources, |
202 | CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand, |
203 | Reason: ResourceDemand)) |
204 | return TryCand.Reason != NoCand; |
205 | |
206 | // Avoid serializing long latency dependence chains. |
207 | if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Zone&: Top)) { |
208 | return TryCand.Reason != NoCand; |
209 | } |
210 | |
211 | // Fall through to original instruction order. |
212 | if (TryCand.SU->NodeNum < Cand.SU->NodeNum) |
213 | TryCand.Reason = NodeOrder; |
214 | |
215 | // PostGenericScheduler::tryCandidate end |
216 | |
217 | // Add powerpc post ra specific heuristic only when TryCand isn't selected or |
218 | // selected as node order. |
219 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
220 | return true; |
221 | |
222 | // There are some benefits to schedule the ADDI as early as possible post ra |
223 | // to avoid stalled by vector instructions which take up all the hw units. |
224 | // And ADDI is usually used to post inc the loop indvar, which matters the |
225 | // performance. |
226 | if (biasAddiCandidate(Cand, TryCand)) |
227 | return TryCand.Reason != NoCand; |
228 | |
229 | return TryCand.Reason != NoCand; |
230 | } |
231 | |
232 | void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { |
233 | // Custom PPC PostRA specific behavior here. |
234 | PostGenericScheduler::enterMBB(MBB); |
235 | } |
236 | |
237 | void PPCPostRASchedStrategy::leaveMBB() { |
238 | // Custom PPC PostRA specific behavior here. |
239 | PostGenericScheduler::leaveMBB(); |
240 | } |
241 | |
242 | void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) { |
243 | // Custom PPC PostRA specific initialization here. |
244 | PostGenericScheduler::initialize(Dag); |
245 | } |
246 | |
247 | SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) { |
248 | // Custom PPC PostRA specific scheduling here. |
249 | return PostGenericScheduler::pickNode(IsTopNode); |
250 | } |
251 | |
252 | |