1 | //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "PPCMachineScheduler.h" |
10 | #include "MCTargetDesc/PPCMCTargetDesc.h" |
11 | |
12 | using namespace llvm; |
13 | |
14 | static cl::opt<bool> |
15 | DisableAddiLoadHeuristic("disable-ppc-sched-addi-load" , |
16 | cl::desc("Disable scheduling addi instruction before" |
17 | "load for ppc" ), cl::Hidden); |
18 | static cl::opt<bool> |
19 | EnableAddiHeuristic("ppc-postra-bias-addi" , |
20 | cl::desc("Enable scheduling addi instruction as early" |
21 | "as possible post ra" ), |
22 | cl::Hidden, cl::init(Val: true)); |
23 | |
24 | static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) { |
25 | return Cand.SU->getInstr()->getOpcode() == PPC::ADDI || |
26 | Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; |
27 | } |
28 | |
29 | bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, |
30 | SchedCandidate &TryCand, |
31 | SchedBoundary &Zone) const { |
32 | if (DisableAddiLoadHeuristic) |
33 | return false; |
34 | |
35 | SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; |
36 | SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; |
37 | if (isADDIInstr(Cand: FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { |
38 | TryCand.Reason = Stall; |
39 | return true; |
40 | } |
41 | if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(Cand: SecondCand)) { |
42 | TryCand.Reason = NoCand; |
43 | return true; |
44 | } |
45 | |
46 | return false; |
47 | } |
48 | |
49 | bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
50 | SchedCandidate &TryCand, |
51 | SchedBoundary *Zone) const { |
52 | // From GenericScheduler::tryCandidate |
53 | |
54 | // Initialize the candidate if needed. |
55 | if (!Cand.isValid()) { |
56 | TryCand.Reason = NodeOrder; |
57 | return true; |
58 | } |
59 | |
60 | // Bias PhysReg Defs and copies to their uses and defined respectively. |
61 | if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop), |
62 | CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg)) |
63 | return TryCand.Reason != NoCand; |
64 | |
65 | // Avoid exceeding the target's limit. |
66 | if (DAG->isTrackingPressure() && |
67 | tryPressure(TryP: TryCand.RPDelta.Excess, CandP: Cand.RPDelta.Excess, TryCand, Cand, |
68 | Reason: RegExcess, TRI, MF: DAG->MF)) |
69 | return TryCand.Reason != NoCand; |
70 | |
71 | // Avoid increasing the max critical pressure in the scheduled region. |
72 | if (DAG->isTrackingPressure() && |
73 | tryPressure(TryP: TryCand.RPDelta.CriticalMax, CandP: Cand.RPDelta.CriticalMax, |
74 | TryCand, Cand, Reason: RegCritical, TRI, MF: DAG->MF)) |
75 | return TryCand.Reason != NoCand; |
76 | |
77 | // We only compare a subset of features when comparing nodes between |
78 | // Top and Bottom boundary. Some properties are simply incomparable, in many |
79 | // other instances we should only override the other boundary if something |
80 | // is a clear good pick on one boundary. Skip heuristics that are more |
81 | // "tie-breaking" in nature. |
82 | bool SameBoundary = Zone != nullptr; |
83 | if (SameBoundary) { |
84 | // For loops that are acyclic path limited, aggressively schedule for |
85 | // latency. Within an single cycle, whenever CurrMOps > 0, allow normal |
86 | // heuristics to take precedence. |
87 | if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && |
88 | tryLatency(TryCand, Cand, Zone&: *Zone)) |
89 | return TryCand.Reason != NoCand; |
90 | |
91 | // Prioritize instructions that read unbuffered resources by stall cycles. |
92 | if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU), |
93 | CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall)) |
94 | return TryCand.Reason != NoCand; |
95 | } |
96 | |
97 | // Keep clustered nodes together to encourage downstream peephole |
98 | // optimizations which may reduce resource requirements. |
99 | // |
100 | // This is a best effort to set things up for a post-RA pass. Optimizations |
101 | // like generating loads of multiple registers should ideally be done within |
102 | // the scheduler pass by combining the loads during DAG postprocessing. |
103 | const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; |
104 | const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; |
105 | if (tryGreater(TryVal: TryCandCluster && TryCandCluster->contains(Ptr: TryCand.SU), |
106 | CandVal: CandCluster && CandCluster->contains(Ptr: Cand.SU), TryCand, Cand, |
107 | Reason: Cluster)) |
108 | return TryCand.Reason != NoCand; |
109 | |
110 | if (SameBoundary) { |
111 | // Weak edges are for clustering and other constraints. |
112 | if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop), |
113 | CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: Weak)) |
114 | return TryCand.Reason != NoCand; |
115 | } |
116 | |
117 | // Avoid increasing the max pressure of the entire region. |
118 | if (DAG->isTrackingPressure() && |
119 | tryPressure(TryP: TryCand.RPDelta.CurrentMax, CandP: Cand.RPDelta.CurrentMax, TryCand, |
120 | Cand, Reason: RegMax, TRI, MF: DAG->MF)) |
121 | return TryCand.Reason != NoCand; |
122 | |
123 | if (SameBoundary) { |
124 | // Avoid critical resource consumption and balance the schedule. |
125 | TryCand.initResourceDelta(DAG, SchedModel); |
126 | if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources, |
127 | TryCand, Cand, Reason: ResourceReduce)) |
128 | return TryCand.Reason != NoCand; |
129 | if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources, |
130 | CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand, |
131 | Reason: ResourceDemand)) |
132 | return TryCand.Reason != NoCand; |
133 | |
134 | // Avoid serializing long latency dependence chains. |
135 | // For acyclic path limited loops, latency was already checked above. |
136 | if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && |
137 | !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone)) |
138 | return TryCand.Reason != NoCand; |
139 | |
140 | // Fall through to original instruction order. |
141 | if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || |
142 | (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { |
143 | TryCand.Reason = NodeOrder; |
144 | } |
145 | } |
146 | |
147 | // GenericScheduler::tryCandidate end |
148 | |
149 | // Add powerpc specific heuristic only when TryCand isn't selected or |
150 | // selected as node order. |
151 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
152 | return true; |
153 | |
154 | // There are some benefits to schedule the ADDI before the load to hide the |
155 | // latency, as RA may create a true dependency between the load and addi. |
156 | if (SameBoundary) { |
157 | if (biasAddiLoadCandidate(Cand, TryCand, Zone&: *Zone)) |
158 | return TryCand.Reason != NoCand; |
159 | } |
160 | |
161 | return TryCand.Reason != NoCand; |
162 | } |
163 | |
164 | bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, |
165 | SchedCandidate &TryCand) const { |
166 | if (!EnableAddiHeuristic) |
167 | return false; |
168 | |
169 | if (isADDIInstr(Cand: TryCand) && !isADDIInstr(Cand)) { |
170 | TryCand.Reason = Stall; |
171 | return true; |
172 | } |
173 | return false; |
174 | } |
175 | |
176 | bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
177 | SchedCandidate &TryCand) { |
178 | // From PostGenericScheduler::tryCandidate |
179 | |
180 | // Initialize the candidate if needed. |
181 | if (!Cand.isValid()) { |
182 | TryCand.Reason = NodeOrder; |
183 | return true; |
184 | } |
185 | |
186 | // Prioritize instructions that read unbuffered resources by stall cycles. |
187 | if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU), |
188 | CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall)) |
189 | return TryCand.Reason != NoCand; |
190 | |
191 | // Keep clustered nodes together. |
192 | const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; |
193 | const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; |
194 | if (tryGreater(TryVal: TryCandCluster && TryCandCluster->contains(Ptr: TryCand.SU), |
195 | CandVal: CandCluster && CandCluster->contains(Ptr: Cand.SU), TryCand, Cand, |
196 | Reason: Cluster)) |
197 | return TryCand.Reason != NoCand; |
198 | |
199 | // Avoid critical resource consumption and balance the schedule. |
200 | if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources, |
201 | TryCand, Cand, Reason: ResourceReduce)) |
202 | return TryCand.Reason != NoCand; |
203 | if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources, |
204 | CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand, |
205 | Reason: ResourceDemand)) |
206 | return TryCand.Reason != NoCand; |
207 | |
208 | // Avoid serializing long latency dependence chains. |
209 | if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Zone&: Top)) { |
210 | return TryCand.Reason != NoCand; |
211 | } |
212 | |
213 | // Fall through to original instruction order. |
214 | if (TryCand.SU->NodeNum < Cand.SU->NodeNum) |
215 | TryCand.Reason = NodeOrder; |
216 | |
217 | // PostGenericScheduler::tryCandidate end |
218 | |
219 | // Add powerpc post ra specific heuristic only when TryCand isn't selected or |
220 | // selected as node order. |
221 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
222 | return true; |
223 | |
224 | // There are some benefits to schedule the ADDI as early as possible post ra |
225 | // to avoid stalled by vector instructions which take up all the hw units. |
226 | // And ADDI is usually used to post inc the loop indvar, which matters the |
227 | // performance. |
228 | if (biasAddiCandidate(Cand, TryCand)) |
229 | return TryCand.Reason != NoCand; |
230 | |
231 | return TryCand.Reason != NoCand; |
232 | } |
233 | |
234 | void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { |
235 | // Custom PPC PostRA specific behavior here. |
236 | PostGenericScheduler::enterMBB(MBB); |
237 | } |
238 | |
239 | void PPCPostRASchedStrategy::leaveMBB() { |
240 | // Custom PPC PostRA specific behavior here. |
241 | PostGenericScheduler::leaveMBB(); |
242 | } |
243 | |
244 | void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) { |
245 | // Custom PPC PostRA specific initialization here. |
246 | PostGenericScheduler::initialize(Dag); |
247 | } |
248 | |
249 | SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) { |
250 | // Custom PPC PostRA specific scheduling here. |
251 | return PostGenericScheduler::pickNode(IsTopNode); |
252 | } |
253 | |
254 | |