1 | //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements hazard recognizers for scheduling on PowerPC processors. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PPCHazardRecognizers.h" |
14 | #include "PPCInstrInfo.h" |
15 | #include "PPCSubtarget.h" |
16 | #include "llvm/CodeGen/ScheduleDAG.h" |
17 | #include "llvm/Support/Debug.h" |
18 | #include "llvm/Support/ErrorHandling.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | using namespace llvm; |
21 | |
22 | #define DEBUG_TYPE "pre-RA-sched" |
23 | |
24 | bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { |
25 | // FIXME: Move this. |
26 | if (isBCTRAfterSet(SU)) |
27 | return true; |
28 | |
29 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
30 | if (!MCID) |
31 | return false; |
32 | |
33 | if (!MCID->mayLoad()) |
34 | return false; |
35 | |
36 | // SU is a load; for any predecessors in this dispatch group, that are stores, |
37 | // and with which we have an ordering dependency, return true. |
38 | for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { |
39 | const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU: SU->Preds[i].getSUnit()); |
40 | if (!PredMCID || !PredMCID->mayStore()) |
41 | continue; |
42 | |
43 | if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) |
44 | continue; |
45 | |
46 | for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) |
47 | if (SU->Preds[i].getSUnit() == CurGroup[j]) |
48 | return true; |
49 | } |
50 | |
51 | return false; |
52 | } |
53 | |
54 | bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { |
55 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
56 | if (!MCID) |
57 | return false; |
58 | |
59 | if (!MCID->isBranch()) |
60 | return false; |
61 | |
62 | // SU is a branch; for any predecessors in this dispatch group, with which we |
63 | // have a data dependence and set the counter register, return true. |
64 | for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { |
65 | const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU: SU->Preds[i].getSUnit()); |
66 | if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) |
67 | continue; |
68 | |
69 | if (SU->Preds[i].isCtrl()) |
70 | continue; |
71 | |
72 | for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) |
73 | if (SU->Preds[i].getSUnit() == CurGroup[j]) |
74 | return true; |
75 | } |
76 | |
77 | return false; |
78 | } |
79 | |
80 | // FIXME: Remove this when we don't need this: |
81 | namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } |
82 | |
83 | // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. |
84 | |
85 | bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, |
86 | unsigned &NSlots) { |
87 | // FIXME: Indirectly, this information is contained in the itinerary, and |
88 | // we should derive it from there instead of separately specifying it |
89 | // here. |
90 | unsigned IIC = MCID->getSchedClass(); |
91 | switch (IIC) { |
92 | default: |
93 | NSlots = 1; |
94 | break; |
95 | case PPC::Sched::IIC_IntDivW: |
96 | case PPC::Sched::IIC_IntDivD: |
97 | case PPC::Sched::IIC_LdStLoadUpd: |
98 | case PPC::Sched::IIC_LdStLDU: |
99 | case PPC::Sched::IIC_LdStLFDU: |
100 | case PPC::Sched::IIC_LdStLFDUX: |
101 | case PPC::Sched::IIC_LdStLHA: |
102 | case PPC::Sched::IIC_LdStLHAU: |
103 | case PPC::Sched::IIC_LdStLWA: |
104 | case PPC::Sched::IIC_LdStSTU: |
105 | case PPC::Sched::IIC_LdStSTFDU: |
106 | NSlots = 2; |
107 | break; |
108 | case PPC::Sched::IIC_LdStLoadUpdX: |
109 | case PPC::Sched::IIC_LdStLDUX: |
110 | case PPC::Sched::IIC_LdStLHAUX: |
111 | case PPC::Sched::IIC_LdStLWARX: |
112 | case PPC::Sched::IIC_LdStLDARX: |
113 | case PPC::Sched::IIC_LdStSTUX: |
114 | case PPC::Sched::IIC_LdStSTDCX: |
115 | case PPC::Sched::IIC_LdStSTWCX: |
116 | case PPC::Sched::IIC_BrMCRX: // mtcr |
117 | // FIXME: Add sync/isync (here and in the itinerary). |
118 | NSlots = 4; |
119 | break; |
120 | } |
121 | |
122 | // FIXME: record-form instructions need a different itinerary class. |
123 | if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) |
124 | NSlots = 2; |
125 | |
126 | switch (IIC) { |
127 | default: |
128 | // All multi-slot instructions must come first. |
129 | return NSlots > 1; |
130 | case PPC::Sched::IIC_BrCR: // cr logicals |
131 | case PPC::Sched::IIC_SprMFCR: |
132 | case PPC::Sched::IIC_SprMFCRF: |
133 | case PPC::Sched::IIC_SprMTSPR: |
134 | return true; |
135 | } |
136 | } |
137 | |
138 | ScheduleHazardRecognizer::HazardType |
139 | PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { |
140 | if (Stalls == 0 && isLoadAfterStore(SU)) |
141 | return NoopHazard; |
142 | |
143 | return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); |
144 | } |
145 | |
146 | bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { |
147 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
148 | unsigned NSlots; |
149 | if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) |
150 | return true; |
151 | |
152 | return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); |
153 | } |
154 | |
155 | unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { |
156 | // We only need to fill out a maximum of 5 slots here: The 6th slot could |
157 | // only be a second branch, and otherwise the next instruction will start a |
158 | // new group. |
159 | if (isLoadAfterStore(SU) && CurSlots < 6) { |
160 | unsigned Directive = |
161 | DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); |
162 | // If we're using a special group-terminating nop, then we need only one. |
163 | // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready |
164 | if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || |
165 | Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) |
166 | return 1; |
167 | |
168 | return 5 - CurSlots; |
169 | } |
170 | |
171 | return ScoreboardHazardRecognizer::PreEmitNoops(SU); |
172 | } |
173 | |
174 | void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { |
175 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
176 | if (MCID) { |
177 | if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { |
178 | CurGroup.clear(); |
179 | CurSlots = CurBranches = 0; |
180 | } else { |
181 | LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: " ); |
182 | LLVM_DEBUG(DAG->dumpNode(*SU)); |
183 | |
184 | unsigned NSlots; |
185 | bool MustBeFirst = mustComeFirst(MCID, NSlots); |
186 | |
187 | // If this instruction must come first, but does not, then it starts a |
188 | // new group. |
189 | if (MustBeFirst && CurSlots) { |
190 | CurSlots = CurBranches = 0; |
191 | CurGroup.clear(); |
192 | } |
193 | |
194 | CurSlots += NSlots; |
195 | CurGroup.push_back(Elt: SU); |
196 | |
197 | if (MCID->isBranch()) |
198 | ++CurBranches; |
199 | } |
200 | } |
201 | |
202 | return ScoreboardHazardRecognizer::EmitInstruction(SU); |
203 | } |
204 | |
205 | void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { |
206 | return ScoreboardHazardRecognizer::AdvanceCycle(); |
207 | } |
208 | |
209 | void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { |
210 | llvm_unreachable("Bottom-up scheduling not supported" ); |
211 | } |
212 | |
213 | void PPCDispatchGroupSBHazardRecognizer::Reset() { |
214 | CurGroup.clear(); |
215 | CurSlots = CurBranches = 0; |
216 | return ScoreboardHazardRecognizer::Reset(); |
217 | } |
218 | |
219 | void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { |
220 | unsigned Directive = |
221 | DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); |
222 | // If the group has now filled all of its slots, or if we're using a special |
223 | // group-terminating nop, the group is complete. |
224 | // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready |
225 | if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || |
226 | Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 || |
227 | CurSlots == 6) { |
228 | CurGroup.clear(); |
229 | CurSlots = CurBranches = 0; |
230 | } else { |
231 | CurGroup.push_back(Elt: nullptr); |
232 | ++CurSlots; |
233 | } |
234 | } |
235 | |
236 | //===----------------------------------------------------------------------===// |
237 | // PowerPC 970 Hazard Recognizer |
238 | // |
239 | // This models the dispatch group formation of the PPC970 processor. Dispatch |
240 | // groups are bundles of up to five instructions that can contain various mixes |
241 | // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one |
242 | // branch instruction per-cycle. |
243 | // |
244 | // There are a number of restrictions to dispatch group formation: some |
245 | // instructions can only be issued in the first slot of a dispatch group, & some |
246 | // instructions fill an entire dispatch group. Additionally, only branches can |
247 | // issue in the 5th (last) slot. |
248 | // |
249 | // Finally, there are a number of "structural" hazards on the PPC970. These |
250 | // conditions cause large performance penalties due to misprediction, recovery, |
251 | // and replay logic that has to happen. These cases include setting a CTR and |
252 | // branching through it in the same dispatch group, and storing to an address, |
253 | // then loading from the same address within a dispatch group. To avoid these |
254 | // conditions, we insert no-op instructions when appropriate. |
255 | // |
256 | // FIXME: This is missing some significant cases: |
257 | // 1. Modeling of microcoded instructions. |
258 | // 2. Handling of serialized operations. |
259 | // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". |
260 | // |
261 | |
262 | PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) |
263 | : DAG(DAG) { |
264 | EndDispatchGroup(); |
265 | } |
266 | |
267 | void PPCHazardRecognizer970::EndDispatchGroup() { |
268 | LLVM_DEBUG(errs() << "=== Start of dispatch group\n" ); |
269 | NumIssued = 0; |
270 | |
271 | // Structural hazard info. |
272 | HasCTRSet = false; |
273 | NumStores = 0; |
274 | } |
275 | |
276 | |
277 | PPCII::PPC970_Unit |
278 | PPCHazardRecognizer970::GetInstrType(unsigned Opcode, |
279 | bool &isFirst, bool &isSingle, |
280 | bool &isCracked, |
281 | bool &isLoad, bool &isStore) { |
282 | const MCInstrDesc &MCID = DAG.TII->get(Opcode); |
283 | |
284 | isLoad = MCID.mayLoad(); |
285 | isStore = MCID.mayStore(); |
286 | |
287 | uint64_t TSFlags = MCID.TSFlags; |
288 | |
289 | isFirst = TSFlags & PPCII::PPC970_First; |
290 | isSingle = TSFlags & PPCII::PPC970_Single; |
291 | isCracked = TSFlags & PPCII::PPC970_Cracked; |
292 | return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); |
293 | } |
294 | |
295 | /// isLoadOfStoredAddress - If we have a load from the previously stored pointer |
296 | /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. |
297 | bool PPCHazardRecognizer970:: |
298 | isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, |
299 | const Value *LoadValue) const { |
300 | for (unsigned i = 0, e = NumStores; i != e; ++i) { |
301 | // Handle exact and commuted addresses. |
302 | if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) |
303 | return true; |
304 | |
305 | // Okay, we don't have an exact match, if this is an indexed offset, see if |
306 | // we have overlap (which happens during fp->int conversion for example). |
307 | if (StoreValue[i] == LoadValue) { |
308 | // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check |
309 | // to see if the load and store actually overlap. |
310 | if (StoreOffset[i] < LoadOffset) { |
311 | if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; |
312 | } else { |
313 | if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; |
314 | } |
315 | } |
316 | } |
317 | return false; |
318 | } |
319 | |
320 | /// getHazardType - We return hazard for any non-branch instruction that would |
321 | /// terminate the dispatch group. We turn NoopHazard for any |
322 | /// instructions that wouldn't terminate the dispatch group that would cause a |
323 | /// pipeline flush. |
324 | ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: |
325 | getHazardType(SUnit *SU, int Stalls) { |
326 | assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead" ); |
327 | |
328 | MachineInstr *MI = SU->getInstr(); |
329 | |
330 | if (MI->isDebugInstr()) |
331 | return NoHazard; |
332 | |
333 | unsigned Opcode = MI->getOpcode(); |
334 | bool isFirst, isSingle, isCracked, isLoad, isStore; |
335 | PPCII::PPC970_Unit InstrType = |
336 | GetInstrType(Opcode, isFirst, isSingle, isCracked, |
337 | isLoad, isStore); |
338 | if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; |
339 | |
340 | // We can only issue a PPC970_First/PPC970_Single instruction (such as |
341 | // crand/mtspr/etc) if this is the first cycle of the dispatch group. |
342 | if (NumIssued != 0 && (isFirst || isSingle)) |
343 | return Hazard; |
344 | |
345 | // If this instruction is cracked into two ops by the decoder, we know that |
346 | // it is not a branch and that it cannot issue if 3 other instructions are |
347 | // already in the dispatch group. |
348 | if (isCracked && NumIssued > 2) |
349 | return Hazard; |
350 | |
351 | switch (InstrType) { |
352 | default: llvm_unreachable("Unknown instruction type!" ); |
353 | case PPCII::PPC970_FXU: |
354 | case PPCII::PPC970_LSU: |
355 | case PPCII::PPC970_FPU: |
356 | case PPCII::PPC970_VALU: |
357 | case PPCII::PPC970_VPERM: |
358 | // We can only issue a branch as the last instruction in a group. |
359 | if (NumIssued == 4) return Hazard; |
360 | break; |
361 | case PPCII::PPC970_CRU: |
362 | // We can only issue a CR instruction in the first two slots. |
363 | if (NumIssued >= 2) return Hazard; |
364 | break; |
365 | case PPCII::PPC970_BRU: |
366 | break; |
367 | } |
368 | |
369 | // Do not allow MTCTR and BCTRL to be in the same dispatch group. |
370 | if (HasCTRSet && Opcode == PPC::BCTRL) |
371 | return NoopHazard; |
372 | |
373 | // If this is a load following a store, make sure it's not to the same or |
374 | // overlapping address. |
375 | if (isLoad && NumStores && !MI->memoperands_empty()) { |
376 | MachineMemOperand *MO = *MI->memoperands_begin(); |
377 | if (MO->getSize().hasValue() && |
378 | isLoadOfStoredAddress(LoadSize: MO->getSize().getValue(), LoadOffset: MO->getOffset(), |
379 | LoadValue: MO->getValue())) |
380 | return NoopHazard; |
381 | } |
382 | |
383 | return NoHazard; |
384 | } |
385 | |
386 | void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { |
387 | MachineInstr *MI = SU->getInstr(); |
388 | |
389 | if (MI->isDebugInstr()) |
390 | return; |
391 | |
392 | unsigned Opcode = MI->getOpcode(); |
393 | bool isFirst, isSingle, isCracked, isLoad, isStore; |
394 | PPCII::PPC970_Unit InstrType = |
395 | GetInstrType(Opcode, isFirst, isSingle, isCracked, |
396 | isLoad, isStore); |
397 | if (InstrType == PPCII::PPC970_Pseudo) return; |
398 | |
399 | // Update structural hazard information. |
400 | if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; |
401 | |
402 | // Track the address stored to. |
403 | if (isStore && NumStores < 4 && !MI->memoperands_empty() && |
404 | (*MI->memoperands_begin())->getSize().hasValue()) { |
405 | MachineMemOperand *MO = *MI->memoperands_begin(); |
406 | StoreSize[NumStores] = MO->getSize().getValue(); |
407 | StoreOffset[NumStores] = MO->getOffset(); |
408 | StoreValue[NumStores] = MO->getValue(); |
409 | ++NumStores; |
410 | } |
411 | |
412 | if (InstrType == PPCII::PPC970_BRU || isSingle) |
413 | NumIssued = 4; // Terminate a d-group. |
414 | ++NumIssued; |
415 | |
416 | // If this instruction is cracked into two ops by the decoder, remember that |
417 | // we issued two pieces. |
418 | if (isCracked) |
419 | ++NumIssued; |
420 | |
421 | if (NumIssued == 5) |
422 | EndDispatchGroup(); |
423 | } |
424 | |
425 | void PPCHazardRecognizer970::AdvanceCycle() { |
426 | assert(NumIssued < 5 && "Illegal dispatch group!" ); |
427 | ++NumIssued; |
428 | if (NumIssued == 5) |
429 | EndDispatchGroup(); |
430 | } |
431 | |
432 | void PPCHazardRecognizer970::Reset() { |
433 | EndDispatchGroup(); |
434 | } |
435 | |
436 | |