1//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hazard recognizers for scheduling on PowerPC processors.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCHazardRecognizers.h"
14#include "PPCInstrInfo.h"
15#include "PPCSubtarget.h"
16#include "llvm/CodeGen/ScheduleDAG.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/raw_ostream.h"
20using namespace llvm;
21
22#define DEBUG_TYPE "pre-RA-sched"
23
24bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
25 // FIXME: Move this.
26 if (isBCTRAfterSet(SU))
27 return true;
28
29 const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
30 if (!MCID)
31 return false;
32
33 if (!MCID->mayLoad())
34 return false;
35
36 // SU is a load; for any predecessors in this dispatch group, that are stores,
37 // and with which we have an ordering dependency, return true.
38 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
39 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU: SU->Preds[i].getSUnit());
40 if (!PredMCID || !PredMCID->mayStore())
41 continue;
42
43 if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
44 continue;
45
46 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
47 if (SU->Preds[i].getSUnit() == CurGroup[j])
48 return true;
49 }
50
51 return false;
52}
53
54bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
55 const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
56 if (!MCID)
57 return false;
58
59 if (!MCID->isBranch())
60 return false;
61
62 // SU is a branch; for any predecessors in this dispatch group, with which we
63 // have a data dependence and set the counter register, return true.
64 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
65 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU: SU->Preds[i].getSUnit());
66 if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
67 continue;
68
69 if (SU->Preds[i].isCtrl())
70 continue;
71
72 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
73 if (SU->Preds[i].getSUnit() == CurGroup[j])
74 return true;
75 }
76
77 return false;
78}
79
80// FIXME: Remove this when we don't need this:
81namespace llvm {
82namespace PPC {
83extern int32_t getNonRecordFormOpcode(uint32_t);
84}
85} // namespace llvm
86
87// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
88
89bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
90 unsigned &NSlots) {
91 // FIXME: Indirectly, this information is contained in the itinerary, and
92 // we should derive it from there instead of separately specifying it
93 // here.
94 unsigned IIC = MCID->getSchedClass();
95 switch (IIC) {
96 default:
97 NSlots = 1;
98 break;
99 case PPC::Sched::IIC_IntDivW:
100 case PPC::Sched::IIC_IntDivD:
101 case PPC::Sched::IIC_LdStLoadUpd:
102 case PPC::Sched::IIC_LdStLDU:
103 case PPC::Sched::IIC_LdStLFDU:
104 case PPC::Sched::IIC_LdStLFDUX:
105 case PPC::Sched::IIC_LdStLHA:
106 case PPC::Sched::IIC_LdStLHAU:
107 case PPC::Sched::IIC_LdStLWA:
108 case PPC::Sched::IIC_LdStSTU:
109 case PPC::Sched::IIC_LdStSTFDU:
110 NSlots = 2;
111 break;
112 case PPC::Sched::IIC_LdStLoadUpdX:
113 case PPC::Sched::IIC_LdStLDUX:
114 case PPC::Sched::IIC_LdStLHAUX:
115 case PPC::Sched::IIC_LdStLWARX:
116 case PPC::Sched::IIC_LdStLDARX:
117 case PPC::Sched::IIC_LdStSTUX:
118 case PPC::Sched::IIC_LdStSTDCX:
119 case PPC::Sched::IIC_LdStSTWCX:
120 case PPC::Sched::IIC_BrMCRX: // mtcr
121 // FIXME: Add sync/isync (here and in the itinerary).
122 NSlots = 4;
123 break;
124 }
125
126 // FIXME: record-form instructions need a different itinerary class.
127 if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
128 NSlots = 2;
129
130 switch (IIC) {
131 default:
132 // All multi-slot instructions must come first.
133 return NSlots > 1;
134 case PPC::Sched::IIC_BrCR: // cr logicals
135 case PPC::Sched::IIC_SprMFCR:
136 case PPC::Sched::IIC_SprMFCRF:
137 case PPC::Sched::IIC_SprMTSPR:
138 return true;
139 }
140}
141
142ScheduleHazardRecognizer::HazardType
143PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
144 if (Stalls == 0 && isLoadAfterStore(SU))
145 return NoopHazard;
146
147 return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
148}
149
150bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
151 const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
152 unsigned NSlots;
153 if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
154 return true;
155
156 return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
157}
158
159unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
160 // We only need to fill out a maximum of 5 slots here: The 6th slot could
161 // only be a second branch, and otherwise the next instruction will start a
162 // new group.
163 if (isLoadAfterStore(SU) && CurSlots < 6) {
164 unsigned Directive =
165 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
166 // If we're using a special group-terminating nop, then we need only one.
167 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
168 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
169 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)
170 return 1;
171
172 return 5 - CurSlots;
173 }
174
175 return ScoreboardHazardRecognizer::PreEmitNoops(SU);
176}
177
178void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
179 const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
180 if (MCID) {
181 if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
182 CurGroup.clear();
183 CurSlots = CurBranches = 0;
184 } else {
185 LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
186 LLVM_DEBUG(DAG->dumpNode(*SU));
187
188 unsigned NSlots;
189 bool MustBeFirst = mustComeFirst(MCID, NSlots);
190
191 // If this instruction must come first, but does not, then it starts a
192 // new group.
193 if (MustBeFirst && CurSlots) {
194 CurSlots = CurBranches = 0;
195 CurGroup.clear();
196 }
197
198 CurSlots += NSlots;
199 CurGroup.push_back(Elt: SU);
200
201 if (MCID->isBranch())
202 ++CurBranches;
203 }
204 }
205
206 return ScoreboardHazardRecognizer::EmitInstruction(SU);
207}
208
209void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
210 return ScoreboardHazardRecognizer::AdvanceCycle();
211}
212
213void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
214 llvm_unreachable("Bottom-up scheduling not supported");
215}
216
217void PPCDispatchGroupSBHazardRecognizer::Reset() {
218 CurGroup.clear();
219 CurSlots = CurBranches = 0;
220 return ScoreboardHazardRecognizer::Reset();
221}
222
223void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
224 unsigned Directive =
225 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
226 // If the group has now filled all of its slots, or if we're using a special
227 // group-terminating nop, the group is complete.
228 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
229 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
230 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 ||
231 CurSlots == 6) {
232 CurGroup.clear();
233 CurSlots = CurBranches = 0;
234 } else {
235 CurGroup.push_back(Elt: nullptr);
236 ++CurSlots;
237 }
238}
239
240//===----------------------------------------------------------------------===//
241// PowerPC 970 Hazard Recognizer
242//
243// This models the dispatch group formation of the PPC970 processor. Dispatch
244// groups are bundles of up to five instructions that can contain various mixes
245// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
246// branch instruction per-cycle.
247//
248// There are a number of restrictions to dispatch group formation: some
249// instructions can only be issued in the first slot of a dispatch group, & some
250// instructions fill an entire dispatch group. Additionally, only branches can
251// issue in the 5th (last) slot.
252//
253// Finally, there are a number of "structural" hazards on the PPC970. These
254// conditions cause large performance penalties due to misprediction, recovery,
255// and replay logic that has to happen. These cases include setting a CTR and
256// branching through it in the same dispatch group, and storing to an address,
257// then loading from the same address within a dispatch group. To avoid these
258// conditions, we insert no-op instructions when appropriate.
259//
260// FIXME: This is missing some significant cases:
261// 1. Modeling of microcoded instructions.
262// 2. Handling of serialized operations.
263// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
264//
265
266PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
267 : DAG(DAG) {
268 EndDispatchGroup();
269}
270
271void PPCHazardRecognizer970::EndDispatchGroup() {
272 LLVM_DEBUG(errs() << "=== Start of dispatch group\n");
273 NumIssued = 0;
274
275 // Structural hazard info.
276 HasCTRSet = false;
277 NumStores = 0;
278}
279
280
281PPCII::PPC970_Unit
282PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
283 bool &isFirst, bool &isSingle,
284 bool &isCracked,
285 bool &isLoad, bool &isStore) {
286 const MCInstrDesc &MCID = DAG.TII->get(Opcode);
287
288 isLoad = MCID.mayLoad();
289 isStore = MCID.mayStore();
290
291 uint64_t TSFlags = MCID.TSFlags;
292
293 isFirst = TSFlags & PPCII::PPC970_First;
294 isSingle = TSFlags & PPCII::PPC970_Single;
295 isCracked = TSFlags & PPCII::PPC970_Cracked;
296 return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
297}
298
299/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
300/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
301bool PPCHazardRecognizer970::
302isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
303 const Value *LoadValue) const {
304 for (unsigned i = 0, e = NumStores; i != e; ++i) {
305 // Handle exact and commuted addresses.
306 if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
307 return true;
308
309 // Okay, we don't have an exact match, if this is an indexed offset, see if
310 // we have overlap (which happens during fp->int conversion for example).
311 if (StoreValue[i] == LoadValue) {
312 // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
313 // to see if the load and store actually overlap.
314 if (StoreOffset[i] < LoadOffset) {
315 if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
316 } else {
317 if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
318 }
319 }
320 }
321 return false;
322}
323
324/// getHazardType - We return hazard for any non-branch instruction that would
325/// terminate the dispatch group. We turn NoopHazard for any
326/// instructions that wouldn't terminate the dispatch group that would cause a
327/// pipeline flush.
328ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
329getHazardType(SUnit *SU, int Stalls) {
330 assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
331
332 MachineInstr *MI = SU->getInstr();
333
334 if (MI->isDebugInstr())
335 return NoHazard;
336
337 unsigned Opcode = MI->getOpcode();
338 bool isFirst, isSingle, isCracked, isLoad, isStore;
339 PPCII::PPC970_Unit InstrType =
340 GetInstrType(Opcode, isFirst, isSingle, isCracked,
341 isLoad, isStore);
342 if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
343
344 // We can only issue a PPC970_First/PPC970_Single instruction (such as
345 // crand/mtspr/etc) if this is the first cycle of the dispatch group.
346 if (NumIssued != 0 && (isFirst || isSingle))
347 return Hazard;
348
349 // If this instruction is cracked into two ops by the decoder, we know that
350 // it is not a branch and that it cannot issue if 3 other instructions are
351 // already in the dispatch group.
352 if (isCracked && NumIssued > 2)
353 return Hazard;
354
355 switch (InstrType) {
356 default: llvm_unreachable("Unknown instruction type!");
357 case PPCII::PPC970_FXU:
358 case PPCII::PPC970_LSU:
359 case PPCII::PPC970_FPU:
360 case PPCII::PPC970_VALU:
361 case PPCII::PPC970_VPERM:
362 // We can only issue a branch as the last instruction in a group.
363 if (NumIssued == 4) return Hazard;
364 break;
365 case PPCII::PPC970_CRU:
366 // We can only issue a CR instruction in the first two slots.
367 if (NumIssued >= 2) return Hazard;
368 break;
369 case PPCII::PPC970_BRU:
370 break;
371 }
372
373 // Do not allow MTCTR and BCTRL to be in the same dispatch group.
374 if (HasCTRSet && Opcode == PPC::BCTRL)
375 return NoopHazard;
376
377 // If this is a load following a store, make sure it's not to the same or
378 // overlapping address.
379 if (isLoad && NumStores && !MI->memoperands_empty()) {
380 MachineMemOperand *MO = *MI->memoperands_begin();
381 if (MO->getSize().hasValue() &&
382 isLoadOfStoredAddress(LoadSize: MO->getSize().getValue(), LoadOffset: MO->getOffset(),
383 LoadValue: MO->getValue()))
384 return NoopHazard;
385 }
386
387 return NoHazard;
388}
389
390void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
391 MachineInstr *MI = SU->getInstr();
392
393 if (MI->isDebugInstr())
394 return;
395
396 unsigned Opcode = MI->getOpcode();
397 bool isFirst, isSingle, isCracked, isLoad, isStore;
398 PPCII::PPC970_Unit InstrType =
399 GetInstrType(Opcode, isFirst, isSingle, isCracked,
400 isLoad, isStore);
401 if (InstrType == PPCII::PPC970_Pseudo) return;
402
403 // Update structural hazard information.
404 if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
405
406 // Track the address stored to.
407 if (isStore && NumStores < 4 && !MI->memoperands_empty() &&
408 (*MI->memoperands_begin())->getSize().hasValue()) {
409 MachineMemOperand *MO = *MI->memoperands_begin();
410 StoreSize[NumStores] = MO->getSize().getValue();
411 StoreOffset[NumStores] = MO->getOffset();
412 StoreValue[NumStores] = MO->getValue();
413 ++NumStores;
414 }
415
416 if (InstrType == PPCII::PPC970_BRU || isSingle)
417 NumIssued = 4; // Terminate a d-group.
418 ++NumIssued;
419
420 // If this instruction is cracked into two ops by the decoder, remember that
421 // we issued two pieces.
422 if (isCracked)
423 ++NumIssued;
424
425 if (NumIssued == 5)
426 EndDispatchGroup();
427}
428
429void PPCHazardRecognizer970::AdvanceCycle() {
430 assert(NumIssued < 5 && "Illegal dispatch group!");
431 ++NumIssued;
432 if (NumIssued == 5)
433 EndDispatchGroup();
434}
435
436void PPCHazardRecognizer970::Reset() {
437 EndDispatchGroup();
438}
439
440