| 1 | //===- GCNIterativeScheduler.cpp ------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements the class GCNIterativeScheduler. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "GCNIterativeScheduler.h" |
| 15 | #include "AMDGPUIGroupLP.h" |
| 16 | #include "GCNSchedStrategy.h" |
| 17 | #include "SIMachineFunctionInfo.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE "machine-scheduler" |
| 22 | |
| 23 | namespace llvm { |
| 24 | |
| 25 | std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots, |
| 26 | const ScheduleDAG &DAG); |
| 27 | |
| 28 | std::vector<const SUnit *> makeGCNILPScheduler(ArrayRef<const SUnit *> BotRoots, |
| 29 | const ScheduleDAG &DAG); |
| 30 | } // namespace llvm |
| 31 | |
| 32 | // shim accessors for different order containers |
| 33 | static inline MachineInstr *getMachineInstr(MachineInstr *MI) { |
| 34 | return MI; |
| 35 | } |
| 36 | static inline MachineInstr *getMachineInstr(const SUnit *SU) { |
| 37 | return SU->getInstr(); |
| 38 | } |
| 39 | static inline MachineInstr *getMachineInstr(const SUnit &SU) { |
| 40 | return SU.getInstr(); |
| 41 | } |
| 42 | |
| 43 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 44 | LLVM_DUMP_METHOD |
| 45 | static void printRegion(raw_ostream &OS, |
| 46 | MachineBasicBlock::iterator Begin, |
| 47 | MachineBasicBlock::iterator End, |
| 48 | const LiveIntervals *LIS, |
| 49 | unsigned MaxInstNum = |
| 50 | std::numeric_limits<unsigned>::max()) { |
| 51 | auto *BB = Begin->getParent(); |
| 52 | OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' ' |
| 53 | << BB->getName() << ":\n" ; |
| 54 | auto I = Begin; |
| 55 | MaxInstNum = std::max(MaxInstNum, 1u); |
| 56 | for (; I != End && MaxInstNum; ++I, --MaxInstNum) { |
| 57 | if (!I->isDebugInstr() && LIS) |
| 58 | OS << LIS->getInstructionIndex(*I); |
| 59 | OS << '\t' << *I; |
| 60 | } |
| 61 | if (I != End) { |
| 62 | OS << "\t...\n" ; |
| 63 | I = std::prev(End); |
| 64 | if (!I->isDebugInstr() && LIS) |
| 65 | OS << LIS->getInstructionIndex(*I); |
| 66 | OS << '\t' << *I; |
| 67 | } |
| 68 | if (End != BB->end()) { // print boundary inst if present |
| 69 | OS << "----\n" ; |
| 70 | if (LIS) OS << LIS->getInstructionIndex(*End) << '\t'; |
| 71 | OS << *End; |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | LLVM_DUMP_METHOD |
| 76 | static void printLivenessInfo(raw_ostream &OS, |
| 77 | MachineBasicBlock::iterator Begin, |
| 78 | MachineBasicBlock::iterator End, |
| 79 | const LiveIntervals *LIS) { |
| 80 | auto *const BB = Begin->getParent(); |
| 81 | const auto &MRI = BB->getParent()->getRegInfo(); |
| 82 | |
| 83 | const auto LiveIns = getLiveRegsBefore(*Begin, *LIS); |
| 84 | OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns)); |
| 85 | |
| 86 | const auto BottomMI = End == BB->end() ? std::prev(End) : End; |
| 87 | const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS); |
| 88 | OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts)); |
| 89 | } |
| 90 | |
| 91 | LLVM_DUMP_METHOD |
| 92 | void GCNIterativeScheduler::printRegions(raw_ostream &OS) const { |
| 93 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 94 | for (auto *const R : Regions) { |
| 95 | OS << "Region to schedule " ; |
| 96 | printRegion(OS, R->Begin, R->End, LIS, 1); |
| 97 | printLivenessInfo(OS, R->Begin, R->End, LIS); |
| 98 | OS << "Max RP: " << print(R->MaxPressure, &ST); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | LLVM_DUMP_METHOD |
| 103 | void GCNIterativeScheduler::printSchedResult(raw_ostream &OS, |
| 104 | const Region *R, |
| 105 | const GCNRegPressure &RP) const { |
| 106 | OS << "\nAfter scheduling " ; |
| 107 | printRegion(OS, R->Begin, R->End, LIS); |
| 108 | printSchedRP(OS, R->MaxPressure, RP); |
| 109 | OS << '\n'; |
| 110 | } |
| 111 | |
| 112 | LLVM_DUMP_METHOD |
| 113 | void GCNIterativeScheduler::printSchedRP(raw_ostream &OS, |
| 114 | const GCNRegPressure &Before, |
| 115 | const GCNRegPressure &After) const { |
| 116 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 117 | OS << "RP before: " << print(Before, &ST) |
| 118 | << "RP after: " << print(After, &ST); |
| 119 | } |
| 120 | #endif |
| 121 | |
| 122 | void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) { |
| 123 | bool HasIGLPInstrs = false; |
| 124 | const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII); |
| 125 | for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) { |
| 126 | if (SII->isIGLPMutationOnly(Opcode: I->getOpcode())) { |
| 127 | HasIGLPInstrs = true; |
| 128 | break; |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | if (HasIGLPInstrs) { |
| 133 | SavedMutations.clear(); |
| 134 | SavedMutations.swap(x&: Mutations); |
| 135 | auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry |
| 136 | : AMDGPU::SchedulingPhase::Initial; |
| 137 | |
| 138 | addMutation(Mutation: createIGroupLPDAGMutation(Phase: SchedPhase)); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | // DAG builder helper |
| 143 | class GCNIterativeScheduler::BuildDAG { |
| 144 | GCNIterativeScheduler &Sch; |
| 145 | SmallVector<SUnit *, 8> TopRoots; |
| 146 | |
| 147 | SmallVector<SUnit*, 8> BotRoots; |
| 148 | public: |
| 149 | BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false) |
| 150 | : Sch(_Sch) { |
| 151 | auto *BB = R.Begin->getParent(); |
| 152 | Sch.BaseClass::startBlock(bb: BB); |
| 153 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
| 154 | Sch.swapIGLPMutations(R, IsReentry); |
| 155 | Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr, |
| 156 | /*TrackLaneMask*/TrackLaneMasks: true); |
| 157 | Sch.postProcessDAG(); |
| 158 | Sch.Topo.InitDAGTopologicalSorting(); |
| 159 | Sch.findRootsAndBiasEdges(TopRoots, BotRoots); |
| 160 | } |
| 161 | |
| 162 | ~BuildDAG() { |
| 163 | Sch.BaseClass::exitRegion(); |
| 164 | Sch.BaseClass::finishBlock(); |
| 165 | } |
| 166 | |
| 167 | ArrayRef<const SUnit *> getTopRoots() const { |
| 168 | return TopRoots; |
| 169 | } |
| 170 | ArrayRef<SUnit*> getBottomRoots() const { |
| 171 | return BotRoots; |
| 172 | } |
| 173 | }; |
| 174 | |
| 175 | class GCNIterativeScheduler::OverrideLegacyStrategy { |
| 176 | GCNIterativeScheduler &Sch; |
| 177 | Region &Rgn; |
| 178 | std::unique_ptr<MachineSchedStrategy> SaveSchedImpl; |
| 179 | GCNRegPressure SaveMaxRP; |
| 180 | |
| 181 | public: |
| 182 | OverrideLegacyStrategy(Region &R, |
| 183 | MachineSchedStrategy &OverrideStrategy, |
| 184 | GCNIterativeScheduler &_Sch) |
| 185 | : Sch(_Sch) |
| 186 | , Rgn(R) |
| 187 | , SaveSchedImpl(std::move(_Sch.SchedImpl)) |
| 188 | , SaveMaxRP(R.MaxPressure) { |
| 189 | Sch.SchedImpl.reset(p: &OverrideStrategy); |
| 190 | auto *BB = R.Begin->getParent(); |
| 191 | Sch.BaseClass::startBlock(bb: BB); |
| 192 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
| 193 | } |
| 194 | |
| 195 | ~OverrideLegacyStrategy() { |
| 196 | Sch.BaseClass::exitRegion(); |
| 197 | Sch.BaseClass::finishBlock(); |
| 198 | Sch.SchedImpl.release(); |
| 199 | Sch.SchedImpl = std::move(SaveSchedImpl); |
| 200 | } |
| 201 | |
| 202 | void schedule() { |
| 203 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
| 204 | LLVM_DEBUG(dbgs() << "\nScheduling " ; |
| 205 | printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2)); |
| 206 | Sch.BaseClass::schedule(); |
| 207 | |
| 208 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
| 209 | Sch.RegionEnd = Rgn.End; |
| 210 | //assert(Rgn.End == Sch.RegionEnd); |
| 211 | Rgn.Begin = Sch.RegionBegin; |
| 212 | Rgn.MaxPressure.clear(); |
| 213 | } |
| 214 | |
| 215 | void restoreOrder() { |
| 216 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
| 217 | // DAG SUnits are stored using original region's order |
| 218 | // so just use SUnits as the restoring schedule |
| 219 | Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP); |
| 220 | } |
| 221 | }; |
| 222 | |
| 223 | namespace { |
| 224 | |
| 225 | // just a stub to make base class happy |
| 226 | class SchedStrategyStub : public MachineSchedStrategy { |
| 227 | public: |
| 228 | bool shouldTrackPressure() const override { return false; } |
| 229 | bool shouldTrackLaneMasks() const override { return false; } |
| 230 | void initialize(ScheduleDAGMI *DAG) override {} |
| 231 | SUnit *pickNode(bool &IsTopNode) override { return nullptr; } |
| 232 | void schedNode(SUnit *SU, bool IsTopNode) override {} |
| 233 | void releaseTopNode(SUnit *SU) override {} |
| 234 | void releaseBottomNode(SUnit *SU) override {} |
| 235 | }; |
| 236 | |
| 237 | } // end anonymous namespace |
| 238 | |
| 239 | GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, |
| 240 | StrategyKind S) |
| 241 | : BaseClass(C, std::make_unique<SchedStrategyStub>()) |
| 242 | , Context(C) |
| 243 | , Strategy(S) |
| 244 | , UPTracker(*LIS) { |
| 245 | } |
| 246 | |
| 247 | // returns max pressure for a region |
| 248 | GCNRegPressure |
| 249 | GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin, |
| 250 | MachineBasicBlock::iterator End) |
| 251 | const { |
| 252 | // For the purpose of pressure tracking bottom inst of the region should |
| 253 | // be also processed. End is either BB end, BB terminator inst or sched |
| 254 | // boundary inst. |
| 255 | auto const BBEnd = Begin->getParent()->end(); |
| 256 | auto const BottomMI = End == BBEnd ? std::prev(x: End) : End; |
| 257 | |
| 258 | // scheduleRegions walks bottom to top, so its likely we just get next |
| 259 | // instruction to track |
| 260 | auto AfterBottomMI = std::next(x: BottomMI); |
| 261 | if (AfterBottomMI == BBEnd || |
| 262 | &*AfterBottomMI != UPTracker.getLastTrackedMI()) { |
| 263 | UPTracker.reset(MI: *BottomMI); |
| 264 | } else { |
| 265 | assert(UPTracker.isValid()); |
| 266 | } |
| 267 | |
| 268 | for (auto I = BottomMI; I != Begin; --I) |
| 269 | UPTracker.recede(MI: *I); |
| 270 | |
| 271 | UPTracker.recede(MI: *Begin); |
| 272 | |
| 273 | assert(UPTracker.isValid() || |
| 274 | (dbgs() << "Tracked region " , |
| 275 | printRegion(dbgs(), Begin, End, LIS), false)); |
| 276 | return UPTracker.getMaxPressureAndReset(); |
| 277 | } |
| 278 | |
| 279 | // returns max pressure for a tentative schedule |
| 280 | template <typename Range> GCNRegPressure |
| 281 | GCNIterativeScheduler::getSchedulePressure(const Region &R, |
| 282 | Range &&Schedule) const { |
| 283 | auto const BBEnd = R.Begin->getParent()->end(); |
| 284 | GCNUpwardRPTracker RPTracker(*LIS); |
| 285 | if (R.End != BBEnd) { |
| 286 | // R.End points to the boundary instruction but the |
| 287 | // schedule doesn't include it |
| 288 | RPTracker.reset(MI: *R.End); |
| 289 | RPTracker.recede(MI: *R.End); |
| 290 | } else { |
| 291 | // R.End doesn't point to the boundary instruction |
| 292 | RPTracker.reset(MI: *std::prev(x: BBEnd)); |
| 293 | } |
| 294 | for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) { |
| 295 | RPTracker.recede(MI: *getMachineInstr(*--I)); |
| 296 | } |
| 297 | return RPTracker.getMaxPressureAndReset(); |
| 298 | } |
| 299 | |
| 300 | void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden |
| 301 | MachineBasicBlock::iterator Begin, |
| 302 | MachineBasicBlock::iterator End, |
| 303 | unsigned NumRegionInstrs) { |
| 304 | BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs); |
| 305 | if (NumRegionInstrs > 2) { |
| 306 | Regions.push_back( |
| 307 | x: new (Alloc.Allocate()) |
| 308 | Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs, |
| 309 | .MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr }); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | void GCNIterativeScheduler::schedule() { // overridden |
| 314 | // do nothing |
| 315 | LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS); |
| 316 | if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { |
| 317 | dbgs() << "Max RP: " |
| 318 | << print(Regions.back()->MaxPressure, |
| 319 | &MF.getSubtarget<GCNSubtarget>()); |
| 320 | } dbgs() |
| 321 | << '\n';); |
| 322 | } |
| 323 | |
| 324 | void GCNIterativeScheduler::finalizeSchedule() { // overridden |
| 325 | if (Regions.empty()) |
| 326 | return; |
| 327 | switch (Strategy) { |
| 328 | case SCHEDULE_MINREGONLY: scheduleMinReg(); break; |
| 329 | case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break; |
| 330 | case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break; |
| 331 | case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | // Detach schedule from SUnits and interleave it with debug values. |
| 336 | // Returned schedule becomes independent of DAG state. |
| 337 | std::vector<MachineInstr*> |
| 338 | GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const { |
| 339 | std::vector<MachineInstr*> Res; |
| 340 | Res.reserve(n: Schedule.size() * 2); |
| 341 | |
| 342 | if (FirstDbgValue) |
| 343 | Res.push_back(x: FirstDbgValue); |
| 344 | |
| 345 | const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end(); |
| 346 | for (const auto *SU : Schedule) { |
| 347 | Res.push_back(x: SU->getInstr()); |
| 348 | const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) { |
| 349 | return P.second == SU->getInstr(); |
| 350 | }); |
| 351 | if (D != DbgE) |
| 352 | Res.push_back(x: D->first); |
| 353 | } |
| 354 | return Res; |
| 355 | } |
| 356 | |
| 357 | void GCNIterativeScheduler::setBestSchedule(Region &R, |
| 358 | ScheduleRef Schedule, |
| 359 | const GCNRegPressure &MaxRP) { |
| 360 | R.BestSchedule.reset( |
| 361 | p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP }); |
| 362 | } |
| 363 | |
| 364 | void GCNIterativeScheduler::scheduleBest(Region &R) { |
| 365 | assert(R.BestSchedule.get() && "No schedule specified" ); |
| 366 | scheduleRegion(R, Schedule&: R.BestSchedule->Schedule, MaxRP: R.BestSchedule->MaxPressure); |
| 367 | R.BestSchedule.reset(); |
| 368 | } |
| 369 | |
| 370 | // minimal required region scheduler, works for ranges of SUnits*, |
| 371 | // SUnits or MachineIntrs* |
| 372 | template <typename Range> |
| 373 | void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule, |
| 374 | const GCNRegPressure &MaxRP) { |
| 375 | assert(RegionBegin == R.Begin && RegionEnd == R.End); |
| 376 | assert(LIS != nullptr); |
| 377 | #ifndef NDEBUG |
| 378 | const auto SchedMaxRP = getSchedulePressure(R, Schedule); |
| 379 | #endif |
| 380 | auto *BB = R.Begin->getParent(); |
| 381 | auto Top = R.Begin; |
| 382 | for (const auto &I : Schedule) { |
| 383 | auto MI = getMachineInstr(I); |
| 384 | if (MI != &*Top) { |
| 385 | BB->remove(I: MI); |
| 386 | BB->insert(Top, MI); |
| 387 | if (!MI->isDebugInstr()) |
| 388 | LIS->handleMove(MI&: *MI, UpdateFlags: true); |
| 389 | } |
| 390 | if (!MI->isDebugInstr()) { |
| 391 | // Reset read - undef flags and update them later. |
| 392 | for (auto &Op : MI->all_defs()) |
| 393 | Op.setIsUndef(false); |
| 394 | |
| 395 | RegisterOperands RegOpers; |
| 396 | RegOpers.collect(MI: *MI, TRI: *TRI, MRI, /*ShouldTrackLaneMasks*/TrackLaneMasks: true, |
| 397 | /*IgnoreDead*/false); |
| 398 | // Adjust liveness and add missing dead+read-undef flags. |
| 399 | auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot(); |
| 400 | RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI); |
| 401 | } |
| 402 | Top = std::next(MI->getIterator()); |
| 403 | } |
| 404 | RegionBegin = getMachineInstr(Schedule.front()); |
| 405 | |
| 406 | // Schedule consisting of MachineInstr* is considered 'detached' |
| 407 | // and already interleaved with debug values |
| 408 | if (!std::is_same_v<decltype(*Schedule.begin()), MachineInstr*>) { |
| 409 | placeDebugValues(); |
| 410 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
| 411 | // assert(R.End == RegionEnd); |
| 412 | RegionEnd = R.End; |
| 413 | } |
| 414 | |
| 415 | R.Begin = RegionBegin; |
| 416 | R.MaxPressure = MaxRP; |
| 417 | |
| 418 | #ifndef NDEBUG |
| 419 | const auto RegionMaxRP = getRegionPressure(R); |
| 420 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 421 | #endif |
| 422 | assert( |
| 423 | (SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) || |
| 424 | (dbgs() << "Max RP mismatch!!!\n" |
| 425 | "RP for schedule (calculated): " |
| 426 | << print(SchedMaxRP, &ST) |
| 427 | << "RP for schedule (reported): " << print(MaxRP, &ST) |
| 428 | << "RP after scheduling: " << print(RegionMaxRP, &ST), |
| 429 | false)); |
| 430 | } |
| 431 | |
| 432 | // Sort recorded regions by pressure - highest at the front |
| 433 | void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { |
| 434 | llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region *R1, const Region *R2) { |
| 435 | return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc); |
| 436 | }); |
| 437 | } |
| 438 | |
| 439 | /////////////////////////////////////////////////////////////////////////////// |
| 440 | // Legacy MaxOccupancy Strategy |
| 441 | |
| 442 | // Tries to increase occupancy applying minreg scheduler for a sequence of |
| 443 | // most demanding regions. Obtained schedules are saved as BestSchedule for a |
| 444 | // region. |
| 445 | // TargetOcc is the best achievable occupancy for a kernel. |
| 446 | // Returns better occupancy on success or current occupancy on fail. |
| 447 | // BestSchedules aren't deleted on fail. |
| 448 | unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { |
| 449 | // TODO: assert Regions are sorted descending by pressure |
| 450 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 451 | const unsigned DynamicVGPRBlockSize = |
| 452 | MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(); |
| 453 | const auto Occ = |
| 454 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
| 455 | LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc |
| 456 | << ", current = " << Occ << '\n'); |
| 457 | |
| 458 | auto NewOcc = TargetOcc; |
| 459 | for (auto *R : Regions) { |
| 460 | // Always build the DAG to add mutations |
| 461 | BuildDAG DAG(*R, *this); |
| 462 | |
| 463 | if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc) |
| 464 | continue; |
| 465 | |
| 466 | LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); |
| 467 | printLivenessInfo(dbgs(), R->Begin, R->End, LIS)); |
| 468 | |
| 469 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
| 470 | const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
| 471 | LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n" ; |
| 472 | printSchedRP(dbgs(), R->MaxPressure, MaxRP)); |
| 473 | |
| 474 | NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST, DynamicVGPRBlockSize)); |
| 475 | if (NewOcc <= Occ) |
| 476 | break; |
| 477 | |
| 478 | setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP); |
| 479 | } |
| 480 | LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc |
| 481 | << ", prev occupancy = " << Occ << '\n'); |
| 482 | if (NewOcc > Occ) { |
| 483 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 484 | MFI->increaseOccupancy(MF, Limit: NewOcc); |
| 485 | } |
| 486 | |
| 487 | return std::max(a: NewOcc, b: Occ); |
| 488 | } |
| 489 | |
| 490 | void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( |
| 491 | bool TryMaximizeOccupancy) { |
| 492 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 493 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 494 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
| 495 | unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); |
| 496 | |
| 497 | sortRegionsByPressure(TargetOcc: TgtOcc); |
| 498 | auto Occ = |
| 499 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
| 500 | |
| 501 | bool IsReentry = false; |
| 502 | if (TryMaximizeOccupancy && Occ < TgtOcc) { |
| 503 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
| 504 | IsReentry = true; |
| 505 | } |
| 506 | |
| 507 | // This is really weird but for some magic scheduling regions twice |
| 508 | // gives performance improvement |
| 509 | const int NumPasses = Occ < TgtOcc ? 2 : 1; |
| 510 | |
| 511 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
| 512 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
| 513 | "target occupancy = " |
| 514 | << TgtOcc << '\n'); |
| 515 | GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/true); |
| 516 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
| 517 | |
| 518 | for (int I = 0; I < NumPasses; ++I) { |
| 519 | // running first pass with TargetOccupancy = 0 mimics previous scheduling |
| 520 | // approach and is a performance magic |
| 521 | LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc); |
| 522 | for (auto *R : Regions) { |
| 523 | OverrideLegacyStrategy Ovr(*R, LStrgy, *this); |
| 524 | IsReentry |= I > 0; |
| 525 | swapIGLPMutations(R: *R, IsReentry); |
| 526 | Ovr.schedule(); |
| 527 | const auto RP = getRegionPressure(R: *R); |
| 528 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
| 529 | |
| 530 | if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { |
| 531 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
| 532 | if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( |
| 533 | ST, DynamicVGPRBlockSize) >= TgtOcc) { |
| 534 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
| 535 | scheduleBest(R&: *R); |
| 536 | } else { |
| 537 | LLVM_DEBUG(dbgs() << ", restoring\n" ); |
| 538 | Ovr.restoreOrder(); |
| 539 | assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= |
| 540 | TgtOcc); |
| 541 | } |
| 542 | } |
| 543 | FinalOccupancy = |
| 544 | std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize)); |
| 545 | } |
| 546 | } |
| 547 | MFI->limitOccupancy(Limit: FinalOccupancy); |
| 548 | } |
| 549 | |
| 550 | /////////////////////////////////////////////////////////////////////////////// |
| 551 | // Minimal Register Strategy |
| 552 | |
| 553 | void GCNIterativeScheduler::scheduleMinReg(bool force) { |
| 554 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 555 | const auto TgtOcc = MFI->getOccupancy(); |
| 556 | sortRegionsByPressure(TargetOcc: TgtOcc); |
| 557 | |
| 558 | auto MaxPressure = Regions.front()->MaxPressure; |
| 559 | for (auto *R : Regions) { |
| 560 | if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc)) |
| 561 | break; |
| 562 | |
| 563 | BuildDAG DAG(*R, *this); |
| 564 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
| 565 | |
| 566 | const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
| 567 | LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) { |
| 568 | dbgs() << "\nWarning: Pressure becomes worse after minreg!" ; |
| 569 | printSchedRP(dbgs(), R->MaxPressure, RP); |
| 570 | }); |
| 571 | |
| 572 | if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc)) |
| 573 | break; |
| 574 | |
| 575 | scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP); |
| 576 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
| 577 | |
| 578 | MaxPressure = RP; |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | /////////////////////////////////////////////////////////////////////////////// |
| 583 | // ILP scheduler port |
| 584 | |
| 585 | void GCNIterativeScheduler::scheduleILP( |
| 586 | bool TryMaximizeOccupancy) { |
| 587 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 588 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 589 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
| 590 | unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); |
| 591 | |
| 592 | sortRegionsByPressure(TargetOcc: TgtOcc); |
| 593 | auto Occ = |
| 594 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
| 595 | |
| 596 | bool IsReentry = false; |
| 597 | if (TryMaximizeOccupancy && Occ < TgtOcc) { |
| 598 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
| 599 | IsReentry = true; |
| 600 | } |
| 601 | |
| 602 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
| 603 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
| 604 | "target occupancy = " |
| 605 | << TgtOcc << '\n'); |
| 606 | |
| 607 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
| 608 | for (auto *R : Regions) { |
| 609 | BuildDAG DAG(*R, *this, IsReentry); |
| 610 | const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this); |
| 611 | |
| 612 | const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule); |
| 613 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
| 614 | |
| 615 | if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { |
| 616 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
| 617 | if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( |
| 618 | ST, DynamicVGPRBlockSize) >= TgtOcc) { |
| 619 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
| 620 | scheduleBest(R&: *R); |
| 621 | } |
| 622 | } else { |
| 623 | scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP); |
| 624 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
| 625 | FinalOccupancy = |
| 626 | std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize)); |
| 627 | } |
| 628 | } |
| 629 | MFI->limitOccupancy(Limit: FinalOccupancy); |
| 630 | } |
| 631 | |