| 1 | //===------------------------- LSUnit.h --------------------------*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// A Load/Store unit class that models load/store queues and that implements |
| 11 | /// a simple weak memory consistency model. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H |
| 16 | #define LLVM_MCA_HARDWAREUNITS_LSUNIT_H |
| 17 | |
| 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/SmallVector.h" |
| 20 | #include "llvm/MC/MCSchedule.h" |
| 21 | #include "llvm/MCA/HardwareUnits/HardwareUnit.h" |
| 22 | #include "llvm/MCA/Instruction.h" |
| 23 | #include "llvm/Support/Compiler.h" |
| 24 | |
| 25 | namespace llvm { |
| 26 | namespace mca { |
| 27 | |
| 28 | /// Abstract base interface for LS (load/store) units in llvm-mca. |
| 29 | class LLVM_ABI LSUnitBase : public HardwareUnit { |
| 30 | /// Load queue size. |
| 31 | /// |
| 32 | /// A value of zero for this field means that the load queue is unbounded. |
| 33 | /// Processor models can declare the size of a load queue via tablegen (see |
| 34 | /// the definition of tablegen class LoadQueue in |
| 35 | /// llvm/Target/TargetSchedule.td). |
| 36 | unsigned LQSize; |
| 37 | |
| 38 | /// Load queue size. |
| 39 | /// |
| 40 | /// A value of zero for this field means that the store queue is unbounded. |
| 41 | /// Processor models can declare the size of a store queue via tablegen (see |
| 42 | /// the definition of tablegen class StoreQueue in |
| 43 | /// llvm/Target/TargetSchedule.td). |
| 44 | unsigned SQSize; |
| 45 | |
| 46 | unsigned UsedLQEntries; |
| 47 | unsigned UsedSQEntries; |
| 48 | |
| 49 | /// True if loads don't alias with stores. |
| 50 | /// |
| 51 | /// By default, the LS unit assumes that loads and stores don't alias with |
| 52 | /// each other. If this field is set to false, then loads are always assumed |
| 53 | /// to alias with stores. |
| 54 | const bool NoAlias; |
| 55 | |
| 56 | public: |
| 57 | LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize, |
| 58 | unsigned StoreQueueSize, bool AssumeNoAlias); |
| 59 | |
| 60 | ~LSUnitBase() override; |
| 61 | |
| 62 | /// Returns the total number of entries in the load queue. |
| 63 | unsigned getLoadQueueSize() const { return LQSize; } |
| 64 | |
| 65 | /// Returns the total number of entries in the store queue. |
| 66 | unsigned getStoreQueueSize() const { return SQSize; } |
| 67 | |
| 68 | unsigned getUsedLQEntries() const { return UsedLQEntries; } |
| 69 | unsigned getUsedSQEntries() const { return UsedSQEntries; } |
| 70 | void acquireLQSlot() { ++UsedLQEntries; } |
| 71 | void acquireSQSlot() { ++UsedSQEntries; } |
| 72 | void releaseLQSlot() { --UsedLQEntries; } |
| 73 | void releaseSQSlot() { --UsedSQEntries; } |
| 74 | |
| 75 | bool assumeNoAlias() const { return NoAlias; } |
| 76 | |
| 77 | enum Status { |
| 78 | LSU_AVAILABLE = 0, |
| 79 | LSU_LQUEUE_FULL, // Load Queue unavailable |
| 80 | LSU_SQUEUE_FULL // Store Queue unavailable |
| 81 | }; |
| 82 | |
| 83 | /// This method checks the availability of the load/store buffers. |
| 84 | /// |
| 85 | /// Returns LSU_AVAILABLE if there are enough load/store queue entries to |
| 86 | /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is |
| 87 | /// not a memory operation. |
| 88 | virtual Status isAvailable(const InstRef &IR) const = 0; |
| 89 | |
| 90 | /// Allocates LS resources for instruction IR. |
| 91 | /// |
| 92 | /// This method assumes that a previous call to `isAvailable(IR)` succeeded |
| 93 | /// with a LSUnitBase::Status value of LSU_AVAILABLE. |
| 94 | /// Returns the GroupID associated with this instruction. That value will be |
| 95 | /// used to set the LSUTokenID field in class Instruction. |
| 96 | virtual unsigned dispatch(const InstRef &IR) = 0; |
| 97 | |
| 98 | bool isSQEmpty() const { return !UsedSQEntries; } |
| 99 | bool isLQEmpty() const { return !UsedLQEntries; } |
| 100 | bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; } |
| 101 | bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; } |
| 102 | |
| 103 | /// Check if a peviously dispatched instruction IR is now ready for execution. |
| 104 | virtual bool isReady(const InstRef &IR) const = 0; |
| 105 | |
| 106 | /// Check if instruction IR only depends on memory instructions that are |
| 107 | /// currently executing. |
| 108 | virtual bool isPending(const InstRef &IR) const = 0; |
| 109 | |
| 110 | /// Check if instruction IR is still waiting on memory operations, and the |
| 111 | /// wait time is still unknown. |
| 112 | virtual bool isWaiting(const InstRef &IR) const = 0; |
| 113 | |
| 114 | virtual bool hasDependentUsers(const InstRef &IR) const = 0; |
| 115 | |
| 116 | virtual const CriticalDependency getCriticalPredecessor(unsigned GroupId) = 0; |
| 117 | |
| 118 | virtual void onInstructionExecuted(const InstRef &IR) = 0; |
| 119 | |
| 120 | // Loads are tracked by the LDQ (load queue) from dispatch until completion. |
| 121 | // Stores are tracked by the STQ (store queue) from dispatch until commitment. |
| 122 | // By default we conservatively assume that the LDQ receives a load at |
| 123 | // dispatch. Loads leave the LDQ at retirement stage. |
| 124 | virtual void onInstructionRetired(const InstRef &IR) = 0; |
| 125 | |
| 126 | virtual void onInstructionIssued(const InstRef &IR) = 0; |
| 127 | |
| 128 | virtual void cycleEvent() = 0; |
| 129 | |
| 130 | #ifndef NDEBUG |
| 131 | virtual void dump() const = 0; |
| 132 | #endif |
| 133 | }; |
| 134 | |
| 135 | /// Default Load/Store Unit (LS Unit) for simulated processors. |
| 136 | /// |
| 137 | /// Each load (or store) consumes one entry in the load (or store) queue. |
| 138 | /// |
| 139 | /// Rules are: |
| 140 | /// 1) A younger load is allowed to pass an older load only if there are no |
| 141 | /// stores nor barriers in between the two loads. |
| 142 | /// 2) An younger store is not allowed to pass an older store. |
| 143 | /// 3) A younger store is not allowed to pass an older load. |
| 144 | /// 4) A younger load is allowed to pass an older store only if the load does |
| 145 | /// not alias with the store. |
| 146 | /// |
| 147 | /// This class optimistically assumes that loads don't alias store operations. |
| 148 | /// Under this assumption, younger loads are always allowed to pass older |
| 149 | /// stores (this would only affects rule 4). |
| 150 | /// Essentially, this class doesn't perform any sort alias analysis to |
| 151 | /// identify aliasing loads and stores. |
| 152 | /// |
| 153 | /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be |
| 154 | /// set to `false` by the constructor of LSUnit. |
| 155 | /// |
| 156 | /// Note that this class doesn't know about the existence of different memory |
| 157 | /// types for memory operations (example: write-through, write-combining, etc.). |
| 158 | /// Derived classes are responsible for implementing that extra knowledge, and |
| 159 | /// provide different sets of rules for loads and stores by overriding method |
| 160 | /// `isReady()`. |
| 161 | /// To emulate a write-combining memory type, rule 2. must be relaxed in a |
| 162 | /// derived class to enable the reordering of non-aliasing store operations. |
| 163 | /// |
| 164 | /// No assumptions are made by this class on the size of the store buffer. This |
| 165 | /// class doesn't know how to identify cases where store-to-load forwarding may |
| 166 | /// occur. |
| 167 | /// |
| 168 | /// LSUnit doesn't attempt to predict whether a load or store hits or misses |
| 169 | /// the L1 cache. To be more specific, LSUnit doesn't know anything about |
| 170 | /// cache hierarchy and memory types. |
| 171 | /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the |
| 172 | /// scheduling model provides an "optimistic" load-to-use latency (which usually |
| 173 | /// matches the load-to-use latency for when there is a hit in the L1D). |
| 174 | /// Derived classes may expand this knowledge. |
| 175 | /// |
| 176 | /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor |
| 177 | /// memory-barrier like instructions. |
| 178 | /// LSUnit conservatively assumes that an instruction which `mayLoad` and has |
| 179 | /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it |
| 180 | /// serializes loads without forcing a flush of the load queue. |
| 181 | /// Similarly, instructions that both `mayStore` and have `unmodeled side |
| 182 | /// effects` are treated like store barriers. A full memory |
| 183 | /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side |
| 184 | /// effects. This is obviously inaccurate, but this is the best that we can do |
| 185 | /// at the moment. |
| 186 | /// |
| 187 | /// Each load/store barrier consumes one entry in the load/store queue. A |
| 188 | /// load/store barrier enforces ordering of loads/stores: |
| 189 | /// - A younger load cannot pass a load barrier. |
| 190 | /// - A younger store cannot pass a store barrier. |
| 191 | /// |
| 192 | /// A younger load has to wait for the memory load barrier to execute. |
| 193 | /// A load/store barrier is "executed" when it becomes the oldest entry in |
| 194 | /// the load/store queue(s). That also means, all the older loads/stores have |
| 195 | /// already been executed. |
| 196 | class LLVM_ABI LSUnit : public LSUnitBase { |
| 197 | |
| 198 | // This class doesn't know about the latency of a load instruction. So, it |
| 199 | // conservatively/pessimistically assumes that the latency of a load opcode |
| 200 | // matches the instruction latency. |
| 201 | // |
| 202 | // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses), |
| 203 | // and load/store conflicts, the latency of a load is determined by the depth |
| 204 | // of the load pipeline. So, we could use field `LoadLatency` in the |
| 205 | // MCSchedModel to model that latency. |
| 206 | // Field `LoadLatency` often matches the so-called 'load-to-use' latency from |
| 207 | // L1D, and it usually already accounts for any extra latency due to data |
| 208 | // forwarding. |
| 209 | // When doing throughput analysis, `LoadLatency` is likely to |
| 210 | // be a better predictor of load latency than instruction latency. This is |
| 211 | // particularly true when simulating code with temporal/spatial locality of |
| 212 | // memory accesses. |
| 213 | // Using `LoadLatency` (instead of the instruction latency) is also expected |
| 214 | // to improve the load queue allocation for long latency instructions with |
| 215 | // folded memory operands (See PR39829). |
| 216 | // |
| 217 | // FIXME: On some processors, load/store operations are split into multiple |
| 218 | // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but |
| 219 | // not 256-bit data types. So, a 256-bit load is effectively split into two |
| 220 | // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For |
| 221 | // simplicity, this class optimistically assumes that a load instruction only |
| 222 | // consumes one entry in the LoadQueue. Similarly, store instructions only |
| 223 | // consume a single entry in the StoreQueue. |
| 224 | // In future, we should reassess the quality of this design, and consider |
| 225 | // alternative approaches that let instructions specify the number of |
| 226 | // load/store queue entries which they consume at dispatch stage (See |
| 227 | // PR39830). |
| 228 | // |
| 229 | // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is |
| 230 | // conservatively treated as a store barrier. It forces older store to be |
| 231 | // executed before newer stores are issued. |
| 232 | // |
| 233 | // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is |
| 234 | // conservatively treated as a load barrier. It forces older loads to execute |
| 235 | // before newer loads are issued. |
| 236 | |
| 237 | protected: |
| 238 | /// A node of a memory dependency graph. A MemoryGroup describes a set of |
| 239 | /// instructions with same memory dependencies. |
| 240 | /// |
| 241 | /// By construction, instructions of a MemoryGroup don't depend on each other. |
| 242 | /// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups. |
| 243 | /// A Memory group identifier is then stored as a "token" in field |
| 244 | /// Instruction::LSUTokenID of each dispatched instructions. That token is |
| 245 | /// used internally by the LSUnit to track memory dependencies. |
| 246 | class MemoryGroup { |
| 247 | unsigned NumPredecessors = 0; |
| 248 | unsigned NumExecutingPredecessors = 0; |
| 249 | unsigned NumExecutedPredecessors = 0; |
| 250 | |
| 251 | unsigned NumInstructions = 0; |
| 252 | unsigned NumExecuting = 0; |
| 253 | unsigned NumExecuted = 0; |
| 254 | // Successors that are in a order dependency with this group. |
| 255 | SmallVector<MemoryGroup *, 4> OrderSucc; |
| 256 | // Successors that are in a data dependency with this group. |
| 257 | SmallVector<MemoryGroup *, 4> DataSucc; |
| 258 | |
| 259 | CriticalDependency CriticalPredecessor; |
| 260 | InstRef CriticalMemoryInstruction; |
| 261 | |
| 262 | MemoryGroup(const MemoryGroup &) = delete; |
| 263 | MemoryGroup &operator=(const MemoryGroup &) = delete; |
| 264 | |
| 265 | public: |
| 266 | MemoryGroup() = default; |
| 267 | MemoryGroup(MemoryGroup &&) = default; |
| 268 | |
| 269 | size_t getNumSuccessors() const { |
| 270 | return OrderSucc.size() + DataSucc.size(); |
| 271 | } |
| 272 | unsigned getNumPredecessors() const { return NumPredecessors; } |
| 273 | unsigned getNumExecutingPredecessors() const { |
| 274 | return NumExecutingPredecessors; |
| 275 | } |
| 276 | unsigned getNumExecutedPredecessors() const { |
| 277 | return NumExecutedPredecessors; |
| 278 | } |
| 279 | unsigned getNumInstructions() const { return NumInstructions; } |
| 280 | unsigned getNumExecuting() const { return NumExecuting; } |
| 281 | unsigned getNumExecuted() const { return NumExecuted; } |
| 282 | |
| 283 | const InstRef &getCriticalMemoryInstruction() const { |
| 284 | return CriticalMemoryInstruction; |
| 285 | } |
| 286 | const CriticalDependency &getCriticalPredecessor() const { |
| 287 | return CriticalPredecessor; |
| 288 | } |
| 289 | |
| 290 | void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { |
| 291 | // Do not need to add a dependency if there is no data |
| 292 | // dependency and all instructions from this group have been |
| 293 | // issued already. |
| 294 | if (!IsDataDependent && isExecuting()) |
| 295 | return; |
| 296 | |
| 297 | Group->NumPredecessors++; |
| 298 | assert(!isExecuted() && "Should have been removed!" ); |
| 299 | if (isExecuting()) |
| 300 | Group->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: IsDataDependent); |
| 301 | |
| 302 | if (IsDataDependent) |
| 303 | DataSucc.emplace_back(Args&: Group); |
| 304 | else |
| 305 | OrderSucc.emplace_back(Args&: Group); |
| 306 | } |
| 307 | |
| 308 | bool isWaiting() const { |
| 309 | return NumPredecessors > |
| 310 | (NumExecutingPredecessors + NumExecutedPredecessors); |
| 311 | } |
| 312 | bool isPending() const { |
| 313 | return NumExecutingPredecessors && |
| 314 | ((NumExecutedPredecessors + NumExecutingPredecessors) == |
| 315 | NumPredecessors); |
| 316 | } |
| 317 | bool isReady() const { return NumExecutedPredecessors == NumPredecessors; } |
| 318 | bool isExecuting() const { |
| 319 | return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted)); |
| 320 | } |
| 321 | bool isExecuted() const { return NumInstructions == NumExecuted; } |
| 322 | |
| 323 | void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { |
| 324 | assert(!isReady() && "Unexpected group-start event!" ); |
| 325 | NumExecutingPredecessors++; |
| 326 | |
| 327 | if (!ShouldUpdateCriticalDep) |
| 328 | return; |
| 329 | |
| 330 | unsigned Cycles = IR.getInstruction()->getCyclesLeft(); |
| 331 | if (CriticalPredecessor.Cycles < Cycles) { |
| 332 | CriticalPredecessor.IID = IR.getSourceIndex(); |
| 333 | CriticalPredecessor.Cycles = Cycles; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | void onGroupExecuted() { |
| 338 | assert(!isReady() && "Inconsistent state found!" ); |
| 339 | NumExecutingPredecessors--; |
| 340 | NumExecutedPredecessors++; |
| 341 | } |
| 342 | |
| 343 | void onInstructionIssued(const InstRef &IR) { |
| 344 | assert(!isExecuting() && "Invalid internal state!" ); |
| 345 | ++NumExecuting; |
| 346 | |
| 347 | // update the CriticalMemDep. |
| 348 | const Instruction &IS = *IR.getInstruction(); |
| 349 | if ((bool)CriticalMemoryInstruction) { |
| 350 | const Instruction &OtherIS = |
| 351 | *CriticalMemoryInstruction.getInstruction(); |
| 352 | if (OtherIS.getCyclesLeft() < IS.getCyclesLeft()) |
| 353 | CriticalMemoryInstruction = IR; |
| 354 | } else { |
| 355 | CriticalMemoryInstruction = IR; |
| 356 | } |
| 357 | |
| 358 | if (!isExecuting()) |
| 359 | return; |
| 360 | |
| 361 | // Notify successors that this group started execution. |
| 362 | for (MemoryGroup *MG : OrderSucc) { |
| 363 | MG->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: false); |
| 364 | // Release the order dependency with this group. |
| 365 | MG->onGroupExecuted(); |
| 366 | } |
| 367 | |
| 368 | for (MemoryGroup *MG : DataSucc) |
| 369 | MG->onGroupIssued(IR: CriticalMemoryInstruction, ShouldUpdateCriticalDep: true); |
| 370 | } |
| 371 | |
| 372 | void onInstructionExecuted(const InstRef &IR) { |
| 373 | assert(isReady() && !isExecuted() && "Invalid internal state!" ); |
| 374 | --NumExecuting; |
| 375 | ++NumExecuted; |
| 376 | |
| 377 | if (CriticalMemoryInstruction && |
| 378 | CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) { |
| 379 | CriticalMemoryInstruction.invalidate(); |
| 380 | } |
| 381 | |
| 382 | if (!isExecuted()) |
| 383 | return; |
| 384 | |
| 385 | // Notify data dependent successors that this group has finished |
| 386 | // execution. |
| 387 | for (MemoryGroup *MG : DataSucc) |
| 388 | MG->onGroupExecuted(); |
| 389 | } |
| 390 | |
| 391 | void addInstruction() { |
| 392 | assert(!getNumSuccessors() && "Cannot add instructions to this group!" ); |
| 393 | ++NumInstructions; |
| 394 | } |
| 395 | |
| 396 | void cycleEvent() { |
| 397 | if (isWaiting() && CriticalPredecessor.Cycles) |
| 398 | CriticalPredecessor.Cycles--; |
| 399 | } |
| 400 | }; |
| 401 | /// Used to map group identifiers to MemoryGroups. |
| 402 | DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups; |
| 403 | unsigned NextGroupID = 1; |
| 404 | |
| 405 | unsigned CurrentLoadGroupID; |
| 406 | unsigned CurrentLoadBarrierGroupID; |
| 407 | unsigned CurrentStoreGroupID; |
| 408 | unsigned CurrentStoreBarrierGroupID; |
| 409 | |
| 410 | public: |
| 411 | LSUnit(const MCSchedModel &SM) |
| 412 | : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {} |
| 413 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ) |
| 414 | : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {} |
| 415 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) |
| 416 | : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0), |
| 417 | CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0), |
| 418 | CurrentStoreBarrierGroupID(0) {} |
| 419 | |
| 420 | /// Returns LSU_AVAILABLE if there are enough load/store queue entries to |
| 421 | /// accomodate instruction IR. |
| 422 | Status isAvailable(const InstRef &IR) const override; |
| 423 | |
| 424 | bool isReady(const InstRef &IR) const override { |
| 425 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
| 426 | const MemoryGroup &Group = getGroup(Index: GroupID); |
| 427 | return Group.isReady(); |
| 428 | } |
| 429 | |
| 430 | bool isPending(const InstRef &IR) const override { |
| 431 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
| 432 | const MemoryGroup &Group = getGroup(Index: GroupID); |
| 433 | return Group.isPending(); |
| 434 | } |
| 435 | |
| 436 | bool isWaiting(const InstRef &IR) const override { |
| 437 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
| 438 | const MemoryGroup &Group = getGroup(Index: GroupID); |
| 439 | return Group.isWaiting(); |
| 440 | } |
| 441 | |
| 442 | bool hasDependentUsers(const InstRef &IR) const override { |
| 443 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
| 444 | const MemoryGroup &Group = getGroup(Index: GroupID); |
| 445 | return !Group.isExecuted() && Group.getNumSuccessors(); |
| 446 | } |
| 447 | |
| 448 | const CriticalDependency getCriticalPredecessor(unsigned GroupId) override { |
| 449 | const MemoryGroup &Group = getGroup(Index: GroupId); |
| 450 | return Group.getCriticalPredecessor(); |
| 451 | } |
| 452 | |
| 453 | /// Allocates LS resources for instruction IR. |
| 454 | /// |
| 455 | /// This method assumes that a previous call to `isAvailable(IR)` succeeded |
| 456 | /// returning LSU_AVAILABLE. |
| 457 | /// |
| 458 | /// Rules are: |
| 459 | /// By default, rules are: |
| 460 | /// 1. A store may not pass a previous store. |
| 461 | /// 2. A load may not pass a previous store unless flag 'NoAlias' is set. |
| 462 | /// 3. A load may pass a previous load. |
| 463 | /// 4. A store may not pass a previous load (regardless of flag 'NoAlias'). |
| 464 | /// 5. A load has to wait until an older load barrier is fully executed. |
| 465 | /// 6. A store has to wait until an older store barrier is fully executed. |
| 466 | unsigned dispatch(const InstRef &IR) override; |
| 467 | |
| 468 | void onInstructionIssued(const InstRef &IR) override { |
| 469 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
| 470 | Groups[GroupID]->onInstructionIssued(IR); |
| 471 | } |
| 472 | |
| 473 | void onInstructionRetired(const InstRef &IR) override; |
| 474 | |
| 475 | void onInstructionExecuted(const InstRef &IR) override; |
| 476 | |
| 477 | void cycleEvent() override; |
| 478 | |
| 479 | #ifndef NDEBUG |
| 480 | void dump() const override; |
| 481 | #endif |
| 482 | |
| 483 | private: |
| 484 | bool isValidGroupID(unsigned Index) const { |
| 485 | return Index && Groups.contains(Val: Index); |
| 486 | } |
| 487 | |
| 488 | const MemoryGroup &getGroup(unsigned Index) const { |
| 489 | assert(isValidGroupID(Index) && "Group doesn't exist!" ); |
| 490 | return *Groups.find(Val: Index)->second; |
| 491 | } |
| 492 | |
| 493 | MemoryGroup &getGroup(unsigned Index) { |
| 494 | assert(isValidGroupID(Index) && "Group doesn't exist!" ); |
| 495 | return *Groups.find(Val: Index)->second; |
| 496 | } |
| 497 | |
| 498 | unsigned createMemoryGroup() { |
| 499 | Groups.insert(KV: std::make_pair(x&: NextGroupID, y: std::make_unique<MemoryGroup>())); |
| 500 | return NextGroupID++; |
| 501 | } |
| 502 | }; |
| 503 | |
| 504 | } // namespace mca |
| 505 | } // namespace llvm |
| 506 | |
| 507 | #endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H |
| 508 | |