| 1 | //===-- SIModeRegister.cpp - Mode Register --------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This pass inserts changes to the Mode register settings as required. |
| 10 | /// Note that currently it only deals with the Double Precision Floating Point |
| 11 | /// rounding mode setting, but is intended to be generic enough to be easily |
| 12 | /// expanded. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | // |
| 16 | #include "AMDGPU.h" |
| 17 | #include "GCNSubtarget.h" |
| 18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 19 | #include "llvm/ADT/Statistic.h" |
| 20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 21 | #include <queue> |
| 22 | |
| 23 | #define DEBUG_TYPE "si-mode-register" |
| 24 | |
| 25 | STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted." ); |
| 26 | |
| 27 | using namespace llvm; |
| 28 | |
| 29 | struct Status { |
| 30 | // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a |
| 31 | // known value |
| 32 | unsigned Mask = 0; |
| 33 | unsigned Mode = 0; |
| 34 | |
| 35 | Status() = default; |
| 36 | |
| 37 | Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) { |
| 38 | Mode &= Mask; |
| 39 | }; |
| 40 | |
| 41 | // merge two status values such that only values that don't conflict are |
| 42 | // preserved |
| 43 | Status merge(const Status &S) const { |
| 44 | return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask))); |
| 45 | } |
| 46 | |
| 47 | // merge an unknown value by using the unknown value's mask to remove bits |
| 48 | // from the result |
| 49 | Status mergeUnknown(unsigned newMask) { |
| 50 | return Status(Mask & ~newMask, Mode & ~newMask); |
| 51 | } |
| 52 | |
| 53 | // intersect two Status values to produce a mode and mask that is a subset |
| 54 | // of both values |
| 55 | Status intersect(const Status &S) const { |
| 56 | unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode); |
| 57 | unsigned NewMode = (Mode & NewMask); |
| 58 | return Status(NewMask, NewMode); |
| 59 | } |
| 60 | |
| 61 | // produce the delta required to change the Mode to the required Mode |
| 62 | Status delta(const Status &S) const { |
| 63 | return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode); |
| 64 | } |
| 65 | |
| 66 | bool operator==(const Status &S) const { |
| 67 | return (Mask == S.Mask) && (Mode == S.Mode); |
| 68 | } |
| 69 | |
| 70 | bool operator!=(const Status &S) const { return !(*this == S); } |
| 71 | |
| 72 | bool isCompatible(Status &S) { |
| 73 | return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); |
| 74 | } |
| 75 | |
| 76 | bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); } |
| 77 | }; |
| 78 | |
| 79 | class BlockData { |
| 80 | public: |
| 81 | // The Status that represents the mode register settings required by the |
| 82 | // FirstInsertionPoint (if any) in this block. Calculated in Phase 1. |
| 83 | Status Require; |
| 84 | |
| 85 | // The Status that represents the net changes to the Mode register made by |
| 86 | // this block, Calculated in Phase 1. |
| 87 | Status Change; |
| 88 | |
| 89 | // The Status that represents the mode register settings on exit from this |
| 90 | // block. Calculated in Phase 2. |
| 91 | Status Exit; |
| 92 | |
| 93 | // The Status that represents the intersection of exit Mode register settings |
| 94 | // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3. |
| 95 | Status Pred; |
| 96 | |
| 97 | // In Phase 1 we record the first instruction that has a mode requirement, |
| 98 | // which is used in Phase 3 if we need to insert a mode change. |
| 99 | MachineInstr *FirstInsertionPoint = nullptr; |
| 100 | |
| 101 | // A flag to indicate whether an Exit value has been set (we can't tell by |
| 102 | // examining the Exit value itself as all values may be valid results). |
| 103 | bool ExitSet = false; |
| 104 | |
| 105 | BlockData() = default; |
| 106 | }; |
| 107 | |
| 108 | namespace { |
| 109 | |
| 110 | class SIModeRegister { |
| 111 | public: |
| 112 | std::vector<std::unique_ptr<BlockData>> BlockInfo; |
| 113 | std::queue<MachineBasicBlock *> Phase2List; |
| 114 | |
| 115 | // The default mode register setting currently only caters for the floating |
| 116 | // point double precision rounding mode. |
| 117 | // We currently assume the default rounding mode is Round to Nearest |
| 118 | // NOTE: this should come from a per function rounding mode setting once such |
| 119 | // a setting exists. |
| 120 | unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST; |
| 121 | Status DefaultStatus = |
| 122 | Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); |
| 123 | |
| 124 | bool Changed = false; |
| 125 | |
| 126 | bool run(MachineFunction &MF); |
| 127 | |
| 128 | void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII); |
| 129 | |
| 130 | void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII); |
| 131 | |
| 132 | void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII); |
| 133 | |
| 134 | Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII); |
| 135 | |
| 136 | void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I, |
| 137 | const SIInstrInfo *TII, Status InstrMode); |
| 138 | }; |
| 139 | |
| 140 | class SIModeRegisterLegacy : public MachineFunctionPass { |
| 141 | public: |
| 142 | static char ID; |
| 143 | |
| 144 | SIModeRegisterLegacy() : MachineFunctionPass(ID) {} |
| 145 | |
| 146 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 147 | |
| 148 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 149 | AU.setPreservesCFG(); |
| 150 | MachineFunctionPass::getAnalysisUsage(AU); |
| 151 | } |
| 152 | }; |
| 153 | } // End anonymous namespace. |
| 154 | |
| 155 | INITIALIZE_PASS(SIModeRegisterLegacy, DEBUG_TYPE, |
| 156 | "Insert required mode register values" , false, false) |
| 157 | |
| 158 | char SIModeRegisterLegacy::ID = 0; |
| 159 | |
| 160 | char &llvm::SIModeRegisterID = SIModeRegisterLegacy::ID; |
| 161 | |
| 162 | FunctionPass *llvm::createSIModeRegisterPass() { |
| 163 | return new SIModeRegisterLegacy(); |
| 164 | } |
| 165 | |
| 166 | // Determine the Mode register setting required for this instruction. |
| 167 | // Instructions which don't use the Mode register return a null Status. |
| 168 | // Note this currently only deals with instructions that use the floating point |
| 169 | // double precision setting. |
| 170 | Status SIModeRegister::getInstructionMode(MachineInstr &MI, |
| 171 | const SIInstrInfo *TII) { |
| 172 | unsigned Opcode = MI.getOpcode(); |
| 173 | if (TII->usesFPDPRounding(MI) || |
| 174 | Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO || |
| 175 | Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 || |
| 176 | Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 || |
| 177 | Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) { |
| 178 | switch (Opcode) { |
| 179 | case AMDGPU::V_INTERP_P1LL_F16: |
| 180 | case AMDGPU::V_INTERP_P1LV_F16: |
| 181 | case AMDGPU::V_INTERP_P2_F16: |
| 182 | // f16 interpolation instructions need double precision round to zero |
| 183 | return Status(FP_ROUND_MODE_DP(3), |
| 184 | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); |
| 185 | case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: { |
| 186 | unsigned Mode = MI.getOperand(i: 2).getImm(); |
| 187 | MI.removeOperand(OpNo: 2); |
| 188 | MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_e32)); |
| 189 | return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); |
| 190 | } |
| 191 | case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: { |
| 192 | unsigned Mode = MI.getOperand(i: 2).getImm(); |
| 193 | MI.removeOperand(OpNo: 2); |
| 194 | MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_fake16_e32)); |
| 195 | return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); |
| 196 | } |
| 197 | case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: { |
| 198 | unsigned Mode = MI.getOperand(i: 6).getImm(); |
| 199 | MI.removeOperand(OpNo: 6); |
| 200 | MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F16_F32_t16_e64)); |
| 201 | return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); |
| 202 | } |
| 203 | case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: { |
| 204 | unsigned Mode = MI.getOperand(i: 2).getImm(); |
| 205 | MI.removeOperand(OpNo: 2); |
| 206 | MI.setDesc(TII->get(Opcode: AMDGPU::V_CVT_F32_F64_e32)); |
| 207 | return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); |
| 208 | } |
| 209 | default: |
| 210 | return DefaultStatus; |
| 211 | } |
| 212 | } |
| 213 | return Status(); |
| 214 | } |
| 215 | |
| 216 | // Insert a setreg instruction to update the Mode register. |
| 217 | // It is possible (though unlikely) for an instruction to require a change to |
| 218 | // the value of disjoint parts of the Mode register when we don't know the |
| 219 | // value of the intervening bits. In that case we need to use more than one |
| 220 | // setreg instruction. |
| 221 | void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, |
| 222 | const SIInstrInfo *TII, Status InstrMode) { |
| 223 | while (InstrMode.Mask) { |
| 224 | unsigned Offset = llvm::countr_zero<unsigned>(Val: InstrMode.Mask); |
| 225 | unsigned Width = llvm::countr_one<unsigned>(Value: InstrMode.Mask >> Offset); |
| 226 | unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); |
| 227 | using namespace AMDGPU::Hwreg; |
| 228 | BuildMI(BB&: MBB, I: MI, MIMD: nullptr, MCID: TII->get(Opcode: AMDGPU::S_SETREG_IMM32_B32)) |
| 229 | .addImm(Val: Value) |
| 230 | .addImm(Val: HwregEncoding::encode(Values: ID_MODE, Values: Offset, Values: Width)); |
| 231 | ++NumSetregInserted; |
| 232 | Changed = true; |
| 233 | InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | // In Phase 1 we iterate through the instructions of the block and for each |
| 238 | // instruction we get its mode usage. If the instruction uses the Mode register |
| 239 | // we: |
| 240 | // - update the Change status, which tracks the changes to the Mode register |
| 241 | // made by this block |
| 242 | // - if this instruction's requirements are compatible with the current setting |
| 243 | // of the Mode register we merge the modes |
| 244 | // - if it isn't compatible and an InsertionPoint isn't set, then we set the |
| 245 | // InsertionPoint to the current instruction, and we remember the current |
| 246 | // mode |
| 247 | // - if it isn't compatible and InsertionPoint is set we insert a seteg before |
| 248 | // that instruction (unless this instruction forms part of the block's |
| 249 | // entry requirements in which case the insertion is deferred until Phase 3 |
| 250 | // when predecessor exit values are known), and move the insertion point to |
| 251 | // this instruction |
| 252 | // - if this is a setreg instruction we treat it as an incompatible instruction. |
| 253 | // This is sub-optimal but avoids some nasty corner cases, and is expected to |
| 254 | // occur very rarely. |
| 255 | // - on exit we have set the Require, Change, and initial Exit modes. |
| 256 | void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, |
| 257 | const SIInstrInfo *TII) { |
| 258 | auto NewInfo = std::make_unique<BlockData>(); |
| 259 | MachineInstr *InsertionPoint = nullptr; |
| 260 | // RequirePending is used to indicate whether we are collecting the initial |
| 261 | // requirements for the block, and need to defer the first InsertionPoint to |
| 262 | // Phase 3. It is set to false once we have set FirstInsertionPoint, or when |
| 263 | // we discover an explicit setreg that means this block doesn't have any |
| 264 | // initial requirements. |
| 265 | bool RequirePending = true; |
| 266 | Status IPChange; |
| 267 | for (MachineInstr &MI : MBB) { |
| 268 | Status InstrMode = getInstructionMode(MI, TII); |
| 269 | if (MI.getOpcode() == AMDGPU::S_SETREG_B32 || |
| 270 | MI.getOpcode() == AMDGPU::S_SETREG_B32_mode || |
| 271 | MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || |
| 272 | MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { |
| 273 | // We preserve any explicit mode register setreg instruction we encounter, |
| 274 | // as we assume it has been inserted by a higher authority (this is |
| 275 | // likely to be a very rare occurrence). |
| 276 | unsigned Dst = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::simm16)->getImm(); |
| 277 | using namespace AMDGPU::Hwreg; |
| 278 | auto [Id, Offset, Width] = HwregEncoding::decode(Encoded: Dst); |
| 279 | if (Id != ID_MODE) |
| 280 | continue; |
| 281 | |
| 282 | unsigned Mask = maskTrailingOnes<unsigned>(N: Width) << Offset; |
| 283 | |
| 284 | // If an InsertionPoint is set we will insert a setreg there. |
| 285 | if (InsertionPoint) { |
| 286 | insertSetreg(MBB, MI: InsertionPoint, TII, InstrMode: IPChange.delta(S: NewInfo->Change)); |
| 287 | InsertionPoint = nullptr; |
| 288 | } |
| 289 | // If this is an immediate then we know the value being set, but if it is |
| 290 | // not an immediate then we treat the modified bits of the mode register |
| 291 | // as unknown. |
| 292 | if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || |
| 293 | MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { |
| 294 | unsigned Val = TII->getNamedOperand(MI, OperandName: AMDGPU::OpName::imm)->getImm(); |
| 295 | unsigned Mode = (Val << Offset) & Mask; |
| 296 | Status Setreg = Status(Mask, Mode); |
| 297 | // If we haven't already set the initial requirements for the block we |
| 298 | // don't need to as the requirements start from this explicit setreg. |
| 299 | RequirePending = false; |
| 300 | NewInfo->Change = NewInfo->Change.merge(S: Setreg); |
| 301 | } else { |
| 302 | NewInfo->Change = NewInfo->Change.mergeUnknown(newMask: Mask); |
| 303 | } |
| 304 | } else if (!NewInfo->Change.isCompatible(S&: InstrMode)) { |
| 305 | // This instruction uses the Mode register and its requirements aren't |
| 306 | // compatible with the current mode. |
| 307 | if (InsertionPoint) { |
| 308 | // If the required mode change cannot be included in the current |
| 309 | // InsertionPoint changes, we need a setreg and start a new |
| 310 | // InsertionPoint. |
| 311 | if (!IPChange.delta(S: NewInfo->Change).isCombinable(S&: InstrMode)) { |
| 312 | if (RequirePending) { |
| 313 | // This is the first insertionPoint in the block so we will defer |
| 314 | // the insertion of the setreg to Phase 3 where we know whether or |
| 315 | // not it is actually needed. |
| 316 | NewInfo->FirstInsertionPoint = InsertionPoint; |
| 317 | NewInfo->Require = NewInfo->Change; |
| 318 | RequirePending = false; |
| 319 | } else { |
| 320 | insertSetreg(MBB, MI: InsertionPoint, TII, |
| 321 | InstrMode: IPChange.delta(S: NewInfo->Change)); |
| 322 | IPChange = NewInfo->Change; |
| 323 | } |
| 324 | // Set the new InsertionPoint |
| 325 | InsertionPoint = &MI; |
| 326 | } |
| 327 | NewInfo->Change = NewInfo->Change.merge(S: InstrMode); |
| 328 | } else { |
| 329 | // No InsertionPoint is currently set - this is either the first in |
| 330 | // the block or we have previously seen an explicit setreg. |
| 331 | InsertionPoint = &MI; |
| 332 | IPChange = NewInfo->Change; |
| 333 | NewInfo->Change = NewInfo->Change.merge(S: InstrMode); |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | if (RequirePending) { |
| 338 | // If we haven't yet set the initial requirements for the block we set them |
| 339 | // now. |
| 340 | NewInfo->FirstInsertionPoint = InsertionPoint; |
| 341 | NewInfo->Require = NewInfo->Change; |
| 342 | } else if (InsertionPoint) { |
| 343 | // We need to insert a setreg at the InsertionPoint |
| 344 | insertSetreg(MBB, MI: InsertionPoint, TII, InstrMode: IPChange.delta(S: NewInfo->Change)); |
| 345 | } |
| 346 | NewInfo->Exit = NewInfo->Change; |
| 347 | BlockInfo[MBB.getNumber()] = std::move(NewInfo); |
| 348 | } |
| 349 | |
| 350 | // In Phase 2 we revisit each block and calculate the common Mode register |
| 351 | // value provided by all predecessor blocks. If the Exit value for the block |
| 352 | // is changed, then we add the successor blocks to the worklist so that the |
| 353 | // exit value is propagated. |
| 354 | void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, |
| 355 | const SIInstrInfo *TII) { |
| 356 | bool RevisitRequired = false; |
| 357 | bool ExitSet = false; |
| 358 | unsigned ThisBlock = MBB.getNumber(); |
| 359 | if (MBB.pred_empty()) { |
| 360 | // There are no predecessors, so use the default starting status. |
| 361 | BlockInfo[ThisBlock]->Pred = DefaultStatus; |
| 362 | ExitSet = true; |
| 363 | } else { |
| 364 | // Build a status that is common to all the predecessors by intersecting |
| 365 | // all the predecessor exit status values. |
| 366 | // Mask bits (which represent the Mode bits with a known value) can only be |
| 367 | // added by explicit SETREG instructions or the initial default value - |
| 368 | // the intersection process may remove Mask bits. |
| 369 | // If we find a predecessor that has not yet had an exit value determined |
| 370 | // (this can happen for example if a block is its own predecessor) we defer |
| 371 | // use of that value as the Mask will be all zero, and we will revisit this |
| 372 | // block again later (unless the only predecessor without an exit value is |
| 373 | // this block). |
| 374 | MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); |
| 375 | MachineBasicBlock &PB = *(*P); |
| 376 | unsigned PredBlock = PB.getNumber(); |
| 377 | if ((ThisBlock == PredBlock) && (std::next(x: P) == E)) { |
| 378 | BlockInfo[ThisBlock]->Pred = DefaultStatus; |
| 379 | ExitSet = true; |
| 380 | } else if (BlockInfo[PredBlock]->ExitSet) { |
| 381 | BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; |
| 382 | ExitSet = true; |
| 383 | } else if (PredBlock != ThisBlock) |
| 384 | RevisitRequired = true; |
| 385 | |
| 386 | for (P = std::next(x: P); P != E; P = std::next(x: P)) { |
| 387 | MachineBasicBlock *Pred = *P; |
| 388 | unsigned PredBlock = Pred->getNumber(); |
| 389 | if (BlockInfo[PredBlock]->ExitSet) { |
| 390 | if (BlockInfo[ThisBlock]->ExitSet) { |
| 391 | BlockInfo[ThisBlock]->Pred = |
| 392 | BlockInfo[ThisBlock]->Pred.intersect(S: BlockInfo[PredBlock]->Exit); |
| 393 | } else { |
| 394 | BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; |
| 395 | } |
| 396 | ExitSet = true; |
| 397 | } else if (PredBlock != ThisBlock) |
| 398 | RevisitRequired = true; |
| 399 | } |
| 400 | } |
| 401 | Status TmpStatus = |
| 402 | BlockInfo[ThisBlock]->Pred.merge(S: BlockInfo[ThisBlock]->Change); |
| 403 | if (BlockInfo[ThisBlock]->Exit != TmpStatus) { |
| 404 | BlockInfo[ThisBlock]->Exit = TmpStatus; |
| 405 | // Add the successors to the work list so we can propagate the changed exit |
| 406 | // status. |
| 407 | for (MachineBasicBlock *Succ : MBB.successors()) |
| 408 | Phase2List.push(x: Succ); |
| 409 | } |
| 410 | BlockInfo[ThisBlock]->ExitSet = ExitSet; |
| 411 | if (RevisitRequired) |
| 412 | Phase2List.push(x: &MBB); |
| 413 | } |
| 414 | |
| 415 | // In Phase 3 we revisit each block and if it has an insertion point defined we |
| 416 | // check whether the predecessor mode meets the block's entry requirements. If |
| 417 | // not we insert an appropriate setreg instruction to modify the Mode register. |
| 418 | void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, |
| 419 | const SIInstrInfo *TII) { |
| 420 | unsigned ThisBlock = MBB.getNumber(); |
| 421 | if (!BlockInfo[ThisBlock]->Pred.isCompatible(S&: BlockInfo[ThisBlock]->Require)) { |
| 422 | Status Delta = |
| 423 | BlockInfo[ThisBlock]->Pred.delta(S: BlockInfo[ThisBlock]->Require); |
| 424 | if (BlockInfo[ThisBlock]->FirstInsertionPoint) |
| 425 | insertSetreg(MBB, MI: BlockInfo[ThisBlock]->FirstInsertionPoint, TII, InstrMode: Delta); |
| 426 | else |
| 427 | insertSetreg(MBB, MI: &MBB.instr_front(), TII, InstrMode: Delta); |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | bool SIModeRegisterLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 432 | return SIModeRegister().run(MF); |
| 433 | } |
| 434 | |
| 435 | PreservedAnalyses SIModeRegisterPass::run(MachineFunction &MF, |
| 436 | MachineFunctionAnalysisManager &AM) { |
| 437 | if (!SIModeRegister().run(MF)) |
| 438 | return PreservedAnalyses::all(); |
| 439 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
| 440 | PA.preserveSet<CFGAnalyses>(); |
| 441 | return PA; |
| 442 | } |
| 443 | |
| 444 | bool SIModeRegister::run(MachineFunction &MF) { |
| 445 | // Constrained FP intrinsics are used to support non-default rounding modes. |
| 446 | // strictfp attribute is required to mark functions with strict FP semantics |
| 447 | // having constrained FP intrinsics. This pass fixes up operations that uses |
| 448 | // a non-default rounding mode for non-strictfp functions. But it should not |
| 449 | // assume or modify any default rounding modes in case of strictfp functions. |
| 450 | const Function &F = MF.getFunction(); |
| 451 | if (F.hasFnAttribute(Kind: llvm::Attribute::StrictFP)) |
| 452 | return Changed; |
| 453 | BlockInfo.resize(new_size: MF.getNumBlockIDs()); |
| 454 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 455 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 456 | |
| 457 | // Processing is performed in a number of phases |
| 458 | |
| 459 | // Phase 1 - determine the initial mode required by each block, and add setreg |
| 460 | // instructions for intra block requirements. |
| 461 | for (MachineBasicBlock &BB : MF) |
| 462 | processBlockPhase1(MBB&: BB, TII); |
| 463 | |
| 464 | // Phase 2 - determine the exit mode from each block. We add all blocks to the |
| 465 | // list here, but will also add any that need to be revisited during Phase 2 |
| 466 | // processing. |
| 467 | for (MachineBasicBlock &BB : MF) |
| 468 | Phase2List.push(x: &BB); |
| 469 | while (!Phase2List.empty()) { |
| 470 | processBlockPhase2(MBB&: *Phase2List.front(), TII); |
| 471 | Phase2List.pop(); |
| 472 | } |
| 473 | |
| 474 | // Phase 3 - add an initial setreg to each block where the required entry mode |
| 475 | // is not satisfied by the exit mode of all its predecessors. |
| 476 | for (MachineBasicBlock &BB : MF) |
| 477 | processBlockPhase3(MBB&: BB, TII); |
| 478 | |
| 479 | BlockInfo.clear(); |
| 480 | |
| 481 | return Changed; |
| 482 | } |
| 483 | |