| 1 | //===--- X86DomainReassignment.cpp - Selectively switch register classes---===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass attempts to find instruction chains (closures) in one domain, |
| 10 | // and convert them to equivalent instructions in a different domain, |
| 11 | // if profitable. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "X86.h" |
| 16 | #include "X86InstrInfo.h" |
| 17 | #include "X86Subtarget.h" |
| 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/STLExtras.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/Statistic.h" |
| 22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 26 | #include "llvm/Support/Debug.h" |
| 27 | #include "llvm/Support/Printable.h" |
| 28 | #include <bitset> |
| 29 | |
| 30 | using namespace llvm; |
| 31 | |
| 32 | #define DEBUG_TYPE "x86-domain-reassignment" |
| 33 | |
| 34 | STATISTIC(NumClosuresConverted, "Number of closures converted by the pass" ); |
| 35 | |
| 36 | static cl::opt<bool> DisableX86DomainReassignment( |
| 37 | "disable-x86-domain-reassignment" , cl::Hidden, |
| 38 | cl::desc("X86: Disable Virtual Register Reassignment." ), cl::init(Val: false)); |
| 39 | |
| 40 | namespace { |
| 41 | enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains }; |
| 42 | |
| 43 | static bool isMask(const TargetRegisterClass *RC, |
| 44 | const TargetRegisterInfo *TRI) { |
| 45 | return X86::VK16RegClass.hasSubClassEq(RC); |
| 46 | } |
| 47 | |
| 48 | static RegDomain getDomain(const TargetRegisterClass *RC, |
| 49 | const TargetRegisterInfo *TRI) { |
| 50 | if (TRI->isGeneralPurposeRegisterClass(RC)) |
| 51 | return GPRDomain; |
| 52 | if (isMask(RC, TRI)) |
| 53 | return MaskDomain; |
| 54 | return OtherDomain; |
| 55 | } |
| 56 | |
| 57 | /// Return a register class equivalent to \p SrcRC, in \p Domain. |
| 58 | static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, |
| 59 | RegDomain Domain) { |
| 60 | assert(Domain == MaskDomain && "add domain" ); |
| 61 | if (X86::GR8RegClass.hasSubClassEq(RC: SrcRC)) |
| 62 | return &X86::VK8RegClass; |
| 63 | if (X86::GR16RegClass.hasSubClassEq(RC: SrcRC)) |
| 64 | return &X86::VK16RegClass; |
| 65 | if (X86::GR32RegClass.hasSubClassEq(RC: SrcRC)) |
| 66 | return &X86::VK32RegClass; |
| 67 | if (X86::GR64RegClass.hasSubClassEq(RC: SrcRC)) |
| 68 | return &X86::VK64RegClass; |
| 69 | llvm_unreachable("add register class" ); |
| 70 | return nullptr; |
| 71 | } |
| 72 | |
| 73 | /// Abstract Instruction Converter class. |
| 74 | class InstrConverterBase { |
| 75 | protected: |
| 76 | unsigned SrcOpcode; |
| 77 | |
| 78 | public: |
| 79 | InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} |
| 80 | |
| 81 | virtual ~InstrConverterBase() = default; |
| 82 | |
| 83 | /// \returns true if \p MI is legal to convert. |
| 84 | virtual bool isLegal(const MachineInstr *MI, |
| 85 | const TargetInstrInfo *TII) const { |
| 86 | assert(MI->getOpcode() == SrcOpcode && |
| 87 | "Wrong instruction passed to converter" ); |
| 88 | return true; |
| 89 | } |
| 90 | |
| 91 | /// Applies conversion to \p MI. |
| 92 | /// |
| 93 | /// \returns true if \p MI is no longer need, and can be deleted. |
| 94 | virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
| 95 | MachineRegisterInfo *MRI) const = 0; |
| 96 | |
| 97 | /// \returns the cost increment incurred by converting \p MI. |
| 98 | virtual double (const MachineInstr *MI, |
| 99 | MachineRegisterInfo *MRI) const = 0; |
| 100 | }; |
| 101 | |
| 102 | /// An Instruction Converter which ignores the given instruction. |
| 103 | /// For example, PHI instructions can be safely ignored since only the registers |
| 104 | /// need to change. |
| 105 | class InstrIgnore : public InstrConverterBase { |
| 106 | public: |
| 107 | InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} |
| 108 | |
| 109 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
| 110 | MachineRegisterInfo *MRI) const override { |
| 111 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
| 112 | return false; |
| 113 | } |
| 114 | |
| 115 | double (const MachineInstr *MI, |
| 116 | MachineRegisterInfo *MRI) const override { |
| 117 | return 0; |
| 118 | } |
| 119 | }; |
| 120 | |
| 121 | /// An Instruction Converter which replaces an instruction with another. |
| 122 | class InstrReplacer : public InstrConverterBase { |
| 123 | public: |
| 124 | /// Opcode of the destination instruction. |
| 125 | unsigned DstOpcode; |
| 126 | |
| 127 | InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) |
| 128 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
| 129 | |
| 130 | bool isLegal(const MachineInstr *MI, |
| 131 | const TargetInstrInfo *TII) const override { |
| 132 | if (!InstrConverterBase::isLegal(MI, TII)) |
| 133 | return false; |
| 134 | // It's illegal to replace an instruction that implicitly defines a register |
| 135 | // with an instruction that doesn't, unless that register dead. |
| 136 | for (const auto &MO : MI->implicit_operands()) |
| 137 | if (MO.isReg() && MO.isDef() && !MO.isDead() && |
| 138 | !TII->get(Opcode: DstOpcode).hasImplicitDefOfPhysReg(Reg: MO.getReg())) |
| 139 | return false; |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
| 144 | MachineRegisterInfo *MRI) const override { |
| 145 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
| 146 | MachineInstrBuilder Bld = |
| 147 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: DstOpcode)); |
| 148 | // Transfer explicit operands from original instruction. Implicit operands |
| 149 | // are handled by BuildMI. |
| 150 | for (auto &Op : MI->explicit_operands()) |
| 151 | Bld.add(MO: Op); |
| 152 | return true; |
| 153 | } |
| 154 | |
| 155 | double (const MachineInstr *MI, |
| 156 | MachineRegisterInfo *MRI) const override { |
| 157 | // Assuming instructions have the same cost. |
| 158 | return 0; |
| 159 | } |
| 160 | }; |
| 161 | |
| 162 | /// An Instruction Converter which replaces an instruction with another, and |
| 163 | /// adds a COPY from the new instruction's destination to the old one's. |
| 164 | class InstrReplacerDstCOPY : public InstrConverterBase { |
| 165 | public: |
| 166 | unsigned DstOpcode; |
| 167 | |
| 168 | InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode) |
| 169 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
| 170 | |
| 171 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
| 172 | MachineRegisterInfo *MRI) const override { |
| 173 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
| 174 | MachineBasicBlock *MBB = MI->getParent(); |
| 175 | const DebugLoc &DL = MI->getDebugLoc(); |
| 176 | |
| 177 | Register Reg = MRI->createVirtualRegister( |
| 178 | RegClass: TII->getRegClass(MCID: TII->get(Opcode: DstOpcode), OpNum: 0, TRI: MRI->getTargetRegisterInfo(), |
| 179 | MF: *MBB->getParent())); |
| 180 | MachineInstrBuilder Bld = BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: DstOpcode), DestReg: Reg); |
| 181 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) |
| 182 | Bld.add(MO); |
| 183 | |
| 184 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY)) |
| 185 | .add(MO: MI->getOperand(i: 0)) |
| 186 | .addReg(RegNo: Reg); |
| 187 | |
| 188 | return true; |
| 189 | } |
| 190 | |
| 191 | double (const MachineInstr *MI, |
| 192 | MachineRegisterInfo *MRI) const override { |
| 193 | // Assuming instructions have the same cost, and that COPY is in the same |
| 194 | // domain so it will be eliminated. |
| 195 | return 0; |
| 196 | } |
| 197 | }; |
| 198 | |
| 199 | /// An Instruction Converter for replacing COPY instructions. |
| 200 | class InstrCOPYReplacer : public InstrReplacer { |
| 201 | public: |
| 202 | RegDomain DstDomain; |
| 203 | |
| 204 | InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode) |
| 205 | : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {} |
| 206 | |
| 207 | bool isLegal(const MachineInstr *MI, |
| 208 | const TargetInstrInfo *TII) const override { |
| 209 | if (!InstrConverterBase::isLegal(MI, TII)) |
| 210 | return false; |
| 211 | |
| 212 | // Don't allow copies to/flow GR8/GR16 physical registers. |
| 213 | // FIXME: Is there some better way to support this? |
| 214 | Register DstReg = MI->getOperand(i: 0).getReg(); |
| 215 | if (DstReg.isPhysical() && (X86::GR8RegClass.contains(Reg: DstReg) || |
| 216 | X86::GR16RegClass.contains(Reg: DstReg))) |
| 217 | return false; |
| 218 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
| 219 | if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(Reg: SrcReg) || |
| 220 | X86::GR16RegClass.contains(Reg: SrcReg))) |
| 221 | return false; |
| 222 | |
| 223 | return true; |
| 224 | } |
| 225 | |
| 226 | double (const MachineInstr *MI, |
| 227 | MachineRegisterInfo *MRI) const override { |
| 228 | assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY" ); |
| 229 | |
| 230 | for (const auto &MO : MI->operands()) { |
| 231 | // Physical registers will not be converted. Assume that converting the |
| 232 | // COPY to the destination domain will eventually result in a actual |
| 233 | // instruction. |
| 234 | if (MO.getReg().isPhysical()) |
| 235 | return 1; |
| 236 | |
| 237 | RegDomain OpDomain = getDomain(RC: MRI->getRegClass(Reg: MO.getReg()), |
| 238 | TRI: MRI->getTargetRegisterInfo()); |
| 239 | // Converting a cross domain COPY to a same domain COPY should eliminate |
| 240 | // an insturction |
| 241 | if (OpDomain == DstDomain) |
| 242 | return -1; |
| 243 | } |
| 244 | return 0; |
| 245 | } |
| 246 | }; |
| 247 | |
| 248 | /// An Instruction Converter which replaces an instruction with a COPY. |
| 249 | class InstrReplaceWithCopy : public InstrConverterBase { |
| 250 | public: |
| 251 | // Source instruction operand Index, to be used as the COPY source. |
| 252 | unsigned SrcOpIdx; |
| 253 | |
| 254 | InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx) |
| 255 | : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {} |
| 256 | |
| 257 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
| 258 | MachineRegisterInfo *MRI) const override { |
| 259 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
| 260 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), |
| 261 | MCID: TII->get(Opcode: TargetOpcode::COPY)) |
| 262 | .add(MOs: {MI->getOperand(i: 0), MI->getOperand(i: SrcOpIdx)}); |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | double (const MachineInstr *MI, |
| 267 | MachineRegisterInfo *MRI) const override { |
| 268 | return 0; |
| 269 | } |
| 270 | }; |
| 271 | |
| 272 | // Key type to be used by the Instruction Converters map. |
| 273 | // A converter is identified by <destination domain, source opcode> |
| 274 | typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; |
| 275 | |
| 276 | typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>> |
| 277 | InstrConverterBaseMap; |
| 278 | |
| 279 | /// A closure is a set of virtual register representing all of the edges in |
| 280 | /// the closure, as well as all of the instructions connected by those edges. |
| 281 | /// |
| 282 | /// A closure may encompass virtual registers in the same register bank that |
| 283 | /// have different widths. For example, it may contain 32-bit GPRs as well as |
| 284 | /// 64-bit GPRs. |
| 285 | /// |
| 286 | /// A closure that computes an address (i.e. defines a virtual register that is |
| 287 | /// used in a memory operand) excludes the instructions that contain memory |
| 288 | /// operands using the address. Such an instruction will be included in a |
| 289 | /// different closure that manipulates the loaded or stored value. |
| 290 | class Closure { |
| 291 | private: |
| 292 | /// Virtual registers in the closure. |
| 293 | DenseSet<Register> Edges; |
| 294 | |
| 295 | /// Instructions in the closure. |
| 296 | SmallVector<MachineInstr *, 8> Instrs; |
| 297 | |
| 298 | /// Domains which this closure can legally be reassigned to. |
| 299 | std::bitset<NumDomains> LegalDstDomains; |
| 300 | |
| 301 | /// An ID to uniquely identify this closure, even when it gets |
| 302 | /// moved around |
| 303 | unsigned ID; |
| 304 | |
| 305 | public: |
| 306 | Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { |
| 307 | for (RegDomain D : LegalDstDomainList) |
| 308 | LegalDstDomains.set(position: D); |
| 309 | } |
| 310 | |
| 311 | /// Mark this closure as illegal for reassignment to all domains. |
| 312 | void setAllIllegal() { LegalDstDomains.reset(); } |
| 313 | |
| 314 | /// \returns true if this closure has domains which are legal to reassign to. |
| 315 | bool hasLegalDstDomain() const { return LegalDstDomains.any(); } |
| 316 | |
| 317 | /// \returns true if is legal to reassign this closure to domain \p RD. |
| 318 | bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; } |
| 319 | |
| 320 | /// Mark this closure as illegal for reassignment to domain \p RD. |
| 321 | void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; } |
| 322 | |
| 323 | bool empty() const { return Edges.empty(); } |
| 324 | |
| 325 | bool insertEdge(Register Reg) { return Edges.insert(V: Reg).second; } |
| 326 | |
| 327 | using const_edge_iterator = DenseSet<Register>::const_iterator; |
| 328 | iterator_range<const_edge_iterator> edges() const { |
| 329 | return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end()); |
| 330 | } |
| 331 | |
| 332 | void addInstruction(MachineInstr *I) { |
| 333 | Instrs.push_back(Elt: I); |
| 334 | } |
| 335 | |
| 336 | ArrayRef<MachineInstr *> instructions() const { |
| 337 | return Instrs; |
| 338 | } |
| 339 | |
| 340 | LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { |
| 341 | dbgs() << "Registers: " ; |
| 342 | bool First = true; |
| 343 | for (Register Reg : Edges) { |
| 344 | if (!First) |
| 345 | dbgs() << ", " ; |
| 346 | First = false; |
| 347 | dbgs() << printReg(Reg, TRI: MRI->getTargetRegisterInfo(), SubIdx: 0, MRI); |
| 348 | } |
| 349 | dbgs() << "\n" << "Instructions:" ; |
| 350 | for (MachineInstr *MI : Instrs) { |
| 351 | dbgs() << "\n " ; |
| 352 | MI->print(OS&: dbgs()); |
| 353 | } |
| 354 | dbgs() << "\n" ; |
| 355 | } |
| 356 | |
| 357 | unsigned getID() const { |
| 358 | return ID; |
| 359 | } |
| 360 | |
| 361 | }; |
| 362 | |
| 363 | class X86DomainReassignment : public MachineFunctionPass { |
| 364 | const X86Subtarget *STI = nullptr; |
| 365 | MachineRegisterInfo *MRI = nullptr; |
| 366 | const X86InstrInfo *TII = nullptr; |
| 367 | |
| 368 | /// All edges that are included in some closure |
| 369 | DenseMap<Register, unsigned> EnclosedEdges; |
| 370 | |
| 371 | /// All instructions that are included in some closure. |
| 372 | DenseMap<MachineInstr *, unsigned> EnclosedInstrs; |
| 373 | |
| 374 | public: |
| 375 | static char ID; |
| 376 | |
| 377 | X86DomainReassignment() : MachineFunctionPass(ID) { } |
| 378 | |
| 379 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 380 | |
| 381 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 382 | AU.setPreservesCFG(); |
| 383 | MachineFunctionPass::getAnalysisUsage(AU); |
| 384 | } |
| 385 | |
| 386 | StringRef getPassName() const override { |
| 387 | return "X86 Domain Reassignment Pass" ; |
| 388 | } |
| 389 | |
| 390 | private: |
| 391 | /// A map of available Instruction Converters. |
| 392 | InstrConverterBaseMap Converters; |
| 393 | |
| 394 | /// Initialize Converters map. |
| 395 | void initConverters(); |
| 396 | |
| 397 | /// Starting from \Reg, expand the closure as much as possible. |
| 398 | void buildClosure(Closure &, Register Reg); |
| 399 | |
| 400 | /// Enqueue \p Reg to be considered for addition to the closure. |
| 401 | /// Return false if the closure becomes invalid. |
| 402 | bool visitRegister(Closure &, Register Reg, RegDomain &Domain, |
| 403 | SmallVectorImpl<Register> &Worklist); |
| 404 | |
| 405 | /// Reassign the closure to \p Domain. |
| 406 | void reassign(const Closure &C, RegDomain Domain) const; |
| 407 | |
| 408 | /// Add \p MI to the closure. |
| 409 | /// Return false if the closure becomes invalid. |
| 410 | bool encloseInstr(Closure &C, MachineInstr *MI); |
| 411 | |
| 412 | /// /returns true if it is profitable to reassign the closure to \p Domain. |
| 413 | bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; |
| 414 | |
| 415 | /// Calculate the total cost of reassigning the closure to \p Domain. |
| 416 | double calculateCost(const Closure &C, RegDomain Domain) const; |
| 417 | }; |
| 418 | |
| 419 | char X86DomainReassignment::ID = 0; |
| 420 | |
| 421 | } // End anonymous namespace. |
| 422 | |
| 423 | bool X86DomainReassignment::visitRegister(Closure &C, Register Reg, |
| 424 | RegDomain &Domain, |
| 425 | SmallVectorImpl<Register> &Worklist) { |
| 426 | if (!Reg.isVirtual()) |
| 427 | return true; |
| 428 | |
| 429 | auto I = EnclosedEdges.find(Val: Reg); |
| 430 | if (I != EnclosedEdges.end()) { |
| 431 | if (I->second != C.getID()) { |
| 432 | C.setAllIllegal(); |
| 433 | return false; |
| 434 | } |
| 435 | return true; |
| 436 | } |
| 437 | |
| 438 | if (!MRI->hasOneDef(RegNo: Reg)) |
| 439 | return true; |
| 440 | |
| 441 | RegDomain RD = getDomain(RC: MRI->getRegClass(Reg), TRI: MRI->getTargetRegisterInfo()); |
| 442 | // First edge in closure sets the domain. |
| 443 | if (Domain == NoDomain) |
| 444 | Domain = RD; |
| 445 | |
| 446 | if (Domain != RD) |
| 447 | return true; |
| 448 | |
| 449 | Worklist.push_back(Elt: Reg); |
| 450 | return true; |
| 451 | } |
| 452 | |
| 453 | bool X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { |
| 454 | auto [I, Inserted] = EnclosedInstrs.try_emplace(Key: MI, Args: C.getID()); |
| 455 | if (!Inserted) { |
| 456 | if (I->second != C.getID()) { |
| 457 | // Instruction already belongs to another closure, avoid conflicts between |
| 458 | // closure and mark this closure as illegal. |
| 459 | C.setAllIllegal(); |
| 460 | return false; |
| 461 | } |
| 462 | return true; |
| 463 | } |
| 464 | |
| 465 | C.addInstruction(I: MI); |
| 466 | |
| 467 | // Mark closure as illegal for reassignment to domains, if there is no |
| 468 | // converter for the instruction or if the converter cannot convert the |
| 469 | // instruction. |
| 470 | for (int i = 0; i != NumDomains; ++i) { |
| 471 | if (C.isLegal(RD: (RegDomain)i)) { |
| 472 | auto I = Converters.find(Val: {i, MI->getOpcode()}); |
| 473 | if (I == Converters.end() || !I->second->isLegal(MI, TII)) |
| 474 | C.setIllegal((RegDomain)i); |
| 475 | } |
| 476 | } |
| 477 | return C.hasLegalDstDomain(); |
| 478 | } |
| 479 | |
| 480 | double X86DomainReassignment::calculateCost(const Closure &C, |
| 481 | RegDomain DstDomain) const { |
| 482 | assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure" ); |
| 483 | |
| 484 | double Cost = 0.0; |
| 485 | for (auto *MI : C.instructions()) |
| 486 | Cost += Converters.find(Val: {DstDomain, MI->getOpcode()}) |
| 487 | ->second->getExtraCost(MI, MRI); |
| 488 | return Cost; |
| 489 | } |
| 490 | |
| 491 | bool X86DomainReassignment::isReassignmentProfitable(const Closure &C, |
| 492 | RegDomain Domain) const { |
| 493 | return calculateCost(C, DstDomain: Domain) < 0.0; |
| 494 | } |
| 495 | |
| 496 | void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { |
| 497 | assert(C.isLegal(Domain) && "Cannot convert illegal closure" ); |
| 498 | |
| 499 | // Iterate all instructions in the closure, convert each one using the |
| 500 | // appropriate converter. |
| 501 | SmallVector<MachineInstr *, 8> ToErase; |
| 502 | for (auto *MI : C.instructions()) |
| 503 | if (Converters.find(Val: {Domain, MI->getOpcode()}) |
| 504 | ->second->convertInstr(MI, TII, MRI)) |
| 505 | ToErase.push_back(Elt: MI); |
| 506 | |
| 507 | // Iterate all registers in the closure, replace them with registers in the |
| 508 | // destination domain. |
| 509 | for (Register Reg : C.edges()) { |
| 510 | MRI->setRegClass(Reg, RC: getDstRC(SrcRC: MRI->getRegClass(Reg), Domain)); |
| 511 | for (auto &MO : MRI->use_operands(Reg)) { |
| 512 | if (MO.isReg()) |
| 513 | // Remove all subregister references as they are not valid in the |
| 514 | // destination domain. |
| 515 | MO.setSubReg(0); |
| 516 | } |
| 517 | } |
| 518 | |
| 519 | for (auto *MI : ToErase) |
| 520 | MI->eraseFromParent(); |
| 521 | } |
| 522 | |
| 523 | /// \returns true when \p Reg is used as part of an address calculation in \p |
| 524 | /// MI. |
| 525 | static bool usedAsAddr(const MachineInstr &MI, Register Reg, |
| 526 | const TargetInstrInfo *TII) { |
| 527 | if (!MI.mayLoadOrStore()) |
| 528 | return false; |
| 529 | |
| 530 | const MCInstrDesc &Desc = TII->get(Opcode: MI.getOpcode()); |
| 531 | int MemOpStart = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
| 532 | if (MemOpStart == -1) |
| 533 | return false; |
| 534 | |
| 535 | MemOpStart += X86II::getOperandBias(Desc); |
| 536 | for (unsigned MemOpIdx = MemOpStart, |
| 537 | MemOpEnd = MemOpStart + X86::AddrNumOperands; |
| 538 | MemOpIdx < MemOpEnd; ++MemOpIdx) { |
| 539 | const MachineOperand &Op = MI.getOperand(i: MemOpIdx); |
| 540 | if (Op.isReg() && Op.getReg() == Reg) |
| 541 | return true; |
| 542 | } |
| 543 | return false; |
| 544 | } |
| 545 | |
| 546 | void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { |
| 547 | SmallVector<Register, 4> Worklist; |
| 548 | RegDomain Domain = NoDomain; |
| 549 | visitRegister(C, Reg, Domain, Worklist); |
| 550 | while (!Worklist.empty()) { |
| 551 | Register CurReg = Worklist.pop_back_val(); |
| 552 | |
| 553 | // Register already in this closure. |
| 554 | if (!C.insertEdge(Reg: CurReg)) |
| 555 | continue; |
| 556 | EnclosedEdges[Reg] = C.getID(); |
| 557 | |
| 558 | MachineInstr *DefMI = MRI->getVRegDef(Reg: CurReg); |
| 559 | if (!encloseInstr(C, MI: DefMI)) |
| 560 | return; |
| 561 | |
| 562 | // Add register used by the defining MI to the worklist. |
| 563 | // Do not add registers which are used in address calculation, they will be |
| 564 | // added to a different closure. |
| 565 | int OpEnd = DefMI->getNumOperands(); |
| 566 | const MCInstrDesc &Desc = DefMI->getDesc(); |
| 567 | int MemOp = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
| 568 | if (MemOp != -1) |
| 569 | MemOp += X86II::getOperandBias(Desc); |
| 570 | for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) { |
| 571 | if (OpIdx == MemOp) { |
| 572 | // skip address calculation. |
| 573 | OpIdx += (X86::AddrNumOperands - 1); |
| 574 | continue; |
| 575 | } |
| 576 | auto &Op = DefMI->getOperand(i: OpIdx); |
| 577 | if (!Op.isReg() || !Op.isUse()) |
| 578 | continue; |
| 579 | if (!visitRegister(C, Reg: Op.getReg(), Domain, Worklist)) |
| 580 | return; |
| 581 | } |
| 582 | |
| 583 | // Expand closure through register uses. |
| 584 | for (auto &UseMI : MRI->use_nodbg_instructions(Reg: CurReg)) { |
| 585 | // We would like to avoid converting closures which calculare addresses, |
| 586 | // as this should remain in GPRs. |
| 587 | if (usedAsAddr(MI: UseMI, Reg: CurReg, TII)) { |
| 588 | C.setAllIllegal(); |
| 589 | return; |
| 590 | } |
| 591 | if (!encloseInstr(C, MI: &UseMI)) |
| 592 | return; |
| 593 | |
| 594 | for (auto &DefOp : UseMI.defs()) { |
| 595 | if (!DefOp.isReg()) |
| 596 | continue; |
| 597 | |
| 598 | Register DefReg = DefOp.getReg(); |
| 599 | if (!DefReg.isVirtual()) { |
| 600 | C.setAllIllegal(); |
| 601 | return; |
| 602 | } |
| 603 | if (!visitRegister(C, Reg: DefReg, Domain, Worklist)) |
| 604 | return; |
| 605 | } |
| 606 | } |
| 607 | } |
| 608 | } |
| 609 | |
| 610 | void X86DomainReassignment::initConverters() { |
| 611 | Converters[{MaskDomain, TargetOpcode::PHI}] = |
| 612 | std::make_unique<InstrIgnore>(args: TargetOpcode::PHI); |
| 613 | |
| 614 | Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = |
| 615 | std::make_unique<InstrIgnore>(args: TargetOpcode::IMPLICIT_DEF); |
| 616 | |
| 617 | Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = |
| 618 | std::make_unique<InstrReplaceWithCopy>(args: TargetOpcode::INSERT_SUBREG, args: 2); |
| 619 | |
| 620 | Converters[{MaskDomain, TargetOpcode::COPY}] = |
| 621 | std::make_unique<InstrCOPYReplacer>(args: TargetOpcode::COPY, args: MaskDomain, |
| 622 | args: TargetOpcode::COPY); |
| 623 | |
| 624 | auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { |
| 625 | Converters[{MaskDomain, From}] = |
| 626 | std::make_unique<InstrReplacerDstCOPY>(args&: From, args&: To); |
| 627 | }; |
| 628 | |
| 629 | #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC |
| 630 | createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
| 631 | createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
| 632 | |
| 633 | createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
| 634 | createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
| 635 | |
| 636 | if (STI->hasDQI()) { |
| 637 | createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
| 638 | createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
| 639 | createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
| 640 | |
| 641 | createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
| 642 | createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
| 643 | createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
| 644 | } |
| 645 | |
| 646 | auto createReplacer = [&](unsigned From, unsigned To) { |
| 647 | Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(args&: From, args&: To); |
| 648 | }; |
| 649 | |
| 650 | createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
| 651 | createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); |
| 652 | createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
| 653 | createReplacer(X86::SHR16ri, X86::KSHIFTRWki); |
| 654 | createReplacer(X86::SHL16ri, X86::KSHIFTLWki); |
| 655 | createReplacer(X86::NOT16r, X86::KNOTWkk); |
| 656 | createReplacer(X86::OR16rr, X86::KORWkk); |
| 657 | createReplacer(X86::AND16rr, X86::KANDWkk); |
| 658 | createReplacer(X86::XOR16rr, X86::KXORWkk); |
| 659 | |
| 660 | bool HasNDD = STI->hasNDD(); |
| 661 | if (HasNDD) { |
| 662 | createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWki); |
| 663 | createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWki); |
| 664 | createReplacer(X86::NOT16r_ND, X86::KNOTWkk); |
| 665 | createReplacer(X86::OR16rr_ND, X86::KORWkk); |
| 666 | createReplacer(X86::AND16rr_ND, X86::KANDWkk); |
| 667 | createReplacer(X86::XOR16rr_ND, X86::KXORWkk); |
| 668 | } |
| 669 | |
| 670 | if (STI->hasBWI()) { |
| 671 | createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); |
| 672 | createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); |
| 673 | |
| 674 | createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); |
| 675 | createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); |
| 676 | |
| 677 | createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); |
| 678 | createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); |
| 679 | |
| 680 | createReplacer(X86::SHR32ri, X86::KSHIFTRDki); |
| 681 | createReplacer(X86::SHR64ri, X86::KSHIFTRQki); |
| 682 | |
| 683 | createReplacer(X86::SHL32ri, X86::KSHIFTLDki); |
| 684 | createReplacer(X86::SHL64ri, X86::KSHIFTLQki); |
| 685 | |
| 686 | createReplacer(X86::ADD32rr, X86::KADDDkk); |
| 687 | createReplacer(X86::ADD64rr, X86::KADDQkk); |
| 688 | |
| 689 | createReplacer(X86::NOT32r, X86::KNOTDkk); |
| 690 | createReplacer(X86::NOT64r, X86::KNOTQkk); |
| 691 | |
| 692 | createReplacer(X86::OR32rr, X86::KORDkk); |
| 693 | createReplacer(X86::OR64rr, X86::KORQkk); |
| 694 | |
| 695 | createReplacer(X86::AND32rr, X86::KANDDkk); |
| 696 | createReplacer(X86::AND64rr, X86::KANDQkk); |
| 697 | |
| 698 | createReplacer(X86::ANDN32rr, X86::KANDNDkk); |
| 699 | createReplacer(X86::ANDN64rr, X86::KANDNQkk); |
| 700 | |
| 701 | createReplacer(X86::XOR32rr, X86::KXORDkk); |
| 702 | createReplacer(X86::XOR64rr, X86::KXORQkk); |
| 703 | |
| 704 | if (HasNDD) { |
| 705 | createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDki); |
| 706 | createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDki); |
| 707 | createReplacer(X86::ADD32rr_ND, X86::KADDDkk); |
| 708 | createReplacer(X86::NOT32r_ND, X86::KNOTDkk); |
| 709 | createReplacer(X86::OR32rr_ND, X86::KORDkk); |
| 710 | createReplacer(X86::AND32rr_ND, X86::KANDDkk); |
| 711 | createReplacer(X86::XOR32rr_ND, X86::KXORDkk); |
| 712 | createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQki); |
| 713 | createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQki); |
| 714 | createReplacer(X86::ADD64rr_ND, X86::KADDQkk); |
| 715 | createReplacer(X86::NOT64r_ND, X86::KNOTQkk); |
| 716 | createReplacer(X86::OR64rr_ND, X86::KORQkk); |
| 717 | createReplacer(X86::AND64rr_ND, X86::KANDQkk); |
| 718 | createReplacer(X86::XOR64rr_ND, X86::KXORQkk); |
| 719 | } |
| 720 | |
| 721 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
| 722 | // to prove only Z flag is used. |
| 723 | // createReplacer(X86::TEST32rr, X86::KTESTDkk); |
| 724 | // createReplacer(X86::TEST64rr, X86::KTESTQkk); |
| 725 | } |
| 726 | |
| 727 | if (STI->hasDQI()) { |
| 728 | createReplacer(X86::ADD8rr, X86::KADDBkk); |
| 729 | createReplacer(X86::ADD16rr, X86::KADDWkk); |
| 730 | |
| 731 | createReplacer(X86::AND8rr, X86::KANDBkk); |
| 732 | |
| 733 | createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
| 734 | createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); |
| 735 | createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
| 736 | |
| 737 | createReplacer(X86::NOT8r, X86::KNOTBkk); |
| 738 | |
| 739 | createReplacer(X86::OR8rr, X86::KORBkk); |
| 740 | |
| 741 | createReplacer(X86::SHR8ri, X86::KSHIFTRBki); |
| 742 | createReplacer(X86::SHL8ri, X86::KSHIFTLBki); |
| 743 | |
| 744 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
| 745 | // to prove only Z flag is used. |
| 746 | // createReplacer(X86::TEST8rr, X86::KTESTBkk); |
| 747 | // createReplacer(X86::TEST16rr, X86::KTESTWkk); |
| 748 | |
| 749 | createReplacer(X86::XOR8rr, X86::KXORBkk); |
| 750 | |
| 751 | if (HasNDD) { |
| 752 | createReplacer(X86::ADD8rr_ND, X86::KADDBkk); |
| 753 | createReplacer(X86::ADD16rr_ND, X86::KADDWkk); |
| 754 | createReplacer(X86::AND8rr_ND, X86::KANDBkk); |
| 755 | createReplacer(X86::NOT8r_ND, X86::KNOTBkk); |
| 756 | createReplacer(X86::OR8rr_ND, X86::KORBkk); |
| 757 | createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBki); |
| 758 | createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBki); |
| 759 | createReplacer(X86::XOR8rr_ND, X86::KXORBkk); |
| 760 | } |
| 761 | } |
| 762 | #undef GET_EGPR_IF_ENABLED |
| 763 | } |
| 764 | |
| 765 | bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { |
| 766 | if (skipFunction(F: MF.getFunction())) |
| 767 | return false; |
| 768 | if (DisableX86DomainReassignment) |
| 769 | return false; |
| 770 | |
| 771 | LLVM_DEBUG( |
| 772 | dbgs() << "***** Machine Function before Domain Reassignment *****\n" ); |
| 773 | LLVM_DEBUG(MF.print(dbgs())); |
| 774 | |
| 775 | STI = &MF.getSubtarget<X86Subtarget>(); |
| 776 | // GPR->K is the only transformation currently supported, bail out early if no |
| 777 | // AVX512. |
| 778 | // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and |
| 779 | // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register |
| 780 | // coalescer doesn't clean it up and we generate a spill we will crash. |
| 781 | if (!STI->hasAVX512() || !STI->hasBWI()) |
| 782 | return false; |
| 783 | |
| 784 | MRI = &MF.getRegInfo(); |
| 785 | assert(MRI->isSSA() && "Expected MIR to be in SSA form" ); |
| 786 | |
| 787 | TII = STI->getInstrInfo(); |
| 788 | initConverters(); |
| 789 | bool Changed = false; |
| 790 | |
| 791 | EnclosedEdges.clear(); |
| 792 | EnclosedInstrs.clear(); |
| 793 | |
| 794 | std::vector<Closure> Closures; |
| 795 | |
| 796 | // Go over all virtual registers and calculate a closure. |
| 797 | unsigned ClosureID = 0; |
| 798 | for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { |
| 799 | Register Reg = Register::index2VirtReg(Index: Idx); |
| 800 | |
| 801 | // Skip unused VRegs. |
| 802 | if (MRI->reg_nodbg_empty(RegNo: Reg)) |
| 803 | continue; |
| 804 | |
| 805 | // GPR only current source domain supported. |
| 806 | if (!MRI->getTargetRegisterInfo()->isGeneralPurposeRegisterClass( |
| 807 | RC: MRI->getRegClass(Reg))) |
| 808 | continue; |
| 809 | |
| 810 | // Register already in closure. |
| 811 | if (EnclosedEdges.contains(Val: Reg)) |
| 812 | continue; |
| 813 | |
| 814 | // Calculate closure starting with Reg. |
| 815 | Closure C(ClosureID++, {MaskDomain}); |
| 816 | buildClosure(C, Reg); |
| 817 | |
| 818 | // Collect all closures that can potentially be converted. |
| 819 | if (!C.empty() && C.isLegal(RD: MaskDomain)) |
| 820 | Closures.push_back(x: std::move(C)); |
| 821 | } |
| 822 | |
| 823 | for (Closure &C : Closures) { |
| 824 | LLVM_DEBUG(C.dump(MRI)); |
| 825 | if (isReassignmentProfitable(C, Domain: MaskDomain)) { |
| 826 | reassign(C, Domain: MaskDomain); |
| 827 | ++NumClosuresConverted; |
| 828 | Changed = true; |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | LLVM_DEBUG( |
| 833 | dbgs() << "***** Machine Function after Domain Reassignment *****\n" ); |
| 834 | LLVM_DEBUG(MF.print(dbgs())); |
| 835 | |
| 836 | return Changed; |
| 837 | } |
| 838 | |
| 839 | INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment" , |
| 840 | "X86 Domain Reassignment Pass" , false, false) |
| 841 | |
| 842 | /// Returns an instance of the Domain Reassignment pass. |
| 843 | FunctionPass *llvm::createX86DomainReassignmentPass() { |
| 844 | return new X86DomainReassignment(); |
| 845 | } |
| 846 | |