1 | //===--- X86DomainReassignment.cpp - Selectively switch register classes---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass attempts to find instruction chains (closures) in one domain, |
10 | // and convert them to equivalent instructions in a different domain, |
11 | // if profitable. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86Subtarget.h" |
18 | #include "llvm/ADT/BitVector.h" |
19 | #include "llvm/ADT/DenseMap.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include "llvm/Support/Printable.h" |
29 | #include <bitset> |
30 | |
31 | using namespace llvm; |
32 | |
33 | #define DEBUG_TYPE "x86-domain-reassignment" |
34 | |
35 | STATISTIC(NumClosuresConverted, "Number of closures converted by the pass" ); |
36 | |
37 | static cl::opt<bool> DisableX86DomainReassignment( |
38 | "disable-x86-domain-reassignment" , cl::Hidden, |
39 | cl::desc("X86: Disable Virtual Register Reassignment." ), cl::init(Val: false)); |
40 | |
41 | namespace { |
42 | enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains }; |
43 | |
44 | static bool isGPR(const TargetRegisterClass *RC) { |
45 | return X86::GR64RegClass.hasSubClassEq(RC) || |
46 | X86::GR32RegClass.hasSubClassEq(RC) || |
47 | X86::GR16RegClass.hasSubClassEq(RC) || |
48 | X86::GR8RegClass.hasSubClassEq(RC); |
49 | } |
50 | |
51 | static bool isMask(const TargetRegisterClass *RC, |
52 | const TargetRegisterInfo *TRI) { |
53 | return X86::VK16RegClass.hasSubClassEq(RC); |
54 | } |
55 | |
56 | static RegDomain getDomain(const TargetRegisterClass *RC, |
57 | const TargetRegisterInfo *TRI) { |
58 | if (isGPR(RC)) |
59 | return GPRDomain; |
60 | if (isMask(RC, TRI)) |
61 | return MaskDomain; |
62 | return OtherDomain; |
63 | } |
64 | |
65 | /// Return a register class equivalent to \p SrcRC, in \p Domain. |
66 | static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, |
67 | RegDomain Domain) { |
68 | assert(Domain == MaskDomain && "add domain" ); |
69 | if (X86::GR8RegClass.hasSubClassEq(RC: SrcRC)) |
70 | return &X86::VK8RegClass; |
71 | if (X86::GR16RegClass.hasSubClassEq(RC: SrcRC)) |
72 | return &X86::VK16RegClass; |
73 | if (X86::GR32RegClass.hasSubClassEq(RC: SrcRC)) |
74 | return &X86::VK32RegClass; |
75 | if (X86::GR64RegClass.hasSubClassEq(RC: SrcRC)) |
76 | return &X86::VK64RegClass; |
77 | llvm_unreachable("add register class" ); |
78 | return nullptr; |
79 | } |
80 | |
81 | /// Abstract Instruction Converter class. |
82 | class InstrConverterBase { |
83 | protected: |
84 | unsigned SrcOpcode; |
85 | |
86 | public: |
87 | InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} |
88 | |
89 | virtual ~InstrConverterBase() = default; |
90 | |
91 | /// \returns true if \p MI is legal to convert. |
92 | virtual bool isLegal(const MachineInstr *MI, |
93 | const TargetInstrInfo *TII) const { |
94 | assert(MI->getOpcode() == SrcOpcode && |
95 | "Wrong instruction passed to converter" ); |
96 | return true; |
97 | } |
98 | |
99 | /// Applies conversion to \p MI. |
100 | /// |
101 | /// \returns true if \p MI is no longer need, and can be deleted. |
102 | virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
103 | MachineRegisterInfo *MRI) const = 0; |
104 | |
105 | /// \returns the cost increment incurred by converting \p MI. |
106 | virtual double (const MachineInstr *MI, |
107 | MachineRegisterInfo *MRI) const = 0; |
108 | }; |
109 | |
110 | /// An Instruction Converter which ignores the given instruction. |
111 | /// For example, PHI instructions can be safely ignored since only the registers |
112 | /// need to change. |
113 | class InstrIgnore : public InstrConverterBase { |
114 | public: |
115 | InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} |
116 | |
117 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
118 | MachineRegisterInfo *MRI) const override { |
119 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
120 | return false; |
121 | } |
122 | |
123 | double (const MachineInstr *MI, |
124 | MachineRegisterInfo *MRI) const override { |
125 | return 0; |
126 | } |
127 | }; |
128 | |
129 | /// An Instruction Converter which replaces an instruction with another. |
130 | class InstrReplacer : public InstrConverterBase { |
131 | public: |
132 | /// Opcode of the destination instruction. |
133 | unsigned DstOpcode; |
134 | |
135 | InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) |
136 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
137 | |
138 | bool isLegal(const MachineInstr *MI, |
139 | const TargetInstrInfo *TII) const override { |
140 | if (!InstrConverterBase::isLegal(MI, TII)) |
141 | return false; |
142 | // It's illegal to replace an instruction that implicitly defines a register |
143 | // with an instruction that doesn't, unless that register dead. |
144 | for (const auto &MO : MI->implicit_operands()) |
145 | if (MO.isReg() && MO.isDef() && !MO.isDead() && |
146 | !TII->get(Opcode: DstOpcode).hasImplicitDefOfPhysReg(Reg: MO.getReg())) |
147 | return false; |
148 | return true; |
149 | } |
150 | |
151 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
152 | MachineRegisterInfo *MRI) const override { |
153 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
154 | MachineInstrBuilder Bld = |
155 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: DstOpcode)); |
156 | // Transfer explicit operands from original instruction. Implicit operands |
157 | // are handled by BuildMI. |
158 | for (auto &Op : MI->explicit_operands()) |
159 | Bld.add(MO: Op); |
160 | return true; |
161 | } |
162 | |
163 | double (const MachineInstr *MI, |
164 | MachineRegisterInfo *MRI) const override { |
165 | // Assuming instructions have the same cost. |
166 | return 0; |
167 | } |
168 | }; |
169 | |
170 | /// An Instruction Converter which replaces an instruction with another, and |
171 | /// adds a COPY from the new instruction's destination to the old one's. |
172 | class InstrReplacerDstCOPY : public InstrConverterBase { |
173 | public: |
174 | unsigned DstOpcode; |
175 | |
176 | InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode) |
177 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
178 | |
179 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
180 | MachineRegisterInfo *MRI) const override { |
181 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
182 | MachineBasicBlock *MBB = MI->getParent(); |
183 | const DebugLoc &DL = MI->getDebugLoc(); |
184 | |
185 | Register Reg = MRI->createVirtualRegister( |
186 | RegClass: TII->getRegClass(MCID: TII->get(Opcode: DstOpcode), OpNum: 0, TRI: MRI->getTargetRegisterInfo(), |
187 | MF: *MBB->getParent())); |
188 | MachineInstrBuilder Bld = BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: DstOpcode), DestReg: Reg); |
189 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) |
190 | Bld.add(MO); |
191 | |
192 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY)) |
193 | .add(MO: MI->getOperand(i: 0)) |
194 | .addReg(RegNo: Reg); |
195 | |
196 | return true; |
197 | } |
198 | |
199 | double (const MachineInstr *MI, |
200 | MachineRegisterInfo *MRI) const override { |
201 | // Assuming instructions have the same cost, and that COPY is in the same |
202 | // domain so it will be eliminated. |
203 | return 0; |
204 | } |
205 | }; |
206 | |
207 | /// An Instruction Converter for replacing COPY instructions. |
208 | class InstrCOPYReplacer : public InstrReplacer { |
209 | public: |
210 | RegDomain DstDomain; |
211 | |
212 | InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode) |
213 | : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {} |
214 | |
215 | bool isLegal(const MachineInstr *MI, |
216 | const TargetInstrInfo *TII) const override { |
217 | if (!InstrConverterBase::isLegal(MI, TII)) |
218 | return false; |
219 | |
220 | // Don't allow copies to/flow GR8/GR16 physical registers. |
221 | // FIXME: Is there some better way to support this? |
222 | Register DstReg = MI->getOperand(i: 0).getReg(); |
223 | if (DstReg.isPhysical() && (X86::GR8RegClass.contains(Reg: DstReg) || |
224 | X86::GR16RegClass.contains(Reg: DstReg))) |
225 | return false; |
226 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
227 | if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(Reg: SrcReg) || |
228 | X86::GR16RegClass.contains(Reg: SrcReg))) |
229 | return false; |
230 | |
231 | return true; |
232 | } |
233 | |
234 | double (const MachineInstr *MI, |
235 | MachineRegisterInfo *MRI) const override { |
236 | assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY" ); |
237 | |
238 | for (const auto &MO : MI->operands()) { |
239 | // Physical registers will not be converted. Assume that converting the |
240 | // COPY to the destination domain will eventually result in a actual |
241 | // instruction. |
242 | if (MO.getReg().isPhysical()) |
243 | return 1; |
244 | |
245 | RegDomain OpDomain = getDomain(RC: MRI->getRegClass(Reg: MO.getReg()), |
246 | TRI: MRI->getTargetRegisterInfo()); |
247 | // Converting a cross domain COPY to a same domain COPY should eliminate |
248 | // an insturction |
249 | if (OpDomain == DstDomain) |
250 | return -1; |
251 | } |
252 | return 0; |
253 | } |
254 | }; |
255 | |
256 | /// An Instruction Converter which replaces an instruction with a COPY. |
257 | class InstrReplaceWithCopy : public InstrConverterBase { |
258 | public: |
259 | // Source instruction operand Index, to be used as the COPY source. |
260 | unsigned SrcOpIdx; |
261 | |
262 | InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx) |
263 | : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {} |
264 | |
265 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
266 | MachineRegisterInfo *MRI) const override { |
267 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
268 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), |
269 | MCID: TII->get(Opcode: TargetOpcode::COPY)) |
270 | .add(MOs: {MI->getOperand(i: 0), MI->getOperand(i: SrcOpIdx)}); |
271 | return true; |
272 | } |
273 | |
274 | double (const MachineInstr *MI, |
275 | MachineRegisterInfo *MRI) const override { |
276 | return 0; |
277 | } |
278 | }; |
279 | |
280 | // Key type to be used by the Instruction Converters map. |
281 | // A converter is identified by <destination domain, source opcode> |
282 | typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; |
283 | |
284 | typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>> |
285 | InstrConverterBaseMap; |
286 | |
287 | /// A closure is a set of virtual register representing all of the edges in |
288 | /// the closure, as well as all of the instructions connected by those edges. |
289 | /// |
290 | /// A closure may encompass virtual registers in the same register bank that |
291 | /// have different widths. For example, it may contain 32-bit GPRs as well as |
292 | /// 64-bit GPRs. |
293 | /// |
294 | /// A closure that computes an address (i.e. defines a virtual register that is |
295 | /// used in a memory operand) excludes the instructions that contain memory |
296 | /// operands using the address. Such an instruction will be included in a |
297 | /// different closure that manipulates the loaded or stored value. |
298 | class Closure { |
299 | private: |
300 | /// Virtual registers in the closure. |
301 | DenseSet<Register> Edges; |
302 | |
303 | /// Instructions in the closure. |
304 | SmallVector<MachineInstr *, 8> Instrs; |
305 | |
306 | /// Domains which this closure can legally be reassigned to. |
307 | std::bitset<NumDomains> LegalDstDomains; |
308 | |
309 | /// An ID to uniquely identify this closure, even when it gets |
310 | /// moved around |
311 | unsigned ID; |
312 | |
313 | public: |
314 | Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { |
315 | for (RegDomain D : LegalDstDomainList) |
316 | LegalDstDomains.set(position: D); |
317 | } |
318 | |
319 | /// Mark this closure as illegal for reassignment to all domains. |
320 | void setAllIllegal() { LegalDstDomains.reset(); } |
321 | |
322 | /// \returns true if this closure has domains which are legal to reassign to. |
323 | bool hasLegalDstDomain() const { return LegalDstDomains.any(); } |
324 | |
325 | /// \returns true if is legal to reassign this closure to domain \p RD. |
326 | bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; } |
327 | |
328 | /// Mark this closure as illegal for reassignment to domain \p RD. |
329 | void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; } |
330 | |
331 | bool empty() const { return Edges.empty(); } |
332 | |
333 | bool insertEdge(Register Reg) { return Edges.insert(V: Reg).second; } |
334 | |
335 | using const_edge_iterator = DenseSet<Register>::const_iterator; |
336 | iterator_range<const_edge_iterator> edges() const { |
337 | return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end()); |
338 | } |
339 | |
340 | void addInstruction(MachineInstr *I) { |
341 | Instrs.push_back(Elt: I); |
342 | } |
343 | |
344 | ArrayRef<MachineInstr *> instructions() const { |
345 | return Instrs; |
346 | } |
347 | |
348 | LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { |
349 | dbgs() << "Registers: " ; |
350 | bool First = true; |
351 | for (Register Reg : Edges) { |
352 | if (!First) |
353 | dbgs() << ", " ; |
354 | First = false; |
355 | dbgs() << printReg(Reg, TRI: MRI->getTargetRegisterInfo(), SubIdx: 0, MRI); |
356 | } |
357 | dbgs() << "\n" << "Instructions:" ; |
358 | for (MachineInstr *MI : Instrs) { |
359 | dbgs() << "\n " ; |
360 | MI->print(OS&: dbgs()); |
361 | } |
362 | dbgs() << "\n" ; |
363 | } |
364 | |
365 | unsigned getID() const { |
366 | return ID; |
367 | } |
368 | |
369 | }; |
370 | |
371 | class X86DomainReassignment : public MachineFunctionPass { |
372 | const X86Subtarget *STI = nullptr; |
373 | MachineRegisterInfo *MRI = nullptr; |
374 | const X86InstrInfo *TII = nullptr; |
375 | |
376 | /// All edges that are included in some closure |
377 | BitVector EnclosedEdges{8, false}; |
378 | |
379 | /// All instructions that are included in some closure. |
380 | DenseMap<MachineInstr *, unsigned> EnclosedInstrs; |
381 | |
382 | public: |
383 | static char ID; |
384 | |
385 | X86DomainReassignment() : MachineFunctionPass(ID) { } |
386 | |
387 | bool runOnMachineFunction(MachineFunction &MF) override; |
388 | |
389 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
390 | AU.setPreservesCFG(); |
391 | MachineFunctionPass::getAnalysisUsage(AU); |
392 | } |
393 | |
394 | StringRef getPassName() const override { |
395 | return "X86 Domain Reassignment Pass" ; |
396 | } |
397 | |
398 | private: |
399 | /// A map of available Instruction Converters. |
400 | InstrConverterBaseMap Converters; |
401 | |
402 | /// Initialize Converters map. |
403 | void initConverters(); |
404 | |
405 | /// Starting from \Reg, expand the closure as much as possible. |
406 | void buildClosure(Closure &, Register Reg); |
407 | |
408 | /// Enqueue \p Reg to be considered for addition to the closure. |
409 | void visitRegister(Closure &, Register Reg, RegDomain &Domain, |
410 | SmallVectorImpl<unsigned> &Worklist); |
411 | |
412 | /// Reassign the closure to \p Domain. |
413 | void reassign(const Closure &C, RegDomain Domain) const; |
414 | |
415 | /// Add \p MI to the closure. |
416 | void encloseInstr(Closure &C, MachineInstr *MI); |
417 | |
418 | /// /returns true if it is profitable to reassign the closure to \p Domain. |
419 | bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; |
420 | |
421 | /// Calculate the total cost of reassigning the closure to \p Domain. |
422 | double calculateCost(const Closure &C, RegDomain Domain) const; |
423 | }; |
424 | |
425 | char X86DomainReassignment::ID = 0; |
426 | |
427 | } // End anonymous namespace. |
428 | |
429 | void X86DomainReassignment::visitRegister(Closure &C, Register Reg, |
430 | RegDomain &Domain, |
431 | SmallVectorImpl<unsigned> &Worklist) { |
432 | if (!Reg.isVirtual()) |
433 | return; |
434 | |
435 | if (EnclosedEdges.test(Idx: Register::virtReg2Index(Reg))) |
436 | return; |
437 | |
438 | if (!MRI->hasOneDef(RegNo: Reg)) |
439 | return; |
440 | |
441 | RegDomain RD = getDomain(RC: MRI->getRegClass(Reg), TRI: MRI->getTargetRegisterInfo()); |
442 | // First edge in closure sets the domain. |
443 | if (Domain == NoDomain) |
444 | Domain = RD; |
445 | |
446 | if (Domain != RD) |
447 | return; |
448 | |
449 | Worklist.push_back(Elt: Reg); |
450 | } |
451 | |
452 | void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { |
453 | auto I = EnclosedInstrs.find(Val: MI); |
454 | if (I != EnclosedInstrs.end()) { |
455 | if (I->second != C.getID()) |
456 | // Instruction already belongs to another closure, avoid conflicts between |
457 | // closure and mark this closure as illegal. |
458 | C.setAllIllegal(); |
459 | return; |
460 | } |
461 | |
462 | EnclosedInstrs[MI] = C.getID(); |
463 | C.addInstruction(I: MI); |
464 | |
465 | // Mark closure as illegal for reassignment to domains, if there is no |
466 | // converter for the instruction or if the converter cannot convert the |
467 | // instruction. |
468 | for (int i = 0; i != NumDomains; ++i) { |
469 | if (C.isLegal(RD: (RegDomain)i)) { |
470 | auto I = Converters.find(Val: {i, MI->getOpcode()}); |
471 | if (I == Converters.end() || !I->second->isLegal(MI, TII)) |
472 | C.setIllegal((RegDomain)i); |
473 | } |
474 | } |
475 | } |
476 | |
477 | double X86DomainReassignment::calculateCost(const Closure &C, |
478 | RegDomain DstDomain) const { |
479 | assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure" ); |
480 | |
481 | double Cost = 0.0; |
482 | for (auto *MI : C.instructions()) |
483 | Cost += Converters.find(Val: {DstDomain, MI->getOpcode()}) |
484 | ->second->getExtraCost(MI, MRI); |
485 | return Cost; |
486 | } |
487 | |
488 | bool X86DomainReassignment::isReassignmentProfitable(const Closure &C, |
489 | RegDomain Domain) const { |
490 | return calculateCost(C, DstDomain: Domain) < 0.0; |
491 | } |
492 | |
493 | void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { |
494 | assert(C.isLegal(Domain) && "Cannot convert illegal closure" ); |
495 | |
496 | // Iterate all instructions in the closure, convert each one using the |
497 | // appropriate converter. |
498 | SmallVector<MachineInstr *, 8> ToErase; |
499 | for (auto *MI : C.instructions()) |
500 | if (Converters.find(Val: {Domain, MI->getOpcode()}) |
501 | ->second->convertInstr(MI, TII, MRI)) |
502 | ToErase.push_back(Elt: MI); |
503 | |
504 | // Iterate all registers in the closure, replace them with registers in the |
505 | // destination domain. |
506 | for (Register Reg : C.edges()) { |
507 | MRI->setRegClass(Reg, RC: getDstRC(SrcRC: MRI->getRegClass(Reg), Domain)); |
508 | for (auto &MO : MRI->use_operands(Reg)) { |
509 | if (MO.isReg()) |
510 | // Remove all subregister references as they are not valid in the |
511 | // destination domain. |
512 | MO.setSubReg(0); |
513 | } |
514 | } |
515 | |
516 | for (auto *MI : ToErase) |
517 | MI->eraseFromParent(); |
518 | } |
519 | |
520 | /// \returns true when \p Reg is used as part of an address calculation in \p |
521 | /// MI. |
522 | static bool usedAsAddr(const MachineInstr &MI, Register Reg, |
523 | const TargetInstrInfo *TII) { |
524 | if (!MI.mayLoadOrStore()) |
525 | return false; |
526 | |
527 | const MCInstrDesc &Desc = TII->get(Opcode: MI.getOpcode()); |
528 | int MemOpStart = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
529 | if (MemOpStart == -1) |
530 | return false; |
531 | |
532 | MemOpStart += X86II::getOperandBias(Desc); |
533 | for (unsigned MemOpIdx = MemOpStart, |
534 | MemOpEnd = MemOpStart + X86::AddrNumOperands; |
535 | MemOpIdx < MemOpEnd; ++MemOpIdx) { |
536 | const MachineOperand &Op = MI.getOperand(i: MemOpIdx); |
537 | if (Op.isReg() && Op.getReg() == Reg) |
538 | return true; |
539 | } |
540 | return false; |
541 | } |
542 | |
543 | void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { |
544 | SmallVector<unsigned, 4> Worklist; |
545 | RegDomain Domain = NoDomain; |
546 | visitRegister(C, Reg, Domain, Worklist); |
547 | while (!Worklist.empty()) { |
548 | unsigned CurReg = Worklist.pop_back_val(); |
549 | |
550 | // Register already in this closure. |
551 | if (!C.insertEdge(Reg: CurReg)) |
552 | continue; |
553 | EnclosedEdges.set(Register::virtReg2Index(Reg)); |
554 | |
555 | MachineInstr *DefMI = MRI->getVRegDef(Reg: CurReg); |
556 | encloseInstr(C, MI: DefMI); |
557 | |
558 | // Add register used by the defining MI to the worklist. |
559 | // Do not add registers which are used in address calculation, they will be |
560 | // added to a different closure. |
561 | int OpEnd = DefMI->getNumOperands(); |
562 | const MCInstrDesc &Desc = DefMI->getDesc(); |
563 | int MemOp = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
564 | if (MemOp != -1) |
565 | MemOp += X86II::getOperandBias(Desc); |
566 | for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) { |
567 | if (OpIdx == MemOp) { |
568 | // skip address calculation. |
569 | OpIdx += (X86::AddrNumOperands - 1); |
570 | continue; |
571 | } |
572 | auto &Op = DefMI->getOperand(i: OpIdx); |
573 | if (!Op.isReg() || !Op.isUse()) |
574 | continue; |
575 | visitRegister(C, Reg: Op.getReg(), Domain, Worklist); |
576 | } |
577 | |
578 | // Expand closure through register uses. |
579 | for (auto &UseMI : MRI->use_nodbg_instructions(Reg: CurReg)) { |
580 | // We would like to avoid converting closures which calculare addresses, |
581 | // as this should remain in GPRs. |
582 | if (usedAsAddr(MI: UseMI, Reg: CurReg, TII)) { |
583 | C.setAllIllegal(); |
584 | continue; |
585 | } |
586 | encloseInstr(C, MI: &UseMI); |
587 | |
588 | for (auto &DefOp : UseMI.defs()) { |
589 | if (!DefOp.isReg()) |
590 | continue; |
591 | |
592 | Register DefReg = DefOp.getReg(); |
593 | if (!DefReg.isVirtual()) { |
594 | C.setAllIllegal(); |
595 | continue; |
596 | } |
597 | visitRegister(C, Reg: DefReg, Domain, Worklist); |
598 | } |
599 | } |
600 | } |
601 | } |
602 | |
603 | void X86DomainReassignment::initConverters() { |
604 | Converters[{MaskDomain, TargetOpcode::PHI}] = |
605 | std::make_unique<InstrIgnore>(args: TargetOpcode::PHI); |
606 | |
607 | Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = |
608 | std::make_unique<InstrIgnore>(args: TargetOpcode::IMPLICIT_DEF); |
609 | |
610 | Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = |
611 | std::make_unique<InstrReplaceWithCopy>(args: TargetOpcode::INSERT_SUBREG, args: 2); |
612 | |
613 | Converters[{MaskDomain, TargetOpcode::COPY}] = |
614 | std::make_unique<InstrCOPYReplacer>(args: TargetOpcode::COPY, args: MaskDomain, |
615 | args: TargetOpcode::COPY); |
616 | |
617 | auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { |
618 | Converters[{MaskDomain, From}] = |
619 | std::make_unique<InstrReplacerDstCOPY>(args&: From, args&: To); |
620 | }; |
621 | |
622 | #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC |
623 | createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
624 | createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
625 | |
626 | createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
627 | createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
628 | |
629 | if (STI->hasDQI()) { |
630 | createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
631 | createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
632 | createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
633 | |
634 | createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
635 | createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
636 | createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
637 | } |
638 | |
639 | auto createReplacer = [&](unsigned From, unsigned To) { |
640 | Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(args&: From, args&: To); |
641 | }; |
642 | |
643 | createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
644 | createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); |
645 | createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
646 | createReplacer(X86::SHR16ri, X86::KSHIFTRWri); |
647 | createReplacer(X86::SHL16ri, X86::KSHIFTLWri); |
648 | createReplacer(X86::NOT16r, X86::KNOTWrr); |
649 | createReplacer(X86::OR16rr, X86::KORWrr); |
650 | createReplacer(X86::AND16rr, X86::KANDWrr); |
651 | createReplacer(X86::XOR16rr, X86::KXORWrr); |
652 | |
653 | bool HasNDD = STI->hasNDD(); |
654 | if (HasNDD) { |
655 | createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWri); |
656 | createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWri); |
657 | createReplacer(X86::NOT16r_ND, X86::KNOTWrr); |
658 | createReplacer(X86::OR16rr_ND, X86::KORWrr); |
659 | createReplacer(X86::AND16rr_ND, X86::KANDWrr); |
660 | createReplacer(X86::XOR16rr_ND, X86::KXORWrr); |
661 | } |
662 | |
663 | if (STI->hasBWI()) { |
664 | createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); |
665 | createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); |
666 | |
667 | createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); |
668 | createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); |
669 | |
670 | createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); |
671 | createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); |
672 | |
673 | createReplacer(X86::SHR32ri, X86::KSHIFTRDri); |
674 | createReplacer(X86::SHR64ri, X86::KSHIFTRQri); |
675 | |
676 | createReplacer(X86::SHL32ri, X86::KSHIFTLDri); |
677 | createReplacer(X86::SHL64ri, X86::KSHIFTLQri); |
678 | |
679 | createReplacer(X86::ADD32rr, X86::KADDDrr); |
680 | createReplacer(X86::ADD64rr, X86::KADDQrr); |
681 | |
682 | createReplacer(X86::NOT32r, X86::KNOTDrr); |
683 | createReplacer(X86::NOT64r, X86::KNOTQrr); |
684 | |
685 | createReplacer(X86::OR32rr, X86::KORDrr); |
686 | createReplacer(X86::OR64rr, X86::KORQrr); |
687 | |
688 | createReplacer(X86::AND32rr, X86::KANDDrr); |
689 | createReplacer(X86::AND64rr, X86::KANDQrr); |
690 | |
691 | createReplacer(X86::ANDN32rr, X86::KANDNDrr); |
692 | createReplacer(X86::ANDN64rr, X86::KANDNQrr); |
693 | |
694 | createReplacer(X86::XOR32rr, X86::KXORDrr); |
695 | createReplacer(X86::XOR64rr, X86::KXORQrr); |
696 | |
697 | if (HasNDD) { |
698 | createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDri); |
699 | createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDri); |
700 | createReplacer(X86::ADD32rr_ND, X86::KADDDrr); |
701 | createReplacer(X86::NOT32r_ND, X86::KNOTDrr); |
702 | createReplacer(X86::OR32rr_ND, X86::KORDrr); |
703 | createReplacer(X86::AND32rr_ND, X86::KANDDrr); |
704 | createReplacer(X86::XOR32rr_ND, X86::KXORDrr); |
705 | createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQri); |
706 | createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQri); |
707 | createReplacer(X86::ADD64rr_ND, X86::KADDQrr); |
708 | createReplacer(X86::NOT64r_ND, X86::KNOTQrr); |
709 | createReplacer(X86::OR64rr_ND, X86::KORQrr); |
710 | createReplacer(X86::AND64rr_ND, X86::KANDQrr); |
711 | createReplacer(X86::XOR64rr_ND, X86::KXORQrr); |
712 | } |
713 | |
714 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
715 | // to prove only Z flag is used. |
716 | // createReplacer(X86::TEST32rr, X86::KTESTDrr); |
717 | // createReplacer(X86::TEST64rr, X86::KTESTQrr); |
718 | } |
719 | |
720 | if (STI->hasDQI()) { |
721 | createReplacer(X86::ADD8rr, X86::KADDBrr); |
722 | createReplacer(X86::ADD16rr, X86::KADDWrr); |
723 | |
724 | createReplacer(X86::AND8rr, X86::KANDBrr); |
725 | |
726 | createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
727 | createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); |
728 | createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
729 | |
730 | createReplacer(X86::NOT8r, X86::KNOTBrr); |
731 | |
732 | createReplacer(X86::OR8rr, X86::KORBrr); |
733 | |
734 | createReplacer(X86::SHR8ri, X86::KSHIFTRBri); |
735 | createReplacer(X86::SHL8ri, X86::KSHIFTLBri); |
736 | |
737 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
738 | // to prove only Z flag is used. |
739 | // createReplacer(X86::TEST8rr, X86::KTESTBrr); |
740 | // createReplacer(X86::TEST16rr, X86::KTESTWrr); |
741 | |
742 | createReplacer(X86::XOR8rr, X86::KXORBrr); |
743 | |
744 | if (HasNDD) { |
745 | createReplacer(X86::ADD8rr_ND, X86::KADDBrr); |
746 | createReplacer(X86::ADD16rr_ND, X86::KADDWrr); |
747 | createReplacer(X86::AND8rr_ND, X86::KANDBrr); |
748 | createReplacer(X86::NOT8r_ND, X86::KNOTBrr); |
749 | createReplacer(X86::OR8rr_ND, X86::KORBrr); |
750 | createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBri); |
751 | createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBri); |
752 | createReplacer(X86::XOR8rr_ND, X86::KXORBrr); |
753 | } |
754 | } |
755 | #undef GET_EGPR_IF_ENABLED |
756 | } |
757 | |
758 | bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { |
759 | if (skipFunction(F: MF.getFunction())) |
760 | return false; |
761 | if (DisableX86DomainReassignment) |
762 | return false; |
763 | |
764 | LLVM_DEBUG( |
765 | dbgs() << "***** Machine Function before Domain Reassignment *****\n" ); |
766 | LLVM_DEBUG(MF.print(dbgs())); |
767 | |
768 | STI = &MF.getSubtarget<X86Subtarget>(); |
769 | // GPR->K is the only transformation currently supported, bail out early if no |
770 | // AVX512. |
771 | // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and |
772 | // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register |
773 | // coalescer doesn't clean it up and we generate a spill we will crash. |
774 | if (!STI->hasAVX512() || !STI->hasBWI()) |
775 | return false; |
776 | |
777 | MRI = &MF.getRegInfo(); |
778 | assert(MRI->isSSA() && "Expected MIR to be in SSA form" ); |
779 | |
780 | TII = STI->getInstrInfo(); |
781 | initConverters(); |
782 | bool Changed = false; |
783 | |
784 | EnclosedEdges.clear(); |
785 | EnclosedEdges.resize(N: MRI->getNumVirtRegs()); |
786 | EnclosedInstrs.clear(); |
787 | |
788 | std::vector<Closure> Closures; |
789 | |
790 | // Go over all virtual registers and calculate a closure. |
791 | unsigned ClosureID = 0; |
792 | for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { |
793 | Register Reg = Register::index2VirtReg(Index: Idx); |
794 | |
795 | // Skip unused VRegs. |
796 | if (MRI->reg_nodbg_empty(RegNo: Reg)) |
797 | continue; |
798 | |
799 | // GPR only current source domain supported. |
800 | if (!isGPR(RC: MRI->getRegClass(Reg))) |
801 | continue; |
802 | |
803 | // Register already in closure. |
804 | if (EnclosedEdges.test(Idx)) |
805 | continue; |
806 | |
807 | // Calculate closure starting with Reg. |
808 | Closure C(ClosureID++, {MaskDomain}); |
809 | buildClosure(C, Reg); |
810 | |
811 | // Collect all closures that can potentially be converted. |
812 | if (!C.empty() && C.isLegal(RD: MaskDomain)) |
813 | Closures.push_back(x: std::move(C)); |
814 | } |
815 | |
816 | for (Closure &C : Closures) { |
817 | LLVM_DEBUG(C.dump(MRI)); |
818 | if (isReassignmentProfitable(C, Domain: MaskDomain)) { |
819 | reassign(C, Domain: MaskDomain); |
820 | ++NumClosuresConverted; |
821 | Changed = true; |
822 | } |
823 | } |
824 | |
825 | LLVM_DEBUG( |
826 | dbgs() << "***** Machine Function after Domain Reassignment *****\n" ); |
827 | LLVM_DEBUG(MF.print(dbgs())); |
828 | |
829 | return Changed; |
830 | } |
831 | |
832 | INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment" , |
833 | "X86 Domain Reassignment Pass" , false, false) |
834 | |
835 | /// Returns an instance of the Domain Reassignment pass. |
836 | FunctionPass *llvm::createX86DomainReassignmentPass() { |
837 | return new X86DomainReassignment(); |
838 | } |
839 | |