1 | //===--- X86DomainReassignment.cpp - Selectively switch register classes---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass attempts to find instruction chains (closures) in one domain, |
10 | // and convert them to equivalent instructions in a different domain, |
11 | // if profitable. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "X86.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86Subtarget.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/Statistic.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
26 | #include "llvm/Support/Debug.h" |
27 | #include "llvm/Support/Printable.h" |
28 | #include <bitset> |
29 | |
30 | using namespace llvm; |
31 | |
32 | #define DEBUG_TYPE "x86-domain-reassignment" |
33 | |
34 | STATISTIC(NumClosuresConverted, "Number of closures converted by the pass" ); |
35 | |
36 | static cl::opt<bool> DisableX86DomainReassignment( |
37 | "disable-x86-domain-reassignment" , cl::Hidden, |
38 | cl::desc("X86: Disable Virtual Register Reassignment." ), cl::init(Val: false)); |
39 | |
40 | namespace { |
41 | enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains }; |
42 | |
43 | static bool isMask(const TargetRegisterClass *RC, |
44 | const TargetRegisterInfo *TRI) { |
45 | return X86::VK16RegClass.hasSubClassEq(RC); |
46 | } |
47 | |
48 | static RegDomain getDomain(const TargetRegisterClass *RC, |
49 | const TargetRegisterInfo *TRI) { |
50 | if (TRI->isGeneralPurposeRegisterClass(RC)) |
51 | return GPRDomain; |
52 | if (isMask(RC, TRI)) |
53 | return MaskDomain; |
54 | return OtherDomain; |
55 | } |
56 | |
57 | /// Return a register class equivalent to \p SrcRC, in \p Domain. |
58 | static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, |
59 | RegDomain Domain) { |
60 | assert(Domain == MaskDomain && "add domain" ); |
61 | if (X86::GR8RegClass.hasSubClassEq(RC: SrcRC)) |
62 | return &X86::VK8RegClass; |
63 | if (X86::GR16RegClass.hasSubClassEq(RC: SrcRC)) |
64 | return &X86::VK16RegClass; |
65 | if (X86::GR32RegClass.hasSubClassEq(RC: SrcRC)) |
66 | return &X86::VK32RegClass; |
67 | if (X86::GR64RegClass.hasSubClassEq(RC: SrcRC)) |
68 | return &X86::VK64RegClass; |
69 | llvm_unreachable("add register class" ); |
70 | return nullptr; |
71 | } |
72 | |
73 | /// Abstract Instruction Converter class. |
74 | class InstrConverterBase { |
75 | protected: |
76 | unsigned SrcOpcode; |
77 | |
78 | public: |
79 | InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} |
80 | |
81 | virtual ~InstrConverterBase() = default; |
82 | |
83 | /// \returns true if \p MI is legal to convert. |
84 | virtual bool isLegal(const MachineInstr *MI, |
85 | const TargetInstrInfo *TII) const { |
86 | assert(MI->getOpcode() == SrcOpcode && |
87 | "Wrong instruction passed to converter" ); |
88 | return true; |
89 | } |
90 | |
91 | /// Applies conversion to \p MI. |
92 | /// |
93 | /// \returns true if \p MI is no longer need, and can be deleted. |
94 | virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
95 | MachineRegisterInfo *MRI) const = 0; |
96 | |
97 | /// \returns the cost increment incurred by converting \p MI. |
98 | virtual double (const MachineInstr *MI, |
99 | MachineRegisterInfo *MRI) const = 0; |
100 | }; |
101 | |
102 | /// An Instruction Converter which ignores the given instruction. |
103 | /// For example, PHI instructions can be safely ignored since only the registers |
104 | /// need to change. |
105 | class InstrIgnore : public InstrConverterBase { |
106 | public: |
107 | InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} |
108 | |
109 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
110 | MachineRegisterInfo *MRI) const override { |
111 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
112 | return false; |
113 | } |
114 | |
115 | double (const MachineInstr *MI, |
116 | MachineRegisterInfo *MRI) const override { |
117 | return 0; |
118 | } |
119 | }; |
120 | |
121 | /// An Instruction Converter which replaces an instruction with another. |
122 | class InstrReplacer : public InstrConverterBase { |
123 | public: |
124 | /// Opcode of the destination instruction. |
125 | unsigned DstOpcode; |
126 | |
127 | InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) |
128 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
129 | |
130 | bool isLegal(const MachineInstr *MI, |
131 | const TargetInstrInfo *TII) const override { |
132 | if (!InstrConverterBase::isLegal(MI, TII)) |
133 | return false; |
134 | // It's illegal to replace an instruction that implicitly defines a register |
135 | // with an instruction that doesn't, unless that register dead. |
136 | for (const auto &MO : MI->implicit_operands()) |
137 | if (MO.isReg() && MO.isDef() && !MO.isDead() && |
138 | !TII->get(Opcode: DstOpcode).hasImplicitDefOfPhysReg(Reg: MO.getReg())) |
139 | return false; |
140 | return true; |
141 | } |
142 | |
143 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
144 | MachineRegisterInfo *MRI) const override { |
145 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
146 | MachineInstrBuilder Bld = |
147 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: DstOpcode)); |
148 | // Transfer explicit operands from original instruction. Implicit operands |
149 | // are handled by BuildMI. |
150 | for (auto &Op : MI->explicit_operands()) |
151 | Bld.add(MO: Op); |
152 | return true; |
153 | } |
154 | |
155 | double (const MachineInstr *MI, |
156 | MachineRegisterInfo *MRI) const override { |
157 | // Assuming instructions have the same cost. |
158 | return 0; |
159 | } |
160 | }; |
161 | |
162 | /// An Instruction Converter which replaces an instruction with another, and |
163 | /// adds a COPY from the new instruction's destination to the old one's. |
164 | class InstrReplacerDstCOPY : public InstrConverterBase { |
165 | public: |
166 | unsigned DstOpcode; |
167 | |
168 | InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode) |
169 | : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} |
170 | |
171 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
172 | MachineRegisterInfo *MRI) const override { |
173 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
174 | MachineBasicBlock *MBB = MI->getParent(); |
175 | const DebugLoc &DL = MI->getDebugLoc(); |
176 | |
177 | Register Reg = MRI->createVirtualRegister( |
178 | RegClass: TII->getRegClass(MCID: TII->get(Opcode: DstOpcode), OpNum: 0, TRI: MRI->getTargetRegisterInfo(), |
179 | MF: *MBB->getParent())); |
180 | MachineInstrBuilder Bld = BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: DstOpcode), DestReg: Reg); |
181 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) |
182 | Bld.add(MO); |
183 | |
184 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY)) |
185 | .add(MO: MI->getOperand(i: 0)) |
186 | .addReg(RegNo: Reg); |
187 | |
188 | return true; |
189 | } |
190 | |
191 | double (const MachineInstr *MI, |
192 | MachineRegisterInfo *MRI) const override { |
193 | // Assuming instructions have the same cost, and that COPY is in the same |
194 | // domain so it will be eliminated. |
195 | return 0; |
196 | } |
197 | }; |
198 | |
199 | /// An Instruction Converter for replacing COPY instructions. |
200 | class InstrCOPYReplacer : public InstrReplacer { |
201 | public: |
202 | RegDomain DstDomain; |
203 | |
204 | InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode) |
205 | : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {} |
206 | |
207 | bool isLegal(const MachineInstr *MI, |
208 | const TargetInstrInfo *TII) const override { |
209 | if (!InstrConverterBase::isLegal(MI, TII)) |
210 | return false; |
211 | |
212 | // Don't allow copies to/flow GR8/GR16 physical registers. |
213 | // FIXME: Is there some better way to support this? |
214 | Register DstReg = MI->getOperand(i: 0).getReg(); |
215 | if (DstReg.isPhysical() && (X86::GR8RegClass.contains(Reg: DstReg) || |
216 | X86::GR16RegClass.contains(Reg: DstReg))) |
217 | return false; |
218 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
219 | if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(Reg: SrcReg) || |
220 | X86::GR16RegClass.contains(Reg: SrcReg))) |
221 | return false; |
222 | |
223 | return true; |
224 | } |
225 | |
226 | double (const MachineInstr *MI, |
227 | MachineRegisterInfo *MRI) const override { |
228 | assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY" ); |
229 | |
230 | for (const auto &MO : MI->operands()) { |
231 | // Physical registers will not be converted. Assume that converting the |
232 | // COPY to the destination domain will eventually result in a actual |
233 | // instruction. |
234 | if (MO.getReg().isPhysical()) |
235 | return 1; |
236 | |
237 | RegDomain OpDomain = getDomain(RC: MRI->getRegClass(Reg: MO.getReg()), |
238 | TRI: MRI->getTargetRegisterInfo()); |
239 | // Converting a cross domain COPY to a same domain COPY should eliminate |
240 | // an insturction |
241 | if (OpDomain == DstDomain) |
242 | return -1; |
243 | } |
244 | return 0; |
245 | } |
246 | }; |
247 | |
248 | /// An Instruction Converter which replaces an instruction with a COPY. |
249 | class InstrReplaceWithCopy : public InstrConverterBase { |
250 | public: |
251 | // Source instruction operand Index, to be used as the COPY source. |
252 | unsigned SrcOpIdx; |
253 | |
254 | InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx) |
255 | : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {} |
256 | |
257 | bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, |
258 | MachineRegisterInfo *MRI) const override { |
259 | assert(isLegal(MI, TII) && "Cannot convert instruction" ); |
260 | BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(), |
261 | MCID: TII->get(Opcode: TargetOpcode::COPY)) |
262 | .add(MOs: {MI->getOperand(i: 0), MI->getOperand(i: SrcOpIdx)}); |
263 | return true; |
264 | } |
265 | |
266 | double (const MachineInstr *MI, |
267 | MachineRegisterInfo *MRI) const override { |
268 | return 0; |
269 | } |
270 | }; |
271 | |
272 | // Key type to be used by the Instruction Converters map. |
273 | // A converter is identified by <destination domain, source opcode> |
274 | typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; |
275 | |
276 | typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>> |
277 | InstrConverterBaseMap; |
278 | |
279 | /// A closure is a set of virtual register representing all of the edges in |
280 | /// the closure, as well as all of the instructions connected by those edges. |
281 | /// |
282 | /// A closure may encompass virtual registers in the same register bank that |
283 | /// have different widths. For example, it may contain 32-bit GPRs as well as |
284 | /// 64-bit GPRs. |
285 | /// |
286 | /// A closure that computes an address (i.e. defines a virtual register that is |
287 | /// used in a memory operand) excludes the instructions that contain memory |
288 | /// operands using the address. Such an instruction will be included in a |
289 | /// different closure that manipulates the loaded or stored value. |
290 | class Closure { |
291 | private: |
292 | /// Virtual registers in the closure. |
293 | DenseSet<Register> Edges; |
294 | |
295 | /// Instructions in the closure. |
296 | SmallVector<MachineInstr *, 8> Instrs; |
297 | |
298 | /// Domains which this closure can legally be reassigned to. |
299 | std::bitset<NumDomains> LegalDstDomains; |
300 | |
301 | /// An ID to uniquely identify this closure, even when it gets |
302 | /// moved around |
303 | unsigned ID; |
304 | |
305 | public: |
306 | Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { |
307 | for (RegDomain D : LegalDstDomainList) |
308 | LegalDstDomains.set(position: D); |
309 | } |
310 | |
311 | /// Mark this closure as illegal for reassignment to all domains. |
312 | void setAllIllegal() { LegalDstDomains.reset(); } |
313 | |
314 | /// \returns true if this closure has domains which are legal to reassign to. |
315 | bool hasLegalDstDomain() const { return LegalDstDomains.any(); } |
316 | |
317 | /// \returns true if is legal to reassign this closure to domain \p RD. |
318 | bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; } |
319 | |
320 | /// Mark this closure as illegal for reassignment to domain \p RD. |
321 | void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; } |
322 | |
323 | bool empty() const { return Edges.empty(); } |
324 | |
325 | bool insertEdge(Register Reg) { return Edges.insert(V: Reg).second; } |
326 | |
327 | using const_edge_iterator = DenseSet<Register>::const_iterator; |
328 | iterator_range<const_edge_iterator> edges() const { |
329 | return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end()); |
330 | } |
331 | |
332 | void addInstruction(MachineInstr *I) { |
333 | Instrs.push_back(Elt: I); |
334 | } |
335 | |
336 | ArrayRef<MachineInstr *> instructions() const { |
337 | return Instrs; |
338 | } |
339 | |
340 | LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { |
341 | dbgs() << "Registers: " ; |
342 | bool First = true; |
343 | for (Register Reg : Edges) { |
344 | if (!First) |
345 | dbgs() << ", " ; |
346 | First = false; |
347 | dbgs() << printReg(Reg, TRI: MRI->getTargetRegisterInfo(), SubIdx: 0, MRI); |
348 | } |
349 | dbgs() << "\n" << "Instructions:" ; |
350 | for (MachineInstr *MI : Instrs) { |
351 | dbgs() << "\n " ; |
352 | MI->print(OS&: dbgs()); |
353 | } |
354 | dbgs() << "\n" ; |
355 | } |
356 | |
357 | unsigned getID() const { |
358 | return ID; |
359 | } |
360 | |
361 | }; |
362 | |
363 | class X86DomainReassignment : public MachineFunctionPass { |
364 | const X86Subtarget *STI = nullptr; |
365 | MachineRegisterInfo *MRI = nullptr; |
366 | const X86InstrInfo *TII = nullptr; |
367 | |
368 | /// All edges that are included in some closure |
369 | DenseMap<Register, unsigned> EnclosedEdges; |
370 | |
371 | /// All instructions that are included in some closure. |
372 | DenseMap<MachineInstr *, unsigned> EnclosedInstrs; |
373 | |
374 | public: |
375 | static char ID; |
376 | |
377 | X86DomainReassignment() : MachineFunctionPass(ID) { } |
378 | |
379 | bool runOnMachineFunction(MachineFunction &MF) override; |
380 | |
381 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
382 | AU.setPreservesCFG(); |
383 | MachineFunctionPass::getAnalysisUsage(AU); |
384 | } |
385 | |
386 | StringRef getPassName() const override { |
387 | return "X86 Domain Reassignment Pass" ; |
388 | } |
389 | |
390 | private: |
391 | /// A map of available Instruction Converters. |
392 | InstrConverterBaseMap Converters; |
393 | |
394 | /// Initialize Converters map. |
395 | void initConverters(); |
396 | |
397 | /// Starting from \Reg, expand the closure as much as possible. |
398 | void buildClosure(Closure &, Register Reg); |
399 | |
400 | /// Enqueue \p Reg to be considered for addition to the closure. |
401 | /// Return false if the closure becomes invalid. |
402 | bool visitRegister(Closure &, Register Reg, RegDomain &Domain, |
403 | SmallVectorImpl<Register> &Worklist); |
404 | |
405 | /// Reassign the closure to \p Domain. |
406 | void reassign(const Closure &C, RegDomain Domain) const; |
407 | |
408 | /// Add \p MI to the closure. |
409 | /// Return false if the closure becomes invalid. |
410 | bool encloseInstr(Closure &C, MachineInstr *MI); |
411 | |
412 | /// /returns true if it is profitable to reassign the closure to \p Domain. |
413 | bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; |
414 | |
415 | /// Calculate the total cost of reassigning the closure to \p Domain. |
416 | double calculateCost(const Closure &C, RegDomain Domain) const; |
417 | }; |
418 | |
419 | char X86DomainReassignment::ID = 0; |
420 | |
421 | } // End anonymous namespace. |
422 | |
423 | bool X86DomainReassignment::visitRegister(Closure &C, Register Reg, |
424 | RegDomain &Domain, |
425 | SmallVectorImpl<Register> &Worklist) { |
426 | if (!Reg.isVirtual()) |
427 | return true; |
428 | |
429 | auto I = EnclosedEdges.find(Val: Reg); |
430 | if (I != EnclosedEdges.end()) { |
431 | if (I->second != C.getID()) { |
432 | C.setAllIllegal(); |
433 | return false; |
434 | } |
435 | return true; |
436 | } |
437 | |
438 | if (!MRI->hasOneDef(RegNo: Reg)) |
439 | return true; |
440 | |
441 | RegDomain RD = getDomain(RC: MRI->getRegClass(Reg), TRI: MRI->getTargetRegisterInfo()); |
442 | // First edge in closure sets the domain. |
443 | if (Domain == NoDomain) |
444 | Domain = RD; |
445 | |
446 | if (Domain != RD) |
447 | return true; |
448 | |
449 | Worklist.push_back(Elt: Reg); |
450 | return true; |
451 | } |
452 | |
453 | bool X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { |
454 | auto [I, Inserted] = EnclosedInstrs.try_emplace(Key: MI, Args: C.getID()); |
455 | if (!Inserted) { |
456 | if (I->second != C.getID()) { |
457 | // Instruction already belongs to another closure, avoid conflicts between |
458 | // closure and mark this closure as illegal. |
459 | C.setAllIllegal(); |
460 | return false; |
461 | } |
462 | return true; |
463 | } |
464 | |
465 | C.addInstruction(I: MI); |
466 | |
467 | // Mark closure as illegal for reassignment to domains, if there is no |
468 | // converter for the instruction or if the converter cannot convert the |
469 | // instruction. |
470 | for (int i = 0; i != NumDomains; ++i) { |
471 | if (C.isLegal(RD: (RegDomain)i)) { |
472 | auto I = Converters.find(Val: {i, MI->getOpcode()}); |
473 | if (I == Converters.end() || !I->second->isLegal(MI, TII)) |
474 | C.setIllegal((RegDomain)i); |
475 | } |
476 | } |
477 | return C.hasLegalDstDomain(); |
478 | } |
479 | |
480 | double X86DomainReassignment::calculateCost(const Closure &C, |
481 | RegDomain DstDomain) const { |
482 | assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure" ); |
483 | |
484 | double Cost = 0.0; |
485 | for (auto *MI : C.instructions()) |
486 | Cost += Converters.find(Val: {DstDomain, MI->getOpcode()}) |
487 | ->second->getExtraCost(MI, MRI); |
488 | return Cost; |
489 | } |
490 | |
491 | bool X86DomainReassignment::isReassignmentProfitable(const Closure &C, |
492 | RegDomain Domain) const { |
493 | return calculateCost(C, DstDomain: Domain) < 0.0; |
494 | } |
495 | |
496 | void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { |
497 | assert(C.isLegal(Domain) && "Cannot convert illegal closure" ); |
498 | |
499 | // Iterate all instructions in the closure, convert each one using the |
500 | // appropriate converter. |
501 | SmallVector<MachineInstr *, 8> ToErase; |
502 | for (auto *MI : C.instructions()) |
503 | if (Converters.find(Val: {Domain, MI->getOpcode()}) |
504 | ->second->convertInstr(MI, TII, MRI)) |
505 | ToErase.push_back(Elt: MI); |
506 | |
507 | // Iterate all registers in the closure, replace them with registers in the |
508 | // destination domain. |
509 | for (Register Reg : C.edges()) { |
510 | MRI->setRegClass(Reg, RC: getDstRC(SrcRC: MRI->getRegClass(Reg), Domain)); |
511 | for (auto &MO : MRI->use_operands(Reg)) { |
512 | if (MO.isReg()) |
513 | // Remove all subregister references as they are not valid in the |
514 | // destination domain. |
515 | MO.setSubReg(0); |
516 | } |
517 | } |
518 | |
519 | for (auto *MI : ToErase) |
520 | MI->eraseFromParent(); |
521 | } |
522 | |
523 | /// \returns true when \p Reg is used as part of an address calculation in \p |
524 | /// MI. |
525 | static bool usedAsAddr(const MachineInstr &MI, Register Reg, |
526 | const TargetInstrInfo *TII) { |
527 | if (!MI.mayLoadOrStore()) |
528 | return false; |
529 | |
530 | const MCInstrDesc &Desc = TII->get(Opcode: MI.getOpcode()); |
531 | int MemOpStart = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
532 | if (MemOpStart == -1) |
533 | return false; |
534 | |
535 | MemOpStart += X86II::getOperandBias(Desc); |
536 | for (unsigned MemOpIdx = MemOpStart, |
537 | MemOpEnd = MemOpStart + X86::AddrNumOperands; |
538 | MemOpIdx < MemOpEnd; ++MemOpIdx) { |
539 | const MachineOperand &Op = MI.getOperand(i: MemOpIdx); |
540 | if (Op.isReg() && Op.getReg() == Reg) |
541 | return true; |
542 | } |
543 | return false; |
544 | } |
545 | |
546 | void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { |
547 | SmallVector<Register, 4> Worklist; |
548 | RegDomain Domain = NoDomain; |
549 | visitRegister(C, Reg, Domain, Worklist); |
550 | while (!Worklist.empty()) { |
551 | Register CurReg = Worklist.pop_back_val(); |
552 | |
553 | // Register already in this closure. |
554 | if (!C.insertEdge(Reg: CurReg)) |
555 | continue; |
556 | EnclosedEdges[Reg] = C.getID(); |
557 | |
558 | MachineInstr *DefMI = MRI->getVRegDef(Reg: CurReg); |
559 | if (!encloseInstr(C, MI: DefMI)) |
560 | return; |
561 | |
562 | // Add register used by the defining MI to the worklist. |
563 | // Do not add registers which are used in address calculation, they will be |
564 | // added to a different closure. |
565 | int OpEnd = DefMI->getNumOperands(); |
566 | const MCInstrDesc &Desc = DefMI->getDesc(); |
567 | int MemOp = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
568 | if (MemOp != -1) |
569 | MemOp += X86II::getOperandBias(Desc); |
570 | for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) { |
571 | if (OpIdx == MemOp) { |
572 | // skip address calculation. |
573 | OpIdx += (X86::AddrNumOperands - 1); |
574 | continue; |
575 | } |
576 | auto &Op = DefMI->getOperand(i: OpIdx); |
577 | if (!Op.isReg() || !Op.isUse()) |
578 | continue; |
579 | if (!visitRegister(C, Reg: Op.getReg(), Domain, Worklist)) |
580 | return; |
581 | } |
582 | |
583 | // Expand closure through register uses. |
584 | for (auto &UseMI : MRI->use_nodbg_instructions(Reg: CurReg)) { |
585 | // We would like to avoid converting closures which calculare addresses, |
586 | // as this should remain in GPRs. |
587 | if (usedAsAddr(MI: UseMI, Reg: CurReg, TII)) { |
588 | C.setAllIllegal(); |
589 | return; |
590 | } |
591 | if (!encloseInstr(C, MI: &UseMI)) |
592 | return; |
593 | |
594 | for (auto &DefOp : UseMI.defs()) { |
595 | if (!DefOp.isReg()) |
596 | continue; |
597 | |
598 | Register DefReg = DefOp.getReg(); |
599 | if (!DefReg.isVirtual()) { |
600 | C.setAllIllegal(); |
601 | return; |
602 | } |
603 | if (!visitRegister(C, Reg: DefReg, Domain, Worklist)) |
604 | return; |
605 | } |
606 | } |
607 | } |
608 | } |
609 | |
610 | void X86DomainReassignment::initConverters() { |
611 | Converters[{MaskDomain, TargetOpcode::PHI}] = |
612 | std::make_unique<InstrIgnore>(args: TargetOpcode::PHI); |
613 | |
614 | Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = |
615 | std::make_unique<InstrIgnore>(args: TargetOpcode::IMPLICIT_DEF); |
616 | |
617 | Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = |
618 | std::make_unique<InstrReplaceWithCopy>(args: TargetOpcode::INSERT_SUBREG, args: 2); |
619 | |
620 | Converters[{MaskDomain, TargetOpcode::COPY}] = |
621 | std::make_unique<InstrCOPYReplacer>(args: TargetOpcode::COPY, args: MaskDomain, |
622 | args: TargetOpcode::COPY); |
623 | |
624 | auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { |
625 | Converters[{MaskDomain, From}] = |
626 | std::make_unique<InstrReplacerDstCOPY>(args&: From, args&: To); |
627 | }; |
628 | |
629 | #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC |
630 | createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
631 | createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
632 | |
633 | createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
634 | createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
635 | |
636 | if (STI->hasDQI()) { |
637 | createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
638 | createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
639 | createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
640 | |
641 | createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
642 | createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
643 | createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
644 | } |
645 | |
646 | auto createReplacer = [&](unsigned From, unsigned To) { |
647 | Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(args&: From, args&: To); |
648 | }; |
649 | |
650 | createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); |
651 | createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); |
652 | createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); |
653 | createReplacer(X86::SHR16ri, X86::KSHIFTRWki); |
654 | createReplacer(X86::SHL16ri, X86::KSHIFTLWki); |
655 | createReplacer(X86::NOT16r, X86::KNOTWkk); |
656 | createReplacer(X86::OR16rr, X86::KORWkk); |
657 | createReplacer(X86::AND16rr, X86::KANDWkk); |
658 | createReplacer(X86::XOR16rr, X86::KXORWkk); |
659 | |
660 | bool HasNDD = STI->hasNDD(); |
661 | if (HasNDD) { |
662 | createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWki); |
663 | createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWki); |
664 | createReplacer(X86::NOT16r_ND, X86::KNOTWkk); |
665 | createReplacer(X86::OR16rr_ND, X86::KORWkk); |
666 | createReplacer(X86::AND16rr_ND, X86::KANDWkk); |
667 | createReplacer(X86::XOR16rr_ND, X86::KXORWkk); |
668 | } |
669 | |
670 | if (STI->hasBWI()) { |
671 | createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); |
672 | createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); |
673 | |
674 | createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); |
675 | createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); |
676 | |
677 | createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); |
678 | createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); |
679 | |
680 | createReplacer(X86::SHR32ri, X86::KSHIFTRDki); |
681 | createReplacer(X86::SHR64ri, X86::KSHIFTRQki); |
682 | |
683 | createReplacer(X86::SHL32ri, X86::KSHIFTLDki); |
684 | createReplacer(X86::SHL64ri, X86::KSHIFTLQki); |
685 | |
686 | createReplacer(X86::ADD32rr, X86::KADDDkk); |
687 | createReplacer(X86::ADD64rr, X86::KADDQkk); |
688 | |
689 | createReplacer(X86::NOT32r, X86::KNOTDkk); |
690 | createReplacer(X86::NOT64r, X86::KNOTQkk); |
691 | |
692 | createReplacer(X86::OR32rr, X86::KORDkk); |
693 | createReplacer(X86::OR64rr, X86::KORQkk); |
694 | |
695 | createReplacer(X86::AND32rr, X86::KANDDkk); |
696 | createReplacer(X86::AND64rr, X86::KANDQkk); |
697 | |
698 | createReplacer(X86::ANDN32rr, X86::KANDNDkk); |
699 | createReplacer(X86::ANDN64rr, X86::KANDNQkk); |
700 | |
701 | createReplacer(X86::XOR32rr, X86::KXORDkk); |
702 | createReplacer(X86::XOR64rr, X86::KXORQkk); |
703 | |
704 | if (HasNDD) { |
705 | createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDki); |
706 | createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDki); |
707 | createReplacer(X86::ADD32rr_ND, X86::KADDDkk); |
708 | createReplacer(X86::NOT32r_ND, X86::KNOTDkk); |
709 | createReplacer(X86::OR32rr_ND, X86::KORDkk); |
710 | createReplacer(X86::AND32rr_ND, X86::KANDDkk); |
711 | createReplacer(X86::XOR32rr_ND, X86::KXORDkk); |
712 | createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQki); |
713 | createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQki); |
714 | createReplacer(X86::ADD64rr_ND, X86::KADDQkk); |
715 | createReplacer(X86::NOT64r_ND, X86::KNOTQkk); |
716 | createReplacer(X86::OR64rr_ND, X86::KORQkk); |
717 | createReplacer(X86::AND64rr_ND, X86::KANDQkk); |
718 | createReplacer(X86::XOR64rr_ND, X86::KXORQkk); |
719 | } |
720 | |
721 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
722 | // to prove only Z flag is used. |
723 | // createReplacer(X86::TEST32rr, X86::KTESTDkk); |
724 | // createReplacer(X86::TEST64rr, X86::KTESTQkk); |
725 | } |
726 | |
727 | if (STI->hasDQI()) { |
728 | createReplacer(X86::ADD8rr, X86::KADDBkk); |
729 | createReplacer(X86::ADD16rr, X86::KADDWkk); |
730 | |
731 | createReplacer(X86::AND8rr, X86::KANDBkk); |
732 | |
733 | createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); |
734 | createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); |
735 | createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); |
736 | |
737 | createReplacer(X86::NOT8r, X86::KNOTBkk); |
738 | |
739 | createReplacer(X86::OR8rr, X86::KORBkk); |
740 | |
741 | createReplacer(X86::SHR8ri, X86::KSHIFTRBki); |
742 | createReplacer(X86::SHL8ri, X86::KSHIFTLBki); |
743 | |
744 | // TODO: KTEST is not a replacement for TEST due to flag differences. Need |
745 | // to prove only Z flag is used. |
746 | // createReplacer(X86::TEST8rr, X86::KTESTBkk); |
747 | // createReplacer(X86::TEST16rr, X86::KTESTWkk); |
748 | |
749 | createReplacer(X86::XOR8rr, X86::KXORBkk); |
750 | |
751 | if (HasNDD) { |
752 | createReplacer(X86::ADD8rr_ND, X86::KADDBkk); |
753 | createReplacer(X86::ADD16rr_ND, X86::KADDWkk); |
754 | createReplacer(X86::AND8rr_ND, X86::KANDBkk); |
755 | createReplacer(X86::NOT8r_ND, X86::KNOTBkk); |
756 | createReplacer(X86::OR8rr_ND, X86::KORBkk); |
757 | createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBki); |
758 | createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBki); |
759 | createReplacer(X86::XOR8rr_ND, X86::KXORBkk); |
760 | } |
761 | } |
762 | #undef GET_EGPR_IF_ENABLED |
763 | } |
764 | |
765 | bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { |
766 | if (skipFunction(F: MF.getFunction())) |
767 | return false; |
768 | if (DisableX86DomainReassignment) |
769 | return false; |
770 | |
771 | LLVM_DEBUG( |
772 | dbgs() << "***** Machine Function before Domain Reassignment *****\n" ); |
773 | LLVM_DEBUG(MF.print(dbgs())); |
774 | |
775 | STI = &MF.getSubtarget<X86Subtarget>(); |
776 | // GPR->K is the only transformation currently supported, bail out early if no |
777 | // AVX512. |
778 | // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and |
779 | // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register |
780 | // coalescer doesn't clean it up and we generate a spill we will crash. |
781 | if (!STI->hasAVX512() || !STI->hasBWI()) |
782 | return false; |
783 | |
784 | MRI = &MF.getRegInfo(); |
785 | assert(MRI->isSSA() && "Expected MIR to be in SSA form" ); |
786 | |
787 | TII = STI->getInstrInfo(); |
788 | initConverters(); |
789 | bool Changed = false; |
790 | |
791 | EnclosedEdges.clear(); |
792 | EnclosedInstrs.clear(); |
793 | |
794 | std::vector<Closure> Closures; |
795 | |
796 | // Go over all virtual registers and calculate a closure. |
797 | unsigned ClosureID = 0; |
798 | for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { |
799 | Register Reg = Register::index2VirtReg(Index: Idx); |
800 | |
801 | // Skip unused VRegs. |
802 | if (MRI->reg_nodbg_empty(RegNo: Reg)) |
803 | continue; |
804 | |
805 | // GPR only current source domain supported. |
806 | if (!MRI->getTargetRegisterInfo()->isGeneralPurposeRegisterClass( |
807 | RC: MRI->getRegClass(Reg))) |
808 | continue; |
809 | |
810 | // Register already in closure. |
811 | if (EnclosedEdges.contains(Val: Reg)) |
812 | continue; |
813 | |
814 | // Calculate closure starting with Reg. |
815 | Closure C(ClosureID++, {MaskDomain}); |
816 | buildClosure(C, Reg); |
817 | |
818 | // Collect all closures that can potentially be converted. |
819 | if (!C.empty() && C.isLegal(RD: MaskDomain)) |
820 | Closures.push_back(x: std::move(C)); |
821 | } |
822 | |
823 | for (Closure &C : Closures) { |
824 | LLVM_DEBUG(C.dump(MRI)); |
825 | if (isReassignmentProfitable(C, Domain: MaskDomain)) { |
826 | reassign(C, Domain: MaskDomain); |
827 | ++NumClosuresConverted; |
828 | Changed = true; |
829 | } |
830 | } |
831 | |
832 | LLVM_DEBUG( |
833 | dbgs() << "***** Machine Function after Domain Reassignment *****\n" ); |
834 | LLVM_DEBUG(MF.print(dbgs())); |
835 | |
836 | return Changed; |
837 | } |
838 | |
839 | INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment" , |
840 | "X86 Domain Reassignment Pass" , false, false) |
841 | |
842 | /// Returns an instance of the Domain Reassignment pass. |
843 | FunctionPass *llvm::createX86DomainReassignmentPass() { |
844 | return new X86DomainReassignment(); |
845 | } |
846 | |