1 | //===-------------- GCNRewritePartialRegUses.cpp --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// RenameIndependentSubregs pass leaves large partially used super registers, |
10 | /// for example: |
11 | /// undef %0.sub4:VReg_1024 = ... |
12 | /// %0.sub5:VReg_1024 = ... |
13 | /// %0.sub6:VReg_1024 = ... |
14 | /// %0.sub7:VReg_1024 = ... |
15 | /// use %0.sub4_sub5_sub6_sub7 |
16 | /// use %0.sub6_sub7 |
17 | /// |
18 | /// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and |
19 | /// rewrites such partially used super registers with registers of minimal size: |
20 | /// undef %0.sub0:VReg_128 = ... |
21 | /// %0.sub1:VReg_128 = ... |
22 | /// %0.sub2:VReg_128 = ... |
23 | /// %0.sub3:VReg_128 = ... |
24 | /// use %0.sub0_sub1_sub2_sub3 |
25 | /// use %0.sub2_sub3 |
26 | /// |
27 | /// This allows to avoid subreg lanemasks tracking during register pressure |
28 | /// calculation and creates more possibilities for the code unaware of lanemasks |
29 | //===----------------------------------------------------------------------===// |
30 | |
31 | #include "AMDGPU.h" |
32 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
33 | #include "SIRegisterInfo.h" |
34 | #include "llvm/CodeGen/LiveInterval.h" |
35 | #include "llvm/CodeGen/LiveIntervals.h" |
36 | #include "llvm/CodeGen/MachineFunctionPass.h" |
37 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/TargetInstrInfo.h" |
40 | #include "llvm/InitializePasses.h" |
41 | #include "llvm/Pass.h" |
42 | |
43 | using namespace llvm; |
44 | |
45 | #define DEBUG_TYPE "rewrite-partial-reg-uses" |
46 | |
47 | namespace { |
48 | |
49 | class GCNRewritePartialRegUses : public MachineFunctionPass { |
50 | public: |
51 | static char ID; |
52 | GCNRewritePartialRegUses() : MachineFunctionPass(ID) {} |
53 | |
54 | StringRef getPassName() const override { |
55 | return "Rewrite Partial Register Uses" ; |
56 | } |
57 | |
58 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
59 | AU.setPreservesCFG(); |
60 | AU.addPreserved<LiveIntervalsWrapperPass>(); |
61 | AU.addPreserved<SlotIndexesWrapperPass>(); |
62 | MachineFunctionPass::getAnalysisUsage(AU); |
63 | } |
64 | |
65 | bool runOnMachineFunction(MachineFunction &MF) override; |
66 | |
67 | private: |
68 | MachineRegisterInfo *MRI; |
69 | const SIRegisterInfo *TRI; |
70 | const TargetInstrInfo *TII; |
71 | LiveIntervals *LIS; |
72 | |
73 | /// Rewrite partially used register Reg by shifting all its subregisters to |
74 | /// the right and replacing the original register with a register of minimal |
75 | /// size. Return true if the change has been made. |
76 | bool rewriteReg(Register Reg) const; |
77 | |
78 | /// Value type for SubRegMap below. |
79 | struct SubRegInfo { |
80 | /// Register class required to hold the value stored in the SubReg. |
81 | const TargetRegisterClass *RC; |
82 | |
83 | /// Index for the right-shifted subregister. If 0 this is the "covering" |
84 | /// subreg i.e. subreg that covers all others. Covering subreg becomes the |
85 | /// whole register after the replacement. |
86 | unsigned SubReg = AMDGPU::NoSubRegister; |
87 | SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {} |
88 | }; |
89 | |
90 | /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container. |
91 | using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>; |
92 | |
93 | /// Given register class RC and the set of used subregs as keys in the SubRegs |
94 | /// map return new register class and indexes of right-shifted subregs as |
95 | /// values in SubRegs map such that the resulting regclass would contain |
96 | /// registers of minimal size. |
97 | const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC, |
98 | SubRegMap &SubRegs) const; |
99 | |
100 | /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to |
101 | /// find new regclass such that: |
102 | /// 1. It has subregs obtained by shifting each OldSubReg by RShift number |
103 | /// of bits to the right. Every "shifted" subreg should have the same |
104 | /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers" |
105 | /// all other subregs in pairs. Basically such subreg becomes a whole |
106 | /// register. |
107 | /// 2. Resulting register class contains registers of minimal size but not |
108 | /// less than RegNumBits. |
109 | /// |
110 | /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out |
111 | /// parameter: |
112 | /// OldSubReg - input parameter, |
113 | /// SubRegRC - input parameter (cannot be null), |
114 | /// NewSubReg - output, contains shifted subregs on return. |
115 | const TargetRegisterClass * |
116 | getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift, |
117 | unsigned RegNumBits, unsigned CoverSubregIdx, |
118 | SubRegMap &SubRegs) const; |
119 | |
120 | /// Update live intervals after rewriting OldReg to NewReg with SubRegs map |
121 | /// describing OldSubReg -> NewSubReg mapping. |
122 | void updateLiveIntervals(Register OldReg, Register NewReg, |
123 | SubRegMap &SubRegs) const; |
124 | |
125 | /// Helper methods. |
126 | |
127 | /// Return reg class expected by a MO's parent instruction for a given MO. |
128 | const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const; |
129 | |
130 | /// Find right-shifted by RShift amount version of the SubReg if it exists, |
131 | /// return 0 otherwise. |
132 | unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const; |
133 | |
134 | /// Find subreg index with a given Offset and Size, return 0 if there is no |
135 | /// such subregister index. The result is cached in SubRegs data-member. |
136 | unsigned getSubReg(unsigned Offset, unsigned Size) const; |
137 | |
138 | /// Cache for getSubReg method: {Offset, Size} -> SubReg index. |
139 | mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs; |
140 | |
141 | /// Return bit mask that contains all register classes that are projected into |
142 | /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member. |
143 | const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC, |
144 | unsigned SubRegIdx) const; |
145 | |
146 | /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask. |
147 | mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>, |
148 | const uint32_t *> |
149 | SuperRegMasks; |
150 | |
151 | /// Return bitmask containing all allocatable register classes with registers |
152 | /// aligned at AlignNumBits. The result is cached in |
153 | /// AllocatableAndAlignedRegClassMasks data-member. |
154 | const BitVector & |
155 | getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const; |
156 | |
157 | /// Cache for getAllocatableAndAlignedRegClassMask method: |
158 | /// AlignNumBits -> Class bitmask. |
159 | mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks; |
160 | }; |
161 | |
162 | } // end anonymous namespace |
163 | |
164 | // TODO: move this to the tablegen and use binary search by Offset. |
165 | unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset, |
166 | unsigned Size) const { |
167 | const auto [I, Inserted] = SubRegs.try_emplace(Key: {Offset, Size}, Args: 0); |
168 | if (Inserted) { |
169 | for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) { |
170 | if (TRI->getSubRegIdxOffset(Idx) == Offset && |
171 | TRI->getSubRegIdxSize(Idx) == Size) { |
172 | I->second = Idx; |
173 | break; |
174 | } |
175 | } |
176 | } |
177 | return I->second; |
178 | } |
179 | |
180 | unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg, |
181 | unsigned RShift) const { |
182 | unsigned Offset = TRI->getSubRegIdxOffset(Idx: SubReg) - RShift; |
183 | return getSubReg(Offset, Size: TRI->getSubRegIdxSize(Idx: SubReg)); |
184 | } |
185 | |
186 | const uint32_t * |
187 | GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC, |
188 | unsigned SubRegIdx) const { |
189 | const auto [I, Inserted] = |
190 | SuperRegMasks.try_emplace(Key: {RC, SubRegIdx}, Args: nullptr); |
191 | if (Inserted) { |
192 | for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) { |
193 | if (RCI.getSubReg() == SubRegIdx) { |
194 | I->second = RCI.getMask(); |
195 | break; |
196 | } |
197 | } |
198 | } |
199 | return I->second; |
200 | } |
201 | |
202 | const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask( |
203 | unsigned AlignNumBits) const { |
204 | const auto [I, Inserted] = |
205 | AllocatableAndAlignedRegClassMasks.try_emplace(Key: AlignNumBits); |
206 | if (Inserted) { |
207 | BitVector &BV = I->second; |
208 | BV.resize(N: TRI->getNumRegClasses()); |
209 | for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) { |
210 | auto *RC = TRI->getRegClass(RCID: ClassID); |
211 | if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits)) |
212 | BV.set(ClassID); |
213 | } |
214 | } |
215 | return I->second; |
216 | } |
217 | |
218 | const TargetRegisterClass * |
219 | GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( |
220 | const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, |
221 | unsigned CoverSubregIdx, SubRegMap &SubRegs) const { |
222 | |
223 | unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC); |
224 | LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign |
225 | << '\n'); |
226 | |
227 | BitVector ClassMask(getAllocatableAndAlignedRegClassMask(AlignNumBits: RCAlign)); |
228 | for (auto &[OldSubReg, SRI] : SubRegs) { |
229 | auto &[SubRegRC, NewSubReg] = SRI; |
230 | assert(SubRegRC); |
231 | |
232 | LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':' |
233 | << TRI->getRegClassName(SubRegRC) |
234 | << (SubRegRC->isAllocatable() ? "" : " not alloc" ) |
235 | << " -> " ); |
236 | |
237 | if (OldSubReg == CoverSubregIdx) { |
238 | // Covering subreg will become a full register, RC should be allocatable. |
239 | assert(SubRegRC->isAllocatable()); |
240 | NewSubReg = AMDGPU::NoSubRegister; |
241 | LLVM_DEBUG(dbgs() << "whole reg" ); |
242 | } else { |
243 | NewSubReg = shiftSubReg(SubReg: OldSubReg, RShift); |
244 | if (!NewSubReg) { |
245 | LLVM_DEBUG(dbgs() << "none\n" ); |
246 | return nullptr; |
247 | } |
248 | LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg)); |
249 | } |
250 | |
251 | const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(RC: SubRegRC, SubRegIdx: NewSubReg) |
252 | : SubRegRC->getSubClassMask(); |
253 | if (!Mask) |
254 | llvm_unreachable("no register class mask?" ); |
255 | |
256 | ClassMask.clearBitsNotInMask(Mask); |
257 | // Don't try to early exit because checking if ClassMask has set bits isn't |
258 | // that cheap and we expect it to pass in most cases. |
259 | LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n'); |
260 | } |
261 | |
262 | // ClassMask is the set of all register classes such that each class is |
263 | // allocatable, aligned, has all shifted subregs and each subreg has required |
264 | // register class (see SubRegRC above). Now select first (that is largest) |
265 | // register class with registers of minimal but not less than RegNumBits size. |
266 | // We have to check register size because we may encounter classes of smaller |
267 | // registers like VReg_1 in some situations. |
268 | const TargetRegisterClass *MinRC = nullptr; |
269 | unsigned MinNumBits = std::numeric_limits<unsigned>::max(); |
270 | for (unsigned ClassID : ClassMask.set_bits()) { |
271 | auto *RC = TRI->getRegClass(RCID: ClassID); |
272 | unsigned NumBits = TRI->getRegSizeInBits(RC: *RC); |
273 | if (NumBits < MinNumBits && NumBits >= RegNumBits) { |
274 | MinNumBits = NumBits; |
275 | MinRC = RC; |
276 | } |
277 | if (MinNumBits == RegNumBits) |
278 | break; |
279 | } |
280 | #ifndef NDEBUG |
281 | if (MinRC) { |
282 | assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign)); |
283 | for (auto [SubReg, SRI] : SubRegs) |
284 | // Check that all registers in MinRC support SRI.SubReg subregister. |
285 | assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg)); |
286 | } |
287 | #endif |
288 | // There might be zero RShift - in this case we just trying to find smaller |
289 | // register. |
290 | return (MinRC != RC || RShift != 0) ? MinRC : nullptr; |
291 | } |
292 | |
293 | const TargetRegisterClass * |
294 | GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC, |
295 | SubRegMap &SubRegs) const { |
296 | unsigned CoverSubreg = AMDGPU::NoSubRegister; |
297 | unsigned Offset = std::numeric_limits<unsigned>::max(); |
298 | unsigned End = 0; |
299 | for (auto [SubReg, SRI] : SubRegs) { |
300 | unsigned SubRegOffset = TRI->getSubRegIdxOffset(Idx: SubReg); |
301 | unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(Idx: SubReg); |
302 | if (SubRegOffset < Offset) { |
303 | Offset = SubRegOffset; |
304 | CoverSubreg = AMDGPU::NoSubRegister; |
305 | } |
306 | if (SubRegEnd > End) { |
307 | End = SubRegEnd; |
308 | CoverSubreg = AMDGPU::NoSubRegister; |
309 | } |
310 | if (SubRegOffset == Offset && SubRegEnd == End) |
311 | CoverSubreg = SubReg; |
312 | } |
313 | // If covering subreg is found shift everything so the covering subreg would |
314 | // be in the rightmost position. |
315 | if (CoverSubreg != AMDGPU::NoSubRegister) |
316 | return getRegClassWithShiftedSubregs(RC, RShift: Offset, RegNumBits: End - Offset, CoverSubregIdx: CoverSubreg, |
317 | SubRegs); |
318 | |
319 | // Otherwise find subreg with maximum required alignment and shift it and all |
320 | // other subregs to the rightmost possible position with respect to the |
321 | // alignment. |
322 | unsigned MaxAlign = 0; |
323 | for (auto [SubReg, SRI] : SubRegs) |
324 | MaxAlign = std::max(a: MaxAlign, b: TRI->getSubRegAlignmentNumBits(RC, SubReg)); |
325 | |
326 | unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max(); |
327 | for (auto [SubReg, SRI] : SubRegs) { |
328 | if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign) |
329 | continue; |
330 | FirstMaxAlignedSubRegOffset = |
331 | std::min(a: FirstMaxAlignedSubRegOffset, b: TRI->getSubRegIdxOffset(Idx: SubReg)); |
332 | if (FirstMaxAlignedSubRegOffset == Offset) |
333 | break; |
334 | } |
335 | |
336 | unsigned NewOffsetOfMaxAlignedSubReg = |
337 | alignTo(Value: FirstMaxAlignedSubRegOffset - Offset, Align: MaxAlign); |
338 | |
339 | if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset) |
340 | llvm_unreachable("misaligned subreg" ); |
341 | |
342 | unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg; |
343 | return getRegClassWithShiftedSubregs(RC, RShift, RegNumBits: End - RShift, CoverSubregIdx: 0, SubRegs); |
344 | } |
345 | |
346 | // Only the subrange's lanemasks of the original interval need to be modified. |
347 | // Subrange for a covering subreg becomes the main range. |
348 | void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg, |
349 | Register NewReg, |
350 | SubRegMap &SubRegs) const { |
351 | if (!LIS->hasInterval(Reg: OldReg)) |
352 | return; |
353 | |
354 | auto &OldLI = LIS->getInterval(Reg: OldReg); |
355 | auto &NewLI = LIS->createEmptyInterval(Reg: NewReg); |
356 | |
357 | auto &Allocator = LIS->getVNInfoAllocator(); |
358 | NewLI.setWeight(OldLI.weight()); |
359 | |
360 | for (auto &SR : OldLI.subranges()) { |
361 | auto I = find_if(Range&: SubRegs, P: [&](auto &P) { |
362 | return SR.LaneMask == TRI->getSubRegIndexLaneMask(SubIdx: P.first); |
363 | }); |
364 | |
365 | if (I == SubRegs.end()) { |
366 | // There might be a situation when subranges don't exactly match used |
367 | // subregs, for example: |
368 | // %120 [160r,1392r:0) 0@160r |
369 | // L000000000000C000 [160r,1392r:0) 0@160r |
370 | // L0000000000003000 [160r,1392r:0) 0@160r |
371 | // L0000000000000C00 [160r,1392r:0) 0@160r |
372 | // L0000000000000300 [160r,1392r:0) 0@160r |
373 | // L0000000000000003 [160r,1104r:0) 0@160r |
374 | // L000000000000000C [160r,1104r:0) 0@160r |
375 | // L0000000000000030 [160r,1104r:0) 0@160r |
376 | // L00000000000000C0 [160r,1104r:0) 0@160r |
377 | // but used subregs are: |
378 | // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF |
379 | // sub0_sub1_sub2_sub3, L00000000000000FF |
380 | // sub4_sub5_sub6_sub7, L000000000000FF00 |
381 | // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7 |
382 | // have several subranges with the same lifetime. For such cases just |
383 | // recreate the interval. |
384 | LIS->removeInterval(Reg: OldReg); |
385 | LIS->removeInterval(Reg: NewReg); |
386 | LIS->createAndComputeVirtRegInterval(Reg: NewReg); |
387 | return; |
388 | } |
389 | |
390 | if (unsigned NewSubReg = I->second.SubReg) |
391 | NewLI.createSubRangeFrom(Allocator, |
392 | LaneMask: TRI->getSubRegIndexLaneMask(SubIdx: NewSubReg), CopyFrom: SR); |
393 | else // This is the covering subreg (0 index) - set it as main range. |
394 | NewLI.assign(Other: SR, Allocator); |
395 | |
396 | SubRegs.erase(I); |
397 | } |
398 | if (NewLI.empty()) |
399 | NewLI.assign(Other: OldLI, Allocator); |
400 | NewLI.verify(MRI); |
401 | LIS->removeInterval(Reg: OldReg); |
402 | } |
403 | |
404 | const TargetRegisterClass * |
405 | GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const { |
406 | MachineInstr *MI = MO.getParent(); |
407 | return TII->getRegClass(MCID: TII->get(Opcode: MI->getOpcode()), OpNum: MI->getOperandNo(I: &MO), TRI, |
408 | MF: *MI->getParent()->getParent()); |
409 | } |
410 | |
411 | bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const { |
412 | auto Range = MRI->reg_nodbg_operands(Reg); |
413 | if (Range.empty() || any_of(Range, P: [](MachineOperand &MO) { |
414 | return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1] |
415 | })) |
416 | return false; |
417 | |
418 | auto *RC = MRI->getRegClass(Reg); |
419 | LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) |
420 | << ':' << TRI->getRegClassName(RC) << '\n'); |
421 | |
422 | // Collect used subregs and their reg classes infered from instruction |
423 | // operands. |
424 | SubRegMap SubRegs; |
425 | for (MachineOperand &MO : Range) { |
426 | const unsigned SubReg = MO.getSubReg(); |
427 | assert(SubReg != AMDGPU::NoSubRegister); // Due to [1]. |
428 | LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':'); |
429 | |
430 | const auto [I, Inserted] = SubRegs.try_emplace(Key: SubReg); |
431 | const TargetRegisterClass *&SubRegRC = I->second.RC; |
432 | |
433 | if (Inserted) |
434 | SubRegRC = TRI->getSubRegisterClass(RC, SubReg); |
435 | |
436 | if (SubRegRC) { |
437 | if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) { |
438 | LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & " |
439 | << TRI->getRegClassName(OpDescRC) << " = " ); |
440 | SubRegRC = TRI->getCommonSubClass(A: SubRegRC, B: OpDescRC); |
441 | } |
442 | } |
443 | |
444 | if (!SubRegRC) { |
445 | LLVM_DEBUG(dbgs() << "couldn't find target regclass\n" ); |
446 | return false; |
447 | } |
448 | LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n'); |
449 | } |
450 | |
451 | auto *NewRC = getMinSizeReg(RC, SubRegs); |
452 | if (!NewRC) { |
453 | LLVM_DEBUG(dbgs() << " No improvement achieved\n" ); |
454 | return false; |
455 | } |
456 | |
457 | Register NewReg = MRI->createVirtualRegister(RegClass: NewRC); |
458 | LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':' |
459 | << TRI->getRegClassName(RC) << " -> " |
460 | << printReg(NewReg, TRI) << ':' |
461 | << TRI->getRegClassName(NewRC) << '\n'); |
462 | |
463 | for (auto &MO : make_early_inc_range(Range: MRI->reg_operands(Reg))) { |
464 | MO.setReg(NewReg); |
465 | // Debug info can refer to the whole reg, just leave it as it is for now. |
466 | // TODO: create some DI shift expression? |
467 | if (MO.isDebug() && MO.getSubReg() == 0) |
468 | continue; |
469 | unsigned SubReg = SubRegs[MO.getSubReg()].SubReg; |
470 | MO.setSubReg(SubReg); |
471 | if (SubReg == AMDGPU::NoSubRegister && MO.isDef()) |
472 | MO.setIsUndef(false); |
473 | } |
474 | |
475 | if (LIS) |
476 | updateLiveIntervals(OldReg: Reg, NewReg, SubRegs); |
477 | |
478 | return true; |
479 | } |
480 | |
481 | bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) { |
482 | MRI = &MF.getRegInfo(); |
483 | TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo()); |
484 | TII = MF.getSubtarget().getInstrInfo(); |
485 | auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); |
486 | LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; |
487 | bool Changed = false; |
488 | for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { |
489 | Changed |= rewriteReg(Reg: Register::index2VirtReg(Index: I)); |
490 | } |
491 | return Changed; |
492 | } |
493 | |
494 | char GCNRewritePartialRegUses::ID; |
495 | |
496 | char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID; |
497 | |
498 | INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE, |
499 | "Rewrite Partial Register Uses" , false, false) |
500 | INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE, |
501 | "Rewrite Partial Register Uses" , false, false) |
502 | |