1 | //===- HexagonExpandCondsets.cpp ------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Replace mux instructions with the corresponding legal instructions. |
10 | // It is meant to work post-SSA, but still on virtual registers. It was |
11 | // originally placed between register coalescing and machine instruction |
12 | // scheduler. |
13 | // In this place in the optimization sequence, live interval analysis had |
14 | // been performed, and the live intervals should be preserved. A large part |
15 | // of the code deals with preserving the liveness information. |
16 | // |
17 | // Liveness tracking aside, the main functionality of this pass is divided |
18 | // into two steps. The first step is to replace an instruction |
19 | // %0 = C2_mux %1, %2, %3 |
20 | // with a pair of conditional transfers |
21 | // %0 = A2_tfrt %1, %2 |
22 | // %0 = A2_tfrf %1, %3 |
23 | // It is the intention that the execution of this pass could be terminated |
24 | // after this step, and the code generated would be functionally correct. |
25 | // |
26 | // If the uses of the source values %1 and %2 are kills, and their |
27 | // definitions are predicable, then in the second step, the conditional |
28 | // transfers will then be rewritten as predicated instructions. E.g. |
29 | // %0 = A2_or %1, %2 |
30 | // %3 = A2_tfrt %99, killed %0 |
31 | // will be rewritten as |
32 | // %3 = A2_port %99, %1, %2 |
33 | // |
34 | // This replacement has two variants: "up" and "down". Consider this case: |
35 | // %0 = A2_or %1, %2 |
36 | // ... [intervening instructions] ... |
37 | // %3 = A2_tfrt %99, killed %0 |
38 | // variant "up": |
39 | // %3 = A2_port %99, %1, %2 |
40 | // ... [intervening instructions, %0->vreg3] ... |
41 | // [deleted] |
42 | // variant "down": |
43 | // [deleted] |
44 | // ... [intervening instructions] ... |
45 | // %3 = A2_port %99, %1, %2 |
46 | // |
47 | // Both, one or none of these variants may be valid, and checks are made |
48 | // to rule out inapplicable variants. |
49 | // |
50 | // As an additional optimization, before either of the two steps above is |
51 | // executed, the pass attempts to coalesce the target register with one of |
52 | // the source registers, e.g. given an instruction |
53 | // %3 = C2_mux %0, %1, %2 |
54 | // %3 will be coalesced with either %1 or %2. If this succeeds, |
55 | // the instruction would then be (for example) |
56 | // %3 = C2_mux %0, %3, %2 |
57 | // and, under certain circumstances, this could result in only one predicated |
58 | // instruction: |
59 | // %3 = A2_tfrf %0, %2 |
60 | // |
61 | |
62 | // Splitting a definition of a register into two predicated transfers |
63 | // creates a complication in liveness tracking. Live interval computation |
64 | // will see both instructions as actual definitions, and will mark the |
65 | // first one as dead. The definition is not actually dead, and this |
66 | // situation will need to be fixed. For example: |
67 | // dead %1 = A2_tfrt ... ; marked as dead |
68 | // %1 = A2_tfrf ... |
69 | // |
70 | // Since any of the individual predicated transfers may end up getting |
71 | // removed (in case it is an identity copy), some pre-existing def may |
72 | // be marked as dead after live interval recomputation: |
73 | // dead %1 = ... ; marked as dead |
74 | // ... |
75 | // %1 = A2_tfrf ... ; if A2_tfrt is removed |
76 | // This case happens if %1 was used as a source in A2_tfrt, which means |
77 | // that is it actually live at the A2_tfrf, and so the now dead definition |
78 | // of %1 will need to be updated to non-dead at some point. |
79 | // |
80 | // This issue could be remedied by adding implicit uses to the predicated |
81 | // transfers, but this will create a problem with subsequent predication, |
82 | // since the transfers will no longer be possible to reorder. To avoid |
83 | // that, the initial splitting will not add any implicit uses. These |
84 | // implicit uses will be added later, after predication. The extra price, |
85 | // however, is that finding the locations where the implicit uses need |
86 | // to be added, and updating the live ranges will be more involved. |
87 | |
88 | #include "HexagonInstrInfo.h" |
89 | #include "HexagonRegisterInfo.h" |
90 | #include "llvm/ADT/DenseMap.h" |
91 | #include "llvm/ADT/SetVector.h" |
92 | #include "llvm/ADT/SmallVector.h" |
93 | #include "llvm/ADT/StringRef.h" |
94 | #include "llvm/CodeGen/LiveInterval.h" |
95 | #include "llvm/CodeGen/LiveIntervals.h" |
96 | #include "llvm/CodeGen/MachineBasicBlock.h" |
97 | #include "llvm/CodeGen/MachineDominators.h" |
98 | #include "llvm/CodeGen/MachineFunction.h" |
99 | #include "llvm/CodeGen/MachineFunctionPass.h" |
100 | #include "llvm/CodeGen/MachineInstr.h" |
101 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
102 | #include "llvm/CodeGen/MachineOperand.h" |
103 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
104 | #include "llvm/CodeGen/SlotIndexes.h" |
105 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
106 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
107 | #include "llvm/IR/DebugLoc.h" |
108 | #include "llvm/IR/Function.h" |
109 | #include "llvm/InitializePasses.h" |
110 | #include "llvm/MC/LaneBitmask.h" |
111 | #include "llvm/Pass.h" |
112 | #include "llvm/Support/CommandLine.h" |
113 | #include "llvm/Support/Debug.h" |
114 | #include "llvm/Support/ErrorHandling.h" |
115 | #include "llvm/Support/raw_ostream.h" |
116 | #include <cassert> |
117 | #include <iterator> |
118 | #include <map> |
119 | #include <set> |
120 | #include <utility> |
121 | |
122 | #define DEBUG_TYPE "expand-condsets" |
123 | |
124 | using namespace llvm; |
125 | |
126 | static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit" , |
127 | cl::init(Val: ~0U), cl::Hidden, cl::desc("Max number of mux expansions" )); |
128 | static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit" , |
129 | cl::init(Val: ~0U), cl::Hidden, cl::desc("Max number of segment coalescings" )); |
130 | |
131 | namespace llvm { |
132 | |
133 | void initializeHexagonExpandCondsetsPass(PassRegistry&); |
134 | FunctionPass *createHexagonExpandCondsets(); |
135 | |
136 | } // end namespace llvm |
137 | |
138 | namespace { |
139 | |
140 | class HexagonExpandCondsets : public MachineFunctionPass { |
141 | public: |
142 | static char ID; |
143 | |
144 | HexagonExpandCondsets() : MachineFunctionPass(ID) { |
145 | if (OptCoaLimit.getPosition()) |
146 | CoaLimitActive = true, CoaLimit = OptCoaLimit; |
147 | if (OptTfrLimit.getPosition()) |
148 | TfrLimitActive = true, TfrLimit = OptTfrLimit; |
149 | initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); |
150 | } |
151 | |
152 | StringRef getPassName() const override { return "Hexagon Expand Condsets" ; } |
153 | |
154 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
155 | AU.addRequired<LiveIntervalsWrapperPass>(); |
156 | AU.addPreserved<LiveIntervalsWrapperPass>(); |
157 | AU.addPreserved<SlotIndexesWrapperPass>(); |
158 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
159 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
160 | MachineFunctionPass::getAnalysisUsage(AU); |
161 | } |
162 | |
163 | bool runOnMachineFunction(MachineFunction &MF) override; |
164 | |
165 | private: |
166 | const HexagonInstrInfo *HII = nullptr; |
167 | const TargetRegisterInfo *TRI = nullptr; |
168 | MachineDominatorTree *MDT; |
169 | MachineRegisterInfo *MRI = nullptr; |
170 | LiveIntervals *LIS = nullptr; |
171 | bool CoaLimitActive = false; |
172 | bool TfrLimitActive = false; |
173 | unsigned CoaLimit; |
174 | unsigned TfrLimit; |
175 | unsigned CoaCounter = 0; |
176 | unsigned TfrCounter = 0; |
177 | |
178 | // FIXME: Consolidate duplicate definitions of RegisterRef |
179 | struct RegisterRef { |
180 | RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), |
181 | Sub(Op.getSubReg()) {} |
182 | RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} |
183 | |
184 | bool operator== (RegisterRef RR) const { |
185 | return Reg == RR.Reg && Sub == RR.Sub; |
186 | } |
187 | bool operator!= (RegisterRef RR) const { return !operator==(RR); } |
188 | bool operator< (RegisterRef RR) const { |
189 | return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); |
190 | } |
191 | |
192 | Register Reg; |
193 | unsigned Sub; |
194 | }; |
195 | |
196 | using ReferenceMap = DenseMap<unsigned, unsigned>; |
197 | enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; |
198 | enum { Exec_Then = 0x10, Exec_Else = 0x20 }; |
199 | |
200 | unsigned getMaskForSub(unsigned Sub); |
201 | bool isCondset(const MachineInstr &MI); |
202 | LaneBitmask getLaneMask(Register Reg, unsigned Sub); |
203 | |
204 | void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); |
205 | bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); |
206 | |
207 | void updateDeadsInRange(Register Reg, LaneBitmask LM, LiveRange &Range); |
208 | void updateKillFlags(Register Reg); |
209 | void updateDeadFlags(Register Reg); |
210 | void recalculateLiveInterval(Register Reg); |
211 | void removeInstr(MachineInstr &MI); |
212 | void updateLiveness(const std::set<Register> &RegSet, bool Recalc, |
213 | bool UpdateKills, bool UpdateDeads); |
214 | void distributeLiveIntervals(const std::set<Register> &Regs); |
215 | |
216 | unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); |
217 | MachineInstr *genCondTfrFor(MachineOperand &SrcOp, |
218 | MachineBasicBlock::iterator At, unsigned DstR, |
219 | unsigned DstSR, const MachineOperand &PredOp, bool PredSense, |
220 | bool ReadUndef, bool ImpUse); |
221 | bool split(MachineInstr &MI, std::set<Register> &UpdRegs); |
222 | |
223 | bool isPredicable(MachineInstr *MI); |
224 | MachineInstr *getReachingDefForPred(RegisterRef RD, |
225 | MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); |
226 | bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses); |
227 | bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown); |
228 | void predicateAt(const MachineOperand &DefOp, MachineInstr &MI, |
229 | MachineBasicBlock::iterator Where, |
230 | const MachineOperand &PredOp, bool Cond, |
231 | std::set<Register> &UpdRegs); |
232 | void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, |
233 | bool Cond, MachineBasicBlock::iterator First, |
234 | MachineBasicBlock::iterator Last); |
235 | bool predicate(MachineInstr &TfrI, bool Cond, std::set<Register> &UpdRegs); |
236 | bool predicateInBlock(MachineBasicBlock &B, std::set<Register> &UpdRegs); |
237 | |
238 | bool isIntReg(RegisterRef RR, unsigned &BW); |
239 | bool isIntraBlocks(LiveInterval &LI); |
240 | bool coalesceRegisters(RegisterRef R1, RegisterRef R2); |
241 | bool coalesceSegments(const SmallVectorImpl<MachineInstr *> &Condsets, |
242 | std::set<Register> &UpdRegs); |
243 | }; |
244 | |
245 | } // end anonymous namespace |
246 | |
247 | char HexagonExpandCondsets::ID = 0; |
248 | |
249 | namespace llvm { |
250 | |
251 | char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID; |
252 | |
253 | } // end namespace llvm |
254 | |
255 | INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets" , |
256 | "Hexagon Expand Condsets" , false, false) |
257 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
258 | INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) |
259 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
260 | INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets" , |
261 | "Hexagon Expand Condsets" , false, false) |
262 | |
263 | unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { |
264 | switch (Sub) { |
265 | case Hexagon::isub_lo: |
266 | case Hexagon::vsub_lo: |
267 | return Sub_Low; |
268 | case Hexagon::isub_hi: |
269 | case Hexagon::vsub_hi: |
270 | return Sub_High; |
271 | case Hexagon::NoSubRegister: |
272 | return Sub_None; |
273 | } |
274 | llvm_unreachable("Invalid subregister" ); |
275 | } |
276 | |
277 | bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { |
278 | unsigned Opc = MI.getOpcode(); |
279 | switch (Opc) { |
280 | case Hexagon::C2_mux: |
281 | case Hexagon::C2_muxii: |
282 | case Hexagon::C2_muxir: |
283 | case Hexagon::C2_muxri: |
284 | case Hexagon::PS_pselect: |
285 | return true; |
286 | break; |
287 | } |
288 | return false; |
289 | } |
290 | |
291 | LaneBitmask HexagonExpandCondsets::getLaneMask(Register Reg, unsigned Sub) { |
292 | assert(Reg.isVirtual()); |
293 | return Sub != 0 ? TRI->getSubRegIndexLaneMask(SubIdx: Sub) |
294 | : MRI->getMaxLaneMaskForVReg(Reg); |
295 | } |
296 | |
297 | void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, |
298 | unsigned Exec) { |
299 | unsigned Mask = getMaskForSub(Sub: RR.Sub) | Exec; |
300 | ReferenceMap::iterator F = Map.find(Val: RR.Reg); |
301 | if (F == Map.end()) |
302 | Map.insert(KV: std::make_pair(x&: RR.Reg, y&: Mask)); |
303 | else |
304 | F->second |= Mask; |
305 | } |
306 | |
307 | bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, |
308 | unsigned Exec) { |
309 | ReferenceMap::iterator F = Map.find(Val: RR.Reg); |
310 | if (F == Map.end()) |
311 | return false; |
312 | unsigned Mask = getMaskForSub(Sub: RR.Sub) | Exec; |
313 | if (Mask & F->second) |
314 | return true; |
315 | return false; |
316 | } |
317 | |
318 | void HexagonExpandCondsets::updateKillFlags(Register Reg) { |
319 | auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { |
320 | // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. |
321 | MachineInstr *MI = LIS->getInstructionFromIndex(index: K); |
322 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
323 | MachineOperand &Op = MI->getOperand(i); |
324 | if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg || |
325 | MI->isRegTiedToDefOperand(UseOpIdx: i)) |
326 | continue; |
327 | LaneBitmask SLM = getLaneMask(Reg, Sub: Op.getSubReg()); |
328 | if ((SLM & LM) == SLM) { |
329 | // Only set the kill flag on the first encountered use of Reg in this |
330 | // instruction. |
331 | Op.setIsKill(true); |
332 | break; |
333 | } |
334 | } |
335 | }; |
336 | |
337 | LiveInterval &LI = LIS->getInterval(Reg); |
338 | for (auto I = LI.begin(), E = LI.end(); I != E; ++I) { |
339 | if (!I->end.isRegister()) |
340 | continue; |
341 | // Do not mark the end of the segment as <kill>, if the next segment |
342 | // starts with a predicated instruction. |
343 | auto NextI = std::next(x: I); |
344 | if (NextI != E && NextI->start.isRegister()) { |
345 | MachineInstr *DefI = LIS->getInstructionFromIndex(index: NextI->start); |
346 | if (HII->isPredicated(MI: *DefI)) |
347 | continue; |
348 | } |
349 | bool WholeReg = true; |
350 | if (LI.hasSubRanges()) { |
351 | auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool { |
352 | LiveRange::iterator F = S.find(Pos: I->end); |
353 | return F != S.end() && I->end == F->end; |
354 | }; |
355 | // Check if all subranges end at I->end. If so, make sure to kill |
356 | // the whole register. |
357 | for (LiveInterval::SubRange &S : LI.subranges()) { |
358 | if (EndsAtI(S)) |
359 | KillAt(I->end, S.LaneMask); |
360 | else |
361 | WholeReg = false; |
362 | } |
363 | } |
364 | if (WholeReg) |
365 | KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg)); |
366 | } |
367 | } |
368 | |
369 | void HexagonExpandCondsets::updateDeadsInRange(Register Reg, LaneBitmask LM, |
370 | LiveRange &Range) { |
371 | assert(Reg.isVirtual()); |
372 | if (Range.empty()) |
373 | return; |
374 | |
375 | // Return two booleans: { def-modifes-reg, def-covers-reg }. |
376 | auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> { |
377 | if (!Op.isReg() || !Op.isDef()) |
378 | return { false, false }; |
379 | Register DR = Op.getReg(), DSR = Op.getSubReg(); |
380 | if (!DR.isVirtual() || DR != Reg) |
381 | return { false, false }; |
382 | LaneBitmask SLM = getLaneMask(Reg: DR, Sub: DSR); |
383 | LaneBitmask A = SLM & LM; |
384 | return { A.any(), A == SLM }; |
385 | }; |
386 | |
387 | // The splitting step will create pairs of predicated definitions without |
388 | // any implicit uses (since implicit uses would interfere with predication). |
389 | // This can cause the reaching defs to become dead after live range |
390 | // recomputation, even though they are not really dead. |
391 | // We need to identify predicated defs that need implicit uses, and |
392 | // dead defs that are not really dead, and correct both problems. |
393 | |
394 | auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, |
395 | MachineBasicBlock *Dest) -> bool { |
396 | for (MachineBasicBlock *D : Defs) { |
397 | if (D != Dest && MDT->dominates(A: D, B: Dest)) |
398 | return true; |
399 | } |
400 | MachineBasicBlock *Entry = &Dest->getParent()->front(); |
401 | SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end()); |
402 | for (unsigned i = 0; i < Work.size(); ++i) { |
403 | MachineBasicBlock *B = Work[i]; |
404 | if (Defs.count(key: B)) |
405 | continue; |
406 | if (B == Entry) |
407 | return false; |
408 | for (auto *P : B->predecessors()) |
409 | Work.insert(X: P); |
410 | } |
411 | return true; |
412 | }; |
413 | |
414 | // First, try to extend live range within individual basic blocks. This |
415 | // will leave us only with dead defs that do not reach any predicated |
416 | // defs in the same block. |
417 | SetVector<MachineBasicBlock*> Defs; |
418 | SmallVector<SlotIndex,4> PredDefs; |
419 | for (auto &Seg : Range) { |
420 | if (!Seg.start.isRegister()) |
421 | continue; |
422 | MachineInstr *DefI = LIS->getInstructionFromIndex(index: Seg.start); |
423 | Defs.insert(X: DefI->getParent()); |
424 | if (HII->isPredicated(MI: *DefI)) |
425 | PredDefs.push_back(Elt: Seg.start); |
426 | } |
427 | |
428 | SmallVector<SlotIndex,8> Undefs; |
429 | LiveInterval &LI = LIS->getInterval(Reg); |
430 | LI.computeSubRangeUndefs(Undefs, LaneMask: LM, MRI: *MRI, Indexes: *LIS->getSlotIndexes()); |
431 | |
432 | for (auto &SI : PredDefs) { |
433 | MachineBasicBlock *BB = LIS->getMBBFromIndex(index: SI); |
434 | auto P = Range.extendInBlock(Undefs, StartIdx: LIS->getMBBStartIdx(mbb: BB), Kill: SI); |
435 | if (P.first != nullptr || P.second) |
436 | SI = SlotIndex(); |
437 | } |
438 | |
439 | // Calculate reachability for those predicated defs that were not handled |
440 | // by the in-block extension. |
441 | SmallVector<SlotIndex,4> ExtTo; |
442 | for (auto &SI : PredDefs) { |
443 | if (!SI.isValid()) |
444 | continue; |
445 | MachineBasicBlock *BB = LIS->getMBBFromIndex(index: SI); |
446 | if (BB->pred_empty()) |
447 | continue; |
448 | // If the defs from this range reach SI via all predecessors, it is live. |
449 | // It can happen that SI is reached by the defs through some paths, but |
450 | // not all. In the IR coming into this optimization, SI would not be |
451 | // considered live, since the defs would then not jointly dominate SI. |
452 | // That means that SI is an overwriting def, and no implicit use is |
453 | // needed at this point. Do not add SI to the extension points, since |
454 | // extendToIndices will abort if there is no joint dominance. |
455 | // If the abort was avoided by adding extra undefs added to Undefs, |
456 | // extendToIndices could actually indicate that SI is live, contrary |
457 | // to the original IR. |
458 | if (Dominate(Defs, BB)) |
459 | ExtTo.push_back(Elt: SI); |
460 | } |
461 | |
462 | if (!ExtTo.empty()) |
463 | LIS->extendToIndices(LR&: Range, Indices: ExtTo, Undefs); |
464 | |
465 | // Remove <dead> flags from all defs that are not dead after live range |
466 | // extension, and collect all def operands. They will be used to generate |
467 | // the necessary implicit uses. |
468 | // At the same time, add <dead> flag to all defs that are actually dead. |
469 | // This can happen, for example, when a mux with identical inputs is |
470 | // replaced with a COPY: the use of the predicate register disappears and |
471 | // the dead can become dead. |
472 | std::set<RegisterRef> DefRegs; |
473 | for (auto &Seg : Range) { |
474 | if (!Seg.start.isRegister()) |
475 | continue; |
476 | MachineInstr *DefI = LIS->getInstructionFromIndex(index: Seg.start); |
477 | for (auto &Op : DefI->operands()) { |
478 | auto P = IsRegDef(Op); |
479 | if (P.second && Seg.end.isDead()) { |
480 | Op.setIsDead(true); |
481 | } else if (P.first) { |
482 | DefRegs.insert(x: Op); |
483 | Op.setIsDead(false); |
484 | } |
485 | } |
486 | } |
487 | |
488 | // Now, add implicit uses to each predicated def that is reached |
489 | // by other defs. |
490 | for (auto &Seg : Range) { |
491 | if (!Seg.start.isRegister() || !Range.liveAt(index: Seg.start.getPrevSlot())) |
492 | continue; |
493 | MachineInstr *DefI = LIS->getInstructionFromIndex(index: Seg.start); |
494 | if (!HII->isPredicated(MI: *DefI)) |
495 | continue; |
496 | // Construct the set of all necessary implicit uses, based on the def |
497 | // operands in the instruction. We need to tie the implicit uses to |
498 | // the corresponding defs. |
499 | std::map<RegisterRef,unsigned> ImpUses; |
500 | for (unsigned i = 0, e = DefI->getNumOperands(); i != e; ++i) { |
501 | MachineOperand &Op = DefI->getOperand(i); |
502 | if (!Op.isReg() || !DefRegs.count(x: Op)) |
503 | continue; |
504 | if (Op.isDef()) { |
505 | // Tied defs will always have corresponding uses, so no extra |
506 | // implicit uses are needed. |
507 | if (!Op.isTied()) |
508 | ImpUses.insert(x: {Op, i}); |
509 | } else { |
510 | // This function can be called for the same register with different |
511 | // lane masks. If the def in this instruction was for the whole |
512 | // register, we can get here more than once. Avoid adding multiple |
513 | // implicit uses (or adding an implicit use when an explicit one is |
514 | // present). |
515 | if (Op.isTied()) |
516 | ImpUses.erase(x: Op); |
517 | } |
518 | } |
519 | if (ImpUses.empty()) |
520 | continue; |
521 | MachineFunction &MF = *DefI->getParent()->getParent(); |
522 | for (auto [R, DefIdx] : ImpUses) { |
523 | MachineInstrBuilder(MF, DefI).addReg(RegNo: R.Reg, flags: RegState::Implicit, SubReg: R.Sub); |
524 | DefI->tieOperands(DefIdx, UseIdx: DefI->getNumOperands()-1); |
525 | } |
526 | } |
527 | } |
528 | |
529 | void HexagonExpandCondsets::updateDeadFlags(Register Reg) { |
530 | LiveInterval &LI = LIS->getInterval(Reg); |
531 | if (LI.hasSubRanges()) { |
532 | for (LiveInterval::SubRange &S : LI.subranges()) { |
533 | updateDeadsInRange(Reg, LM: S.LaneMask, Range&: S); |
534 | LIS->shrinkToUses(SR&: S, Reg); |
535 | } |
536 | LI.clear(); |
537 | LIS->constructMainRangeFromSubranges(LI); |
538 | } else { |
539 | updateDeadsInRange(Reg, LM: MRI->getMaxLaneMaskForVReg(Reg), Range&: LI); |
540 | } |
541 | } |
542 | |
543 | void HexagonExpandCondsets::recalculateLiveInterval(Register Reg) { |
544 | LIS->removeInterval(Reg); |
545 | LIS->createAndComputeVirtRegInterval(Reg); |
546 | } |
547 | |
548 | void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { |
549 | LIS->RemoveMachineInstrFromMaps(MI); |
550 | MI.eraseFromParent(); |
551 | } |
552 | |
553 | void HexagonExpandCondsets::updateLiveness(const std::set<Register> &RegSet, |
554 | bool Recalc, bool UpdateKills, |
555 | bool UpdateDeads) { |
556 | UpdateKills |= UpdateDeads; |
557 | for (Register R : RegSet) { |
558 | if (!R.isVirtual()) { |
559 | assert(R.isPhysical()); |
560 | // There shouldn't be any physical registers as operands, except |
561 | // possibly reserved registers. |
562 | assert(MRI->isReserved(R)); |
563 | continue; |
564 | } |
565 | if (Recalc) |
566 | recalculateLiveInterval(Reg: R); |
567 | if (UpdateKills) |
568 | MRI->clearKillFlags(Reg: R); |
569 | if (UpdateDeads) |
570 | updateDeadFlags(Reg: R); |
571 | // Fixing <dead> flags may extend live ranges, so reset <kill> flags |
572 | // after that. |
573 | if (UpdateKills) |
574 | updateKillFlags(Reg: R); |
575 | LIS->getInterval(Reg: R).verify(); |
576 | } |
577 | } |
578 | |
579 | void HexagonExpandCondsets::distributeLiveIntervals( |
580 | const std::set<Register> &Regs) { |
581 | ConnectedVNInfoEqClasses EQC(*LIS); |
582 | for (Register R : Regs) { |
583 | if (!R.isVirtual()) |
584 | continue; |
585 | LiveInterval &LI = LIS->getInterval(Reg: R); |
586 | unsigned NumComp = EQC.Classify(LR: LI); |
587 | if (NumComp == 1) |
588 | continue; |
589 | |
590 | SmallVector<LiveInterval*> NewLIs; |
591 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: LI.reg()); |
592 | for (unsigned I = 1; I < NumComp; ++I) { |
593 | Register NewR = MRI->createVirtualRegister(RegClass: RC); |
594 | NewLIs.push_back(Elt: &LIS->createEmptyInterval(Reg: NewR)); |
595 | } |
596 | EQC.Distribute(LI, LIV: NewLIs.begin(), MRI&: *MRI); |
597 | } |
598 | } |
599 | |
600 | /// Get the opcode for a conditional transfer of the value in SO (source |
601 | /// operand). The condition (true/false) is given in Cond. |
602 | unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, |
603 | bool IfTrue) { |
604 | if (SO.isReg()) { |
605 | MCRegister PhysR; |
606 | RegisterRef RS = SO; |
607 | if (RS.Reg.isVirtual()) { |
608 | const TargetRegisterClass *VC = MRI->getRegClass(Reg: RS.Reg); |
609 | assert(VC->begin() != VC->end() && "Empty register class" ); |
610 | PhysR = *VC->begin(); |
611 | } else { |
612 | PhysR = RS.Reg; |
613 | } |
614 | MCRegister PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(Reg: PhysR, Idx: RS.Sub); |
615 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg: PhysS); |
616 | switch (TRI->getRegSizeInBits(RC: *RC)) { |
617 | case 32: |
618 | return IfTrue ? Hexagon::A2_tfrt : Hexagon::A2_tfrf; |
619 | case 64: |
620 | return IfTrue ? Hexagon::A2_tfrpt : Hexagon::A2_tfrpf; |
621 | } |
622 | llvm_unreachable("Invalid register operand" ); |
623 | } |
624 | switch (SO.getType()) { |
625 | case MachineOperand::MO_Immediate: |
626 | case MachineOperand::MO_FPImmediate: |
627 | case MachineOperand::MO_ConstantPoolIndex: |
628 | case MachineOperand::MO_TargetIndex: |
629 | case MachineOperand::MO_JumpTableIndex: |
630 | case MachineOperand::MO_ExternalSymbol: |
631 | case MachineOperand::MO_GlobalAddress: |
632 | case MachineOperand::MO_BlockAddress: |
633 | return IfTrue ? Hexagon::C2_cmoveit : Hexagon::C2_cmoveif; |
634 | default: |
635 | break; |
636 | } |
637 | llvm_unreachable("Unexpected source operand" ); |
638 | } |
639 | |
640 | /// Generate a conditional transfer, copying the value SrcOp to the |
641 | /// destination register DstR:DstSR, and using the predicate register from |
642 | /// PredOp. The Cond argument specifies whether the predicate is to be |
643 | /// if(PredOp), or if(!PredOp). |
644 | MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, |
645 | MachineBasicBlock::iterator At, |
646 | unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, |
647 | bool PredSense, bool ReadUndef, bool ImpUse) { |
648 | MachineInstr *MI = SrcOp.getParent(); |
649 | MachineBasicBlock &B = *At->getParent(); |
650 | const DebugLoc &DL = MI->getDebugLoc(); |
651 | |
652 | // Don't avoid identity copies here (i.e. if the source and the destination |
653 | // are the same registers). It is actually better to generate them here, |
654 | // since this would cause the copy to potentially be predicated in the next |
655 | // step. The predication will remove such a copy if it is unable to |
656 | /// predicate. |
657 | |
658 | unsigned Opc = getCondTfrOpcode(SO: SrcOp, IfTrue: PredSense); |
659 | unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef : 0); |
660 | unsigned PredState = getRegState(RegOp: PredOp) & ~RegState::Kill; |
661 | MachineInstrBuilder MIB; |
662 | |
663 | if (SrcOp.isReg()) { |
664 | unsigned SrcState = getRegState(RegOp: SrcOp); |
665 | if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR)) |
666 | SrcState &= ~RegState::Kill; |
667 | MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII->get(Opcode: Opc)) |
668 | .addReg(RegNo: DstR, flags: DstState, SubReg: DstSR) |
669 | .addReg(RegNo: PredOp.getReg(), flags: PredState, SubReg: PredOp.getSubReg()) |
670 | .addReg(RegNo: SrcOp.getReg(), flags: SrcState, SubReg: SrcOp.getSubReg()); |
671 | } else { |
672 | MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII->get(Opcode: Opc)) |
673 | .addReg(RegNo: DstR, flags: DstState, SubReg: DstSR) |
674 | .addReg(RegNo: PredOp.getReg(), flags: PredState, SubReg: PredOp.getSubReg()) |
675 | .add(MO: SrcOp); |
676 | } |
677 | |
678 | LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB); |
679 | return &*MIB; |
680 | } |
681 | |
682 | /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function |
683 | /// performs all necessary changes to complete the replacement. |
684 | bool HexagonExpandCondsets::split(MachineInstr &MI, |
685 | std::set<Register> &UpdRegs) { |
686 | if (TfrLimitActive) { |
687 | if (TfrCounter >= TfrLimit) |
688 | return false; |
689 | TfrCounter++; |
690 | } |
691 | LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) |
692 | << ": " << MI); |
693 | MachineOperand &MD = MI.getOperand(i: 0); // Definition |
694 | MachineOperand &MP = MI.getOperand(i: 1); // Predicate register |
695 | assert(MD.isDef()); |
696 | Register DR = MD.getReg(), DSR = MD.getSubReg(); |
697 | bool ReadUndef = MD.isUndef(); |
698 | MachineBasicBlock::iterator At = MI; |
699 | |
700 | auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void { |
701 | for (auto &Op : MI.operands()) { |
702 | if (Op.isReg()) |
703 | UpdRegs.insert(x: Op.getReg()); |
704 | } |
705 | }; |
706 | |
707 | // If this is a mux of the same register, just replace it with COPY. |
708 | // Ideally, this would happen earlier, so that register coalescing would |
709 | // see it. |
710 | MachineOperand &ST = MI.getOperand(i: 2); |
711 | MachineOperand &SF = MI.getOperand(i: 3); |
712 | if (ST.isReg() && SF.isReg()) { |
713 | RegisterRef RT(ST); |
714 | if (RT == RegisterRef(SF)) { |
715 | // Copy regs to update first. |
716 | updateRegs(MI); |
717 | MI.setDesc(HII->get(Opcode: TargetOpcode::COPY)); |
718 | unsigned S = getRegState(RegOp: ST); |
719 | while (MI.getNumOperands() > 1) |
720 | MI.removeOperand(OpNo: MI.getNumOperands()-1); |
721 | MachineFunction &MF = *MI.getParent()->getParent(); |
722 | MachineInstrBuilder(MF, MI).addReg(RegNo: RT.Reg, flags: S, SubReg: RT.Sub); |
723 | return true; |
724 | } |
725 | } |
726 | |
727 | // First, create the two invididual conditional transfers, and add each |
728 | // of them to the live intervals information. Do that first and then remove |
729 | // the old instruction from live intervals. |
730 | MachineInstr *TfrT = |
731 | genCondTfrFor(SrcOp&: ST, At, DstR: DR, DstSR: DSR, PredOp: MP, PredSense: true, ReadUndef, ImpUse: false); |
732 | MachineInstr *TfrF = |
733 | genCondTfrFor(SrcOp&: SF, At, DstR: DR, DstSR: DSR, PredOp: MP, PredSense: false, ReadUndef, ImpUse: true); |
734 | LIS->InsertMachineInstrInMaps(MI&: *TfrT); |
735 | LIS->InsertMachineInstrInMaps(MI&: *TfrF); |
736 | |
737 | // Will need to recalculate live intervals for all registers in MI. |
738 | updateRegs(MI); |
739 | |
740 | removeInstr(MI); |
741 | return true; |
742 | } |
743 | |
744 | bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { |
745 | if (HII->isPredicated(MI: *MI) || !HII->isPredicable(MI: *MI)) |
746 | return false; |
747 | if (MI->hasUnmodeledSideEffects() || MI->mayStore()) |
748 | return false; |
749 | // Reject instructions with multiple defs (e.g. post-increment loads). |
750 | bool HasDef = false; |
751 | for (auto &Op : MI->operands()) { |
752 | if (!Op.isReg() || !Op.isDef()) |
753 | continue; |
754 | if (HasDef) |
755 | return false; |
756 | HasDef = true; |
757 | } |
758 | for (auto &Mo : MI->memoperands()) { |
759 | if (Mo->isVolatile() || Mo->isAtomic()) |
760 | return false; |
761 | } |
762 | return true; |
763 | } |
764 | |
765 | /// Find the reaching definition for a predicated use of RD. The RD is used |
766 | /// under the conditions given by PredR and Cond, and this function will ignore |
767 | /// definitions that set RD under the opposite conditions. |
768 | MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, |
769 | MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { |
770 | MachineBasicBlock &B = *UseIt->getParent(); |
771 | MachineBasicBlock::iterator I = UseIt, S = B.begin(); |
772 | if (I == S) |
773 | return nullptr; |
774 | |
775 | bool PredValid = true; |
776 | do { |
777 | --I; |
778 | MachineInstr *MI = &*I; |
779 | // Check if this instruction can be ignored, i.e. if it is predicated |
780 | // on the complementary condition. |
781 | if (PredValid && HII->isPredicated(MI: *MI)) { |
782 | if (MI->readsRegister(Reg: PredR, /*TRI=*/nullptr) && |
783 | (Cond != HII->isPredicatedTrue(MI: *MI))) |
784 | continue; |
785 | } |
786 | |
787 | // Check the defs. If the PredR is defined, invalidate it. If RD is |
788 | // defined, return the instruction or 0, depending on the circumstances. |
789 | for (auto &Op : MI->operands()) { |
790 | if (!Op.isReg() || !Op.isDef()) |
791 | continue; |
792 | RegisterRef RR = Op; |
793 | if (RR.Reg == PredR) { |
794 | PredValid = false; |
795 | continue; |
796 | } |
797 | if (RR.Reg != RD.Reg) |
798 | continue; |
799 | // If the "Reg" part agrees, there is still the subregister to check. |
800 | // If we are looking for %1:loreg, we can skip %1:hireg, but |
801 | // not %1 (w/o subregisters). |
802 | if (RR.Sub == RD.Sub) |
803 | return MI; |
804 | if (RR.Sub == 0 || RD.Sub == 0) |
805 | return nullptr; |
806 | // We have different subregisters, so we can continue looking. |
807 | } |
808 | } while (I != S); |
809 | |
810 | return nullptr; |
811 | } |
812 | |
813 | /// Check if the instruction MI can be safely moved over a set of instructions |
814 | /// whose side-effects (in terms of register defs and uses) are expressed in |
815 | /// the maps Defs and Uses. These maps reflect the conditional defs and uses |
816 | /// that depend on the same predicate register to allow moving instructions |
817 | /// over instructions predicated on the opposite condition. |
818 | bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, |
819 | ReferenceMap &Uses) { |
820 | // In order to be able to safely move MI over instructions that define |
821 | // "Defs" and use "Uses", no def operand from MI can be defined or used |
822 | // and no use operand can be defined. |
823 | for (auto &Op : MI.operands()) { |
824 | if (!Op.isReg()) |
825 | continue; |
826 | RegisterRef RR = Op; |
827 | // For physical register we would need to check register aliases, etc. |
828 | // and we don't want to bother with that. It would be of little value |
829 | // before the actual register rewriting (from virtual to physical). |
830 | if (!RR.Reg.isVirtual()) |
831 | return false; |
832 | // No redefs for any operand. |
833 | if (isRefInMap(RR, Map&: Defs, Exec: Exec_Then)) |
834 | return false; |
835 | // For defs, there cannot be uses. |
836 | if (Op.isDef() && isRefInMap(RR, Map&: Uses, Exec: Exec_Then)) |
837 | return false; |
838 | } |
839 | return true; |
840 | } |
841 | |
842 | /// Check if the instruction accessing memory (TheI) can be moved to the |
843 | /// location ToI. |
844 | bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, |
845 | bool IsDown) { |
846 | bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore(); |
847 | if (!IsLoad && !IsStore) |
848 | return true; |
849 | if (HII->areMemAccessesTriviallyDisjoint(MIa: TheI, MIb: ToI)) |
850 | return true; |
851 | if (TheI.hasUnmodeledSideEffects()) |
852 | return false; |
853 | |
854 | MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; |
855 | MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; |
856 | bool Ordered = TheI.hasOrderedMemoryRef(); |
857 | |
858 | // Search for aliased memory reference in (StartI, EndI). |
859 | for (MachineInstr &MI : llvm::make_range(x: std::next(x: StartI), y: EndI)) { |
860 | if (MI.hasUnmodeledSideEffects()) |
861 | return false; |
862 | bool L = MI.mayLoad(), S = MI.mayStore(); |
863 | if (!L && !S) |
864 | continue; |
865 | if (Ordered && MI.hasOrderedMemoryRef()) |
866 | return false; |
867 | |
868 | bool Conflict = (L && IsStore) || S; |
869 | if (Conflict) |
870 | return false; |
871 | } |
872 | return true; |
873 | } |
874 | |
875 | /// Generate a predicated version of MI (where the condition is given via |
876 | /// PredR and Cond) at the point indicated by Where. |
877 | void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, |
878 | MachineInstr &MI, |
879 | MachineBasicBlock::iterator Where, |
880 | const MachineOperand &PredOp, bool Cond, |
881 | std::set<Register> &UpdRegs) { |
882 | // The problem with updating live intervals is that we can move one def |
883 | // past another def. In particular, this can happen when moving an A2_tfrt |
884 | // over an A2_tfrf defining the same register. From the point of view of |
885 | // live intervals, these two instructions are two separate definitions, |
886 | // and each one starts another live segment. LiveIntervals's "handleMove" |
887 | // does not allow such moves, so we need to handle it ourselves. To avoid |
888 | // invalidating liveness data while we are using it, the move will be |
889 | // implemented in 4 steps: (1) add a clone of the instruction MI at the |
890 | // target location, (2) update liveness, (3) delete the old instruction, |
891 | // and (4) update liveness again. |
892 | |
893 | MachineBasicBlock &B = *MI.getParent(); |
894 | DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. |
895 | unsigned Opc = MI.getOpcode(); |
896 | unsigned PredOpc = HII->getCondOpcode(Opc, sense: !Cond); |
897 | MachineInstrBuilder MB = BuildMI(BB&: B, I: Where, MIMD: DL, MCID: HII->get(Opcode: PredOpc)); |
898 | unsigned Ox = 0, NP = MI.getNumOperands(); |
899 | // Skip all defs from MI first. |
900 | while (Ox < NP) { |
901 | MachineOperand &MO = MI.getOperand(i: Ox); |
902 | if (!MO.isReg() || !MO.isDef()) |
903 | break; |
904 | Ox++; |
905 | } |
906 | // Add the new def, then the predicate register, then the rest of the |
907 | // operands. |
908 | MB.addReg(RegNo: DefOp.getReg(), flags: getRegState(RegOp: DefOp), SubReg: DefOp.getSubReg()); |
909 | MB.addReg(RegNo: PredOp.getReg(), flags: PredOp.isUndef() ? RegState::Undef : 0, |
910 | SubReg: PredOp.getSubReg()); |
911 | while (Ox < NP) { |
912 | MachineOperand &MO = MI.getOperand(i: Ox); |
913 | if (!MO.isReg() || !MO.isImplicit()) |
914 | MB.add(MO); |
915 | Ox++; |
916 | } |
917 | MB.cloneMemRefs(OtherMI: MI); |
918 | |
919 | MachineInstr *NewI = MB; |
920 | NewI->clearKillInfo(); |
921 | LIS->InsertMachineInstrInMaps(MI&: *NewI); |
922 | |
923 | for (auto &Op : NewI->operands()) { |
924 | if (Op.isReg()) |
925 | UpdRegs.insert(x: Op.getReg()); |
926 | } |
927 | } |
928 | |
929 | /// In the range [First, Last], rename all references to the "old" register RO |
930 | /// to the "new" register RN, but only in instructions predicated on the given |
931 | /// condition. |
932 | void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, |
933 | unsigned PredR, bool Cond, MachineBasicBlock::iterator First, |
934 | MachineBasicBlock::iterator Last) { |
935 | MachineBasicBlock::iterator End = std::next(x: Last); |
936 | for (MachineInstr &MI : llvm::make_range(x: First, y: End)) { |
937 | // Do not touch instructions that are not predicated, or are predicated |
938 | // on the opposite condition. |
939 | if (!HII->isPredicated(MI)) |
940 | continue; |
941 | if (!MI.readsRegister(Reg: PredR, /*TRI=*/nullptr) || |
942 | (Cond != HII->isPredicatedTrue(MI))) |
943 | continue; |
944 | |
945 | for (auto &Op : MI.operands()) { |
946 | if (!Op.isReg() || RO != RegisterRef(Op)) |
947 | continue; |
948 | Op.setReg(RN.Reg); |
949 | Op.setSubReg(RN.Sub); |
950 | // In practice, this isn't supposed to see any defs. |
951 | assert(!Op.isDef() && "Not expecting a def" ); |
952 | } |
953 | } |
954 | } |
955 | |
956 | /// For a given conditional copy, predicate the definition of the source of |
957 | /// the copy under the given condition (using the same predicate register as |
958 | /// the copy). |
959 | bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, |
960 | std::set<Register> &UpdRegs) { |
961 | // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). |
962 | unsigned Opc = TfrI.getOpcode(); |
963 | (void)Opc; |
964 | assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); |
965 | LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false" ) |
966 | << ": " << TfrI); |
967 | |
968 | MachineOperand &MD = TfrI.getOperand(i: 0); |
969 | MachineOperand &MP = TfrI.getOperand(i: 1); |
970 | MachineOperand &MS = TfrI.getOperand(i: 2); |
971 | // The source operand should be a <kill>. This is not strictly necessary, |
972 | // but it makes things a lot simpler. Otherwise, we would need to rename |
973 | // some registers, which would complicate the transformation considerably. |
974 | if (!MS.isKill()) |
975 | return false; |
976 | // Avoid predicating instructions that define a subregister if subregister |
977 | // liveness tracking is not enabled. |
978 | if (MD.getSubReg() && !MRI->shouldTrackSubRegLiveness(VReg: MD.getReg())) |
979 | return false; |
980 | |
981 | RegisterRef RT(MS); |
982 | Register PredR = MP.getReg(); |
983 | MachineInstr *DefI = getReachingDefForPred(RD: RT, UseIt: TfrI, PredR, Cond); |
984 | if (!DefI || !isPredicable(MI: DefI)) |
985 | return false; |
986 | |
987 | LLVM_DEBUG(dbgs() << "Source def: " << *DefI); |
988 | |
989 | // Collect the information about registers defined and used between the |
990 | // DefI and the TfrI. |
991 | // Map: reg -> bitmask of subregs |
992 | ReferenceMap Uses, Defs; |
993 | MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; |
994 | |
995 | // Check if the predicate register is valid between DefI and TfrI. |
996 | // If it is, we can then ignore instructions predicated on the negated |
997 | // conditions when collecting def and use information. |
998 | bool PredValid = true; |
999 | for (MachineInstr &MI : llvm::make_range(x: std::next(x: DefIt), y: TfrIt)) { |
1000 | if (!MI.modifiesRegister(Reg: PredR, TRI: nullptr)) |
1001 | continue; |
1002 | PredValid = false; |
1003 | break; |
1004 | } |
1005 | |
1006 | for (MachineInstr &MI : llvm::make_range(x: std::next(x: DefIt), y: TfrIt)) { |
1007 | // If this instruction is predicated on the same register, it could |
1008 | // potentially be ignored. |
1009 | // By default assume that the instruction executes on the same condition |
1010 | // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). |
1011 | unsigned Exec = Exec_Then | Exec_Else; |
1012 | if (PredValid && HII->isPredicated(MI) && |
1013 | MI.readsRegister(Reg: PredR, /*TRI=*/nullptr)) |
1014 | Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; |
1015 | |
1016 | for (auto &Op : MI.operands()) { |
1017 | if (!Op.isReg()) |
1018 | continue; |
1019 | // We don't want to deal with physical registers. The reason is that |
1020 | // they can be aliased with other physical registers. Aliased virtual |
1021 | // registers must share the same register number, and can only differ |
1022 | // in the subregisters, which we are keeping track of. Physical |
1023 | // registers ters no longer have subregisters---their super- and |
1024 | // subregisters are other physical registers, and we are not checking |
1025 | // that. |
1026 | RegisterRef RR = Op; |
1027 | if (!RR.Reg.isVirtual()) |
1028 | return false; |
1029 | |
1030 | ReferenceMap &Map = Op.isDef() ? Defs : Uses; |
1031 | if (Op.isDef() && Op.isUndef()) { |
1032 | assert(RR.Sub && "Expecting a subregister on <def,read-undef>" ); |
1033 | // If this is a <def,read-undef>, then it invalidates the non-written |
1034 | // part of the register. For the purpose of checking the validity of |
1035 | // the move, assume that it modifies the whole register. |
1036 | RR.Sub = 0; |
1037 | } |
1038 | addRefToMap(RR, Map, Exec); |
1039 | } |
1040 | } |
1041 | |
1042 | // The situation: |
1043 | // RT = DefI |
1044 | // ... |
1045 | // RD = TfrI ..., RT |
1046 | |
1047 | // If the register-in-the-middle (RT) is used or redefined between |
1048 | // DefI and TfrI, we may not be able proceed with this transformation. |
1049 | // We can ignore a def that will not execute together with TfrI, and a |
1050 | // use that will. If there is such a use (that does execute together with |
1051 | // TfrI), we will not be able to move DefI down. If there is a use that |
1052 | // executed if TfrI's condition is false, then RT must be available |
1053 | // unconditionally (cannot be predicated). |
1054 | // Essentially, we need to be able to rename RT to RD in this segment. |
1055 | if (isRefInMap(RR: RT, Map&: Defs, Exec: Exec_Then) || isRefInMap(RR: RT, Map&: Uses, Exec: Exec_Else)) |
1056 | return false; |
1057 | RegisterRef RD = MD; |
1058 | // If the predicate register is defined between DefI and TfrI, the only |
1059 | // potential thing to do would be to move the DefI down to TfrI, and then |
1060 | // predicate. The reaching def (DefI) must be movable down to the location |
1061 | // of the TfrI. |
1062 | // If the target register of the TfrI (RD) is not used or defined between |
1063 | // DefI and TfrI, consider moving TfrI up to DefI. |
1064 | bool CanUp = canMoveOver(MI&: TfrI, Defs, Uses); |
1065 | bool CanDown = canMoveOver(MI&: *DefI, Defs, Uses); |
1066 | // The TfrI does not access memory, but DefI could. Check if it's safe |
1067 | // to move DefI down to TfrI. |
1068 | if (DefI->mayLoadOrStore()) { |
1069 | if (!canMoveMemTo(TheI&: *DefI, ToI&: TfrI, IsDown: true)) |
1070 | CanDown = false; |
1071 | } |
1072 | |
1073 | LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no" ) |
1074 | << ", can move down: " << (CanDown ? "yes\n" : "no\n" )); |
1075 | MachineBasicBlock::iterator PastDefIt = std::next(x: DefIt); |
1076 | if (CanUp) |
1077 | predicateAt(DefOp: MD, MI&: *DefI, Where: PastDefIt, PredOp: MP, Cond, UpdRegs); |
1078 | else if (CanDown) |
1079 | predicateAt(DefOp: MD, MI&: *DefI, Where: TfrIt, PredOp: MP, Cond, UpdRegs); |
1080 | else |
1081 | return false; |
1082 | |
1083 | if (RT != RD) { |
1084 | renameInRange(RO: RT, RN: RD, PredR, Cond, First: PastDefIt, Last: TfrIt); |
1085 | UpdRegs.insert(x: RT.Reg); |
1086 | } |
1087 | |
1088 | removeInstr(MI&: TfrI); |
1089 | removeInstr(MI&: *DefI); |
1090 | return true; |
1091 | } |
1092 | |
1093 | /// Predicate all cases of conditional copies in the specified block. |
1094 | bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, |
1095 | std::set<Register> &UpdRegs) { |
1096 | bool Changed = false; |
1097 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: B)) { |
1098 | unsigned Opc = MI.getOpcode(); |
1099 | if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { |
1100 | bool Done = predicate(TfrI&: MI, Cond: (Opc == Hexagon::A2_tfrt), UpdRegs); |
1101 | if (!Done) { |
1102 | // If we didn't predicate I, we may need to remove it in case it is |
1103 | // an "identity" copy, e.g. %1 = A2_tfrt %2, %1. |
1104 | if (RegisterRef(MI.getOperand(i: 0)) == RegisterRef(MI.getOperand(i: 2))) { |
1105 | for (auto &Op : MI.operands()) { |
1106 | if (Op.isReg()) |
1107 | UpdRegs.insert(x: Op.getReg()); |
1108 | } |
1109 | removeInstr(MI); |
1110 | } |
1111 | } |
1112 | Changed |= Done; |
1113 | } |
1114 | } |
1115 | return Changed; |
1116 | } |
1117 | |
1118 | bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { |
1119 | if (!RR.Reg.isVirtual()) |
1120 | return false; |
1121 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: RR.Reg); |
1122 | if (RC == &Hexagon::IntRegsRegClass) { |
1123 | BW = 32; |
1124 | return true; |
1125 | } |
1126 | if (RC == &Hexagon::DoubleRegsRegClass) { |
1127 | BW = (RR.Sub != 0) ? 32 : 64; |
1128 | return true; |
1129 | } |
1130 | return false; |
1131 | } |
1132 | |
1133 | bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { |
1134 | for (LiveRange::Segment &LR : LI) { |
1135 | // Range must start at a register... |
1136 | if (!LR.start.isRegister()) |
1137 | return false; |
1138 | // ...and end in a register or in a dead slot. |
1139 | if (!LR.end.isRegister() && !LR.end.isDead()) |
1140 | return false; |
1141 | } |
1142 | return true; |
1143 | } |
1144 | |
1145 | bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { |
1146 | if (CoaLimitActive) { |
1147 | if (CoaCounter >= CoaLimit) |
1148 | return false; |
1149 | CoaCounter++; |
1150 | } |
1151 | unsigned BW1, BW2; |
1152 | if (!isIntReg(RR: R1, BW&: BW1) || !isIntReg(RR: R2, BW&: BW2) || BW1 != BW2) |
1153 | return false; |
1154 | if (MRI->isLiveIn(Reg: R1.Reg)) |
1155 | return false; |
1156 | if (MRI->isLiveIn(Reg: R2.Reg)) |
1157 | return false; |
1158 | |
1159 | LiveInterval &L1 = LIS->getInterval(Reg: R1.Reg); |
1160 | LiveInterval &L2 = LIS->getInterval(Reg: R2.Reg); |
1161 | if (L2.empty()) |
1162 | return false; |
1163 | if (L1.hasSubRanges() || L2.hasSubRanges()) |
1164 | return false; |
1165 | bool Overlap = L1.overlaps(other: L2); |
1166 | |
1167 | LLVM_DEBUG(dbgs() << "compatible registers: (" |
1168 | << (Overlap ? "overlap" : "disjoint" ) << ")\n " |
1169 | << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " |
1170 | << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n" ); |
1171 | if (R1.Sub || R2.Sub) |
1172 | return false; |
1173 | if (Overlap) |
1174 | return false; |
1175 | |
1176 | // Coalescing could have a negative impact on scheduling, so try to limit |
1177 | // to some reasonable extent. Only consider coalescing segments, when one |
1178 | // of them does not cross basic block boundaries. |
1179 | if (!isIntraBlocks(LI&: L1) && !isIntraBlocks(LI&: L2)) |
1180 | return false; |
1181 | |
1182 | MRI->replaceRegWith(FromReg: R2.Reg, ToReg: R1.Reg); |
1183 | |
1184 | // Move all live segments from L2 to L1. |
1185 | using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>; |
1186 | ValueInfoMap VM; |
1187 | for (LiveRange::Segment &I : L2) { |
1188 | VNInfo *NewVN, *OldVN = I.valno; |
1189 | ValueInfoMap::iterator F = VM.find(Val: OldVN); |
1190 | if (F == VM.end()) { |
1191 | NewVN = L1.getNextValue(Def: I.valno->def, VNInfoAllocator&: LIS->getVNInfoAllocator()); |
1192 | VM.insert(KV: std::make_pair(x&: OldVN, y&: NewVN)); |
1193 | } else { |
1194 | NewVN = F->second; |
1195 | } |
1196 | L1.addSegment(S: LiveRange::Segment(I.start, I.end, NewVN)); |
1197 | } |
1198 | while (!L2.empty()) |
1199 | L2.removeSegment(S: *L2.begin()); |
1200 | LIS->removeInterval(Reg: R2.Reg); |
1201 | |
1202 | updateKillFlags(Reg: R1.Reg); |
1203 | LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n" ); |
1204 | L1.verify(); |
1205 | |
1206 | return true; |
1207 | } |
1208 | |
1209 | /// Attempt to coalesce one of the source registers to a MUX instruction with |
1210 | /// the destination register. This could lead to having only one predicated |
1211 | /// instruction in the end instead of two. |
1212 | bool HexagonExpandCondsets::coalesceSegments( |
1213 | const SmallVectorImpl<MachineInstr *> &Condsets, |
1214 | std::set<Register> &UpdRegs) { |
1215 | SmallVector<MachineInstr*,16> TwoRegs; |
1216 | for (MachineInstr *MI : Condsets) { |
1217 | MachineOperand &S1 = MI->getOperand(i: 2), &S2 = MI->getOperand(i: 3); |
1218 | if (!S1.isReg() && !S2.isReg()) |
1219 | continue; |
1220 | TwoRegs.push_back(Elt: MI); |
1221 | } |
1222 | |
1223 | bool Changed = false; |
1224 | for (MachineInstr *CI : TwoRegs) { |
1225 | RegisterRef RD = CI->getOperand(i: 0); |
1226 | RegisterRef RP = CI->getOperand(i: 1); |
1227 | MachineOperand &S1 = CI->getOperand(i: 2), &S2 = CI->getOperand(i: 3); |
1228 | bool Done = false; |
1229 | // Consider this case: |
1230 | // %1 = instr1 ... |
1231 | // %2 = instr2 ... |
1232 | // %0 = C2_mux ..., %1, %2 |
1233 | // If %0 was coalesced with %1, we could end up with the following |
1234 | // code: |
1235 | // %0 = instr1 ... |
1236 | // %2 = instr2 ... |
1237 | // %0 = A2_tfrf ..., %2 |
1238 | // which will later become: |
1239 | // %0 = instr1 ... |
1240 | // %0 = instr2_cNotPt ... |
1241 | // i.e. there will be an unconditional definition (instr1) of %0 |
1242 | // followed by a conditional one. The output dependency was there before |
1243 | // and it unavoidable, but if instr1 is predicable, we will no longer be |
1244 | // able to predicate it here. |
1245 | // To avoid this scenario, don't coalesce the destination register with |
1246 | // a source register that is defined by a predicable instruction. |
1247 | if (S1.isReg()) { |
1248 | RegisterRef RS = S1; |
1249 | MachineInstr *RDef = getReachingDefForPred(RD: RS, UseIt: CI, PredR: RP.Reg, Cond: true); |
1250 | if (!RDef || !HII->isPredicable(MI: *RDef)) { |
1251 | Done = coalesceRegisters(R1: RD, R2: RegisterRef(S1)); |
1252 | if (Done) { |
1253 | UpdRegs.insert(x: RD.Reg); |
1254 | UpdRegs.insert(x: S1.getReg()); |
1255 | } |
1256 | } |
1257 | } |
1258 | if (!Done && S2.isReg()) { |
1259 | RegisterRef RS = S2; |
1260 | MachineInstr *RDef = getReachingDefForPred(RD: RS, UseIt: CI, PredR: RP.Reg, Cond: false); |
1261 | if (!RDef || !HII->isPredicable(MI: *RDef)) { |
1262 | Done = coalesceRegisters(R1: RD, R2: RegisterRef(S2)); |
1263 | if (Done) { |
1264 | UpdRegs.insert(x: RD.Reg); |
1265 | UpdRegs.insert(x: S2.getReg()); |
1266 | } |
1267 | } |
1268 | } |
1269 | Changed |= Done; |
1270 | } |
1271 | return Changed; |
1272 | } |
1273 | |
1274 | bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { |
1275 | if (skipFunction(F: MF.getFunction())) |
1276 | return false; |
1277 | |
1278 | HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); |
1279 | TRI = MF.getSubtarget().getRegisterInfo(); |
1280 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
1281 | LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); |
1282 | MRI = &MF.getRegInfo(); |
1283 | |
1284 | LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n" )); |
1285 | |
1286 | bool Changed = false; |
1287 | std::set<Register> CoalUpd, PredUpd; |
1288 | |
1289 | SmallVector<MachineInstr*,16> Condsets; |
1290 | for (auto &B : MF) { |
1291 | for (auto &I : B) { |
1292 | if (isCondset(MI: I)) |
1293 | Condsets.push_back(Elt: &I); |
1294 | } |
1295 | } |
1296 | |
1297 | // Try to coalesce the target of a mux with one of its sources. |
1298 | // This could eliminate a register copy in some circumstances. |
1299 | Changed |= coalesceSegments(Condsets, UpdRegs&: CoalUpd); |
1300 | |
1301 | // Update kill flags on all source operands. This is done here because |
1302 | // at this moment (when expand-condsets runs), there are no kill flags |
1303 | // in the IR (they have been removed by live range analysis). |
1304 | // Updating them right before we split is the easiest, because splitting |
1305 | // adds definitions which would interfere with updating kills afterwards. |
1306 | std::set<Register> KillUpd; |
1307 | for (MachineInstr *MI : Condsets) { |
1308 | for (MachineOperand &Op : MI->operands()) { |
1309 | if (Op.isReg() && Op.isUse()) { |
1310 | if (!CoalUpd.count(x: Op.getReg())) |
1311 | KillUpd.insert(x: Op.getReg()); |
1312 | } |
1313 | } |
1314 | } |
1315 | updateLiveness(RegSet: KillUpd, Recalc: false, UpdateKills: true, UpdateDeads: false); |
1316 | LLVM_DEBUG(LIS->print(dbgs() << "After coalescing\n" )); |
1317 | |
1318 | // First, simply split all muxes into a pair of conditional transfers |
1319 | // and update the live intervals to reflect the new arrangement. The |
1320 | // goal is to update the kill flags, since predication will rely on |
1321 | // them. |
1322 | for (MachineInstr *MI : Condsets) |
1323 | Changed |= split(MI&: *MI, UpdRegs&: PredUpd); |
1324 | Condsets.clear(); // The contents of Condsets are invalid here anyway. |
1325 | |
1326 | // Do not update live ranges after splitting. Recalculation of live |
1327 | // intervals removes kill flags, which were preserved by splitting on |
1328 | // the source operands of condsets. These kill flags are needed by |
1329 | // predication, and after splitting they are difficult to recalculate |
1330 | // (because of predicated defs), so make sure they are left untouched. |
1331 | // Predication does not use live intervals. |
1332 | LLVM_DEBUG(LIS->print(dbgs() << "After splitting\n" )); |
1333 | |
1334 | // Traverse all blocks and collapse predicable instructions feeding |
1335 | // conditional transfers into predicated instructions. |
1336 | // Walk over all the instructions again, so we may catch pre-existing |
1337 | // cases that were not created in the previous step. |
1338 | for (auto &B : MF) |
1339 | Changed |= predicateInBlock(B, UpdRegs&: PredUpd); |
1340 | LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n" )); |
1341 | |
1342 | PredUpd.insert(first: CoalUpd.begin(), last: CoalUpd.end()); |
1343 | updateLiveness(RegSet: PredUpd, Recalc: true, UpdateKills: true, UpdateDeads: true); |
1344 | |
1345 | if (Changed) |
1346 | distributeLiveIntervals(Regs: PredUpd); |
1347 | |
1348 | LLVM_DEBUG({ |
1349 | if (Changed) |
1350 | LIS->print(dbgs() << "After expand-condsets\n" ); |
1351 | }); |
1352 | |
1353 | return Changed; |
1354 | } |
1355 | |
1356 | //===----------------------------------------------------------------------===// |
1357 | // Public Constructor Functions |
1358 | //===----------------------------------------------------------------------===// |
1359 | FunctionPass *llvm::createHexagonExpandCondsets() { |
1360 | return new HexagonExpandCondsets(); |
1361 | } |
1362 | |