1 | //===- HexagonBitSimplify.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "BitTracker.h" |
10 | #include "Hexagon.h" |
11 | #include "HexagonBitTracker.h" |
12 | #include "HexagonInstrInfo.h" |
13 | #include "HexagonRegisterInfo.h" |
14 | #include "HexagonSubtarget.h" |
15 | #include "llvm/ADT/BitVector.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/CodeGen/MachineBasicBlock.h" |
19 | #include "llvm/CodeGen/MachineDominators.h" |
20 | #include "llvm/CodeGen/MachineFunction.h" |
21 | #include "llvm/CodeGen/MachineFunctionPass.h" |
22 | #include "llvm/CodeGen/MachineInstr.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
26 | #include "llvm/IR/DebugLoc.h" |
27 | #include "llvm/InitializePasses.h" |
28 | #include "llvm/MC/MCInstrDesc.h" |
29 | #include "llvm/Pass.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | |
33 | #define DEBUG_TYPE "hexbit" |
34 | |
35 | using namespace llvm; |
36 | |
37 | static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied" , cl::Hidden, |
38 | cl::init(Val: true), cl::desc("Preserve subregisters in tied operands" )); |
39 | static cl::opt<bool> ("hexbit-extract" , cl::Hidden, |
40 | cl::init(Val: true), cl::desc("Generate extract instructions" )); |
41 | static cl::opt<bool> GenBitSplit("hexbit-bitsplit" , cl::Hidden, |
42 | cl::init(Val: true), cl::desc("Generate bitsplit instructions" )); |
43 | |
44 | static cl::opt<unsigned> ("hexbit-max-extract" , cl::Hidden, |
45 | cl::init(Val: std::numeric_limits<unsigned>::max())); |
46 | static unsigned = 0; |
47 | static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit" , cl::Hidden, |
48 | cl::init(Val: std::numeric_limits<unsigned>::max())); |
49 | static unsigned CountBitSplit = 0; |
50 | |
51 | static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit" , |
52 | cl::Hidden, cl::init(Val: 1000)); |
53 | |
54 | namespace { |
55 | |
56 | // Set of virtual registers, based on BitVector. |
57 | struct RegisterSet { |
58 | RegisterSet() = default; |
59 | explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {} |
60 | RegisterSet(const RegisterSet &RS) = default; |
61 | |
62 | void clear() { |
63 | Bits.clear(); |
64 | LRU.clear(); |
65 | } |
66 | |
67 | unsigned count() const { |
68 | return Bits.count(); |
69 | } |
70 | |
71 | unsigned find_first() const { |
72 | int First = Bits.find_first(); |
73 | if (First < 0) |
74 | return 0; |
75 | return x2v(x: First); |
76 | } |
77 | |
78 | unsigned find_next(unsigned Prev) const { |
79 | int Next = Bits.find_next(Prev: v2x(v: Prev)); |
80 | if (Next < 0) |
81 | return 0; |
82 | return x2v(x: Next); |
83 | } |
84 | |
85 | RegisterSet &insert(unsigned R) { |
86 | unsigned Idx = v2x(v: R); |
87 | ensure(Idx); |
88 | bool Exists = Bits.test(Idx); |
89 | Bits.set(Idx); |
90 | if (!Exists) { |
91 | LRU.push_back(x: Idx); |
92 | if (LRU.size() > RegisterSetLimit) { |
93 | unsigned T = LRU.front(); |
94 | Bits.reset(Idx: T); |
95 | LRU.pop_front(); |
96 | } |
97 | } |
98 | return *this; |
99 | } |
100 | RegisterSet &remove(unsigned R) { |
101 | unsigned Idx = v2x(v: R); |
102 | if (Idx < Bits.size()) { |
103 | bool Exists = Bits.test(Idx); |
104 | Bits.reset(Idx); |
105 | if (Exists) { |
106 | auto F = llvm::find(Range&: LRU, Val: Idx); |
107 | assert(F != LRU.end()); |
108 | LRU.erase(position: F); |
109 | } |
110 | } |
111 | return *this; |
112 | } |
113 | |
114 | RegisterSet &insert(const RegisterSet &Rs) { |
115 | for (unsigned R = Rs.find_first(); R; R = Rs.find_next(Prev: R)) |
116 | insert(R); |
117 | return *this; |
118 | } |
119 | RegisterSet &remove(const RegisterSet &Rs) { |
120 | for (unsigned R = Rs.find_first(); R; R = Rs.find_next(Prev: R)) |
121 | remove(R); |
122 | return *this; |
123 | } |
124 | |
125 | bool operator[](unsigned R) const { |
126 | unsigned Idx = v2x(v: R); |
127 | return Idx < Bits.size() ? Bits[Idx] : false; |
128 | } |
129 | bool has(unsigned R) const { |
130 | unsigned Idx = v2x(v: R); |
131 | if (Idx >= Bits.size()) |
132 | return false; |
133 | return Bits.test(Idx); |
134 | } |
135 | |
136 | bool empty() const { |
137 | return !Bits.any(); |
138 | } |
139 | bool includes(const RegisterSet &Rs) const { |
140 | // A.test(B) <=> A-B != {} |
141 | return !Rs.Bits.test(RHS: Bits); |
142 | } |
143 | bool intersects(const RegisterSet &Rs) const { |
144 | return Bits.anyCommon(RHS: Rs.Bits); |
145 | } |
146 | |
147 | private: |
148 | BitVector Bits; |
149 | std::deque<unsigned> LRU; |
150 | |
151 | void ensure(unsigned Idx) { |
152 | if (Bits.size() <= Idx) |
153 | Bits.resize(N: std::max(a: Idx+1, b: 32U)); |
154 | } |
155 | |
156 | static inline unsigned v2x(unsigned v) { |
157 | return Register(v).virtRegIndex(); |
158 | } |
159 | |
160 | static inline unsigned x2v(unsigned x) { |
161 | return Register::index2VirtReg(Index: x); |
162 | } |
163 | }; |
164 | |
165 | struct PrintRegSet { |
166 | PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) |
167 | : RS(S), TRI(RI) {} |
168 | |
169 | friend raw_ostream &operator<< (raw_ostream &OS, |
170 | const PrintRegSet &P); |
171 | |
172 | private: |
173 | const RegisterSet &RS; |
174 | const TargetRegisterInfo *TRI; |
175 | }; |
176 | |
177 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) |
178 | LLVM_ATTRIBUTE_UNUSED; |
179 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { |
180 | OS << '{'; |
181 | for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(Prev: R)) |
182 | OS << ' ' << printReg(Reg: R, TRI: P.TRI); |
183 | OS << " }" ; |
184 | return OS; |
185 | } |
186 | |
187 | class Transformation; |
188 | |
189 | class HexagonBitSimplify : public MachineFunctionPass { |
190 | public: |
191 | static char ID; |
192 | |
193 | HexagonBitSimplify() : MachineFunctionPass(ID) {} |
194 | |
195 | StringRef getPassName() const override { |
196 | return "Hexagon bit simplification" ; |
197 | } |
198 | |
199 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
200 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
201 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
202 | MachineFunctionPass::getAnalysisUsage(AU); |
203 | } |
204 | |
205 | bool runOnMachineFunction(MachineFunction &MF) override; |
206 | |
207 | static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); |
208 | static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); |
209 | static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, |
210 | const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); |
211 | static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, |
212 | uint16_t W); |
213 | static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, |
214 | uint16_t W, uint64_t &U); |
215 | static bool replaceReg(Register OldR, Register NewR, |
216 | MachineRegisterInfo &MRI); |
217 | static bool getSubregMask(const BitTracker::RegisterRef &RR, |
218 | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); |
219 | static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR, |
220 | MachineRegisterInfo &MRI); |
221 | static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR, |
222 | unsigned NewSR, MachineRegisterInfo &MRI); |
223 | static bool parseRegSequence(const MachineInstr &I, |
224 | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
225 | const MachineRegisterInfo &MRI); |
226 | |
227 | static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
228 | uint16_t Begin); |
229 | static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, |
230 | uint16_t Begin, const HexagonInstrInfo &HII); |
231 | |
232 | static const TargetRegisterClass *getFinalVRegClass( |
233 | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); |
234 | static bool isTransparentCopy(const BitTracker::RegisterRef &RD, |
235 | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); |
236 | |
237 | private: |
238 | MachineDominatorTree *MDT = nullptr; |
239 | |
240 | bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); |
241 | static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
242 | unsigned NewSub = Hexagon::NoSubRegister); |
243 | }; |
244 | |
245 | using HBS = HexagonBitSimplify; |
246 | |
247 | // The purpose of this class is to provide a common facility to traverse |
248 | // the function top-down or bottom-up via the dominator tree, and keep |
249 | // track of the available registers. |
250 | class Transformation { |
251 | public: |
252 | bool TopDown; |
253 | |
254 | Transformation(bool TD) : TopDown(TD) {} |
255 | virtual ~Transformation() = default; |
256 | |
257 | virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; |
258 | }; |
259 | |
260 | } // end anonymous namespace |
261 | |
262 | char HexagonBitSimplify::ID = 0; |
263 | |
264 | INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify" , |
265 | "Hexagon bit simplification" , false, false) |
266 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
267 | INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify" , |
268 | "Hexagon bit simplification" , false, false) |
269 | |
270 | bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, |
271 | RegisterSet &AVs) { |
272 | bool Changed = false; |
273 | |
274 | if (T.TopDown) |
275 | Changed = T.processBlock(B, AVs); |
276 | |
277 | RegisterSet Defs; |
278 | for (auto &I : B) |
279 | getInstrDefs(MI: I, Defs); |
280 | RegisterSet NewAVs = AVs; |
281 | NewAVs.insert(Rs: Defs); |
282 | |
283 | for (auto *DTN : children<MachineDomTreeNode*>(G: MDT->getNode(BB: &B))) |
284 | Changed |= visitBlock(B&: *(DTN->getBlock()), T, AVs&: NewAVs); |
285 | |
286 | if (!T.TopDown) |
287 | Changed |= T.processBlock(B, AVs); |
288 | |
289 | return Changed; |
290 | } |
291 | |
292 | // |
293 | // Utility functions: |
294 | // |
295 | void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, |
296 | RegisterSet &Defs) { |
297 | for (auto &Op : MI.operands()) { |
298 | if (!Op.isReg() || !Op.isDef()) |
299 | continue; |
300 | Register R = Op.getReg(); |
301 | if (!R.isVirtual()) |
302 | continue; |
303 | Defs.insert(R); |
304 | } |
305 | } |
306 | |
307 | void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, |
308 | RegisterSet &Uses) { |
309 | for (auto &Op : MI.operands()) { |
310 | if (!Op.isReg() || !Op.isUse()) |
311 | continue; |
312 | Register R = Op.getReg(); |
313 | if (!R.isVirtual()) |
314 | continue; |
315 | Uses.insert(R); |
316 | } |
317 | } |
318 | |
319 | // Check if all the bits in range [B, E) in both cells are equal. |
320 | bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, |
321 | uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, |
322 | uint16_t W) { |
323 | for (uint16_t i = 0; i < W; ++i) { |
324 | // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. |
325 | if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) |
326 | return false; |
327 | // Same for RC2[i]. |
328 | if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) |
329 | return false; |
330 | if (RC1[B1+i] != RC2[B2+i]) |
331 | return false; |
332 | } |
333 | return true; |
334 | } |
335 | |
336 | bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, |
337 | uint16_t B, uint16_t W) { |
338 | assert(B < RC.width() && B+W <= RC.width()); |
339 | for (uint16_t i = B; i < B+W; ++i) |
340 | if (!RC[i].is(T: 0)) |
341 | return false; |
342 | return true; |
343 | } |
344 | |
345 | bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, |
346 | uint16_t B, uint16_t W, uint64_t &U) { |
347 | assert(B < RC.width() && B+W <= RC.width()); |
348 | int64_t T = 0; |
349 | for (uint16_t i = B+W; i > B; --i) { |
350 | const BitTracker::BitValue &BV = RC[i-1]; |
351 | T <<= 1; |
352 | if (BV.is(T: 1)) |
353 | T |= 1; |
354 | else if (!BV.is(T: 0)) |
355 | return false; |
356 | } |
357 | U = T; |
358 | return true; |
359 | } |
360 | |
361 | bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR, |
362 | MachineRegisterInfo &MRI) { |
363 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
364 | return false; |
365 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
366 | decltype(End) NextI; |
367 | for (auto I = Begin; I != End; I = NextI) { |
368 | NextI = std::next(x: I); |
369 | I->setReg(NewR); |
370 | } |
371 | return Begin != End; |
372 | } |
373 | |
374 | bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR, |
375 | unsigned NewSR, |
376 | MachineRegisterInfo &MRI) { |
377 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
378 | return false; |
379 | if (hasTiedUse(Reg: OldR, MRI, NewSub: NewSR)) |
380 | return false; |
381 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
382 | decltype(End) NextI; |
383 | for (auto I = Begin; I != End; I = NextI) { |
384 | NextI = std::next(x: I); |
385 | I->setReg(NewR); |
386 | I->setSubReg(NewSR); |
387 | } |
388 | return Begin != End; |
389 | } |
390 | |
391 | bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR, |
392 | Register NewR, unsigned NewSR, |
393 | MachineRegisterInfo &MRI) { |
394 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
395 | return false; |
396 | if (OldSR != NewSR && hasTiedUse(Reg: OldR, MRI, NewSub: NewSR)) |
397 | return false; |
398 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
399 | decltype(End) NextI; |
400 | for (auto I = Begin; I != End; I = NextI) { |
401 | NextI = std::next(x: I); |
402 | if (I->getSubReg() != OldSR) |
403 | continue; |
404 | I->setReg(NewR); |
405 | I->setSubReg(NewSR); |
406 | } |
407 | return Begin != End; |
408 | } |
409 | |
410 | // For a register ref (pair Reg:Sub), set Begin to the position of the LSB |
411 | // of Sub in Reg, and set Width to the size of Sub in bits. Return true, |
412 | // if this succeeded, otherwise return false. |
413 | bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, |
414 | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { |
415 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: RR.Reg); |
416 | if (RR.Sub == 0) { |
417 | Begin = 0; |
418 | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(RC: *RC); |
419 | return true; |
420 | } |
421 | |
422 | Begin = 0; |
423 | |
424 | switch (RC->getID()) { |
425 | case Hexagon::DoubleRegsRegClassID: |
426 | case Hexagon::HvxWRRegClassID: |
427 | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(RC: *RC) / 2; |
428 | if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) |
429 | Begin = Width; |
430 | break; |
431 | default: |
432 | return false; |
433 | } |
434 | return true; |
435 | } |
436 | |
437 | |
438 | // For a REG_SEQUENCE, set SL to the low subregister and SH to the high |
439 | // subregister. |
440 | bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, |
441 | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
442 | const MachineRegisterInfo &MRI) { |
443 | assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); |
444 | unsigned Sub1 = I.getOperand(i: 2).getImm(), Sub2 = I.getOperand(i: 4).getImm(); |
445 | auto &DstRC = *MRI.getRegClass(Reg: I.getOperand(i: 0).getReg()); |
446 | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
447 | *MRI.getTargetRegisterInfo()); |
448 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC: DstRC, GenIdx: Hexagon::ps_sub_lo); |
449 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC: DstRC, GenIdx: Hexagon::ps_sub_hi); |
450 | assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); |
451 | if (Sub1 == SubLo && Sub2 == SubHi) { |
452 | SL = I.getOperand(i: 1); |
453 | SH = I.getOperand(i: 3); |
454 | return true; |
455 | } |
456 | if (Sub1 == SubHi && Sub2 == SubLo) { |
457 | SH = I.getOperand(i: 1); |
458 | SL = I.getOperand(i: 3); |
459 | return true; |
460 | } |
461 | return false; |
462 | } |
463 | |
464 | // All stores (except 64-bit stores) take a 32-bit register as the source |
465 | // of the value to be stored. If the instruction stores into a location |
466 | // that is shorter than 32 bits, some bits of the source register are not |
467 | // used. For each store instruction, calculate the set of used bits in |
468 | // the source register, and set appropriate bits in Bits. Return true if |
469 | // the bits are calculated, false otherwise. |
470 | bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
471 | uint16_t Begin) { |
472 | using namespace Hexagon; |
473 | |
474 | switch (Opc) { |
475 | // Store byte |
476 | case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 |
477 | case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new |
478 | case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 |
479 | case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 |
480 | case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 |
481 | case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 |
482 | case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new |
483 | case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new |
484 | case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
485 | case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
486 | case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 |
487 | case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new |
488 | case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 |
489 | case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 |
490 | case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 |
491 | case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 |
492 | case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new |
493 | case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new |
494 | case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
495 | case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
496 | case S4_storerb_ap: // memb(Re32=#U6)=Rt32 |
497 | case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new |
498 | case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 |
499 | case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new |
500 | case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 |
501 | case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new |
502 | case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 |
503 | case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new |
504 | case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 |
505 | case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new |
506 | case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 |
507 | case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new |
508 | case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 |
509 | case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new |
510 | case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
511 | case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
512 | case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
513 | case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
514 | case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
515 | case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
516 | case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
517 | case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
518 | case S2_storerbgp: // memb(gp+#u16:0)=Rt32 |
519 | case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new |
520 | case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 |
521 | case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 |
522 | case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 |
523 | case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 |
524 | case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new |
525 | case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new |
526 | case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new |
527 | case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new |
528 | Bits.set(I: Begin, E: Begin+8); |
529 | return true; |
530 | |
531 | // Store low half |
532 | case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 |
533 | case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new |
534 | case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 |
535 | case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 |
536 | case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 |
537 | case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 |
538 | case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new |
539 | case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new |
540 | case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
541 | case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
542 | case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 |
543 | case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new |
544 | case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 |
545 | case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 |
546 | case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 |
547 | case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 |
548 | case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new |
549 | case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new |
550 | case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
551 | case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
552 | case S4_storerh_ap: // memh(Re32=#U6)=Rt32 |
553 | case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new |
554 | case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 |
555 | case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new |
556 | case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 |
557 | case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new |
558 | case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 |
559 | case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new |
560 | case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 |
561 | case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new |
562 | case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 |
563 | case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new |
564 | case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 |
565 | case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
566 | case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
567 | case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
568 | case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
569 | case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new |
570 | case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
571 | case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
572 | case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
573 | case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
574 | case S2_storerhgp: // memh(gp+#u16:1)=Rt32 |
575 | case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new |
576 | case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 |
577 | case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 |
578 | case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 |
579 | case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 |
580 | case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new |
581 | case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new |
582 | case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new |
583 | case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new |
584 | Bits.set(I: Begin, E: Begin+16); |
585 | return true; |
586 | |
587 | // Store high half |
588 | case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 |
589 | case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 |
590 | case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 |
591 | case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
592 | case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
593 | case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 |
594 | case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 |
595 | case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 |
596 | case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
597 | case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
598 | case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 |
599 | case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 |
600 | case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 |
601 | case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 |
602 | case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 |
603 | case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 |
604 | case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 |
605 | case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
606 | case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
607 | case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
608 | case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
609 | case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 |
610 | case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 |
611 | case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 |
612 | case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 |
613 | case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 |
614 | Bits.set(I: Begin+16, E: Begin+32); |
615 | return true; |
616 | } |
617 | |
618 | return false; |
619 | } |
620 | |
621 | // For an instruction with opcode Opc, calculate the set of bits that it |
622 | // uses in a register in operand OpN. This only calculates the set of used |
623 | // bits for cases where it does not depend on any operands (as is the case |
624 | // in shifts, for example). For concrete instructions from a program, the |
625 | // operand may be a subregister of a larger register, while Bits would |
626 | // correspond to the larger register in its entirety. Because of that, |
627 | // the parameter Begin can be used to indicate which bit of Bits should be |
628 | // considered the LSB of the operand. |
629 | bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, |
630 | BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { |
631 | using namespace Hexagon; |
632 | |
633 | const MCInstrDesc &D = HII.get(Opcode: Opc); |
634 | if (D.mayStore()) { |
635 | if (OpN == D.getNumOperands()-1) |
636 | return getUsedBitsInStore(Opc, Bits, Begin); |
637 | return false; |
638 | } |
639 | |
640 | switch (Opc) { |
641 | // One register source. Used bits: R1[0-7]. |
642 | case A2_sxtb: |
643 | case A2_zxtb: |
644 | case A4_cmpbeqi: |
645 | case A4_cmpbgti: |
646 | case A4_cmpbgtui: |
647 | if (OpN == 1) { |
648 | Bits.set(I: Begin, E: Begin+8); |
649 | return true; |
650 | } |
651 | break; |
652 | |
653 | // One register source. Used bits: R1[0-15]. |
654 | case A2_aslh: |
655 | case A2_sxth: |
656 | case A2_zxth: |
657 | case A4_cmpheqi: |
658 | case A4_cmphgti: |
659 | case A4_cmphgtui: |
660 | if (OpN == 1) { |
661 | Bits.set(I: Begin, E: Begin+16); |
662 | return true; |
663 | } |
664 | break; |
665 | |
666 | // One register source. Used bits: R1[16-31]. |
667 | case A2_asrh: |
668 | if (OpN == 1) { |
669 | Bits.set(I: Begin+16, E: Begin+32); |
670 | return true; |
671 | } |
672 | break; |
673 | |
674 | // Two register sources. Used bits: R1[0-7], R2[0-7]. |
675 | case A4_cmpbeq: |
676 | case A4_cmpbgt: |
677 | case A4_cmpbgtu: |
678 | if (OpN == 1) { |
679 | Bits.set(I: Begin, E: Begin+8); |
680 | return true; |
681 | } |
682 | break; |
683 | |
684 | // Two register sources. Used bits: R1[0-15], R2[0-15]. |
685 | case A4_cmpheq: |
686 | case A4_cmphgt: |
687 | case A4_cmphgtu: |
688 | case A2_addh_h16_ll: |
689 | case A2_addh_h16_sat_ll: |
690 | case A2_addh_l16_ll: |
691 | case A2_addh_l16_sat_ll: |
692 | case A2_combine_ll: |
693 | case A2_subh_h16_ll: |
694 | case A2_subh_h16_sat_ll: |
695 | case A2_subh_l16_ll: |
696 | case A2_subh_l16_sat_ll: |
697 | case M2_mpy_acc_ll_s0: |
698 | case M2_mpy_acc_ll_s1: |
699 | case M2_mpy_acc_sat_ll_s0: |
700 | case M2_mpy_acc_sat_ll_s1: |
701 | case M2_mpy_ll_s0: |
702 | case M2_mpy_ll_s1: |
703 | case M2_mpy_nac_ll_s0: |
704 | case M2_mpy_nac_ll_s1: |
705 | case M2_mpy_nac_sat_ll_s0: |
706 | case M2_mpy_nac_sat_ll_s1: |
707 | case M2_mpy_rnd_ll_s0: |
708 | case M2_mpy_rnd_ll_s1: |
709 | case M2_mpy_sat_ll_s0: |
710 | case M2_mpy_sat_ll_s1: |
711 | case M2_mpy_sat_rnd_ll_s0: |
712 | case M2_mpy_sat_rnd_ll_s1: |
713 | case M2_mpyd_acc_ll_s0: |
714 | case M2_mpyd_acc_ll_s1: |
715 | case M2_mpyd_ll_s0: |
716 | case M2_mpyd_ll_s1: |
717 | case M2_mpyd_nac_ll_s0: |
718 | case M2_mpyd_nac_ll_s1: |
719 | case M2_mpyd_rnd_ll_s0: |
720 | case M2_mpyd_rnd_ll_s1: |
721 | case M2_mpyu_acc_ll_s0: |
722 | case M2_mpyu_acc_ll_s1: |
723 | case M2_mpyu_ll_s0: |
724 | case M2_mpyu_ll_s1: |
725 | case M2_mpyu_nac_ll_s0: |
726 | case M2_mpyu_nac_ll_s1: |
727 | case M2_mpyud_acc_ll_s0: |
728 | case M2_mpyud_acc_ll_s1: |
729 | case M2_mpyud_ll_s0: |
730 | case M2_mpyud_ll_s1: |
731 | case M2_mpyud_nac_ll_s0: |
732 | case M2_mpyud_nac_ll_s1: |
733 | if (OpN == 1 || OpN == 2) { |
734 | Bits.set(I: Begin, E: Begin+16); |
735 | return true; |
736 | } |
737 | break; |
738 | |
739 | // Two register sources. Used bits: R1[0-15], R2[16-31]. |
740 | case A2_addh_h16_lh: |
741 | case A2_addh_h16_sat_lh: |
742 | case A2_combine_lh: |
743 | case A2_subh_h16_lh: |
744 | case A2_subh_h16_sat_lh: |
745 | case M2_mpy_acc_lh_s0: |
746 | case M2_mpy_acc_lh_s1: |
747 | case M2_mpy_acc_sat_lh_s0: |
748 | case M2_mpy_acc_sat_lh_s1: |
749 | case M2_mpy_lh_s0: |
750 | case M2_mpy_lh_s1: |
751 | case M2_mpy_nac_lh_s0: |
752 | case M2_mpy_nac_lh_s1: |
753 | case M2_mpy_nac_sat_lh_s0: |
754 | case M2_mpy_nac_sat_lh_s1: |
755 | case M2_mpy_rnd_lh_s0: |
756 | case M2_mpy_rnd_lh_s1: |
757 | case M2_mpy_sat_lh_s0: |
758 | case M2_mpy_sat_lh_s1: |
759 | case M2_mpy_sat_rnd_lh_s0: |
760 | case M2_mpy_sat_rnd_lh_s1: |
761 | case M2_mpyd_acc_lh_s0: |
762 | case M2_mpyd_acc_lh_s1: |
763 | case M2_mpyd_lh_s0: |
764 | case M2_mpyd_lh_s1: |
765 | case M2_mpyd_nac_lh_s0: |
766 | case M2_mpyd_nac_lh_s1: |
767 | case M2_mpyd_rnd_lh_s0: |
768 | case M2_mpyd_rnd_lh_s1: |
769 | case M2_mpyu_acc_lh_s0: |
770 | case M2_mpyu_acc_lh_s1: |
771 | case M2_mpyu_lh_s0: |
772 | case M2_mpyu_lh_s1: |
773 | case M2_mpyu_nac_lh_s0: |
774 | case M2_mpyu_nac_lh_s1: |
775 | case M2_mpyud_acc_lh_s0: |
776 | case M2_mpyud_acc_lh_s1: |
777 | case M2_mpyud_lh_s0: |
778 | case M2_mpyud_lh_s1: |
779 | case M2_mpyud_nac_lh_s0: |
780 | case M2_mpyud_nac_lh_s1: |
781 | // These four are actually LH. |
782 | case A2_addh_l16_hl: |
783 | case A2_addh_l16_sat_hl: |
784 | case A2_subh_l16_hl: |
785 | case A2_subh_l16_sat_hl: |
786 | if (OpN == 1) { |
787 | Bits.set(I: Begin, E: Begin+16); |
788 | return true; |
789 | } |
790 | if (OpN == 2) { |
791 | Bits.set(I: Begin+16, E: Begin+32); |
792 | return true; |
793 | } |
794 | break; |
795 | |
796 | // Two register sources, used bits: R1[16-31], R2[0-15]. |
797 | case A2_addh_h16_hl: |
798 | case A2_addh_h16_sat_hl: |
799 | case A2_combine_hl: |
800 | case A2_subh_h16_hl: |
801 | case A2_subh_h16_sat_hl: |
802 | case M2_mpy_acc_hl_s0: |
803 | case M2_mpy_acc_hl_s1: |
804 | case M2_mpy_acc_sat_hl_s0: |
805 | case M2_mpy_acc_sat_hl_s1: |
806 | case M2_mpy_hl_s0: |
807 | case M2_mpy_hl_s1: |
808 | case M2_mpy_nac_hl_s0: |
809 | case M2_mpy_nac_hl_s1: |
810 | case M2_mpy_nac_sat_hl_s0: |
811 | case M2_mpy_nac_sat_hl_s1: |
812 | case M2_mpy_rnd_hl_s0: |
813 | case M2_mpy_rnd_hl_s1: |
814 | case M2_mpy_sat_hl_s0: |
815 | case M2_mpy_sat_hl_s1: |
816 | case M2_mpy_sat_rnd_hl_s0: |
817 | case M2_mpy_sat_rnd_hl_s1: |
818 | case M2_mpyd_acc_hl_s0: |
819 | case M2_mpyd_acc_hl_s1: |
820 | case M2_mpyd_hl_s0: |
821 | case M2_mpyd_hl_s1: |
822 | case M2_mpyd_nac_hl_s0: |
823 | case M2_mpyd_nac_hl_s1: |
824 | case M2_mpyd_rnd_hl_s0: |
825 | case M2_mpyd_rnd_hl_s1: |
826 | case M2_mpyu_acc_hl_s0: |
827 | case M2_mpyu_acc_hl_s1: |
828 | case M2_mpyu_hl_s0: |
829 | case M2_mpyu_hl_s1: |
830 | case M2_mpyu_nac_hl_s0: |
831 | case M2_mpyu_nac_hl_s1: |
832 | case M2_mpyud_acc_hl_s0: |
833 | case M2_mpyud_acc_hl_s1: |
834 | case M2_mpyud_hl_s0: |
835 | case M2_mpyud_hl_s1: |
836 | case M2_mpyud_nac_hl_s0: |
837 | case M2_mpyud_nac_hl_s1: |
838 | if (OpN == 1) { |
839 | Bits.set(I: Begin+16, E: Begin+32); |
840 | return true; |
841 | } |
842 | if (OpN == 2) { |
843 | Bits.set(I: Begin, E: Begin+16); |
844 | return true; |
845 | } |
846 | break; |
847 | |
848 | // Two register sources, used bits: R1[16-31], R2[16-31]. |
849 | case A2_addh_h16_hh: |
850 | case A2_addh_h16_sat_hh: |
851 | case A2_combine_hh: |
852 | case A2_subh_h16_hh: |
853 | case A2_subh_h16_sat_hh: |
854 | case M2_mpy_acc_hh_s0: |
855 | case M2_mpy_acc_hh_s1: |
856 | case M2_mpy_acc_sat_hh_s0: |
857 | case M2_mpy_acc_sat_hh_s1: |
858 | case M2_mpy_hh_s0: |
859 | case M2_mpy_hh_s1: |
860 | case M2_mpy_nac_hh_s0: |
861 | case M2_mpy_nac_hh_s1: |
862 | case M2_mpy_nac_sat_hh_s0: |
863 | case M2_mpy_nac_sat_hh_s1: |
864 | case M2_mpy_rnd_hh_s0: |
865 | case M2_mpy_rnd_hh_s1: |
866 | case M2_mpy_sat_hh_s0: |
867 | case M2_mpy_sat_hh_s1: |
868 | case M2_mpy_sat_rnd_hh_s0: |
869 | case M2_mpy_sat_rnd_hh_s1: |
870 | case M2_mpyd_acc_hh_s0: |
871 | case M2_mpyd_acc_hh_s1: |
872 | case M2_mpyd_hh_s0: |
873 | case M2_mpyd_hh_s1: |
874 | case M2_mpyd_nac_hh_s0: |
875 | case M2_mpyd_nac_hh_s1: |
876 | case M2_mpyd_rnd_hh_s0: |
877 | case M2_mpyd_rnd_hh_s1: |
878 | case M2_mpyu_acc_hh_s0: |
879 | case M2_mpyu_acc_hh_s1: |
880 | case M2_mpyu_hh_s0: |
881 | case M2_mpyu_hh_s1: |
882 | case M2_mpyu_nac_hh_s0: |
883 | case M2_mpyu_nac_hh_s1: |
884 | case M2_mpyud_acc_hh_s0: |
885 | case M2_mpyud_acc_hh_s1: |
886 | case M2_mpyud_hh_s0: |
887 | case M2_mpyud_hh_s1: |
888 | case M2_mpyud_nac_hh_s0: |
889 | case M2_mpyud_nac_hh_s1: |
890 | if (OpN == 1 || OpN == 2) { |
891 | Bits.set(I: Begin+16, E: Begin+32); |
892 | return true; |
893 | } |
894 | break; |
895 | } |
896 | |
897 | return false; |
898 | } |
899 | |
900 | // Calculate the register class that matches Reg:Sub. For example, if |
901 | // %1 is a double register, then %1:isub_hi would match the "int" |
902 | // register class. |
903 | const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( |
904 | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { |
905 | if (!RR.Reg.isVirtual()) |
906 | return nullptr; |
907 | auto *RC = MRI.getRegClass(Reg: RR.Reg); |
908 | if (RR.Sub == 0) |
909 | return RC; |
910 | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
911 | *MRI.getTargetRegisterInfo()); |
912 | |
913 | auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { |
914 | (void)HRI; |
915 | assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) || |
916 | Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi)); |
917 | }; |
918 | |
919 | switch (RC->getID()) { |
920 | case Hexagon::DoubleRegsRegClassID: |
921 | VerifySR(RC, RR.Sub); |
922 | return &Hexagon::IntRegsRegClass; |
923 | case Hexagon::HvxWRRegClassID: |
924 | VerifySR(RC, RR.Sub); |
925 | return &Hexagon::HvxVRRegClass; |
926 | } |
927 | return nullptr; |
928 | } |
929 | |
930 | // Check if RD could be replaced with RS at any possible use of RD. |
931 | // For example a predicate register cannot be replaced with a integer |
932 | // register, but a 64-bit register with a subregister can be replaced |
933 | // with a 32-bit register. |
934 | bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, |
935 | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { |
936 | if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual()) |
937 | return false; |
938 | // Return false if one (or both) classes are nullptr. |
939 | auto *DRC = getFinalVRegClass(RR: RD, MRI); |
940 | if (!DRC) |
941 | return false; |
942 | |
943 | return DRC == getFinalVRegClass(RR: RS, MRI); |
944 | } |
945 | |
946 | bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
947 | unsigned NewSub) { |
948 | if (!PreserveTiedOps) |
949 | return false; |
950 | return llvm::any_of(Range: MRI.use_operands(Reg), |
951 | P: [NewSub] (const MachineOperand &Op) -> bool { |
952 | return Op.getSubReg() != NewSub && Op.isTied(); |
953 | }); |
954 | } |
955 | |
956 | namespace { |
957 | |
958 | class DeadCodeElimination { |
959 | public: |
960 | DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) |
961 | : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), |
962 | MDT(mdt), MRI(mf.getRegInfo()) {} |
963 | |
964 | bool run() { |
965 | return runOnNode(N: MDT.getRootNode()); |
966 | } |
967 | |
968 | private: |
969 | bool isDead(unsigned R) const; |
970 | bool runOnNode(MachineDomTreeNode *N); |
971 | |
972 | MachineFunction &MF; |
973 | const HexagonInstrInfo &HII; |
974 | MachineDominatorTree &MDT; |
975 | MachineRegisterInfo &MRI; |
976 | }; |
977 | |
978 | } // end anonymous namespace |
979 | |
980 | bool DeadCodeElimination::isDead(unsigned R) const { |
981 | for (const MachineOperand &MO : MRI.use_operands(Reg: R)) { |
982 | const MachineInstr *UseI = MO.getParent(); |
983 | if (UseI->isDebugInstr()) |
984 | continue; |
985 | if (UseI->isPHI()) { |
986 | assert(!UseI->getOperand(0).getSubReg()); |
987 | Register DR = UseI->getOperand(i: 0).getReg(); |
988 | if (DR == R) |
989 | continue; |
990 | } |
991 | return false; |
992 | } |
993 | return true; |
994 | } |
995 | |
996 | bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { |
997 | bool Changed = false; |
998 | |
999 | for (auto *DTN : children<MachineDomTreeNode*>(G: N)) |
1000 | Changed |= runOnNode(N: DTN); |
1001 | |
1002 | MachineBasicBlock *B = N->getBlock(); |
1003 | std::vector<MachineInstr*> Instrs; |
1004 | for (MachineInstr &MI : llvm::reverse(C&: *B)) |
1005 | Instrs.push_back(x: &MI); |
1006 | |
1007 | for (auto *MI : Instrs) { |
1008 | unsigned Opc = MI->getOpcode(); |
1009 | // Do not touch lifetime markers. This is why the target-independent DCE |
1010 | // cannot be used. |
1011 | if (Opc == TargetOpcode::LIFETIME_START || |
1012 | Opc == TargetOpcode::LIFETIME_END) |
1013 | continue; |
1014 | bool Store = false; |
1015 | if (MI->isInlineAsm()) |
1016 | continue; |
1017 | // Delete PHIs if possible. |
1018 | if (!MI->isPHI() && !MI->isSafeToMove(SawStore&: Store)) |
1019 | continue; |
1020 | |
1021 | bool AllDead = true; |
1022 | SmallVector<unsigned,2> Regs; |
1023 | for (auto &Op : MI->operands()) { |
1024 | if (!Op.isReg() || !Op.isDef()) |
1025 | continue; |
1026 | Register R = Op.getReg(); |
1027 | if (!R.isVirtual() || !isDead(R)) { |
1028 | AllDead = false; |
1029 | break; |
1030 | } |
1031 | Regs.push_back(Elt: R); |
1032 | } |
1033 | if (!AllDead) |
1034 | continue; |
1035 | |
1036 | B->erase(I: MI); |
1037 | for (unsigned Reg : Regs) |
1038 | MRI.markUsesInDebugValueAsUndef(Reg); |
1039 | Changed = true; |
1040 | } |
1041 | |
1042 | return Changed; |
1043 | } |
1044 | |
1045 | namespace { |
1046 | |
1047 | // Eliminate redundant instructions |
1048 | // |
1049 | // This transformation will identify instructions where the output register |
1050 | // is the same as one of its input registers. This only works on instructions |
1051 | // that define a single register (unlike post-increment loads, for example). |
1052 | // The equality check is actually more detailed: the code calculates which |
1053 | // bits of the output are used, and only compares these bits with the input |
1054 | // registers. |
1055 | // If the output matches an input, the instruction is replaced with COPY. |
1056 | // The copies will be removed by another transformation. |
1057 | class RedundantInstrElimination : public Transformation { |
1058 | public: |
1059 | RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, |
1060 | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1061 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1062 | |
1063 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1064 | |
1065 | private: |
1066 | bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, |
1067 | unsigned &LostB, unsigned &LostE); |
1068 | bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, |
1069 | unsigned &LostB, unsigned &LostE); |
1070 | bool computeUsedBits(unsigned Reg, BitVector &Bits); |
1071 | bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, |
1072 | uint16_t Begin); |
1073 | bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); |
1074 | |
1075 | const HexagonInstrInfo &HII; |
1076 | const HexagonRegisterInfo &HRI; |
1077 | MachineRegisterInfo &MRI; |
1078 | BitTracker &BT; |
1079 | }; |
1080 | |
1081 | } // end anonymous namespace |
1082 | |
1083 | // Check if the instruction is a lossy shift left, where the input being |
1084 | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1085 | // of bit indices that are lost. |
1086 | bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, |
1087 | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1088 | using namespace Hexagon; |
1089 | |
1090 | unsigned Opc = MI.getOpcode(); |
1091 | unsigned ImN, RegN, Width; |
1092 | switch (Opc) { |
1093 | case S2_asl_i_p: |
1094 | ImN = 2; |
1095 | RegN = 1; |
1096 | Width = 64; |
1097 | break; |
1098 | case S2_asl_i_p_acc: |
1099 | case S2_asl_i_p_and: |
1100 | case S2_asl_i_p_nac: |
1101 | case S2_asl_i_p_or: |
1102 | case S2_asl_i_p_xacc: |
1103 | ImN = 3; |
1104 | RegN = 2; |
1105 | Width = 64; |
1106 | break; |
1107 | case S2_asl_i_r: |
1108 | ImN = 2; |
1109 | RegN = 1; |
1110 | Width = 32; |
1111 | break; |
1112 | case S2_addasl_rrri: |
1113 | case S4_andi_asl_ri: |
1114 | case S4_ori_asl_ri: |
1115 | case S4_addi_asl_ri: |
1116 | case S4_subi_asl_ri: |
1117 | case S2_asl_i_r_acc: |
1118 | case S2_asl_i_r_and: |
1119 | case S2_asl_i_r_nac: |
1120 | case S2_asl_i_r_or: |
1121 | case S2_asl_i_r_sat: |
1122 | case S2_asl_i_r_xacc: |
1123 | ImN = 3; |
1124 | RegN = 2; |
1125 | Width = 32; |
1126 | break; |
1127 | default: |
1128 | return false; |
1129 | } |
1130 | |
1131 | if (RegN != OpN) |
1132 | return false; |
1133 | |
1134 | assert(MI.getOperand(ImN).isImm()); |
1135 | unsigned S = MI.getOperand(i: ImN).getImm(); |
1136 | if (S == 0) |
1137 | return false; |
1138 | LostB = Width-S; |
1139 | LostE = Width; |
1140 | return true; |
1141 | } |
1142 | |
1143 | // Check if the instruction is a lossy shift right, where the input being |
1144 | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1145 | // of bit indices that are lost. |
1146 | bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, |
1147 | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1148 | using namespace Hexagon; |
1149 | |
1150 | unsigned Opc = MI.getOpcode(); |
1151 | unsigned ImN, RegN; |
1152 | switch (Opc) { |
1153 | case S2_asr_i_p: |
1154 | case S2_lsr_i_p: |
1155 | ImN = 2; |
1156 | RegN = 1; |
1157 | break; |
1158 | case S2_asr_i_p_acc: |
1159 | case S2_asr_i_p_and: |
1160 | case S2_asr_i_p_nac: |
1161 | case S2_asr_i_p_or: |
1162 | case S2_lsr_i_p_acc: |
1163 | case S2_lsr_i_p_and: |
1164 | case S2_lsr_i_p_nac: |
1165 | case S2_lsr_i_p_or: |
1166 | case S2_lsr_i_p_xacc: |
1167 | ImN = 3; |
1168 | RegN = 2; |
1169 | break; |
1170 | case S2_asr_i_r: |
1171 | case S2_lsr_i_r: |
1172 | ImN = 2; |
1173 | RegN = 1; |
1174 | break; |
1175 | case S4_andi_lsr_ri: |
1176 | case S4_ori_lsr_ri: |
1177 | case S4_addi_lsr_ri: |
1178 | case S4_subi_lsr_ri: |
1179 | case S2_asr_i_r_acc: |
1180 | case S2_asr_i_r_and: |
1181 | case S2_asr_i_r_nac: |
1182 | case S2_asr_i_r_or: |
1183 | case S2_lsr_i_r_acc: |
1184 | case S2_lsr_i_r_and: |
1185 | case S2_lsr_i_r_nac: |
1186 | case S2_lsr_i_r_or: |
1187 | case S2_lsr_i_r_xacc: |
1188 | ImN = 3; |
1189 | RegN = 2; |
1190 | break; |
1191 | |
1192 | default: |
1193 | return false; |
1194 | } |
1195 | |
1196 | if (RegN != OpN) |
1197 | return false; |
1198 | |
1199 | assert(MI.getOperand(ImN).isImm()); |
1200 | unsigned S = MI.getOperand(i: ImN).getImm(); |
1201 | LostB = 0; |
1202 | LostE = S; |
1203 | return true; |
1204 | } |
1205 | |
1206 | // Calculate the bit vector that corresponds to the used bits of register Reg. |
1207 | // The vector Bits has the same size, as the size of Reg in bits. If the cal- |
1208 | // culation fails (i.e. the used bits are unknown), it returns false. Other- |
1209 | // wise, it returns true and sets the corresponding bits in Bits. |
1210 | bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { |
1211 | BitVector Used(Bits.size()); |
1212 | RegisterSet Visited; |
1213 | std::vector<unsigned> Pending; |
1214 | Pending.push_back(x: Reg); |
1215 | |
1216 | for (unsigned i = 0; i < Pending.size(); ++i) { |
1217 | unsigned R = Pending[i]; |
1218 | if (Visited.has(R)) |
1219 | continue; |
1220 | Visited.insert(R); |
1221 | for (auto I = MRI.use_begin(RegNo: R), E = MRI.use_end(); I != E; ++I) { |
1222 | BitTracker::RegisterRef UR = *I; |
1223 | unsigned B, W; |
1224 | if (!HBS::getSubregMask(RR: UR, Begin&: B, Width&: W, MRI)) |
1225 | return false; |
1226 | MachineInstr &UseI = *I->getParent(); |
1227 | if (UseI.isPHI() || UseI.isCopy()) { |
1228 | Register DefR = UseI.getOperand(i: 0).getReg(); |
1229 | if (!DefR.isVirtual()) |
1230 | return false; |
1231 | Pending.push_back(x: DefR); |
1232 | } else { |
1233 | if (!computeUsedBits(MI: UseI, OpN: I.getOperandNo(), Bits&: Used, Begin: B)) |
1234 | return false; |
1235 | } |
1236 | } |
1237 | } |
1238 | Bits |= Used; |
1239 | return true; |
1240 | } |
1241 | |
1242 | // Calculate the bits used by instruction MI in a register in operand OpN. |
1243 | // Return true/false if the calculation succeeds/fails. If is succeeds, set |
1244 | // used bits in Bits. This function does not reset any bits in Bits, so |
1245 | // subsequent calls over different instructions will result in the union |
1246 | // of the used bits in all these instructions. |
1247 | // The register in question may be used with a sub-register, whereas Bits |
1248 | // holds the bits for the entire register. To keep track of that, the |
1249 | // argument Begin indicates where in Bits is the lowest-significant bit |
1250 | // of the register used in operand OpN. For example, in instruction: |
1251 | // %1 = S2_lsr_i_r %2:isub_hi, 10 |
1252 | // the operand 1 is a 32-bit register, which happens to be a subregister |
1253 | // of the 64-bit register %2, and that subregister starts at position 32. |
1254 | // In this case Begin=32, since Bits[32] would be the lowest-significant bit |
1255 | // of %2:isub_hi. |
1256 | bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, |
1257 | unsigned OpN, BitVector &Bits, uint16_t Begin) { |
1258 | unsigned Opc = MI.getOpcode(); |
1259 | BitVector T(Bits.size()); |
1260 | bool GotBits = HBS::getUsedBits(Opc, OpN, Bits&: T, Begin, HII); |
1261 | // Even if we don't have bits yet, we could still provide some information |
1262 | // if the instruction is a lossy shift: the lost bits will be marked as |
1263 | // not used. |
1264 | unsigned LB, LE; |
1265 | if (isLossyShiftLeft(MI, OpN, LostB&: LB, LostE&: LE) || isLossyShiftRight(MI, OpN, LostB&: LB, LostE&: LE)) { |
1266 | assert(MI.getOperand(OpN).isReg()); |
1267 | BitTracker::RegisterRef RR = MI.getOperand(i: OpN); |
1268 | const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); |
1269 | uint16_t Width = HRI.getRegSizeInBits(RC: *RC); |
1270 | |
1271 | if (!GotBits) |
1272 | T.set(I: Begin, E: Begin+Width); |
1273 | assert(LB <= LE && LB < Width && LE <= Width); |
1274 | T.reset(I: Begin+LB, E: Begin+LE); |
1275 | GotBits = true; |
1276 | } |
1277 | if (GotBits) |
1278 | Bits |= T; |
1279 | return GotBits; |
1280 | } |
1281 | |
1282 | // Calculates the used bits in RD ("defined register"), and checks if these |
1283 | // bits in RS ("used register") and RD are identical. |
1284 | bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, |
1285 | BitTracker::RegisterRef RS) { |
1286 | const BitTracker::RegisterCell &DC = BT.lookup(Reg: RD.Reg); |
1287 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
1288 | |
1289 | unsigned DB, DW; |
1290 | if (!HBS::getSubregMask(RR: RD, Begin&: DB, Width&: DW, MRI)) |
1291 | return false; |
1292 | unsigned SB, SW; |
1293 | if (!HBS::getSubregMask(RR: RS, Begin&: SB, Width&: SW, MRI)) |
1294 | return false; |
1295 | if (SW != DW) |
1296 | return false; |
1297 | |
1298 | BitVector Used(DC.width()); |
1299 | if (!computeUsedBits(Reg: RD.Reg, Bits&: Used)) |
1300 | return false; |
1301 | |
1302 | for (unsigned i = 0; i != DW; ++i) |
1303 | if (Used[i+DB] && DC[DB+i] != SC[SB+i]) |
1304 | return false; |
1305 | return true; |
1306 | } |
1307 | |
1308 | bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, |
1309 | const RegisterSet&) { |
1310 | if (!BT.reached(B: &B)) |
1311 | return false; |
1312 | bool Changed = false; |
1313 | |
1314 | for (auto I = B.begin(), E = B.end(); I != E; ++I) { |
1315 | MachineInstr *MI = &*I; |
1316 | |
1317 | if (MI->getOpcode() == TargetOpcode::COPY) |
1318 | continue; |
1319 | if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) |
1320 | continue; |
1321 | unsigned NumD = MI->getDesc().getNumDefs(); |
1322 | if (NumD != 1) |
1323 | continue; |
1324 | |
1325 | BitTracker::RegisterRef RD = MI->getOperand(i: 0); |
1326 | if (!BT.has(Reg: RD.Reg)) |
1327 | continue; |
1328 | const BitTracker::RegisterCell &DC = BT.lookup(Reg: RD.Reg); |
1329 | auto At = MachineBasicBlock::iterator(MI); |
1330 | |
1331 | // Find a source operand that is equal to the result. |
1332 | for (auto &Op : MI->uses()) { |
1333 | if (!Op.isReg()) |
1334 | continue; |
1335 | BitTracker::RegisterRef RS = Op; |
1336 | if (!BT.has(Reg: RS.Reg)) |
1337 | continue; |
1338 | if (!HBS::isTransparentCopy(RD, RS, MRI)) |
1339 | continue; |
1340 | |
1341 | unsigned BN, BW; |
1342 | if (!HBS::getSubregMask(RR: RS, Begin&: BN, Width&: BW, MRI)) |
1343 | continue; |
1344 | |
1345 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
1346 | if (!usedBitsEqual(RD, RS) && !HBS::isEqual(RC1: DC, B1: 0, RC2: SC, B2: BN, W: BW)) |
1347 | continue; |
1348 | |
1349 | // If found, replace the instruction with a COPY. |
1350 | const DebugLoc &DL = MI->getDebugLoc(); |
1351 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
1352 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1353 | MachineInstr *CopyI = |
1354 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::COPY), DestReg: NewR) |
1355 | .addReg(RegNo: RS.Reg, flags: 0, SubReg: RS.Sub); |
1356 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
1357 | // This pass can create copies between registers that don't have the |
1358 | // exact same values. Updating the tracker has to involve updating |
1359 | // all dependent cells. Example: |
1360 | // %1 = inst %2 ; %1 != %2, but used bits are equal |
1361 | // |
1362 | // %3 = copy %2 ; <- inserted |
1363 | // ... = %3 ; <- replaced from %2 |
1364 | // Indirectly, we can create a "copy" between %1 and %2 even |
1365 | // though their exact values do not match. |
1366 | BT.visit(MI: *CopyI); |
1367 | Changed = true; |
1368 | break; |
1369 | } |
1370 | } |
1371 | |
1372 | return Changed; |
1373 | } |
1374 | |
1375 | namespace { |
1376 | |
1377 | // Recognize instructions that produce constant values known at compile-time. |
1378 | // Replace them with register definitions that load these constants directly. |
1379 | class ConstGeneration : public Transformation { |
1380 | public: |
1381 | ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1382 | MachineRegisterInfo &mri) |
1383 | : Transformation(true), HII(hii), MRI(mri), BT(bt) {} |
1384 | |
1385 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1386 | static bool isTfrConst(const MachineInstr &MI); |
1387 | |
1388 | private: |
1389 | Register genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1390 | MachineBasicBlock &B, MachineBasicBlock::iterator At, |
1391 | DebugLoc &DL); |
1392 | |
1393 | const HexagonInstrInfo &HII; |
1394 | MachineRegisterInfo &MRI; |
1395 | BitTracker &BT; |
1396 | }; |
1397 | |
1398 | } // end anonymous namespace |
1399 | |
1400 | bool ConstGeneration::isTfrConst(const MachineInstr &MI) { |
1401 | unsigned Opc = MI.getOpcode(); |
1402 | switch (Opc) { |
1403 | case Hexagon::A2_combineii: |
1404 | case Hexagon::A4_combineii: |
1405 | case Hexagon::A2_tfrsi: |
1406 | case Hexagon::A2_tfrpi: |
1407 | case Hexagon::PS_true: |
1408 | case Hexagon::PS_false: |
1409 | case Hexagon::CONST32: |
1410 | case Hexagon::CONST64: |
1411 | return true; |
1412 | } |
1413 | return false; |
1414 | } |
1415 | |
1416 | // Generate a transfer-immediate instruction that is appropriate for the |
1417 | // register class and the actual value being transferred. |
1418 | Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1419 | MachineBasicBlock &B, |
1420 | MachineBasicBlock::iterator At, |
1421 | DebugLoc &DL) { |
1422 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
1423 | if (RC == &Hexagon::IntRegsRegClass) { |
1424 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrsi), DestReg: Reg) |
1425 | .addImm(Val: int32_t(C)); |
1426 | return Reg; |
1427 | } |
1428 | |
1429 | if (RC == &Hexagon::DoubleRegsRegClass) { |
1430 | if (isInt<8>(x: C)) { |
1431 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrpi), DestReg: Reg) |
1432 | .addImm(Val: C); |
1433 | return Reg; |
1434 | } |
1435 | |
1436 | unsigned Lo = Lo_32(Value: C), Hi = Hi_32(Value: C); |
1437 | if (isInt<8>(x: Lo) || isInt<8>(x: Hi)) { |
1438 | unsigned Opc = isInt<8>(x: Lo) ? Hexagon::A2_combineii |
1439 | : Hexagon::A4_combineii; |
1440 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Opc), DestReg: Reg) |
1441 | .addImm(Val: int32_t(Hi)) |
1442 | .addImm(Val: int32_t(Lo)); |
1443 | return Reg; |
1444 | } |
1445 | MachineFunction *MF = B.getParent(); |
1446 | auto &HST = MF->getSubtarget<HexagonSubtarget>(); |
1447 | |
1448 | // Disable CONST64 for tiny core since it takes a LD resource. |
1449 | if (!HST.isTinyCore() || |
1450 | MF->getFunction().hasOptSize()) { |
1451 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::CONST64), DestReg: Reg) |
1452 | .addImm(Val: C); |
1453 | return Reg; |
1454 | } |
1455 | } |
1456 | |
1457 | if (RC == &Hexagon::PredRegsRegClass) { |
1458 | unsigned Opc; |
1459 | if (C == 0) |
1460 | Opc = Hexagon::PS_false; |
1461 | else if ((C & 0xFF) == 0xFF) |
1462 | Opc = Hexagon::PS_true; |
1463 | else |
1464 | return 0; |
1465 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Opc), DestReg: Reg); |
1466 | return Reg; |
1467 | } |
1468 | |
1469 | return 0; |
1470 | } |
1471 | |
1472 | bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1473 | if (!BT.reached(B: &B)) |
1474 | return false; |
1475 | bool Changed = false; |
1476 | RegisterSet Defs; |
1477 | |
1478 | for (auto I = B.begin(), E = B.end(); I != E; ++I) { |
1479 | if (isTfrConst(MI: *I)) |
1480 | continue; |
1481 | Defs.clear(); |
1482 | HBS::getInstrDefs(MI: *I, Defs); |
1483 | if (Defs.count() != 1) |
1484 | continue; |
1485 | Register DR = Defs.find_first(); |
1486 | if (!DR.isVirtual()) |
1487 | continue; |
1488 | uint64_t U; |
1489 | const BitTracker::RegisterCell &DRC = BT.lookup(Reg: DR); |
1490 | if (HBS::getConst(RC: DRC, B: 0, W: DRC.width(), U)) { |
1491 | int64_t C = U; |
1492 | DebugLoc DL = I->getDebugLoc(); |
1493 | auto At = I->isPHI() ? B.getFirstNonPHI() : I; |
1494 | Register ImmReg = genTfrConst(RC: MRI.getRegClass(Reg: DR), C, B, At, DL); |
1495 | if (ImmReg) { |
1496 | HBS::replaceReg(OldR: DR, NewR: ImmReg, MRI); |
1497 | BT.put(RR: ImmReg, RC: DRC); |
1498 | Changed = true; |
1499 | } |
1500 | } |
1501 | } |
1502 | return Changed; |
1503 | } |
1504 | |
1505 | namespace { |
1506 | |
1507 | // Identify pairs of available registers which hold identical values. |
1508 | // In such cases, only one of them needs to be calculated, the other one |
1509 | // will be defined as a copy of the first. |
1510 | class CopyGeneration : public Transformation { |
1511 | public: |
1512 | CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1513 | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1514 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1515 | |
1516 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1517 | |
1518 | private: |
1519 | bool findMatch(const BitTracker::RegisterRef &Inp, |
1520 | BitTracker::RegisterRef &Out, const RegisterSet &AVs); |
1521 | |
1522 | const HexagonInstrInfo &HII; |
1523 | const HexagonRegisterInfo &HRI; |
1524 | MachineRegisterInfo &MRI; |
1525 | BitTracker &BT; |
1526 | RegisterSet Forbidden; |
1527 | }; |
1528 | |
1529 | // Eliminate register copies RD = RS, by replacing the uses of RD with |
1530 | // with uses of RS. |
1531 | class CopyPropagation : public Transformation { |
1532 | public: |
1533 | CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1534 | : Transformation(false), HRI(hri), MRI(mri) {} |
1535 | |
1536 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1537 | |
1538 | static bool isCopyReg(unsigned Opc, bool NoConv); |
1539 | |
1540 | private: |
1541 | bool propagateRegCopy(MachineInstr &MI); |
1542 | |
1543 | const HexagonRegisterInfo &HRI; |
1544 | MachineRegisterInfo &MRI; |
1545 | }; |
1546 | |
1547 | } // end anonymous namespace |
1548 | |
1549 | /// Check if there is a register in AVs that is identical to Inp. If so, |
1550 | /// set Out to the found register. The output may be a pair Reg:Sub. |
1551 | bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, |
1552 | BitTracker::RegisterRef &Out, const RegisterSet &AVs) { |
1553 | if (!BT.has(Reg: Inp.Reg)) |
1554 | return false; |
1555 | const BitTracker::RegisterCell &InpRC = BT.lookup(Reg: Inp.Reg); |
1556 | auto *FRC = HBS::getFinalVRegClass(RR: Inp, MRI); |
1557 | unsigned B, W; |
1558 | if (!HBS::getSubregMask(RR: Inp, Begin&: B, Width&: W, MRI)) |
1559 | return false; |
1560 | |
1561 | for (Register R = AVs.find_first(); R; R = AVs.find_next(Prev: R)) { |
1562 | if (!BT.has(Reg: R) || Forbidden[R]) |
1563 | continue; |
1564 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: R); |
1565 | unsigned RW = RC.width(); |
1566 | if (W == RW) { |
1567 | if (FRC != MRI.getRegClass(Reg: R)) |
1568 | continue; |
1569 | if (!HBS::isTransparentCopy(RD: R, RS: Inp, MRI)) |
1570 | continue; |
1571 | if (!HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: 0, W)) |
1572 | continue; |
1573 | Out.Reg = R; |
1574 | Out.Sub = 0; |
1575 | return true; |
1576 | } |
1577 | // Check if there is a super-register, whose part (with a subregister) |
1578 | // is equal to the input. |
1579 | // Only do double registers for now. |
1580 | if (W*2 != RW) |
1581 | continue; |
1582 | if (MRI.getRegClass(Reg: R) != &Hexagon::DoubleRegsRegClass) |
1583 | continue; |
1584 | |
1585 | if (HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: 0, W)) |
1586 | Out.Sub = Hexagon::isub_lo; |
1587 | else if (HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: W, W)) |
1588 | Out.Sub = Hexagon::isub_hi; |
1589 | else |
1590 | continue; |
1591 | Out.Reg = R; |
1592 | if (HBS::isTransparentCopy(RD: Out, RS: Inp, MRI)) |
1593 | return true; |
1594 | } |
1595 | return false; |
1596 | } |
1597 | |
1598 | bool CopyGeneration::processBlock(MachineBasicBlock &B, |
1599 | const RegisterSet &AVs) { |
1600 | if (!BT.reached(B: &B)) |
1601 | return false; |
1602 | RegisterSet AVB(AVs); |
1603 | bool Changed = false; |
1604 | RegisterSet Defs; |
1605 | |
1606 | for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Rs: Defs)) { |
1607 | Defs.clear(); |
1608 | HBS::getInstrDefs(MI: *I, Defs); |
1609 | |
1610 | unsigned Opc = I->getOpcode(); |
1611 | if (CopyPropagation::isCopyReg(Opc, NoConv: false) || |
1612 | ConstGeneration::isTfrConst(MI: *I)) |
1613 | continue; |
1614 | |
1615 | DebugLoc DL = I->getDebugLoc(); |
1616 | auto At = I->isPHI() ? B.getFirstNonPHI() : I; |
1617 | |
1618 | for (Register R = Defs.find_first(); R; R = Defs.find_next(Prev: R)) { |
1619 | BitTracker::RegisterRef MR; |
1620 | auto *FRC = HBS::getFinalVRegClass(RR: R, MRI); |
1621 | |
1622 | if (findMatch(Inp: R, Out&: MR, AVs: AVB)) { |
1623 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1624 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::COPY), DestReg: NewR) |
1625 | .addReg(RegNo: MR.Reg, flags: 0, SubReg: MR.Sub); |
1626 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: BT.get(RR: MR)); |
1627 | HBS::replaceReg(OldR: R, NewR, MRI); |
1628 | Forbidden.insert(R); |
1629 | continue; |
1630 | } |
1631 | |
1632 | if (FRC == &Hexagon::DoubleRegsRegClass || |
1633 | FRC == &Hexagon::HvxWRRegClass) { |
1634 | // Try to generate REG_SEQUENCE. |
1635 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC: *FRC, GenIdx: Hexagon::ps_sub_lo); |
1636 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC: *FRC, GenIdx: Hexagon::ps_sub_hi); |
1637 | BitTracker::RegisterRef TL = { R, SubLo }; |
1638 | BitTracker::RegisterRef TH = { R, SubHi }; |
1639 | BitTracker::RegisterRef ML, MH; |
1640 | if (findMatch(Inp: TL, Out&: ML, AVs: AVB) && findMatch(Inp: TH, Out&: MH, AVs: AVB)) { |
1641 | auto *FRC = HBS::getFinalVRegClass(RR: R, MRI); |
1642 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1643 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::REG_SEQUENCE), DestReg: NewR) |
1644 | .addReg(RegNo: ML.Reg, flags: 0, SubReg: ML.Sub) |
1645 | .addImm(Val: SubLo) |
1646 | .addReg(RegNo: MH.Reg, flags: 0, SubReg: MH.Sub) |
1647 | .addImm(Val: SubHi); |
1648 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: BT.get(RR: R)); |
1649 | HBS::replaceReg(OldR: R, NewR, MRI); |
1650 | Forbidden.insert(R); |
1651 | } |
1652 | } |
1653 | } |
1654 | } |
1655 | |
1656 | return Changed; |
1657 | } |
1658 | |
1659 | bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { |
1660 | switch (Opc) { |
1661 | case TargetOpcode::COPY: |
1662 | case TargetOpcode::REG_SEQUENCE: |
1663 | case Hexagon::A4_combineir: |
1664 | case Hexagon::A4_combineri: |
1665 | return true; |
1666 | case Hexagon::A2_tfr: |
1667 | case Hexagon::A2_tfrp: |
1668 | case Hexagon::A2_combinew: |
1669 | case Hexagon::V6_vcombine: |
1670 | return NoConv; |
1671 | default: |
1672 | break; |
1673 | } |
1674 | return false; |
1675 | } |
1676 | |
1677 | bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { |
1678 | bool Changed = false; |
1679 | unsigned Opc = MI.getOpcode(); |
1680 | BitTracker::RegisterRef RD = MI.getOperand(i: 0); |
1681 | assert(MI.getOperand(0).getSubReg() == 0); |
1682 | |
1683 | switch (Opc) { |
1684 | case TargetOpcode::COPY: |
1685 | case Hexagon::A2_tfr: |
1686 | case Hexagon::A2_tfrp: { |
1687 | BitTracker::RegisterRef RS = MI.getOperand(i: 1); |
1688 | if (!HBS::isTransparentCopy(RD, RS, MRI)) |
1689 | break; |
1690 | if (RS.Sub != 0) |
1691 | Changed = HBS::replaceRegWithSub(OldR: RD.Reg, NewR: RS.Reg, NewSR: RS.Sub, MRI); |
1692 | else |
1693 | Changed = HBS::replaceReg(OldR: RD.Reg, NewR: RS.Reg, MRI); |
1694 | break; |
1695 | } |
1696 | case TargetOpcode::REG_SEQUENCE: { |
1697 | BitTracker::RegisterRef SL, SH; |
1698 | if (HBS::parseRegSequence(I: MI, SL, SH, MRI)) { |
1699 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg: RD.Reg); |
1700 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_lo); |
1701 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_hi); |
1702 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubLo, NewR: SL.Reg, NewSR: SL.Sub, MRI); |
1703 | Changed |= HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubHi, NewR: SH.Reg, NewSR: SH.Sub, MRI); |
1704 | } |
1705 | break; |
1706 | } |
1707 | case Hexagon::A2_combinew: |
1708 | case Hexagon::V6_vcombine: { |
1709 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg: RD.Reg); |
1710 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_lo); |
1711 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_hi); |
1712 | BitTracker::RegisterRef RH = MI.getOperand(i: 1), RL = MI.getOperand(i: 2); |
1713 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubLo, NewR: RL.Reg, NewSR: RL.Sub, MRI); |
1714 | Changed |= HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubHi, NewR: RH.Reg, NewSR: RH.Sub, MRI); |
1715 | break; |
1716 | } |
1717 | case Hexagon::A4_combineir: |
1718 | case Hexagon::A4_combineri: { |
1719 | unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; |
1720 | unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo |
1721 | : Hexagon::isub_hi; |
1722 | BitTracker::RegisterRef RS = MI.getOperand(i: SrcX); |
1723 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: Sub, NewR: RS.Reg, NewSR: RS.Sub, MRI); |
1724 | break; |
1725 | } |
1726 | } |
1727 | return Changed; |
1728 | } |
1729 | |
1730 | bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1731 | std::vector<MachineInstr*> Instrs; |
1732 | for (MachineInstr &MI : llvm::reverse(C&: B)) |
1733 | Instrs.push_back(x: &MI); |
1734 | |
1735 | bool Changed = false; |
1736 | for (auto *I : Instrs) { |
1737 | unsigned Opc = I->getOpcode(); |
1738 | if (!CopyPropagation::isCopyReg(Opc, NoConv: true)) |
1739 | continue; |
1740 | Changed |= propagateRegCopy(MI&: *I); |
1741 | } |
1742 | |
1743 | return Changed; |
1744 | } |
1745 | |
1746 | namespace { |
1747 | |
1748 | // Recognize patterns that can be simplified and replace them with the |
1749 | // simpler forms. |
1750 | // This is by no means complete |
1751 | class BitSimplification : public Transformation { |
1752 | public: |
1753 | BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, |
1754 | const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, |
1755 | MachineRegisterInfo &mri, MachineFunction &mf) |
1756 | : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), |
1757 | MF(mf), BT(bt) {} |
1758 | |
1759 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1760 | |
1761 | private: |
1762 | struct RegHalf : public BitTracker::RegisterRef { |
1763 | bool Low; // Low/High halfword. |
1764 | }; |
1765 | |
1766 | bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1767 | unsigned B, RegHalf &RH); |
1768 | bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); |
1769 | |
1770 | bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1771 | BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); |
1772 | unsigned getCombineOpcode(bool HLow, bool LLow); |
1773 | |
1774 | bool genStoreUpperHalf(MachineInstr *MI); |
1775 | bool genStoreImmediate(MachineInstr *MI); |
1776 | bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, |
1777 | const BitTracker::RegisterCell &RC); |
1778 | bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1779 | const BitTracker::RegisterCell &RC); |
1780 | bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1781 | const BitTracker::RegisterCell &RC); |
1782 | bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1783 | const BitTracker::RegisterCell &RC); |
1784 | bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1785 | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1786 | bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1787 | const BitTracker::RegisterCell &RC); |
1788 | bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1789 | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1790 | bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD); |
1791 | |
1792 | // Cache of created instructions to avoid creating duplicates. |
1793 | // XXX Currently only used by genBitSplit. |
1794 | std::vector<MachineInstr*> NewMIs; |
1795 | |
1796 | const MachineDominatorTree &MDT; |
1797 | const HexagonInstrInfo &HII; |
1798 | const HexagonRegisterInfo &HRI; |
1799 | MachineRegisterInfo &MRI; |
1800 | MachineFunction &MF; |
1801 | BitTracker &BT; |
1802 | }; |
1803 | |
1804 | } // end anonymous namespace |
1805 | |
1806 | // Check if the bits [B..B+16) in register cell RC form a valid halfword, |
1807 | // i.e. [0..16), [16..32), etc. of some register. If so, return true and |
1808 | // set the information about the found register in RH. |
1809 | bool BitSimplification::matchHalf(unsigned SelfR, |
1810 | const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { |
1811 | // XXX This could be searching in the set of available registers, in case |
1812 | // the match is not exact. |
1813 | |
1814 | // Match 16-bit chunks, where the RC[B..B+15] references exactly one |
1815 | // register and all the bits B..B+15 match between RC and the register. |
1816 | // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, |
1817 | // and RC = { [0]:0 [1-15]:v1[1-15]... }. |
1818 | bool Low = false; |
1819 | unsigned I = B; |
1820 | while (I < B+16 && RC[I].num()) |
1821 | I++; |
1822 | if (I == B+16) |
1823 | return false; |
1824 | |
1825 | Register Reg = RC[I].RefI.Reg; |
1826 | unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. |
1827 | if (P < I-B) |
1828 | return false; |
1829 | unsigned Pos = P - (I-B); |
1830 | |
1831 | if (Reg == 0 || Reg == SelfR) // Don't match "self". |
1832 | return false; |
1833 | if (!Reg.isVirtual()) |
1834 | return false; |
1835 | if (!BT.has(Reg)) |
1836 | return false; |
1837 | |
1838 | const BitTracker::RegisterCell &SC = BT.lookup(Reg); |
1839 | if (Pos+16 > SC.width()) |
1840 | return false; |
1841 | |
1842 | for (unsigned i = 0; i < 16; ++i) { |
1843 | const BitTracker::BitValue &RV = RC[i+B]; |
1844 | if (RV.Type == BitTracker::BitValue::Ref) { |
1845 | if (RV.RefI.Reg != Reg) |
1846 | return false; |
1847 | if (RV.RefI.Pos != i+Pos) |
1848 | return false; |
1849 | continue; |
1850 | } |
1851 | if (RC[i+B] != SC[i+Pos]) |
1852 | return false; |
1853 | } |
1854 | |
1855 | unsigned Sub = 0; |
1856 | switch (Pos) { |
1857 | case 0: |
1858 | Sub = Hexagon::isub_lo; |
1859 | Low = true; |
1860 | break; |
1861 | case 16: |
1862 | Sub = Hexagon::isub_lo; |
1863 | Low = false; |
1864 | break; |
1865 | case 32: |
1866 | Sub = Hexagon::isub_hi; |
1867 | Low = true; |
1868 | break; |
1869 | case 48: |
1870 | Sub = Hexagon::isub_hi; |
1871 | Low = false; |
1872 | break; |
1873 | default: |
1874 | return false; |
1875 | } |
1876 | |
1877 | RH.Reg = Reg; |
1878 | RH.Sub = Sub; |
1879 | RH.Low = Low; |
1880 | // If the subregister is not valid with the register, set it to 0. |
1881 | if (!HBS::getFinalVRegClass(RR: RH, MRI)) |
1882 | RH.Sub = 0; |
1883 | |
1884 | return true; |
1885 | } |
1886 | |
1887 | bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, |
1888 | unsigned OpNum) { |
1889 | auto *OpRC = HII.getRegClass(MCID: HII.get(Opcode: Opc), OpNum, TRI: &HRI, MF); |
1890 | auto *RRC = HBS::getFinalVRegClass(RR: R, MRI); |
1891 | return OpRC->hasSubClassEq(RC: RRC); |
1892 | } |
1893 | |
1894 | // Check if RC matches the pattern of a S2_packhl. If so, return true and |
1895 | // set the inputs Rs and Rt. |
1896 | bool BitSimplification::matchPackhl(unsigned SelfR, |
1897 | const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, |
1898 | BitTracker::RegisterRef &Rt) { |
1899 | RegHalf L1, H1, L2, H2; |
1900 | |
1901 | if (!matchHalf(SelfR, RC, B: 0, RH&: L2) || !matchHalf(SelfR, RC, B: 16, RH&: L1)) |
1902 | return false; |
1903 | if (!matchHalf(SelfR, RC, B: 32, RH&: H2) || !matchHalf(SelfR, RC, B: 48, RH&: H1)) |
1904 | return false; |
1905 | |
1906 | // Rs = H1.L1, Rt = H2.L2 |
1907 | if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) |
1908 | return false; |
1909 | if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) |
1910 | return false; |
1911 | |
1912 | Rs = H1; |
1913 | Rt = H2; |
1914 | return true; |
1915 | } |
1916 | |
1917 | unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { |
1918 | return HLow ? LLow ? Hexagon::A2_combine_ll |
1919 | : Hexagon::A2_combine_lh |
1920 | : LLow ? Hexagon::A2_combine_hl |
1921 | : Hexagon::A2_combine_hh; |
1922 | } |
1923 | |
1924 | // If MI stores the upper halfword of a register (potentially obtained via |
1925 | // shifts or extracts), replace it with a storerf instruction. This could |
1926 | // cause the "extraction" code to become dead. |
1927 | bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { |
1928 | unsigned Opc = MI->getOpcode(); |
1929 | if (Opc != Hexagon::S2_storerh_io) |
1930 | return false; |
1931 | |
1932 | MachineOperand &ValOp = MI->getOperand(i: 2); |
1933 | BitTracker::RegisterRef RS = ValOp; |
1934 | if (!BT.has(Reg: RS.Reg)) |
1935 | return false; |
1936 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RS.Reg); |
1937 | RegHalf H; |
1938 | unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0; |
1939 | if (!matchHalf(SelfR: 0, RC, B, RH&: H)) |
1940 | return false; |
1941 | if (H.Low) |
1942 | return false; |
1943 | MI->setDesc(HII.get(Opcode: Hexagon::S2_storerf_io)); |
1944 | ValOp.setReg(H.Reg); |
1945 | ValOp.setSubReg(H.Sub); |
1946 | return true; |
1947 | } |
1948 | |
1949 | // If MI stores a value known at compile-time, and the value is within a range |
1950 | // that avoids using constant-extenders, replace it with a store-immediate. |
1951 | bool BitSimplification::genStoreImmediate(MachineInstr *MI) { |
1952 | unsigned Opc = MI->getOpcode(); |
1953 | unsigned Align = 0; |
1954 | switch (Opc) { |
1955 | case Hexagon::S2_storeri_io: |
1956 | Align++; |
1957 | [[fallthrough]]; |
1958 | case Hexagon::S2_storerh_io: |
1959 | Align++; |
1960 | [[fallthrough]]; |
1961 | case Hexagon::S2_storerb_io: |
1962 | break; |
1963 | default: |
1964 | return false; |
1965 | } |
1966 | |
1967 | // Avoid stores to frame-indices (due to an unknown offset). |
1968 | if (!MI->getOperand(i: 0).isReg()) |
1969 | return false; |
1970 | MachineOperand &OffOp = MI->getOperand(i: 1); |
1971 | if (!OffOp.isImm()) |
1972 | return false; |
1973 | |
1974 | int64_t Off = OffOp.getImm(); |
1975 | // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). |
1976 | if (!isUIntN(N: 6+Align, x: Off) || (Off & ((1<<Align)-1))) |
1977 | return false; |
1978 | // Source register: |
1979 | BitTracker::RegisterRef RS = MI->getOperand(i: 2); |
1980 | if (!BT.has(Reg: RS.Reg)) |
1981 | return false; |
1982 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RS.Reg); |
1983 | uint64_t U; |
1984 | if (!HBS::getConst(RC, B: 0, W: RC.width(), U)) |
1985 | return false; |
1986 | |
1987 | // Only consider 8-bit values to avoid constant-extenders. |
1988 | int V; |
1989 | switch (Opc) { |
1990 | case Hexagon::S2_storerb_io: |
1991 | V = int8_t(U); |
1992 | break; |
1993 | case Hexagon::S2_storerh_io: |
1994 | V = int16_t(U); |
1995 | break; |
1996 | case Hexagon::S2_storeri_io: |
1997 | V = int32_t(U); |
1998 | break; |
1999 | default: |
2000 | // Opc is already checked above to be one of the three store instructions. |
2001 | // This silences a -Wuninitialized false positive on GCC 5.4. |
2002 | llvm_unreachable("Unexpected store opcode" ); |
2003 | } |
2004 | if (!isInt<8>(x: V)) |
2005 | return false; |
2006 | |
2007 | MI->removeOperand(OpNo: 2); |
2008 | switch (Opc) { |
2009 | case Hexagon::S2_storerb_io: |
2010 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeirb_io)); |
2011 | break; |
2012 | case Hexagon::S2_storerh_io: |
2013 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeirh_io)); |
2014 | break; |
2015 | case Hexagon::S2_storeri_io: |
2016 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeiri_io)); |
2017 | break; |
2018 | } |
2019 | MI->addOperand(Op: MachineOperand::CreateImm(Val: V)); |
2020 | return true; |
2021 | } |
2022 | |
2023 | // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the |
2024 | // last instruction in a sequence that results in something equivalent to |
2025 | // the pack-halfwords. The intent is to cause the entire sequence to become |
2026 | // dead. |
2027 | bool BitSimplification::genPackhl(MachineInstr *MI, |
2028 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2029 | unsigned Opc = MI->getOpcode(); |
2030 | if (Opc == Hexagon::S2_packhl) |
2031 | return false; |
2032 | BitTracker::RegisterRef Rs, Rt; |
2033 | if (!matchPackhl(SelfR: RD.Reg, RC, Rs, Rt)) |
2034 | return false; |
2035 | if (!validateReg(R: Rs, Opc: Hexagon::S2_packhl, OpNum: 1) || |
2036 | !validateReg(R: Rt, Opc: Hexagon::S2_packhl, OpNum: 2)) |
2037 | return false; |
2038 | |
2039 | MachineBasicBlock &B = *MI->getParent(); |
2040 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
2041 | DebugLoc DL = MI->getDebugLoc(); |
2042 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2043 | : MachineBasicBlock::iterator(MI); |
2044 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_packhl), DestReg: NewR) |
2045 | .addReg(RegNo: Rs.Reg, flags: 0, SubReg: Rs.Sub) |
2046 | .addReg(RegNo: Rt.Reg, flags: 0, SubReg: Rt.Sub); |
2047 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2048 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2049 | return true; |
2050 | } |
2051 | |
2052 | // If MI produces halfword of the input in the low half of the output, |
2053 | // replace it with zero-extend or extractu. |
2054 | bool BitSimplification::(MachineInstr *MI, |
2055 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2056 | RegHalf L; |
2057 | // Check for halfword in low 16 bits, zeros elsewhere. |
2058 | if (!matchHalf(SelfR: RD.Reg, RC, B: 0, RH&: L) || !HBS::isZero(RC, B: 16, W: 16)) |
2059 | return false; |
2060 | |
2061 | unsigned Opc = MI->getOpcode(); |
2062 | MachineBasicBlock &B = *MI->getParent(); |
2063 | DebugLoc DL = MI->getDebugLoc(); |
2064 | |
2065 | // Prefer zxth, since zxth can go in any slot, while extractu only in |
2066 | // slots 2 and 3. |
2067 | unsigned NewR = 0; |
2068 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2069 | : MachineBasicBlock::iterator(MI); |
2070 | if (L.Low && Opc != Hexagon::A2_zxth) { |
2071 | if (validateReg(R: L, Opc: Hexagon::A2_zxth, OpNum: 1)) { |
2072 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2073 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_zxth), DestReg: NewR) |
2074 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub); |
2075 | } |
2076 | } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { |
2077 | if (validateReg(R: L, Opc: Hexagon::S2_lsr_i_r, OpNum: 1)) { |
2078 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2079 | BuildMI(BB&: B, I: MI, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_lsr_i_r), DestReg: NewR) |
2080 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub) |
2081 | .addImm(Val: 16); |
2082 | } |
2083 | } |
2084 | if (NewR == 0) |
2085 | return false; |
2086 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2087 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2088 | return true; |
2089 | } |
2090 | |
2091 | // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the |
2092 | // combine. |
2093 | bool BitSimplification::genCombineHalf(MachineInstr *MI, |
2094 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2095 | RegHalf L, H; |
2096 | // Check for combine h/l |
2097 | if (!matchHalf(SelfR: RD.Reg, RC, B: 0, RH&: L) || !matchHalf(SelfR: RD.Reg, RC, B: 16, RH&: H)) |
2098 | return false; |
2099 | // Do nothing if this is just a reg copy. |
2100 | if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) |
2101 | return false; |
2102 | |
2103 | unsigned Opc = MI->getOpcode(); |
2104 | unsigned COpc = getCombineOpcode(HLow: H.Low, LLow: L.Low); |
2105 | if (COpc == Opc) |
2106 | return false; |
2107 | if (!validateReg(R: H, Opc: COpc, OpNum: 1) || !validateReg(R: L, Opc: COpc, OpNum: 2)) |
2108 | return false; |
2109 | |
2110 | MachineBasicBlock &B = *MI->getParent(); |
2111 | DebugLoc DL = MI->getDebugLoc(); |
2112 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2113 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2114 | : MachineBasicBlock::iterator(MI); |
2115 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: COpc), DestReg: NewR) |
2116 | .addReg(RegNo: H.Reg, flags: 0, SubReg: H.Sub) |
2117 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub); |
2118 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2119 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2120 | return true; |
2121 | } |
2122 | |
2123 | // If MI resets high bits of a register and keeps the lower ones, replace it |
2124 | // with zero-extend byte/half, and-immediate, or extractu, as appropriate. |
2125 | bool BitSimplification::(MachineInstr *MI, |
2126 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2127 | unsigned Opc = MI->getOpcode(); |
2128 | switch (Opc) { |
2129 | case Hexagon::A2_zxtb: |
2130 | case Hexagon::A2_zxth: |
2131 | case Hexagon::S2_extractu: |
2132 | return false; |
2133 | } |
2134 | if (Opc == Hexagon::A2_andir && MI->getOperand(i: 2).isImm()) { |
2135 | int32_t Imm = MI->getOperand(i: 2).getImm(); |
2136 | if (isInt<10>(x: Imm)) |
2137 | return false; |
2138 | } |
2139 | |
2140 | if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) |
2141 | return false; |
2142 | unsigned W = RC.width(); |
2143 | while (W > 0 && RC[W-1].is(T: 0)) |
2144 | W--; |
2145 | if (W == 0 || W == RC.width()) |
2146 | return false; |
2147 | unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb |
2148 | : (W == 16) ? Hexagon::A2_zxth |
2149 | : (W < 10) ? Hexagon::A2_andir |
2150 | : Hexagon::S2_extractu; |
2151 | MachineBasicBlock &B = *MI->getParent(); |
2152 | DebugLoc DL = MI->getDebugLoc(); |
2153 | |
2154 | for (auto &Op : MI->uses()) { |
2155 | if (!Op.isReg()) |
2156 | continue; |
2157 | BitTracker::RegisterRef RS = Op; |
2158 | if (!BT.has(Reg: RS.Reg)) |
2159 | continue; |
2160 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
2161 | unsigned BN, BW; |
2162 | if (!HBS::getSubregMask(RR: RS, Begin&: BN, Width&: BW, MRI)) |
2163 | continue; |
2164 | if (BW < W || !HBS::isEqual(RC1: RC, B1: 0, RC2: SC, B2: BN, W)) |
2165 | continue; |
2166 | if (!validateReg(R: RS, Opc: NewOpc, OpNum: 1)) |
2167 | continue; |
2168 | |
2169 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2170 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2171 | : MachineBasicBlock::iterator(MI); |
2172 | auto MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: NewOpc), DestReg: NewR) |
2173 | .addReg(RegNo: RS.Reg, flags: 0, SubReg: RS.Sub); |
2174 | if (NewOpc == Hexagon::A2_andir) |
2175 | MIB.addImm(Val: (1 << W) - 1); |
2176 | else if (NewOpc == Hexagon::S2_extractu) |
2177 | MIB.addImm(Val: W).addImm(Val: 0); |
2178 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2179 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2180 | return true; |
2181 | } |
2182 | return false; |
2183 | } |
2184 | |
2185 | bool BitSimplification::genBitSplit(MachineInstr *MI, |
2186 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2187 | const RegisterSet &AVs) { |
2188 | if (!GenBitSplit) |
2189 | return false; |
2190 | if (MaxBitSplit.getNumOccurrences()) { |
2191 | if (CountBitSplit >= MaxBitSplit) |
2192 | return false; |
2193 | } |
2194 | |
2195 | unsigned Opc = MI->getOpcode(); |
2196 | switch (Opc) { |
2197 | case Hexagon::A4_bitsplit: |
2198 | case Hexagon::A4_bitspliti: |
2199 | return false; |
2200 | } |
2201 | |
2202 | unsigned W = RC.width(); |
2203 | if (W != 32) |
2204 | return false; |
2205 | |
2206 | auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned { |
2207 | unsigned Z = C.width(); |
2208 | while (Z > 0 && C[Z-1].is(T: 0)) |
2209 | --Z; |
2210 | return C.width() - Z; |
2211 | }; |
2212 | |
2213 | // Count the number of leading zeros in the target RC. |
2214 | unsigned Z = ctlz(RC); |
2215 | if (Z == 0 || Z == W) |
2216 | return false; |
2217 | |
2218 | // A simplistic analysis: assume the source register (the one being split) |
2219 | // is fully unknown, and that all its bits are self-references. |
2220 | const BitTracker::BitValue &B0 = RC[0]; |
2221 | if (B0.Type != BitTracker::BitValue::Ref) |
2222 | return false; |
2223 | |
2224 | unsigned SrcR = B0.RefI.Reg; |
2225 | unsigned SrcSR = 0; |
2226 | unsigned Pos = B0.RefI.Pos; |
2227 | |
2228 | // All the non-zero bits should be consecutive bits from the same register. |
2229 | for (unsigned i = 1; i < W-Z; ++i) { |
2230 | const BitTracker::BitValue &V = RC[i]; |
2231 | if (V.Type != BitTracker::BitValue::Ref) |
2232 | return false; |
2233 | if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i) |
2234 | return false; |
2235 | } |
2236 | |
2237 | // Now, find the other bitfield among AVs. |
2238 | for (unsigned S = AVs.find_first(); S; S = AVs.find_next(Prev: S)) { |
2239 | // The number of leading zeros here should be the number of trailing |
2240 | // non-zeros in RC. |
2241 | unsigned SRC = MRI.getRegClass(Reg: S)->getID(); |
2242 | if (SRC != Hexagon::IntRegsRegClassID && |
2243 | SRC != Hexagon::DoubleRegsRegClassID) |
2244 | continue; |
2245 | if (!BT.has(Reg: S)) |
2246 | continue; |
2247 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: S); |
2248 | if (SC.width() != W || ctlz(SC) != W-Z) |
2249 | continue; |
2250 | // The Z lower bits should now match SrcR. |
2251 | const BitTracker::BitValue &S0 = SC[0]; |
2252 | if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR) |
2253 | continue; |
2254 | unsigned P = S0.RefI.Pos; |
2255 | |
2256 | if (Pos <= P && (Pos + W-Z) != P) |
2257 | continue; |
2258 | if (P < Pos && (P + Z) != Pos) |
2259 | continue; |
2260 | // The starting bitfield position must be at a subregister boundary. |
2261 | if (std::min(a: P, b: Pos) != 0 && std::min(a: P, b: Pos) != 32) |
2262 | continue; |
2263 | |
2264 | unsigned I; |
2265 | for (I = 1; I < Z; ++I) { |
2266 | const BitTracker::BitValue &V = SC[I]; |
2267 | if (V.Type != BitTracker::BitValue::Ref) |
2268 | break; |
2269 | if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I) |
2270 | break; |
2271 | } |
2272 | if (I != Z) |
2273 | continue; |
2274 | |
2275 | // Generate bitsplit where S is defined. |
2276 | if (MaxBitSplit.getNumOccurrences()) |
2277 | CountBitSplit++; |
2278 | MachineInstr *DefS = MRI.getVRegDef(Reg: S); |
2279 | assert(DefS != nullptr); |
2280 | DebugLoc DL = DefS->getDebugLoc(); |
2281 | MachineBasicBlock &B = *DefS->getParent(); |
2282 | auto At = DefS->isPHI() ? B.getFirstNonPHI() |
2283 | : MachineBasicBlock::iterator(DefS); |
2284 | if (MRI.getRegClass(Reg: SrcR)->getID() == Hexagon::DoubleRegsRegClassID) |
2285 | SrcSR = (std::min(a: Pos, b: P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo; |
2286 | if (!validateReg(R: {SrcR,SrcSR}, Opc: Hexagon::A4_bitspliti, OpNum: 1)) |
2287 | continue; |
2288 | unsigned ImmOp = Pos <= P ? W-Z : Z; |
2289 | |
2290 | // Find an existing bitsplit instruction if one already exists. |
2291 | unsigned NewR = 0; |
2292 | for (MachineInstr *In : NewMIs) { |
2293 | if (In->getOpcode() != Hexagon::A4_bitspliti) |
2294 | continue; |
2295 | MachineOperand &Op1 = In->getOperand(i: 1); |
2296 | if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR) |
2297 | continue; |
2298 | if (In->getOperand(i: 2).getImm() != ImmOp) |
2299 | continue; |
2300 | // Check if the target register is available here. |
2301 | MachineOperand &Op0 = In->getOperand(i: 0); |
2302 | MachineInstr *DefI = MRI.getVRegDef(Reg: Op0.getReg()); |
2303 | assert(DefI != nullptr); |
2304 | if (!MDT.dominates(A: DefI, B: &*At)) |
2305 | continue; |
2306 | |
2307 | // Found one that can be reused. |
2308 | assert(Op0.getSubReg() == 0); |
2309 | NewR = Op0.getReg(); |
2310 | break; |
2311 | } |
2312 | if (!NewR) { |
2313 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
2314 | auto NewBS = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A4_bitspliti), DestReg: NewR) |
2315 | .addReg(RegNo: SrcR, flags: 0, SubReg: SrcSR) |
2316 | .addImm(Val: ImmOp); |
2317 | NewMIs.push_back(x: NewBS); |
2318 | } |
2319 | if (Pos <= P) { |
2320 | HBS::replaceRegWithSub(OldR: RD.Reg, NewR, NewSR: Hexagon::isub_lo, MRI); |
2321 | HBS::replaceRegWithSub(OldR: S, NewR, NewSR: Hexagon::isub_hi, MRI); |
2322 | } else { |
2323 | HBS::replaceRegWithSub(OldR: S, NewR, NewSR: Hexagon::isub_lo, MRI); |
2324 | HBS::replaceRegWithSub(OldR: RD.Reg, NewR, NewSR: Hexagon::isub_hi, MRI); |
2325 | } |
2326 | return true; |
2327 | } |
2328 | |
2329 | return false; |
2330 | } |
2331 | |
2332 | // Check for tstbit simplification opportunity, where the bit being checked |
2333 | // can be tracked back to another register. For example: |
2334 | // %2 = S2_lsr_i_r %1, 5 |
2335 | // %3 = S2_tstbit_i %2, 0 |
2336 | // => |
2337 | // %3 = S2_tstbit_i %1, 5 |
2338 | bool BitSimplification::simplifyTstbit(MachineInstr *MI, |
2339 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2340 | unsigned Opc = MI->getOpcode(); |
2341 | if (Opc != Hexagon::S2_tstbit_i) |
2342 | return false; |
2343 | |
2344 | unsigned BN = MI->getOperand(i: 2).getImm(); |
2345 | BitTracker::RegisterRef RS = MI->getOperand(i: 1); |
2346 | unsigned F, W; |
2347 | DebugLoc DL = MI->getDebugLoc(); |
2348 | if (!BT.has(Reg: RS.Reg) || !HBS::getSubregMask(RR: RS, Begin&: F, Width&: W, MRI)) |
2349 | return false; |
2350 | MachineBasicBlock &B = *MI->getParent(); |
2351 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2352 | : MachineBasicBlock::iterator(MI); |
2353 | |
2354 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
2355 | const BitTracker::BitValue &V = SC[F+BN]; |
2356 | if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { |
2357 | const TargetRegisterClass *TC = MRI.getRegClass(Reg: V.RefI.Reg); |
2358 | // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is |
2359 | // a double register, need to use a subregister and adjust bit |
2360 | // number. |
2361 | unsigned P = std::numeric_limits<unsigned>::max(); |
2362 | BitTracker::RegisterRef RR(V.RefI.Reg, 0); |
2363 | if (TC == &Hexagon::DoubleRegsRegClass) { |
2364 | P = V.RefI.Pos; |
2365 | RR.Sub = Hexagon::isub_lo; |
2366 | if (P >= 32) { |
2367 | P -= 32; |
2368 | RR.Sub = Hexagon::isub_hi; |
2369 | } |
2370 | } else if (TC == &Hexagon::IntRegsRegClass) { |
2371 | P = V.RefI.Pos; |
2372 | } |
2373 | if (P != std::numeric_limits<unsigned>::max()) { |
2374 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::PredRegsRegClass); |
2375 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_tstbit_i), DestReg: NewR) |
2376 | .addReg(RegNo: RR.Reg, flags: 0, SubReg: RR.Sub) |
2377 | .addImm(Val: P); |
2378 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2379 | BT.put(RR: NewR, RC); |
2380 | return true; |
2381 | } |
2382 | } else if (V.is(T: 0) || V.is(T: 1)) { |
2383 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::PredRegsRegClass); |
2384 | unsigned NewOpc = V.is(T: 0) ? Hexagon::PS_false : Hexagon::PS_true; |
2385 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: NewOpc), DestReg: NewR); |
2386 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2387 | return true; |
2388 | } |
2389 | |
2390 | return false; |
2391 | } |
2392 | |
2393 | // Detect whether RD is a bitfield extract (sign- or zero-extended) of |
2394 | // some register from the AVs set. Create a new corresponding instruction |
2395 | // at the location of MI. The intent is to recognize situations where |
2396 | // a sequence of instructions performs an operation that is equivalent to |
2397 | // an extract operation, such as a shift left followed by a shift right. |
2398 | bool BitSimplification::(MachineInstr *MI, |
2399 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2400 | const RegisterSet &AVs) { |
2401 | if (!GenExtract) |
2402 | return false; |
2403 | if (MaxExtract.getNumOccurrences()) { |
2404 | if (CountExtract >= MaxExtract) |
2405 | return false; |
2406 | CountExtract++; |
2407 | } |
2408 | |
2409 | unsigned W = RC.width(); |
2410 | unsigned RW = W; |
2411 | unsigned Len; |
2412 | bool Signed; |
2413 | |
2414 | // The code is mostly class-independent, except for the part that generates |
2415 | // the extract instruction, and establishes the source register (in case it |
2416 | // needs to use a subregister). |
2417 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2418 | if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) |
2419 | return false; |
2420 | assert(RD.Sub == 0); |
2421 | |
2422 | // Observation: |
2423 | // If the cell has a form of 00..0xx..x with k zeros and n remaining |
2424 | // bits, this could be an extractu of the n bits, but it could also be |
2425 | // an extractu of a longer field which happens to have 0s in the top |
2426 | // bit positions. |
2427 | // The same logic applies to sign-extended fields. |
2428 | // |
2429 | // Do not check for the extended extracts, since it would expand the |
2430 | // search space quite a bit. The search may be expensive as it is. |
2431 | |
2432 | const BitTracker::BitValue &TopV = RC[W-1]; |
2433 | |
2434 | // Eliminate candidates that have self-referential bits, since they |
2435 | // cannot be extracts from other registers. Also, skip registers that |
2436 | // have compile-time constant values. |
2437 | bool IsConst = true; |
2438 | for (unsigned I = 0; I != W; ++I) { |
2439 | const BitTracker::BitValue &V = RC[I]; |
2440 | if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg) |
2441 | return false; |
2442 | IsConst = IsConst && (V.is(T: 0) || V.is(T: 1)); |
2443 | } |
2444 | if (IsConst) |
2445 | return false; |
2446 | |
2447 | if (TopV.is(T: 0) || TopV.is(T: 1)) { |
2448 | bool S = TopV.is(T: 1); |
2449 | for (--W; W > 0 && RC[W-1].is(T: S); --W) |
2450 | ; |
2451 | Len = W; |
2452 | Signed = S; |
2453 | // The sign bit must be a part of the field being extended. |
2454 | if (Signed) |
2455 | ++Len; |
2456 | } else { |
2457 | // This could still be a sign-extended extract. |
2458 | assert(TopV.Type == BitTracker::BitValue::Ref); |
2459 | if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1) |
2460 | return false; |
2461 | for (--W; W > 0 && RC[W-1] == TopV; --W) |
2462 | ; |
2463 | // The top bits of RC are copies of TopV. One occurrence of TopV will |
2464 | // be a part of the field. |
2465 | Len = W + 1; |
2466 | Signed = true; |
2467 | } |
2468 | |
2469 | // This would be just a copy. It should be handled elsewhere. |
2470 | if (Len == RW) |
2471 | return false; |
2472 | |
2473 | LLVM_DEBUG({ |
2474 | dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub) |
2475 | << ", MI: " << *MI; |
2476 | dbgs() << "Cell: " << RC << '\n'; |
2477 | dbgs() << "Expected bitfield size: " << Len << " bits, " |
2478 | << (Signed ? "sign" : "zero" ) << "-extended\n" ; |
2479 | }); |
2480 | |
2481 | bool Changed = false; |
2482 | |
2483 | for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(Prev: R)) { |
2484 | if (!BT.has(Reg: R)) |
2485 | continue; |
2486 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: R); |
2487 | unsigned SW = SC.width(); |
2488 | |
2489 | // The source can be longer than the destination, as long as its size is |
2490 | // a multiple of the size of the destination. Also, we would need to be |
2491 | // able to refer to the subregister in the source that would be of the |
2492 | // same size as the destination, but only check the sizes here. |
2493 | if (SW < RW || (SW % RW) != 0) |
2494 | continue; |
2495 | |
2496 | // The field can start at any offset in SC as long as it contains Len |
2497 | // bits and does not cross subregister boundary (if the source register |
2498 | // is longer than the destination). |
2499 | unsigned Off = 0; |
2500 | while (Off <= SW-Len) { |
2501 | unsigned OE = (Off+Len)/RW; |
2502 | if (OE != Off/RW) { |
2503 | // The assumption here is that if the source (R) is longer than the |
2504 | // destination, then the destination is a sequence of words of |
2505 | // size RW, and each such word in R can be accessed via a subregister. |
2506 | // |
2507 | // If the beginning and the end of the field cross the subregister |
2508 | // boundary, advance to the next subregister. |
2509 | Off = OE*RW; |
2510 | continue; |
2511 | } |
2512 | if (HBS::isEqual(RC1: RC, B1: 0, RC2: SC, B2: Off, W: Len)) |
2513 | break; |
2514 | ++Off; |
2515 | } |
2516 | |
2517 | if (Off > SW-Len) |
2518 | continue; |
2519 | |
2520 | // Found match. |
2521 | unsigned ExtOpc = 0; |
2522 | if (Off == 0) { |
2523 | if (Len == 8) |
2524 | ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb; |
2525 | else if (Len == 16) |
2526 | ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth; |
2527 | else if (Len < 10 && !Signed) |
2528 | ExtOpc = Hexagon::A2_andir; |
2529 | } |
2530 | if (ExtOpc == 0) { |
2531 | ExtOpc = |
2532 | Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp) |
2533 | : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup); |
2534 | } |
2535 | unsigned SR = 0; |
2536 | // This only recognizes isub_lo and isub_hi. |
2537 | if (RW != SW && RW*2 != SW) |
2538 | continue; |
2539 | if (RW != SW) |
2540 | SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi; |
2541 | Off = Off % RW; |
2542 | |
2543 | if (!validateReg(R: {R,SR}, Opc: ExtOpc, OpNum: 1)) |
2544 | continue; |
2545 | |
2546 | // Don't generate the same instruction as the one being optimized. |
2547 | if (MI->getOpcode() == ExtOpc) { |
2548 | // All possible ExtOpc's have the source in operand(1). |
2549 | const MachineOperand &SrcOp = MI->getOperand(i: 1); |
2550 | if (SrcOp.getReg() == R) |
2551 | continue; |
2552 | } |
2553 | |
2554 | DebugLoc DL = MI->getDebugLoc(); |
2555 | MachineBasicBlock &B = *MI->getParent(); |
2556 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2557 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2558 | : MachineBasicBlock::iterator(MI); |
2559 | auto MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: ExtOpc), DestReg: NewR) |
2560 | .addReg(RegNo: R, flags: 0, SubReg: SR); |
2561 | switch (ExtOpc) { |
2562 | case Hexagon::A2_sxtb: |
2563 | case Hexagon::A2_zxtb: |
2564 | case Hexagon::A2_sxth: |
2565 | case Hexagon::A2_zxth: |
2566 | break; |
2567 | case Hexagon::A2_andir: |
2568 | MIB.addImm(Val: (1u << Len) - 1); |
2569 | break; |
2570 | case Hexagon::S4_extract: |
2571 | case Hexagon::S2_extractu: |
2572 | case Hexagon::S4_extractp: |
2573 | case Hexagon::S2_extractup: |
2574 | MIB.addImm(Val: Len) |
2575 | .addImm(Val: Off); |
2576 | break; |
2577 | default: |
2578 | llvm_unreachable("Unexpected opcode" ); |
2579 | } |
2580 | |
2581 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2582 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2583 | Changed = true; |
2584 | break; |
2585 | } |
2586 | |
2587 | return Changed; |
2588 | } |
2589 | |
2590 | bool BitSimplification::simplifyRCmp0(MachineInstr *MI, |
2591 | BitTracker::RegisterRef RD) { |
2592 | unsigned Opc = MI->getOpcode(); |
2593 | if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi) |
2594 | return false; |
2595 | MachineOperand &CmpOp = MI->getOperand(i: 2); |
2596 | if (!CmpOp.isImm() || CmpOp.getImm() != 0) |
2597 | return false; |
2598 | |
2599 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2600 | if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) |
2601 | return false; |
2602 | assert(RD.Sub == 0); |
2603 | |
2604 | MachineBasicBlock &B = *MI->getParent(); |
2605 | const DebugLoc &DL = MI->getDebugLoc(); |
2606 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2607 | : MachineBasicBlock::iterator(MI); |
2608 | bool KnownZ = true; |
2609 | bool KnownNZ = false; |
2610 | |
2611 | BitTracker::RegisterRef SR = MI->getOperand(i: 1); |
2612 | if (!BT.has(Reg: SR.Reg)) |
2613 | return false; |
2614 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: SR.Reg); |
2615 | unsigned F, W; |
2616 | if (!HBS::getSubregMask(RR: SR, Begin&: F, Width&: W, MRI)) |
2617 | return false; |
2618 | |
2619 | for (uint16_t I = F; I != F+W; ++I) { |
2620 | const BitTracker::BitValue &V = SC[I]; |
2621 | if (!V.is(T: 0)) |
2622 | KnownZ = false; |
2623 | if (V.is(T: 1)) |
2624 | KnownNZ = true; |
2625 | } |
2626 | |
2627 | auto ReplaceWithConst = [&](int C) { |
2628 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2629 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrsi), DestReg: NewR) |
2630 | .addImm(Val: C); |
2631 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2632 | BitTracker::RegisterCell NewRC(W); |
2633 | for (uint16_t I = 0; I != W; ++I) { |
2634 | NewRC[I] = BitTracker::BitValue(C & 1); |
2635 | C = unsigned(C) >> 1; |
2636 | } |
2637 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: NewRC); |
2638 | return true; |
2639 | }; |
2640 | |
2641 | auto IsNonZero = [] (const MachineOperand &Op) { |
2642 | if (Op.isGlobal() || Op.isBlockAddress()) |
2643 | return true; |
2644 | if (Op.isImm()) |
2645 | return Op.getImm() != 0; |
2646 | if (Op.isCImm()) |
2647 | return !Op.getCImm()->isZero(); |
2648 | if (Op.isFPImm()) |
2649 | return !Op.getFPImm()->isZero(); |
2650 | return false; |
2651 | }; |
2652 | |
2653 | auto IsZero = [] (const MachineOperand &Op) { |
2654 | if (Op.isGlobal() || Op.isBlockAddress()) |
2655 | return false; |
2656 | if (Op.isImm()) |
2657 | return Op.getImm() == 0; |
2658 | if (Op.isCImm()) |
2659 | return Op.getCImm()->isZero(); |
2660 | if (Op.isFPImm()) |
2661 | return Op.getFPImm()->isZero(); |
2662 | return false; |
2663 | }; |
2664 | |
2665 | // If the source register is known to be 0 or non-0, the comparison can |
2666 | // be folded to a load of a constant. |
2667 | if (KnownZ || KnownNZ) { |
2668 | assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0" ); |
2669 | return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi)); |
2670 | } |
2671 | |
2672 | // Special case: if the compare comes from a C2_muxii, then we know the |
2673 | // two possible constants that can be the source value. |
2674 | MachineInstr *InpDef = MRI.getVRegDef(Reg: SR.Reg); |
2675 | if (!InpDef) |
2676 | return false; |
2677 | if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) { |
2678 | MachineOperand &Src1 = InpDef->getOperand(i: 2); |
2679 | MachineOperand &Src2 = InpDef->getOperand(i: 3); |
2680 | // Check if both are non-zero. |
2681 | bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2); |
2682 | if (KnownNZ1 && KnownNZ2) |
2683 | return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi); |
2684 | // Check if both are zero. |
2685 | bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2); |
2686 | if (KnownZ1 && KnownZ2) |
2687 | return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi); |
2688 | |
2689 | // If for both operands we know that they are either 0 or non-0, |
2690 | // replace the comparison with a C2_muxii, using the same predicate |
2691 | // register, but with operands substituted with 0/1 accordingly. |
2692 | if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { |
2693 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2694 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::C2_muxii), DestReg: NewR) |
2695 | .addReg(RegNo: InpDef->getOperand(i: 1).getReg()) |
2696 | .addImm(Val: KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) |
2697 | .addImm(Val: KnownZ2 == (Opc == Hexagon::A4_rcmpeqi)); |
2698 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2699 | // Create a new cell with only the least significant bit unknown. |
2700 | BitTracker::RegisterCell NewRC(W); |
2701 | NewRC[0] = BitTracker::BitValue::self(); |
2702 | NewRC.fill(B: 1, E: W, V: BitTracker::BitValue::Zero); |
2703 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: NewRC); |
2704 | return true; |
2705 | } |
2706 | } |
2707 | |
2708 | return false; |
2709 | } |
2710 | |
2711 | bool BitSimplification::processBlock(MachineBasicBlock &B, |
2712 | const RegisterSet &AVs) { |
2713 | if (!BT.reached(B: &B)) |
2714 | return false; |
2715 | bool Changed = false; |
2716 | RegisterSet AVB = AVs; |
2717 | RegisterSet Defs; |
2718 | |
2719 | for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Rs: Defs)) { |
2720 | MachineInstr *MI = &*I; |
2721 | Defs.clear(); |
2722 | HBS::getInstrDefs(MI: *MI, Defs); |
2723 | |
2724 | unsigned Opc = MI->getOpcode(); |
2725 | if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) |
2726 | continue; |
2727 | |
2728 | if (MI->mayStore()) { |
2729 | bool T = genStoreUpperHalf(MI); |
2730 | T = T || genStoreImmediate(MI); |
2731 | Changed |= T; |
2732 | continue; |
2733 | } |
2734 | |
2735 | if (Defs.count() != 1) |
2736 | continue; |
2737 | const MachineOperand &Op0 = MI->getOperand(i: 0); |
2738 | if (!Op0.isReg() || !Op0.isDef()) |
2739 | continue; |
2740 | BitTracker::RegisterRef RD = Op0; |
2741 | if (!BT.has(Reg: RD.Reg)) |
2742 | continue; |
2743 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2744 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RD.Reg); |
2745 | |
2746 | if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { |
2747 | bool T = genPackhl(MI, RD, RC); |
2748 | T = T || simplifyExtractLow(MI, RD, RC, AVs: AVB); |
2749 | Changed |= T; |
2750 | continue; |
2751 | } |
2752 | |
2753 | if (FRC->getID() == Hexagon::IntRegsRegClassID) { |
2754 | bool T = genBitSplit(MI, RD, RC, AVs: AVB); |
2755 | T = T || simplifyExtractLow(MI, RD, RC, AVs: AVB); |
2756 | T = T || genExtractHalf(MI, RD, RC); |
2757 | T = T || genCombineHalf(MI, RD, RC); |
2758 | T = T || genExtractLow(MI, RD, RC); |
2759 | T = T || simplifyRCmp0(MI, RD); |
2760 | Changed |= T; |
2761 | continue; |
2762 | } |
2763 | |
2764 | if (FRC->getID() == Hexagon::PredRegsRegClassID) { |
2765 | bool T = simplifyTstbit(MI, RD, RC); |
2766 | Changed |= T; |
2767 | continue; |
2768 | } |
2769 | } |
2770 | return Changed; |
2771 | } |
2772 | |
2773 | bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { |
2774 | if (skipFunction(F: MF.getFunction())) |
2775 | return false; |
2776 | |
2777 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
2778 | auto &HRI = *HST.getRegisterInfo(); |
2779 | auto &HII = *HST.getInstrInfo(); |
2780 | |
2781 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
2782 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2783 | bool Changed; |
2784 | |
2785 | Changed = DeadCodeElimination(MF, *MDT).run(); |
2786 | |
2787 | const HexagonEvaluator HE(HRI, MRI, HII, MF); |
2788 | BitTracker BT(HE, MF); |
2789 | LLVM_DEBUG(BT.trace(true)); |
2790 | BT.run(); |
2791 | |
2792 | MachineBasicBlock &Entry = MF.front(); |
2793 | |
2794 | RegisterSet AIG; // Available registers for IG. |
2795 | ConstGeneration ImmG(BT, HII, MRI); |
2796 | Changed |= visitBlock(B&: Entry, T&: ImmG, AVs&: AIG); |
2797 | |
2798 | RegisterSet ARE; // Available registers for RIE. |
2799 | RedundantInstrElimination RIE(BT, HII, HRI, MRI); |
2800 | bool Ried = visitBlock(B&: Entry, T&: RIE, AVs&: ARE); |
2801 | if (Ried) { |
2802 | Changed = true; |
2803 | BT.run(); |
2804 | } |
2805 | |
2806 | RegisterSet ACG; // Available registers for CG. |
2807 | CopyGeneration CopyG(BT, HII, HRI, MRI); |
2808 | Changed |= visitBlock(B&: Entry, T&: CopyG, AVs&: ACG); |
2809 | |
2810 | RegisterSet ACP; // Available registers for CP. |
2811 | CopyPropagation CopyP(HRI, MRI); |
2812 | Changed |= visitBlock(B&: Entry, T&: CopyP, AVs&: ACP); |
2813 | |
2814 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2815 | |
2816 | BT.run(); |
2817 | RegisterSet ABS; // Available registers for BS. |
2818 | BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF); |
2819 | Changed |= visitBlock(B&: Entry, T&: BitS, AVs&: ABS); |
2820 | |
2821 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2822 | |
2823 | if (Changed) { |
2824 | for (auto &B : MF) |
2825 | for (auto &I : B) |
2826 | I.clearKillInfo(); |
2827 | DeadCodeElimination(MF, *MDT).run(); |
2828 | } |
2829 | return Changed; |
2830 | } |
2831 | |
2832 | // Recognize loops where the code at the end of the loop matches the code |
2833 | // before the entry of the loop, and the matching code is such that is can |
2834 | // be simplified. This pass relies on the bit simplification above and only |
2835 | // prepares code in a way that can be handled by the bit simplification. |
2836 | // |
2837 | // This is the motivating testcase (and explanation): |
2838 | // |
2839 | // { |
2840 | // loop0(.LBB0_2, r1) // %for.body.preheader |
2841 | // r5:4 = memd(r0++#8) |
2842 | // } |
2843 | // { |
2844 | // r3 = lsr(r4, #16) |
2845 | // r7:6 = combine(r5, r5) |
2846 | // } |
2847 | // { |
2848 | // r3 = insert(r5, #16, #16) |
2849 | // r7:6 = vlsrw(r7:6, #16) |
2850 | // } |
2851 | // .LBB0_2: |
2852 | // { |
2853 | // memh(r2+#4) = r5 |
2854 | // memh(r2+#6) = r6 # R6 is really R5.H |
2855 | // } |
2856 | // { |
2857 | // r2 = add(r2, #8) |
2858 | // memh(r2+#0) = r4 |
2859 | // memh(r2+#2) = r3 # R3 is really R4.H |
2860 | // } |
2861 | // { |
2862 | // r5:4 = memd(r0++#8) |
2863 | // } |
2864 | // { # "Shuffling" code that sets up R3 and R6 |
2865 | // r3 = lsr(r4, #16) # so that their halves can be stored in the |
2866 | // r7:6 = combine(r5, r5) # next iteration. This could be folded into |
2867 | // } # the stores if the code was at the beginning |
2868 | // { # of the loop iteration. Since the same code |
2869 | // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved |
2870 | // r7:6 = vlsrw(r7:6, #16) # there. |
2871 | // }:endloop0 |
2872 | // |
2873 | // |
2874 | // The outcome: |
2875 | // |
2876 | // { |
2877 | // loop0(.LBB0_2, r1) |
2878 | // r5:4 = memd(r0++#8) |
2879 | // } |
2880 | // .LBB0_2: |
2881 | // { |
2882 | // memh(r2+#4) = r5 |
2883 | // memh(r2+#6) = r5.h |
2884 | // } |
2885 | // { |
2886 | // r2 = add(r2, #8) |
2887 | // memh(r2+#0) = r4 |
2888 | // memh(r2+#2) = r4.h |
2889 | // } |
2890 | // { |
2891 | // r5:4 = memd(r0++#8) |
2892 | // }:endloop0 |
2893 | |
2894 | namespace { |
2895 | |
2896 | class HexagonLoopRescheduling : public MachineFunctionPass { |
2897 | public: |
2898 | static char ID; |
2899 | |
2900 | HexagonLoopRescheduling() : MachineFunctionPass(ID) {} |
2901 | |
2902 | bool runOnMachineFunction(MachineFunction &MF) override; |
2903 | |
2904 | private: |
2905 | const HexagonInstrInfo *HII = nullptr; |
2906 | const HexagonRegisterInfo *HRI = nullptr; |
2907 | MachineRegisterInfo *MRI = nullptr; |
2908 | BitTracker *BTP = nullptr; |
2909 | |
2910 | struct LoopCand { |
2911 | LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, |
2912 | MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} |
2913 | |
2914 | MachineBasicBlock *LB, *PB, *EB; |
2915 | }; |
2916 | using InstrList = std::vector<MachineInstr *>; |
2917 | struct InstrGroup { |
2918 | BitTracker::RegisterRef Inp, Out; |
2919 | InstrList Ins; |
2920 | }; |
2921 | struct PhiInfo { |
2922 | PhiInfo(MachineInstr &P, MachineBasicBlock &B); |
2923 | |
2924 | unsigned DefR; |
2925 | BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register |
2926 | MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block |
2927 | }; |
2928 | |
2929 | static unsigned getDefReg(const MachineInstr *MI); |
2930 | bool isConst(unsigned Reg) const; |
2931 | bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; |
2932 | bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; |
2933 | bool isShuffleOf(unsigned OutR, unsigned InpR) const; |
2934 | bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, |
2935 | unsigned &InpR2) const; |
2936 | void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, |
2937 | MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); |
2938 | bool processLoop(LoopCand &C); |
2939 | }; |
2940 | |
2941 | } // end anonymous namespace |
2942 | |
2943 | char HexagonLoopRescheduling::ID = 0; |
2944 | |
2945 | INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched-pass" , |
2946 | "Hexagon Loop Rescheduling" , false, false) |
2947 | |
2948 | HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, |
2949 | MachineBasicBlock &B) { |
2950 | DefR = HexagonLoopRescheduling::getDefReg(MI: &P); |
2951 | LB = &B; |
2952 | PB = nullptr; |
2953 | for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { |
2954 | const MachineOperand &OpB = P.getOperand(i: i+1); |
2955 | if (OpB.getMBB() == &B) { |
2956 | LR = P.getOperand(i); |
2957 | continue; |
2958 | } |
2959 | PB = OpB.getMBB(); |
2960 | PR = P.getOperand(i); |
2961 | } |
2962 | } |
2963 | |
2964 | unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { |
2965 | RegisterSet Defs; |
2966 | HBS::getInstrDefs(MI: *MI, Defs); |
2967 | if (Defs.count() != 1) |
2968 | return 0; |
2969 | return Defs.find_first(); |
2970 | } |
2971 | |
2972 | bool HexagonLoopRescheduling::isConst(unsigned Reg) const { |
2973 | if (!BTP->has(Reg)) |
2974 | return false; |
2975 | const BitTracker::RegisterCell &RC = BTP->lookup(Reg); |
2976 | for (unsigned i = 0, w = RC.width(); i < w; ++i) { |
2977 | const BitTracker::BitValue &V = RC[i]; |
2978 | if (!V.is(T: 0) && !V.is(T: 1)) |
2979 | return false; |
2980 | } |
2981 | return true; |
2982 | } |
2983 | |
2984 | bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, |
2985 | unsigned DefR) const { |
2986 | unsigned Opc = MI->getOpcode(); |
2987 | switch (Opc) { |
2988 | case TargetOpcode::COPY: |
2989 | case Hexagon::S2_lsr_i_r: |
2990 | case Hexagon::S2_asr_i_r: |
2991 | case Hexagon::S2_asl_i_r: |
2992 | case Hexagon::S2_lsr_i_p: |
2993 | case Hexagon::S2_asr_i_p: |
2994 | case Hexagon::S2_asl_i_p: |
2995 | case Hexagon::S2_insert: |
2996 | case Hexagon::A2_or: |
2997 | case Hexagon::A2_orp: |
2998 | case Hexagon::A2_and: |
2999 | case Hexagon::A2_andp: |
3000 | case Hexagon::A2_combinew: |
3001 | case Hexagon::A4_combineri: |
3002 | case Hexagon::A4_combineir: |
3003 | case Hexagon::A2_combineii: |
3004 | case Hexagon::A4_combineii: |
3005 | case Hexagon::A2_combine_ll: |
3006 | case Hexagon::A2_combine_lh: |
3007 | case Hexagon::A2_combine_hl: |
3008 | case Hexagon::A2_combine_hh: |
3009 | return true; |
3010 | } |
3011 | return false; |
3012 | } |
3013 | |
3014 | bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, |
3015 | unsigned InpR) const { |
3016 | for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { |
3017 | const MachineOperand &Op = MI->getOperand(i); |
3018 | if (!Op.isReg()) |
3019 | continue; |
3020 | if (Op.getReg() == InpR) |
3021 | return i == n-1; |
3022 | } |
3023 | return false; |
3024 | } |
3025 | |
3026 | bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { |
3027 | if (!BTP->has(Reg: OutR) || !BTP->has(Reg: InpR)) |
3028 | return false; |
3029 | const BitTracker::RegisterCell &OutC = BTP->lookup(Reg: OutR); |
3030 | for (unsigned i = 0, w = OutC.width(); i < w; ++i) { |
3031 | const BitTracker::BitValue &V = OutC[i]; |
3032 | if (V.Type != BitTracker::BitValue::Ref) |
3033 | continue; |
3034 | if (V.RefI.Reg != InpR) |
3035 | return false; |
3036 | } |
3037 | return true; |
3038 | } |
3039 | |
3040 | bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, |
3041 | unsigned OutR2, unsigned &InpR2) const { |
3042 | if (!BTP->has(Reg: OutR1) || !BTP->has(Reg: InpR1) || !BTP->has(Reg: OutR2)) |
3043 | return false; |
3044 | const BitTracker::RegisterCell &OutC1 = BTP->lookup(Reg: OutR1); |
3045 | const BitTracker::RegisterCell &OutC2 = BTP->lookup(Reg: OutR2); |
3046 | unsigned W = OutC1.width(); |
3047 | unsigned MatchR = 0; |
3048 | if (W != OutC2.width()) |
3049 | return false; |
3050 | for (unsigned i = 0; i < W; ++i) { |
3051 | const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; |
3052 | if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) |
3053 | return false; |
3054 | if (V1.Type != BitTracker::BitValue::Ref) |
3055 | continue; |
3056 | if (V1.RefI.Pos != V2.RefI.Pos) |
3057 | return false; |
3058 | if (V1.RefI.Reg != InpR1) |
3059 | return false; |
3060 | if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) |
3061 | return false; |
3062 | if (!MatchR) |
3063 | MatchR = V2.RefI.Reg; |
3064 | else if (V2.RefI.Reg != MatchR) |
3065 | return false; |
3066 | } |
3067 | InpR2 = MatchR; |
3068 | return true; |
3069 | } |
3070 | |
3071 | void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, |
3072 | MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, |
3073 | unsigned NewPredR) { |
3074 | DenseMap<unsigned,unsigned> RegMap; |
3075 | |
3076 | const TargetRegisterClass *PhiRC = MRI->getRegClass(Reg: NewPredR); |
3077 | Register PhiR = MRI->createVirtualRegister(RegClass: PhiRC); |
3078 | BuildMI(BB&: LB, I: At, MIMD: At->getDebugLoc(), MCID: HII->get(Opcode: TargetOpcode::PHI), DestReg: PhiR) |
3079 | .addReg(RegNo: NewPredR) |
3080 | .addMBB(MBB: &PB) |
3081 | .addReg(RegNo: G.Inp.Reg) |
3082 | .addMBB(MBB: &LB); |
3083 | RegMap.insert(KV: std::make_pair(x&: G.Inp.Reg, y&: PhiR)); |
3084 | |
3085 | for (const MachineInstr *SI : llvm::reverse(C&: G.Ins)) { |
3086 | unsigned DR = getDefReg(MI: SI); |
3087 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DR); |
3088 | Register NewDR = MRI->createVirtualRegister(RegClass: RC); |
3089 | DebugLoc DL = SI->getDebugLoc(); |
3090 | |
3091 | auto MIB = BuildMI(BB&: LB, I: At, MIMD: DL, MCID: HII->get(Opcode: SI->getOpcode()), DestReg: NewDR); |
3092 | for (const MachineOperand &Op : SI->operands()) { |
3093 | if (!Op.isReg()) { |
3094 | MIB.add(MO: Op); |
3095 | continue; |
3096 | } |
3097 | if (!Op.isUse()) |
3098 | continue; |
3099 | unsigned UseR = RegMap[Op.getReg()]; |
3100 | MIB.addReg(RegNo: UseR, flags: 0, SubReg: Op.getSubReg()); |
3101 | } |
3102 | RegMap.insert(KV: std::make_pair(x&: DR, y&: NewDR)); |
3103 | } |
3104 | |
3105 | HBS::replaceReg(OldR: OldPhiR, NewR: RegMap[G.Out.Reg], MRI&: *MRI); |
3106 | } |
3107 | |
3108 | bool HexagonLoopRescheduling::processLoop(LoopCand &C) { |
3109 | LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) |
3110 | << "\n" ); |
3111 | std::vector<PhiInfo> Phis; |
3112 | for (auto &I : *C.LB) { |
3113 | if (!I.isPHI()) |
3114 | break; |
3115 | unsigned PR = getDefReg(MI: &I); |
3116 | if (isConst(Reg: PR)) |
3117 | continue; |
3118 | bool BadUse = false, GoodUse = false; |
3119 | for (const MachineOperand &MO : MRI->use_operands(Reg: PR)) { |
3120 | const MachineInstr *UseI = MO.getParent(); |
3121 | if (UseI->getParent() != C.LB) { |
3122 | BadUse = true; |
3123 | break; |
3124 | } |
3125 | if (isBitShuffle(MI: UseI, DefR: PR) || isStoreInput(MI: UseI, InpR: PR)) |
3126 | GoodUse = true; |
3127 | } |
3128 | if (BadUse || !GoodUse) |
3129 | continue; |
3130 | |
3131 | Phis.push_back(x: PhiInfo(I, *C.LB)); |
3132 | } |
3133 | |
3134 | LLVM_DEBUG({ |
3135 | dbgs() << "Phis: {" ; |
3136 | for (auto &I : Phis) { |
3137 | dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi(" |
3138 | << printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() |
3139 | << ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" |
3140 | << I.LB->getNumber() << ')'; |
3141 | } |
3142 | dbgs() << " }\n" ; |
3143 | }); |
3144 | |
3145 | if (Phis.empty()) |
3146 | return false; |
3147 | |
3148 | bool Changed = false; |
3149 | InstrList ShufIns; |
3150 | |
3151 | // Go backwards in the block: for each bit shuffling instruction, check |
3152 | // if that instruction could potentially be moved to the front of the loop: |
3153 | // the output of the loop cannot be used in a non-shuffling instruction |
3154 | // in this loop. |
3155 | for (MachineInstr &MI : llvm::reverse(C&: *C.LB)) { |
3156 | if (MI.isTerminator()) |
3157 | continue; |
3158 | if (MI.isPHI()) |
3159 | break; |
3160 | |
3161 | RegisterSet Defs; |
3162 | HBS::getInstrDefs(MI, Defs); |
3163 | if (Defs.count() != 1) |
3164 | continue; |
3165 | Register DefR = Defs.find_first(); |
3166 | if (!DefR.isVirtual()) |
3167 | continue; |
3168 | if (!isBitShuffle(MI: &MI, DefR)) |
3169 | continue; |
3170 | |
3171 | bool BadUse = false; |
3172 | for (auto UI = MRI->use_begin(RegNo: DefR), UE = MRI->use_end(); UI != UE; ++UI) { |
3173 | MachineInstr *UseI = UI->getParent(); |
3174 | if (UseI->getParent() == C.LB) { |
3175 | if (UseI->isPHI()) { |
3176 | // If the use is in a phi node in this loop, then it should be |
3177 | // the value corresponding to the back edge. |
3178 | unsigned Idx = UI.getOperandNo(); |
3179 | if (UseI->getOperand(i: Idx+1).getMBB() != C.LB) |
3180 | BadUse = true; |
3181 | } else { |
3182 | if (!llvm::is_contained(Range&: ShufIns, Element: UseI)) |
3183 | BadUse = true; |
3184 | } |
3185 | } else { |
3186 | // There is a use outside of the loop, but there is no epilog block |
3187 | // suitable for a copy-out. |
3188 | if (C.EB == nullptr) |
3189 | BadUse = true; |
3190 | } |
3191 | if (BadUse) |
3192 | break; |
3193 | } |
3194 | |
3195 | if (BadUse) |
3196 | continue; |
3197 | ShufIns.push_back(x: &MI); |
3198 | } |
3199 | |
3200 | // Partition the list of shuffling instructions into instruction groups, |
3201 | // where each group has to be moved as a whole (i.e. a group is a chain of |
3202 | // dependent instructions). A group produces a single live output register, |
3203 | // which is meant to be the input of the loop phi node (although this is |
3204 | // not checked here yet). It also uses a single register as its input, |
3205 | // which is some value produced in the loop body. After moving the group |
3206 | // to the beginning of the loop, that input register would need to be |
3207 | // the loop-carried register (through a phi node) instead of the (currently |
3208 | // loop-carried) output register. |
3209 | using InstrGroupList = std::vector<InstrGroup>; |
3210 | InstrGroupList Groups; |
3211 | |
3212 | for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { |
3213 | MachineInstr *SI = ShufIns[i]; |
3214 | if (SI == nullptr) |
3215 | continue; |
3216 | |
3217 | InstrGroup G; |
3218 | G.Ins.push_back(x: SI); |
3219 | G.Out.Reg = getDefReg(MI: SI); |
3220 | RegisterSet Inputs; |
3221 | HBS::getInstrUses(MI: *SI, Uses&: Inputs); |
3222 | |
3223 | for (unsigned j = i+1; j < n; ++j) { |
3224 | MachineInstr *MI = ShufIns[j]; |
3225 | if (MI == nullptr) |
3226 | continue; |
3227 | RegisterSet Defs; |
3228 | HBS::getInstrDefs(MI: *MI, Defs); |
3229 | // If this instruction does not define any pending inputs, skip it. |
3230 | if (!Defs.intersects(Rs: Inputs)) |
3231 | continue; |
3232 | // Otherwise, add it to the current group and remove the inputs that |
3233 | // are defined by MI. |
3234 | G.Ins.push_back(x: MI); |
3235 | Inputs.remove(Rs: Defs); |
3236 | // Then add all registers used by MI. |
3237 | HBS::getInstrUses(MI: *MI, Uses&: Inputs); |
3238 | ShufIns[j] = nullptr; |
3239 | } |
3240 | |
3241 | // Only add a group if it requires at most one register. |
3242 | if (Inputs.count() > 1) |
3243 | continue; |
3244 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool { |
3245 | return G.Out.Reg == P.LR.Reg; |
3246 | }; |
3247 | if (llvm::none_of(Range&: Phis, P: LoopInpEq)) |
3248 | continue; |
3249 | |
3250 | G.Inp.Reg = Inputs.find_first(); |
3251 | Groups.push_back(x: G); |
3252 | } |
3253 | |
3254 | LLVM_DEBUG({ |
3255 | for (unsigned i = 0, n = Groups.size(); i < n; ++i) { |
3256 | InstrGroup &G = Groups[i]; |
3257 | dbgs() << "Group[" << i << "] inp: " |
3258 | << printReg(G.Inp.Reg, HRI, G.Inp.Sub) |
3259 | << " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n" ; |
3260 | for (const MachineInstr *MI : G.Ins) |
3261 | dbgs() << " " << MI; |
3262 | } |
3263 | }); |
3264 | |
3265 | for (InstrGroup &G : Groups) { |
3266 | if (!isShuffleOf(OutR: G.Out.Reg, InpR: G.Inp.Reg)) |
3267 | continue; |
3268 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool { |
3269 | return G.Out.Reg == P.LR.Reg; |
3270 | }; |
3271 | auto F = llvm::find_if(Range&: Phis, P: LoopInpEq); |
3272 | if (F == Phis.end()) |
3273 | continue; |
3274 | unsigned PrehR = 0; |
3275 | if (!isSameShuffle(OutR1: G.Out.Reg, InpR1: G.Inp.Reg, OutR2: F->PR.Reg, InpR2&: PrehR)) { |
3276 | const MachineInstr *DefPrehR = MRI->getVRegDef(Reg: F->PR.Reg); |
3277 | unsigned Opc = DefPrehR->getOpcode(); |
3278 | if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) |
3279 | continue; |
3280 | if (!DefPrehR->getOperand(i: 1).isImm()) |
3281 | continue; |
3282 | if (DefPrehR->getOperand(i: 1).getImm() != 0) |
3283 | continue; |
3284 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: G.Inp.Reg); |
3285 | if (RC != MRI->getRegClass(Reg: F->PR.Reg)) { |
3286 | PrehR = MRI->createVirtualRegister(RegClass: RC); |
3287 | unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi |
3288 | : Hexagon::A2_tfrpi; |
3289 | auto T = C.PB->getFirstTerminator(); |
3290 | DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); |
3291 | BuildMI(BB&: *C.PB, I: T, MIMD: DL, MCID: HII->get(Opcode: TfrI), DestReg: PrehR) |
3292 | .addImm(Val: 0); |
3293 | } else { |
3294 | PrehR = F->PR.Reg; |
3295 | } |
3296 | } |
3297 | // isSameShuffle could match with PrehR being of a wider class than |
3298 | // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, |
3299 | // it would match for the input being a 32-bit register, and PrehR |
3300 | // being a 64-bit register (where the low 32 bits match). This could |
3301 | // be handled, but for now skip these cases. |
3302 | if (MRI->getRegClass(Reg: PrehR) != MRI->getRegClass(Reg: G.Inp.Reg)) |
3303 | continue; |
3304 | moveGroup(G, LB&: *F->LB, PB&: *F->PB, At: F->LB->getFirstNonPHI(), OldPhiR: F->DefR, NewPredR: PrehR); |
3305 | Changed = true; |
3306 | } |
3307 | |
3308 | return Changed; |
3309 | } |
3310 | |
3311 | bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { |
3312 | if (skipFunction(F: MF.getFunction())) |
3313 | return false; |
3314 | |
3315 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
3316 | HII = HST.getInstrInfo(); |
3317 | HRI = HST.getRegisterInfo(); |
3318 | MRI = &MF.getRegInfo(); |
3319 | const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); |
3320 | BitTracker BT(HE, MF); |
3321 | LLVM_DEBUG(BT.trace(true)); |
3322 | BT.run(); |
3323 | BTP = &BT; |
3324 | |
3325 | std::vector<LoopCand> Cand; |
3326 | |
3327 | for (auto &B : MF) { |
3328 | if (B.pred_size() != 2 || B.succ_size() != 2) |
3329 | continue; |
3330 | MachineBasicBlock *PB = nullptr; |
3331 | bool IsLoop = false; |
3332 | for (MachineBasicBlock *Pred : B.predecessors()) { |
3333 | if (Pred != &B) |
3334 | PB = Pred; |
3335 | else |
3336 | IsLoop = true; |
3337 | } |
3338 | if (!IsLoop) |
3339 | continue; |
3340 | |
3341 | MachineBasicBlock *EB = nullptr; |
3342 | for (MachineBasicBlock *Succ : B.successors()) { |
3343 | if (Succ == &B) |
3344 | continue; |
3345 | // Set EP to the epilog block, if it has only 1 predecessor (i.e. the |
3346 | // edge from B to EP is non-critical. |
3347 | if (Succ->pred_size() == 1) |
3348 | EB = Succ; |
3349 | break; |
3350 | } |
3351 | |
3352 | Cand.push_back(x: LoopCand(&B, PB, EB)); |
3353 | } |
3354 | |
3355 | bool Changed = false; |
3356 | for (auto &C : Cand) |
3357 | Changed |= processLoop(C); |
3358 | |
3359 | return Changed; |
3360 | } |
3361 | |
3362 | //===----------------------------------------------------------------------===// |
3363 | // Public Constructor Functions |
3364 | //===----------------------------------------------------------------------===// |
3365 | |
3366 | FunctionPass *llvm::createHexagonLoopRescheduling() { |
3367 | return new HexagonLoopRescheduling(); |
3368 | } |
3369 | |
3370 | FunctionPass *llvm::createHexagonBitSimplify() { |
3371 | return new HexagonBitSimplify(); |
3372 | } |
3373 | |