1 | //===- HexagonBitSimplify.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "BitTracker.h" |
10 | #include "HexagonBitTracker.h" |
11 | #include "HexagonInstrInfo.h" |
12 | #include "HexagonRegisterInfo.h" |
13 | #include "HexagonSubtarget.h" |
14 | #include "llvm/ADT/BitVector.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/GraphTraits.h" |
17 | #include "llvm/ADT/STLExtras.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/CodeGen/MachineBasicBlock.h" |
21 | #include "llvm/CodeGen/MachineDominators.h" |
22 | #include "llvm/CodeGen/MachineFunction.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/MachineInstr.h" |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
26 | #include "llvm/CodeGen/MachineOperand.h" |
27 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
28 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
29 | #include "llvm/IR/DebugLoc.h" |
30 | #include "llvm/InitializePasses.h" |
31 | #include "llvm/MC/MCInstrDesc.h" |
32 | #include "llvm/Pass.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Compiler.h" |
35 | #include "llvm/Support/Debug.h" |
36 | #include "llvm/Support/ErrorHandling.h" |
37 | #include "llvm/Support/MathExtras.h" |
38 | #include "llvm/Support/raw_ostream.h" |
39 | #include <algorithm> |
40 | #include <cassert> |
41 | #include <cstdint> |
42 | #include <deque> |
43 | #include <iterator> |
44 | #include <limits> |
45 | #include <utility> |
46 | #include <vector> |
47 | |
48 | #define DEBUG_TYPE "hexbit" |
49 | |
50 | using namespace llvm; |
51 | |
52 | static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied" , cl::Hidden, |
53 | cl::init(Val: true), cl::desc("Preserve subregisters in tied operands" )); |
54 | static cl::opt<bool> ("hexbit-extract" , cl::Hidden, |
55 | cl::init(Val: true), cl::desc("Generate extract instructions" )); |
56 | static cl::opt<bool> GenBitSplit("hexbit-bitsplit" , cl::Hidden, |
57 | cl::init(Val: true), cl::desc("Generate bitsplit instructions" )); |
58 | |
59 | static cl::opt<unsigned> ("hexbit-max-extract" , cl::Hidden, |
60 | cl::init(Val: std::numeric_limits<unsigned>::max())); |
61 | static unsigned = 0; |
62 | static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit" , cl::Hidden, |
63 | cl::init(Val: std::numeric_limits<unsigned>::max())); |
64 | static unsigned CountBitSplit = 0; |
65 | |
66 | static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit" , |
67 | cl::Hidden, cl::init(Val: 1000)); |
68 | |
69 | namespace llvm { |
70 | |
71 | void initializeHexagonBitSimplifyPass(PassRegistry& Registry); |
72 | FunctionPass *createHexagonBitSimplify(); |
73 | |
74 | } // end namespace llvm |
75 | |
76 | namespace { |
77 | |
78 | // Set of virtual registers, based on BitVector. |
79 | struct RegisterSet { |
80 | RegisterSet() = default; |
81 | explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {} |
82 | RegisterSet(const RegisterSet &RS) = default; |
83 | |
84 | void clear() { |
85 | Bits.clear(); |
86 | LRU.clear(); |
87 | } |
88 | |
89 | unsigned count() const { |
90 | return Bits.count(); |
91 | } |
92 | |
93 | unsigned find_first() const { |
94 | int First = Bits.find_first(); |
95 | if (First < 0) |
96 | return 0; |
97 | return x2v(x: First); |
98 | } |
99 | |
100 | unsigned find_next(unsigned Prev) const { |
101 | int Next = Bits.find_next(Prev: v2x(v: Prev)); |
102 | if (Next < 0) |
103 | return 0; |
104 | return x2v(x: Next); |
105 | } |
106 | |
107 | RegisterSet &insert(unsigned R) { |
108 | unsigned Idx = v2x(v: R); |
109 | ensure(Idx); |
110 | bool Exists = Bits.test(Idx); |
111 | Bits.set(Idx); |
112 | if (!Exists) { |
113 | LRU.push_back(x: Idx); |
114 | if (LRU.size() > RegisterSetLimit) { |
115 | unsigned T = LRU.front(); |
116 | Bits.reset(Idx: T); |
117 | LRU.pop_front(); |
118 | } |
119 | } |
120 | return *this; |
121 | } |
122 | RegisterSet &remove(unsigned R) { |
123 | unsigned Idx = v2x(v: R); |
124 | if (Idx < Bits.size()) { |
125 | bool Exists = Bits.test(Idx); |
126 | Bits.reset(Idx); |
127 | if (Exists) { |
128 | auto F = llvm::find(Range&: LRU, Val: Idx); |
129 | assert(F != LRU.end()); |
130 | LRU.erase(position: F); |
131 | } |
132 | } |
133 | return *this; |
134 | } |
135 | |
136 | RegisterSet &insert(const RegisterSet &Rs) { |
137 | for (unsigned R = Rs.find_first(); R; R = Rs.find_next(Prev: R)) |
138 | insert(R); |
139 | return *this; |
140 | } |
141 | RegisterSet &remove(const RegisterSet &Rs) { |
142 | for (unsigned R = Rs.find_first(); R; R = Rs.find_next(Prev: R)) |
143 | remove(R); |
144 | return *this; |
145 | } |
146 | |
147 | bool operator[](unsigned R) const { |
148 | unsigned Idx = v2x(v: R); |
149 | return Idx < Bits.size() ? Bits[Idx] : false; |
150 | } |
151 | bool has(unsigned R) const { |
152 | unsigned Idx = v2x(v: R); |
153 | if (Idx >= Bits.size()) |
154 | return false; |
155 | return Bits.test(Idx); |
156 | } |
157 | |
158 | bool empty() const { |
159 | return !Bits.any(); |
160 | } |
161 | bool includes(const RegisterSet &Rs) const { |
162 | // A.test(B) <=> A-B != {} |
163 | return !Rs.Bits.test(RHS: Bits); |
164 | } |
165 | bool intersects(const RegisterSet &Rs) const { |
166 | return Bits.anyCommon(RHS: Rs.Bits); |
167 | } |
168 | |
169 | private: |
170 | BitVector Bits; |
171 | std::deque<unsigned> LRU; |
172 | |
173 | void ensure(unsigned Idx) { |
174 | if (Bits.size() <= Idx) |
175 | Bits.resize(N: std::max(a: Idx+1, b: 32U)); |
176 | } |
177 | |
178 | static inline unsigned v2x(unsigned v) { |
179 | return Register::virtReg2Index(Reg: v); |
180 | } |
181 | |
182 | static inline unsigned x2v(unsigned x) { |
183 | return Register::index2VirtReg(Index: x); |
184 | } |
185 | }; |
186 | |
187 | struct PrintRegSet { |
188 | PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) |
189 | : RS(S), TRI(RI) {} |
190 | |
191 | friend raw_ostream &operator<< (raw_ostream &OS, |
192 | const PrintRegSet &P); |
193 | |
194 | private: |
195 | const RegisterSet &RS; |
196 | const TargetRegisterInfo *TRI; |
197 | }; |
198 | |
199 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) |
200 | LLVM_ATTRIBUTE_UNUSED; |
201 | raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { |
202 | OS << '{'; |
203 | for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(Prev: R)) |
204 | OS << ' ' << printReg(Reg: R, TRI: P.TRI); |
205 | OS << " }" ; |
206 | return OS; |
207 | } |
208 | |
209 | class Transformation; |
210 | |
211 | class HexagonBitSimplify : public MachineFunctionPass { |
212 | public: |
213 | static char ID; |
214 | |
215 | HexagonBitSimplify() : MachineFunctionPass(ID) {} |
216 | |
217 | StringRef getPassName() const override { |
218 | return "Hexagon bit simplification" ; |
219 | } |
220 | |
221 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
222 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
223 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
224 | MachineFunctionPass::getAnalysisUsage(AU); |
225 | } |
226 | |
227 | bool runOnMachineFunction(MachineFunction &MF) override; |
228 | |
229 | static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); |
230 | static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); |
231 | static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, |
232 | const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); |
233 | static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, |
234 | uint16_t W); |
235 | static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, |
236 | uint16_t W, uint64_t &U); |
237 | static bool replaceReg(Register OldR, Register NewR, |
238 | MachineRegisterInfo &MRI); |
239 | static bool getSubregMask(const BitTracker::RegisterRef &RR, |
240 | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); |
241 | static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR, |
242 | MachineRegisterInfo &MRI); |
243 | static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR, |
244 | unsigned NewSR, MachineRegisterInfo &MRI); |
245 | static bool parseRegSequence(const MachineInstr &I, |
246 | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
247 | const MachineRegisterInfo &MRI); |
248 | |
249 | static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
250 | uint16_t Begin); |
251 | static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, |
252 | uint16_t Begin, const HexagonInstrInfo &HII); |
253 | |
254 | static const TargetRegisterClass *getFinalVRegClass( |
255 | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); |
256 | static bool isTransparentCopy(const BitTracker::RegisterRef &RD, |
257 | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); |
258 | |
259 | private: |
260 | MachineDominatorTree *MDT = nullptr; |
261 | |
262 | bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); |
263 | static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
264 | unsigned NewSub = Hexagon::NoSubRegister); |
265 | }; |
266 | |
267 | using HBS = HexagonBitSimplify; |
268 | |
269 | // The purpose of this class is to provide a common facility to traverse |
270 | // the function top-down or bottom-up via the dominator tree, and keep |
271 | // track of the available registers. |
272 | class Transformation { |
273 | public: |
274 | bool TopDown; |
275 | |
276 | Transformation(bool TD) : TopDown(TD) {} |
277 | virtual ~Transformation() = default; |
278 | |
279 | virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; |
280 | }; |
281 | |
282 | } // end anonymous namespace |
283 | |
284 | char HexagonBitSimplify::ID = 0; |
285 | |
286 | INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify" , |
287 | "Hexagon bit simplification" , false, false) |
288 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
289 | INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify" , |
290 | "Hexagon bit simplification" , false, false) |
291 | |
292 | bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, |
293 | RegisterSet &AVs) { |
294 | bool Changed = false; |
295 | |
296 | if (T.TopDown) |
297 | Changed = T.processBlock(B, AVs); |
298 | |
299 | RegisterSet Defs; |
300 | for (auto &I : B) |
301 | getInstrDefs(MI: I, Defs); |
302 | RegisterSet NewAVs = AVs; |
303 | NewAVs.insert(Rs: Defs); |
304 | |
305 | for (auto *DTN : children<MachineDomTreeNode*>(G: MDT->getNode(BB: &B))) |
306 | Changed |= visitBlock(B&: *(DTN->getBlock()), T, AVs&: NewAVs); |
307 | |
308 | if (!T.TopDown) |
309 | Changed |= T.processBlock(B, AVs); |
310 | |
311 | return Changed; |
312 | } |
313 | |
314 | // |
315 | // Utility functions: |
316 | // |
317 | void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, |
318 | RegisterSet &Defs) { |
319 | for (auto &Op : MI.operands()) { |
320 | if (!Op.isReg() || !Op.isDef()) |
321 | continue; |
322 | Register R = Op.getReg(); |
323 | if (!R.isVirtual()) |
324 | continue; |
325 | Defs.insert(R); |
326 | } |
327 | } |
328 | |
329 | void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, |
330 | RegisterSet &Uses) { |
331 | for (auto &Op : MI.operands()) { |
332 | if (!Op.isReg() || !Op.isUse()) |
333 | continue; |
334 | Register R = Op.getReg(); |
335 | if (!R.isVirtual()) |
336 | continue; |
337 | Uses.insert(R); |
338 | } |
339 | } |
340 | |
341 | // Check if all the bits in range [B, E) in both cells are equal. |
342 | bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, |
343 | uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, |
344 | uint16_t W) { |
345 | for (uint16_t i = 0; i < W; ++i) { |
346 | // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. |
347 | if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) |
348 | return false; |
349 | // Same for RC2[i]. |
350 | if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) |
351 | return false; |
352 | if (RC1[B1+i] != RC2[B2+i]) |
353 | return false; |
354 | } |
355 | return true; |
356 | } |
357 | |
358 | bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, |
359 | uint16_t B, uint16_t W) { |
360 | assert(B < RC.width() && B+W <= RC.width()); |
361 | for (uint16_t i = B; i < B+W; ++i) |
362 | if (!RC[i].is(T: 0)) |
363 | return false; |
364 | return true; |
365 | } |
366 | |
367 | bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, |
368 | uint16_t B, uint16_t W, uint64_t &U) { |
369 | assert(B < RC.width() && B+W <= RC.width()); |
370 | int64_t T = 0; |
371 | for (uint16_t i = B+W; i > B; --i) { |
372 | const BitTracker::BitValue &BV = RC[i-1]; |
373 | T <<= 1; |
374 | if (BV.is(T: 1)) |
375 | T |= 1; |
376 | else if (!BV.is(T: 0)) |
377 | return false; |
378 | } |
379 | U = T; |
380 | return true; |
381 | } |
382 | |
383 | bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR, |
384 | MachineRegisterInfo &MRI) { |
385 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
386 | return false; |
387 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
388 | decltype(End) NextI; |
389 | for (auto I = Begin; I != End; I = NextI) { |
390 | NextI = std::next(x: I); |
391 | I->setReg(NewR); |
392 | } |
393 | return Begin != End; |
394 | } |
395 | |
396 | bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR, |
397 | unsigned NewSR, |
398 | MachineRegisterInfo &MRI) { |
399 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
400 | return false; |
401 | if (hasTiedUse(Reg: OldR, MRI, NewSub: NewSR)) |
402 | return false; |
403 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
404 | decltype(End) NextI; |
405 | for (auto I = Begin; I != End; I = NextI) { |
406 | NextI = std::next(x: I); |
407 | I->setReg(NewR); |
408 | I->setSubReg(NewSR); |
409 | } |
410 | return Begin != End; |
411 | } |
412 | |
413 | bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR, |
414 | Register NewR, unsigned NewSR, |
415 | MachineRegisterInfo &MRI) { |
416 | if (!OldR.isVirtual() || !NewR.isVirtual()) |
417 | return false; |
418 | if (OldSR != NewSR && hasTiedUse(Reg: OldR, MRI, NewSub: NewSR)) |
419 | return false; |
420 | auto Begin = MRI.use_begin(RegNo: OldR), End = MRI.use_end(); |
421 | decltype(End) NextI; |
422 | for (auto I = Begin; I != End; I = NextI) { |
423 | NextI = std::next(x: I); |
424 | if (I->getSubReg() != OldSR) |
425 | continue; |
426 | I->setReg(NewR); |
427 | I->setSubReg(NewSR); |
428 | } |
429 | return Begin != End; |
430 | } |
431 | |
432 | // For a register ref (pair Reg:Sub), set Begin to the position of the LSB |
433 | // of Sub in Reg, and set Width to the size of Sub in bits. Return true, |
434 | // if this succeeded, otherwise return false. |
435 | bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, |
436 | unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { |
437 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: RR.Reg); |
438 | if (RR.Sub == 0) { |
439 | Begin = 0; |
440 | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(RC: *RC); |
441 | return true; |
442 | } |
443 | |
444 | Begin = 0; |
445 | |
446 | switch (RC->getID()) { |
447 | case Hexagon::DoubleRegsRegClassID: |
448 | case Hexagon::HvxWRRegClassID: |
449 | Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(RC: *RC) / 2; |
450 | if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) |
451 | Begin = Width; |
452 | break; |
453 | default: |
454 | return false; |
455 | } |
456 | return true; |
457 | } |
458 | |
459 | |
460 | // For a REG_SEQUENCE, set SL to the low subregister and SH to the high |
461 | // subregister. |
462 | bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, |
463 | BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH, |
464 | const MachineRegisterInfo &MRI) { |
465 | assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); |
466 | unsigned Sub1 = I.getOperand(i: 2).getImm(), Sub2 = I.getOperand(i: 4).getImm(); |
467 | auto &DstRC = *MRI.getRegClass(Reg: I.getOperand(i: 0).getReg()); |
468 | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
469 | *MRI.getTargetRegisterInfo()); |
470 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC: DstRC, GenIdx: Hexagon::ps_sub_lo); |
471 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC: DstRC, GenIdx: Hexagon::ps_sub_hi); |
472 | assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo)); |
473 | if (Sub1 == SubLo && Sub2 == SubHi) { |
474 | SL = I.getOperand(i: 1); |
475 | SH = I.getOperand(i: 3); |
476 | return true; |
477 | } |
478 | if (Sub1 == SubHi && Sub2 == SubLo) { |
479 | SH = I.getOperand(i: 1); |
480 | SL = I.getOperand(i: 3); |
481 | return true; |
482 | } |
483 | return false; |
484 | } |
485 | |
486 | // All stores (except 64-bit stores) take a 32-bit register as the source |
487 | // of the value to be stored. If the instruction stores into a location |
488 | // that is shorter than 32 bits, some bits of the source register are not |
489 | // used. For each store instruction, calculate the set of used bits in |
490 | // the source register, and set appropriate bits in Bits. Return true if |
491 | // the bits are calculated, false otherwise. |
492 | bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, |
493 | uint16_t Begin) { |
494 | using namespace Hexagon; |
495 | |
496 | switch (Opc) { |
497 | // Store byte |
498 | case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 |
499 | case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new |
500 | case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 |
501 | case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 |
502 | case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 |
503 | case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 |
504 | case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new |
505 | case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new |
506 | case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
507 | case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new |
508 | case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 |
509 | case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new |
510 | case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 |
511 | case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 |
512 | case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 |
513 | case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 |
514 | case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new |
515 | case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new |
516 | case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
517 | case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new |
518 | case S4_storerb_ap: // memb(Re32=#U6)=Rt32 |
519 | case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new |
520 | case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 |
521 | case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new |
522 | case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 |
523 | case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new |
524 | case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 |
525 | case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new |
526 | case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 |
527 | case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new |
528 | case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 |
529 | case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new |
530 | case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 |
531 | case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new |
532 | case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
533 | case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 |
534 | case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
535 | case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 |
536 | case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
537 | case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new |
538 | case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
539 | case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new |
540 | case S2_storerbgp: // memb(gp+#u16:0)=Rt32 |
541 | case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new |
542 | case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 |
543 | case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 |
544 | case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 |
545 | case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 |
546 | case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new |
547 | case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new |
548 | case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new |
549 | case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new |
550 | Bits.set(I: Begin, E: Begin+8); |
551 | return true; |
552 | |
553 | // Store low half |
554 | case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 |
555 | case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new |
556 | case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 |
557 | case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 |
558 | case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 |
559 | case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 |
560 | case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new |
561 | case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new |
562 | case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
563 | case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new |
564 | case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 |
565 | case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new |
566 | case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 |
567 | case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 |
568 | case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 |
569 | case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 |
570 | case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new |
571 | case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new |
572 | case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
573 | case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new |
574 | case S4_storerh_ap: // memh(Re32=#U6)=Rt32 |
575 | case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new |
576 | case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 |
577 | case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new |
578 | case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 |
579 | case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new |
580 | case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 |
581 | case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new |
582 | case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 |
583 | case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new |
584 | case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 |
585 | case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new |
586 | case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 |
587 | case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
588 | case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 |
589 | case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
590 | case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 |
591 | case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new |
592 | case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
593 | case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new |
594 | case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
595 | case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new |
596 | case S2_storerhgp: // memh(gp+#u16:1)=Rt32 |
597 | case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new |
598 | case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 |
599 | case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 |
600 | case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 |
601 | case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 |
602 | case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new |
603 | case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new |
604 | case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new |
605 | case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new |
606 | Bits.set(I: Begin, E: Begin+16); |
607 | return true; |
608 | |
609 | // Store high half |
610 | case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 |
611 | case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 |
612 | case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 |
613 | case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
614 | case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 |
615 | case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 |
616 | case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 |
617 | case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 |
618 | case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
619 | case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 |
620 | case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 |
621 | case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 |
622 | case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 |
623 | case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 |
624 | case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 |
625 | case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 |
626 | case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 |
627 | case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
628 | case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 |
629 | case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
630 | case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 |
631 | case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 |
632 | case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 |
633 | case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 |
634 | case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 |
635 | case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 |
636 | Bits.set(I: Begin+16, E: Begin+32); |
637 | return true; |
638 | } |
639 | |
640 | return false; |
641 | } |
642 | |
643 | // For an instruction with opcode Opc, calculate the set of bits that it |
644 | // uses in a register in operand OpN. This only calculates the set of used |
645 | // bits for cases where it does not depend on any operands (as is the case |
646 | // in shifts, for example). For concrete instructions from a program, the |
647 | // operand may be a subregister of a larger register, while Bits would |
648 | // correspond to the larger register in its entirety. Because of that, |
649 | // the parameter Begin can be used to indicate which bit of Bits should be |
650 | // considered the LSB of the operand. |
651 | bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, |
652 | BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { |
653 | using namespace Hexagon; |
654 | |
655 | const MCInstrDesc &D = HII.get(Opcode: Opc); |
656 | if (D.mayStore()) { |
657 | if (OpN == D.getNumOperands()-1) |
658 | return getUsedBitsInStore(Opc, Bits, Begin); |
659 | return false; |
660 | } |
661 | |
662 | switch (Opc) { |
663 | // One register source. Used bits: R1[0-7]. |
664 | case A2_sxtb: |
665 | case A2_zxtb: |
666 | case A4_cmpbeqi: |
667 | case A4_cmpbgti: |
668 | case A4_cmpbgtui: |
669 | if (OpN == 1) { |
670 | Bits.set(I: Begin, E: Begin+8); |
671 | return true; |
672 | } |
673 | break; |
674 | |
675 | // One register source. Used bits: R1[0-15]. |
676 | case A2_aslh: |
677 | case A2_sxth: |
678 | case A2_zxth: |
679 | case A4_cmpheqi: |
680 | case A4_cmphgti: |
681 | case A4_cmphgtui: |
682 | if (OpN == 1) { |
683 | Bits.set(I: Begin, E: Begin+16); |
684 | return true; |
685 | } |
686 | break; |
687 | |
688 | // One register source. Used bits: R1[16-31]. |
689 | case A2_asrh: |
690 | if (OpN == 1) { |
691 | Bits.set(I: Begin+16, E: Begin+32); |
692 | return true; |
693 | } |
694 | break; |
695 | |
696 | // Two register sources. Used bits: R1[0-7], R2[0-7]. |
697 | case A4_cmpbeq: |
698 | case A4_cmpbgt: |
699 | case A4_cmpbgtu: |
700 | if (OpN == 1) { |
701 | Bits.set(I: Begin, E: Begin+8); |
702 | return true; |
703 | } |
704 | break; |
705 | |
706 | // Two register sources. Used bits: R1[0-15], R2[0-15]. |
707 | case A4_cmpheq: |
708 | case A4_cmphgt: |
709 | case A4_cmphgtu: |
710 | case A2_addh_h16_ll: |
711 | case A2_addh_h16_sat_ll: |
712 | case A2_addh_l16_ll: |
713 | case A2_addh_l16_sat_ll: |
714 | case A2_combine_ll: |
715 | case A2_subh_h16_ll: |
716 | case A2_subh_h16_sat_ll: |
717 | case A2_subh_l16_ll: |
718 | case A2_subh_l16_sat_ll: |
719 | case M2_mpy_acc_ll_s0: |
720 | case M2_mpy_acc_ll_s1: |
721 | case M2_mpy_acc_sat_ll_s0: |
722 | case M2_mpy_acc_sat_ll_s1: |
723 | case M2_mpy_ll_s0: |
724 | case M2_mpy_ll_s1: |
725 | case M2_mpy_nac_ll_s0: |
726 | case M2_mpy_nac_ll_s1: |
727 | case M2_mpy_nac_sat_ll_s0: |
728 | case M2_mpy_nac_sat_ll_s1: |
729 | case M2_mpy_rnd_ll_s0: |
730 | case M2_mpy_rnd_ll_s1: |
731 | case M2_mpy_sat_ll_s0: |
732 | case M2_mpy_sat_ll_s1: |
733 | case M2_mpy_sat_rnd_ll_s0: |
734 | case M2_mpy_sat_rnd_ll_s1: |
735 | case M2_mpyd_acc_ll_s0: |
736 | case M2_mpyd_acc_ll_s1: |
737 | case M2_mpyd_ll_s0: |
738 | case M2_mpyd_ll_s1: |
739 | case M2_mpyd_nac_ll_s0: |
740 | case M2_mpyd_nac_ll_s1: |
741 | case M2_mpyd_rnd_ll_s0: |
742 | case M2_mpyd_rnd_ll_s1: |
743 | case M2_mpyu_acc_ll_s0: |
744 | case M2_mpyu_acc_ll_s1: |
745 | case M2_mpyu_ll_s0: |
746 | case M2_mpyu_ll_s1: |
747 | case M2_mpyu_nac_ll_s0: |
748 | case M2_mpyu_nac_ll_s1: |
749 | case M2_mpyud_acc_ll_s0: |
750 | case M2_mpyud_acc_ll_s1: |
751 | case M2_mpyud_ll_s0: |
752 | case M2_mpyud_ll_s1: |
753 | case M2_mpyud_nac_ll_s0: |
754 | case M2_mpyud_nac_ll_s1: |
755 | if (OpN == 1 || OpN == 2) { |
756 | Bits.set(I: Begin, E: Begin+16); |
757 | return true; |
758 | } |
759 | break; |
760 | |
761 | // Two register sources. Used bits: R1[0-15], R2[16-31]. |
762 | case A2_addh_h16_lh: |
763 | case A2_addh_h16_sat_lh: |
764 | case A2_combine_lh: |
765 | case A2_subh_h16_lh: |
766 | case A2_subh_h16_sat_lh: |
767 | case M2_mpy_acc_lh_s0: |
768 | case M2_mpy_acc_lh_s1: |
769 | case M2_mpy_acc_sat_lh_s0: |
770 | case M2_mpy_acc_sat_lh_s1: |
771 | case M2_mpy_lh_s0: |
772 | case M2_mpy_lh_s1: |
773 | case M2_mpy_nac_lh_s0: |
774 | case M2_mpy_nac_lh_s1: |
775 | case M2_mpy_nac_sat_lh_s0: |
776 | case M2_mpy_nac_sat_lh_s1: |
777 | case M2_mpy_rnd_lh_s0: |
778 | case M2_mpy_rnd_lh_s1: |
779 | case M2_mpy_sat_lh_s0: |
780 | case M2_mpy_sat_lh_s1: |
781 | case M2_mpy_sat_rnd_lh_s0: |
782 | case M2_mpy_sat_rnd_lh_s1: |
783 | case M2_mpyd_acc_lh_s0: |
784 | case M2_mpyd_acc_lh_s1: |
785 | case M2_mpyd_lh_s0: |
786 | case M2_mpyd_lh_s1: |
787 | case M2_mpyd_nac_lh_s0: |
788 | case M2_mpyd_nac_lh_s1: |
789 | case M2_mpyd_rnd_lh_s0: |
790 | case M2_mpyd_rnd_lh_s1: |
791 | case M2_mpyu_acc_lh_s0: |
792 | case M2_mpyu_acc_lh_s1: |
793 | case M2_mpyu_lh_s0: |
794 | case M2_mpyu_lh_s1: |
795 | case M2_mpyu_nac_lh_s0: |
796 | case M2_mpyu_nac_lh_s1: |
797 | case M2_mpyud_acc_lh_s0: |
798 | case M2_mpyud_acc_lh_s1: |
799 | case M2_mpyud_lh_s0: |
800 | case M2_mpyud_lh_s1: |
801 | case M2_mpyud_nac_lh_s0: |
802 | case M2_mpyud_nac_lh_s1: |
803 | // These four are actually LH. |
804 | case A2_addh_l16_hl: |
805 | case A2_addh_l16_sat_hl: |
806 | case A2_subh_l16_hl: |
807 | case A2_subh_l16_sat_hl: |
808 | if (OpN == 1) { |
809 | Bits.set(I: Begin, E: Begin+16); |
810 | return true; |
811 | } |
812 | if (OpN == 2) { |
813 | Bits.set(I: Begin+16, E: Begin+32); |
814 | return true; |
815 | } |
816 | break; |
817 | |
818 | // Two register sources, used bits: R1[16-31], R2[0-15]. |
819 | case A2_addh_h16_hl: |
820 | case A2_addh_h16_sat_hl: |
821 | case A2_combine_hl: |
822 | case A2_subh_h16_hl: |
823 | case A2_subh_h16_sat_hl: |
824 | case M2_mpy_acc_hl_s0: |
825 | case M2_mpy_acc_hl_s1: |
826 | case M2_mpy_acc_sat_hl_s0: |
827 | case M2_mpy_acc_sat_hl_s1: |
828 | case M2_mpy_hl_s0: |
829 | case M2_mpy_hl_s1: |
830 | case M2_mpy_nac_hl_s0: |
831 | case M2_mpy_nac_hl_s1: |
832 | case M2_mpy_nac_sat_hl_s0: |
833 | case M2_mpy_nac_sat_hl_s1: |
834 | case M2_mpy_rnd_hl_s0: |
835 | case M2_mpy_rnd_hl_s1: |
836 | case M2_mpy_sat_hl_s0: |
837 | case M2_mpy_sat_hl_s1: |
838 | case M2_mpy_sat_rnd_hl_s0: |
839 | case M2_mpy_sat_rnd_hl_s1: |
840 | case M2_mpyd_acc_hl_s0: |
841 | case M2_mpyd_acc_hl_s1: |
842 | case M2_mpyd_hl_s0: |
843 | case M2_mpyd_hl_s1: |
844 | case M2_mpyd_nac_hl_s0: |
845 | case M2_mpyd_nac_hl_s1: |
846 | case M2_mpyd_rnd_hl_s0: |
847 | case M2_mpyd_rnd_hl_s1: |
848 | case M2_mpyu_acc_hl_s0: |
849 | case M2_mpyu_acc_hl_s1: |
850 | case M2_mpyu_hl_s0: |
851 | case M2_mpyu_hl_s1: |
852 | case M2_mpyu_nac_hl_s0: |
853 | case M2_mpyu_nac_hl_s1: |
854 | case M2_mpyud_acc_hl_s0: |
855 | case M2_mpyud_acc_hl_s1: |
856 | case M2_mpyud_hl_s0: |
857 | case M2_mpyud_hl_s1: |
858 | case M2_mpyud_nac_hl_s0: |
859 | case M2_mpyud_nac_hl_s1: |
860 | if (OpN == 1) { |
861 | Bits.set(I: Begin+16, E: Begin+32); |
862 | return true; |
863 | } |
864 | if (OpN == 2) { |
865 | Bits.set(I: Begin, E: Begin+16); |
866 | return true; |
867 | } |
868 | break; |
869 | |
870 | // Two register sources, used bits: R1[16-31], R2[16-31]. |
871 | case A2_addh_h16_hh: |
872 | case A2_addh_h16_sat_hh: |
873 | case A2_combine_hh: |
874 | case A2_subh_h16_hh: |
875 | case A2_subh_h16_sat_hh: |
876 | case M2_mpy_acc_hh_s0: |
877 | case M2_mpy_acc_hh_s1: |
878 | case M2_mpy_acc_sat_hh_s0: |
879 | case M2_mpy_acc_sat_hh_s1: |
880 | case M2_mpy_hh_s0: |
881 | case M2_mpy_hh_s1: |
882 | case M2_mpy_nac_hh_s0: |
883 | case M2_mpy_nac_hh_s1: |
884 | case M2_mpy_nac_sat_hh_s0: |
885 | case M2_mpy_nac_sat_hh_s1: |
886 | case M2_mpy_rnd_hh_s0: |
887 | case M2_mpy_rnd_hh_s1: |
888 | case M2_mpy_sat_hh_s0: |
889 | case M2_mpy_sat_hh_s1: |
890 | case M2_mpy_sat_rnd_hh_s0: |
891 | case M2_mpy_sat_rnd_hh_s1: |
892 | case M2_mpyd_acc_hh_s0: |
893 | case M2_mpyd_acc_hh_s1: |
894 | case M2_mpyd_hh_s0: |
895 | case M2_mpyd_hh_s1: |
896 | case M2_mpyd_nac_hh_s0: |
897 | case M2_mpyd_nac_hh_s1: |
898 | case M2_mpyd_rnd_hh_s0: |
899 | case M2_mpyd_rnd_hh_s1: |
900 | case M2_mpyu_acc_hh_s0: |
901 | case M2_mpyu_acc_hh_s1: |
902 | case M2_mpyu_hh_s0: |
903 | case M2_mpyu_hh_s1: |
904 | case M2_mpyu_nac_hh_s0: |
905 | case M2_mpyu_nac_hh_s1: |
906 | case M2_mpyud_acc_hh_s0: |
907 | case M2_mpyud_acc_hh_s1: |
908 | case M2_mpyud_hh_s0: |
909 | case M2_mpyud_hh_s1: |
910 | case M2_mpyud_nac_hh_s0: |
911 | case M2_mpyud_nac_hh_s1: |
912 | if (OpN == 1 || OpN == 2) { |
913 | Bits.set(I: Begin+16, E: Begin+32); |
914 | return true; |
915 | } |
916 | break; |
917 | } |
918 | |
919 | return false; |
920 | } |
921 | |
922 | // Calculate the register class that matches Reg:Sub. For example, if |
923 | // %1 is a double register, then %1:isub_hi would match the "int" |
924 | // register class. |
925 | const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( |
926 | const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { |
927 | if (!RR.Reg.isVirtual()) |
928 | return nullptr; |
929 | auto *RC = MRI.getRegClass(Reg: RR.Reg); |
930 | if (RR.Sub == 0) |
931 | return RC; |
932 | auto &HRI = static_cast<const HexagonRegisterInfo&>( |
933 | *MRI.getTargetRegisterInfo()); |
934 | |
935 | auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { |
936 | (void)HRI; |
937 | assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) || |
938 | Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi)); |
939 | }; |
940 | |
941 | switch (RC->getID()) { |
942 | case Hexagon::DoubleRegsRegClassID: |
943 | VerifySR(RC, RR.Sub); |
944 | return &Hexagon::IntRegsRegClass; |
945 | case Hexagon::HvxWRRegClassID: |
946 | VerifySR(RC, RR.Sub); |
947 | return &Hexagon::HvxVRRegClass; |
948 | } |
949 | return nullptr; |
950 | } |
951 | |
952 | // Check if RD could be replaced with RS at any possible use of RD. |
953 | // For example a predicate register cannot be replaced with a integer |
954 | // register, but a 64-bit register with a subregister can be replaced |
955 | // with a 32-bit register. |
956 | bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, |
957 | const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { |
958 | if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual()) |
959 | return false; |
960 | // Return false if one (or both) classes are nullptr. |
961 | auto *DRC = getFinalVRegClass(RR: RD, MRI); |
962 | if (!DRC) |
963 | return false; |
964 | |
965 | return DRC == getFinalVRegClass(RR: RS, MRI); |
966 | } |
967 | |
968 | bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI, |
969 | unsigned NewSub) { |
970 | if (!PreserveTiedOps) |
971 | return false; |
972 | return llvm::any_of(Range: MRI.use_operands(Reg), |
973 | P: [NewSub] (const MachineOperand &Op) -> bool { |
974 | return Op.getSubReg() != NewSub && Op.isTied(); |
975 | }); |
976 | } |
977 | |
978 | namespace { |
979 | |
980 | class DeadCodeElimination { |
981 | public: |
982 | DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) |
983 | : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()), |
984 | MDT(mdt), MRI(mf.getRegInfo()) {} |
985 | |
986 | bool run() { |
987 | return runOnNode(N: MDT.getRootNode()); |
988 | } |
989 | |
990 | private: |
991 | bool isDead(unsigned R) const; |
992 | bool runOnNode(MachineDomTreeNode *N); |
993 | |
994 | MachineFunction &MF; |
995 | const HexagonInstrInfo &HII; |
996 | MachineDominatorTree &MDT; |
997 | MachineRegisterInfo &MRI; |
998 | }; |
999 | |
1000 | } // end anonymous namespace |
1001 | |
1002 | bool DeadCodeElimination::isDead(unsigned R) const { |
1003 | for (const MachineOperand &MO : MRI.use_operands(Reg: R)) { |
1004 | const MachineInstr *UseI = MO.getParent(); |
1005 | if (UseI->isDebugInstr()) |
1006 | continue; |
1007 | if (UseI->isPHI()) { |
1008 | assert(!UseI->getOperand(0).getSubReg()); |
1009 | Register DR = UseI->getOperand(i: 0).getReg(); |
1010 | if (DR == R) |
1011 | continue; |
1012 | } |
1013 | return false; |
1014 | } |
1015 | return true; |
1016 | } |
1017 | |
1018 | bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { |
1019 | bool Changed = false; |
1020 | |
1021 | for (auto *DTN : children<MachineDomTreeNode*>(G: N)) |
1022 | Changed |= runOnNode(N: DTN); |
1023 | |
1024 | MachineBasicBlock *B = N->getBlock(); |
1025 | std::vector<MachineInstr*> Instrs; |
1026 | for (MachineInstr &MI : llvm::reverse(C&: *B)) |
1027 | Instrs.push_back(x: &MI); |
1028 | |
1029 | for (auto *MI : Instrs) { |
1030 | unsigned Opc = MI->getOpcode(); |
1031 | // Do not touch lifetime markers. This is why the target-independent DCE |
1032 | // cannot be used. |
1033 | if (Opc == TargetOpcode::LIFETIME_START || |
1034 | Opc == TargetOpcode::LIFETIME_END) |
1035 | continue; |
1036 | bool Store = false; |
1037 | if (MI->isInlineAsm()) |
1038 | continue; |
1039 | // Delete PHIs if possible. |
1040 | if (!MI->isPHI() && !MI->isSafeToMove(AA: nullptr, SawStore&: Store)) |
1041 | continue; |
1042 | |
1043 | bool AllDead = true; |
1044 | SmallVector<unsigned,2> Regs; |
1045 | for (auto &Op : MI->operands()) { |
1046 | if (!Op.isReg() || !Op.isDef()) |
1047 | continue; |
1048 | Register R = Op.getReg(); |
1049 | if (!R.isVirtual() || !isDead(R)) { |
1050 | AllDead = false; |
1051 | break; |
1052 | } |
1053 | Regs.push_back(Elt: R); |
1054 | } |
1055 | if (!AllDead) |
1056 | continue; |
1057 | |
1058 | B->erase(I: MI); |
1059 | for (unsigned Reg : Regs) |
1060 | MRI.markUsesInDebugValueAsUndef(Reg); |
1061 | Changed = true; |
1062 | } |
1063 | |
1064 | return Changed; |
1065 | } |
1066 | |
1067 | namespace { |
1068 | |
1069 | // Eliminate redundant instructions |
1070 | // |
1071 | // This transformation will identify instructions where the output register |
1072 | // is the same as one of its input registers. This only works on instructions |
1073 | // that define a single register (unlike post-increment loads, for example). |
1074 | // The equality check is actually more detailed: the code calculates which |
1075 | // bits of the output are used, and only compares these bits with the input |
1076 | // registers. |
1077 | // If the output matches an input, the instruction is replaced with COPY. |
1078 | // The copies will be removed by another transformation. |
1079 | class RedundantInstrElimination : public Transformation { |
1080 | public: |
1081 | RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, |
1082 | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1083 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1084 | |
1085 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1086 | |
1087 | private: |
1088 | bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, |
1089 | unsigned &LostB, unsigned &LostE); |
1090 | bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, |
1091 | unsigned &LostB, unsigned &LostE); |
1092 | bool computeUsedBits(unsigned Reg, BitVector &Bits); |
1093 | bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, |
1094 | uint16_t Begin); |
1095 | bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); |
1096 | |
1097 | const HexagonInstrInfo &HII; |
1098 | const HexagonRegisterInfo &HRI; |
1099 | MachineRegisterInfo &MRI; |
1100 | BitTracker &BT; |
1101 | }; |
1102 | |
1103 | } // end anonymous namespace |
1104 | |
1105 | // Check if the instruction is a lossy shift left, where the input being |
1106 | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1107 | // of bit indices that are lost. |
1108 | bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, |
1109 | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1110 | using namespace Hexagon; |
1111 | |
1112 | unsigned Opc = MI.getOpcode(); |
1113 | unsigned ImN, RegN, Width; |
1114 | switch (Opc) { |
1115 | case S2_asl_i_p: |
1116 | ImN = 2; |
1117 | RegN = 1; |
1118 | Width = 64; |
1119 | break; |
1120 | case S2_asl_i_p_acc: |
1121 | case S2_asl_i_p_and: |
1122 | case S2_asl_i_p_nac: |
1123 | case S2_asl_i_p_or: |
1124 | case S2_asl_i_p_xacc: |
1125 | ImN = 3; |
1126 | RegN = 2; |
1127 | Width = 64; |
1128 | break; |
1129 | case S2_asl_i_r: |
1130 | ImN = 2; |
1131 | RegN = 1; |
1132 | Width = 32; |
1133 | break; |
1134 | case S2_addasl_rrri: |
1135 | case S4_andi_asl_ri: |
1136 | case S4_ori_asl_ri: |
1137 | case S4_addi_asl_ri: |
1138 | case S4_subi_asl_ri: |
1139 | case S2_asl_i_r_acc: |
1140 | case S2_asl_i_r_and: |
1141 | case S2_asl_i_r_nac: |
1142 | case S2_asl_i_r_or: |
1143 | case S2_asl_i_r_sat: |
1144 | case S2_asl_i_r_xacc: |
1145 | ImN = 3; |
1146 | RegN = 2; |
1147 | Width = 32; |
1148 | break; |
1149 | default: |
1150 | return false; |
1151 | } |
1152 | |
1153 | if (RegN != OpN) |
1154 | return false; |
1155 | |
1156 | assert(MI.getOperand(ImN).isImm()); |
1157 | unsigned S = MI.getOperand(i: ImN).getImm(); |
1158 | if (S == 0) |
1159 | return false; |
1160 | LostB = Width-S; |
1161 | LostE = Width; |
1162 | return true; |
1163 | } |
1164 | |
1165 | // Check if the instruction is a lossy shift right, where the input being |
1166 | // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range |
1167 | // of bit indices that are lost. |
1168 | bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, |
1169 | unsigned OpN, unsigned &LostB, unsigned &LostE) { |
1170 | using namespace Hexagon; |
1171 | |
1172 | unsigned Opc = MI.getOpcode(); |
1173 | unsigned ImN, RegN; |
1174 | switch (Opc) { |
1175 | case S2_asr_i_p: |
1176 | case S2_lsr_i_p: |
1177 | ImN = 2; |
1178 | RegN = 1; |
1179 | break; |
1180 | case S2_asr_i_p_acc: |
1181 | case S2_asr_i_p_and: |
1182 | case S2_asr_i_p_nac: |
1183 | case S2_asr_i_p_or: |
1184 | case S2_lsr_i_p_acc: |
1185 | case S2_lsr_i_p_and: |
1186 | case S2_lsr_i_p_nac: |
1187 | case S2_lsr_i_p_or: |
1188 | case S2_lsr_i_p_xacc: |
1189 | ImN = 3; |
1190 | RegN = 2; |
1191 | break; |
1192 | case S2_asr_i_r: |
1193 | case S2_lsr_i_r: |
1194 | ImN = 2; |
1195 | RegN = 1; |
1196 | break; |
1197 | case S4_andi_lsr_ri: |
1198 | case S4_ori_lsr_ri: |
1199 | case S4_addi_lsr_ri: |
1200 | case S4_subi_lsr_ri: |
1201 | case S2_asr_i_r_acc: |
1202 | case S2_asr_i_r_and: |
1203 | case S2_asr_i_r_nac: |
1204 | case S2_asr_i_r_or: |
1205 | case S2_lsr_i_r_acc: |
1206 | case S2_lsr_i_r_and: |
1207 | case S2_lsr_i_r_nac: |
1208 | case S2_lsr_i_r_or: |
1209 | case S2_lsr_i_r_xacc: |
1210 | ImN = 3; |
1211 | RegN = 2; |
1212 | break; |
1213 | |
1214 | default: |
1215 | return false; |
1216 | } |
1217 | |
1218 | if (RegN != OpN) |
1219 | return false; |
1220 | |
1221 | assert(MI.getOperand(ImN).isImm()); |
1222 | unsigned S = MI.getOperand(i: ImN).getImm(); |
1223 | LostB = 0; |
1224 | LostE = S; |
1225 | return true; |
1226 | } |
1227 | |
1228 | // Calculate the bit vector that corresponds to the used bits of register Reg. |
1229 | // The vector Bits has the same size, as the size of Reg in bits. If the cal- |
1230 | // culation fails (i.e. the used bits are unknown), it returns false. Other- |
1231 | // wise, it returns true and sets the corresponding bits in Bits. |
1232 | bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { |
1233 | BitVector Used(Bits.size()); |
1234 | RegisterSet Visited; |
1235 | std::vector<unsigned> Pending; |
1236 | Pending.push_back(x: Reg); |
1237 | |
1238 | for (unsigned i = 0; i < Pending.size(); ++i) { |
1239 | unsigned R = Pending[i]; |
1240 | if (Visited.has(R)) |
1241 | continue; |
1242 | Visited.insert(R); |
1243 | for (auto I = MRI.use_begin(RegNo: R), E = MRI.use_end(); I != E; ++I) { |
1244 | BitTracker::RegisterRef UR = *I; |
1245 | unsigned B, W; |
1246 | if (!HBS::getSubregMask(RR: UR, Begin&: B, Width&: W, MRI)) |
1247 | return false; |
1248 | MachineInstr &UseI = *I->getParent(); |
1249 | if (UseI.isPHI() || UseI.isCopy()) { |
1250 | Register DefR = UseI.getOperand(i: 0).getReg(); |
1251 | if (!DefR.isVirtual()) |
1252 | return false; |
1253 | Pending.push_back(x: DefR); |
1254 | } else { |
1255 | if (!computeUsedBits(MI: UseI, OpN: I.getOperandNo(), Bits&: Used, Begin: B)) |
1256 | return false; |
1257 | } |
1258 | } |
1259 | } |
1260 | Bits |= Used; |
1261 | return true; |
1262 | } |
1263 | |
1264 | // Calculate the bits used by instruction MI in a register in operand OpN. |
1265 | // Return true/false if the calculation succeeds/fails. If is succeeds, set |
1266 | // used bits in Bits. This function does not reset any bits in Bits, so |
1267 | // subsequent calls over different instructions will result in the union |
1268 | // of the used bits in all these instructions. |
1269 | // The register in question may be used with a sub-register, whereas Bits |
1270 | // holds the bits for the entire register. To keep track of that, the |
1271 | // argument Begin indicates where in Bits is the lowest-significant bit |
1272 | // of the register used in operand OpN. For example, in instruction: |
1273 | // %1 = S2_lsr_i_r %2:isub_hi, 10 |
1274 | // the operand 1 is a 32-bit register, which happens to be a subregister |
1275 | // of the 64-bit register %2, and that subregister starts at position 32. |
1276 | // In this case Begin=32, since Bits[32] would be the lowest-significant bit |
1277 | // of %2:isub_hi. |
1278 | bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, |
1279 | unsigned OpN, BitVector &Bits, uint16_t Begin) { |
1280 | unsigned Opc = MI.getOpcode(); |
1281 | BitVector T(Bits.size()); |
1282 | bool GotBits = HBS::getUsedBits(Opc, OpN, Bits&: T, Begin, HII); |
1283 | // Even if we don't have bits yet, we could still provide some information |
1284 | // if the instruction is a lossy shift: the lost bits will be marked as |
1285 | // not used. |
1286 | unsigned LB, LE; |
1287 | if (isLossyShiftLeft(MI, OpN, LostB&: LB, LostE&: LE) || isLossyShiftRight(MI, OpN, LostB&: LB, LostE&: LE)) { |
1288 | assert(MI.getOperand(OpN).isReg()); |
1289 | BitTracker::RegisterRef RR = MI.getOperand(i: OpN); |
1290 | const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); |
1291 | uint16_t Width = HRI.getRegSizeInBits(RC: *RC); |
1292 | |
1293 | if (!GotBits) |
1294 | T.set(I: Begin, E: Begin+Width); |
1295 | assert(LB <= LE && LB < Width && LE <= Width); |
1296 | T.reset(I: Begin+LB, E: Begin+LE); |
1297 | GotBits = true; |
1298 | } |
1299 | if (GotBits) |
1300 | Bits |= T; |
1301 | return GotBits; |
1302 | } |
1303 | |
1304 | // Calculates the used bits in RD ("defined register"), and checks if these |
1305 | // bits in RS ("used register") and RD are identical. |
1306 | bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, |
1307 | BitTracker::RegisterRef RS) { |
1308 | const BitTracker::RegisterCell &DC = BT.lookup(Reg: RD.Reg); |
1309 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
1310 | |
1311 | unsigned DB, DW; |
1312 | if (!HBS::getSubregMask(RR: RD, Begin&: DB, Width&: DW, MRI)) |
1313 | return false; |
1314 | unsigned SB, SW; |
1315 | if (!HBS::getSubregMask(RR: RS, Begin&: SB, Width&: SW, MRI)) |
1316 | return false; |
1317 | if (SW != DW) |
1318 | return false; |
1319 | |
1320 | BitVector Used(DC.width()); |
1321 | if (!computeUsedBits(Reg: RD.Reg, Bits&: Used)) |
1322 | return false; |
1323 | |
1324 | for (unsigned i = 0; i != DW; ++i) |
1325 | if (Used[i+DB] && DC[DB+i] != SC[SB+i]) |
1326 | return false; |
1327 | return true; |
1328 | } |
1329 | |
1330 | bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, |
1331 | const RegisterSet&) { |
1332 | if (!BT.reached(B: &B)) |
1333 | return false; |
1334 | bool Changed = false; |
1335 | |
1336 | for (auto I = B.begin(), E = B.end(); I != E; ++I) { |
1337 | MachineInstr *MI = &*I; |
1338 | |
1339 | if (MI->getOpcode() == TargetOpcode::COPY) |
1340 | continue; |
1341 | if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) |
1342 | continue; |
1343 | unsigned NumD = MI->getDesc().getNumDefs(); |
1344 | if (NumD != 1) |
1345 | continue; |
1346 | |
1347 | BitTracker::RegisterRef RD = MI->getOperand(i: 0); |
1348 | if (!BT.has(Reg: RD.Reg)) |
1349 | continue; |
1350 | const BitTracker::RegisterCell &DC = BT.lookup(Reg: RD.Reg); |
1351 | auto At = MachineBasicBlock::iterator(MI); |
1352 | |
1353 | // Find a source operand that is equal to the result. |
1354 | for (auto &Op : MI->uses()) { |
1355 | if (!Op.isReg()) |
1356 | continue; |
1357 | BitTracker::RegisterRef RS = Op; |
1358 | if (!BT.has(Reg: RS.Reg)) |
1359 | continue; |
1360 | if (!HBS::isTransparentCopy(RD, RS, MRI)) |
1361 | continue; |
1362 | |
1363 | unsigned BN, BW; |
1364 | if (!HBS::getSubregMask(RR: RS, Begin&: BN, Width&: BW, MRI)) |
1365 | continue; |
1366 | |
1367 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
1368 | if (!usedBitsEqual(RD, RS) && !HBS::isEqual(RC1: DC, B1: 0, RC2: SC, B2: BN, W: BW)) |
1369 | continue; |
1370 | |
1371 | // If found, replace the instruction with a COPY. |
1372 | const DebugLoc &DL = MI->getDebugLoc(); |
1373 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
1374 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1375 | MachineInstr *CopyI = |
1376 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::COPY), DestReg: NewR) |
1377 | .addReg(RegNo: RS.Reg, flags: 0, SubReg: RS.Sub); |
1378 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
1379 | // This pass can create copies between registers that don't have the |
1380 | // exact same values. Updating the tracker has to involve updating |
1381 | // all dependent cells. Example: |
1382 | // %1 = inst %2 ; %1 != %2, but used bits are equal |
1383 | // |
1384 | // %3 = copy %2 ; <- inserted |
1385 | // ... = %3 ; <- replaced from %2 |
1386 | // Indirectly, we can create a "copy" between %1 and %2 even |
1387 | // though their exact values do not match. |
1388 | BT.visit(MI: *CopyI); |
1389 | Changed = true; |
1390 | break; |
1391 | } |
1392 | } |
1393 | |
1394 | return Changed; |
1395 | } |
1396 | |
1397 | namespace { |
1398 | |
1399 | // Recognize instructions that produce constant values known at compile-time. |
1400 | // Replace them with register definitions that load these constants directly. |
1401 | class ConstGeneration : public Transformation { |
1402 | public: |
1403 | ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1404 | MachineRegisterInfo &mri) |
1405 | : Transformation(true), HII(hii), MRI(mri), BT(bt) {} |
1406 | |
1407 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1408 | static bool isTfrConst(const MachineInstr &MI); |
1409 | |
1410 | private: |
1411 | Register genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1412 | MachineBasicBlock &B, MachineBasicBlock::iterator At, |
1413 | DebugLoc &DL); |
1414 | |
1415 | const HexagonInstrInfo &HII; |
1416 | MachineRegisterInfo &MRI; |
1417 | BitTracker &BT; |
1418 | }; |
1419 | |
1420 | } // end anonymous namespace |
1421 | |
1422 | bool ConstGeneration::isTfrConst(const MachineInstr &MI) { |
1423 | unsigned Opc = MI.getOpcode(); |
1424 | switch (Opc) { |
1425 | case Hexagon::A2_combineii: |
1426 | case Hexagon::A4_combineii: |
1427 | case Hexagon::A2_tfrsi: |
1428 | case Hexagon::A2_tfrpi: |
1429 | case Hexagon::PS_true: |
1430 | case Hexagon::PS_false: |
1431 | case Hexagon::CONST32: |
1432 | case Hexagon::CONST64: |
1433 | return true; |
1434 | } |
1435 | return false; |
1436 | } |
1437 | |
1438 | // Generate a transfer-immediate instruction that is appropriate for the |
1439 | // register class and the actual value being transferred. |
1440 | Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, |
1441 | MachineBasicBlock &B, |
1442 | MachineBasicBlock::iterator At, |
1443 | DebugLoc &DL) { |
1444 | Register Reg = MRI.createVirtualRegister(RegClass: RC); |
1445 | if (RC == &Hexagon::IntRegsRegClass) { |
1446 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrsi), DestReg: Reg) |
1447 | .addImm(Val: int32_t(C)); |
1448 | return Reg; |
1449 | } |
1450 | |
1451 | if (RC == &Hexagon::DoubleRegsRegClass) { |
1452 | if (isInt<8>(x: C)) { |
1453 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrpi), DestReg: Reg) |
1454 | .addImm(Val: C); |
1455 | return Reg; |
1456 | } |
1457 | |
1458 | unsigned Lo = Lo_32(Value: C), Hi = Hi_32(Value: C); |
1459 | if (isInt<8>(x: Lo) || isInt<8>(x: Hi)) { |
1460 | unsigned Opc = isInt<8>(x: Lo) ? Hexagon::A2_combineii |
1461 | : Hexagon::A4_combineii; |
1462 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Opc), DestReg: Reg) |
1463 | .addImm(Val: int32_t(Hi)) |
1464 | .addImm(Val: int32_t(Lo)); |
1465 | return Reg; |
1466 | } |
1467 | MachineFunction *MF = B.getParent(); |
1468 | auto &HST = MF->getSubtarget<HexagonSubtarget>(); |
1469 | |
1470 | // Disable CONST64 for tiny core since it takes a LD resource. |
1471 | if (!HST.isTinyCore() || |
1472 | MF->getFunction().hasOptSize()) { |
1473 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::CONST64), DestReg: Reg) |
1474 | .addImm(Val: C); |
1475 | return Reg; |
1476 | } |
1477 | } |
1478 | |
1479 | if (RC == &Hexagon::PredRegsRegClass) { |
1480 | unsigned Opc; |
1481 | if (C == 0) |
1482 | Opc = Hexagon::PS_false; |
1483 | else if ((C & 0xFF) == 0xFF) |
1484 | Opc = Hexagon::PS_true; |
1485 | else |
1486 | return 0; |
1487 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Opc), DestReg: Reg); |
1488 | return Reg; |
1489 | } |
1490 | |
1491 | return 0; |
1492 | } |
1493 | |
1494 | bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1495 | if (!BT.reached(B: &B)) |
1496 | return false; |
1497 | bool Changed = false; |
1498 | RegisterSet Defs; |
1499 | |
1500 | for (auto I = B.begin(), E = B.end(); I != E; ++I) { |
1501 | if (isTfrConst(MI: *I)) |
1502 | continue; |
1503 | Defs.clear(); |
1504 | HBS::getInstrDefs(MI: *I, Defs); |
1505 | if (Defs.count() != 1) |
1506 | continue; |
1507 | Register DR = Defs.find_first(); |
1508 | if (!DR.isVirtual()) |
1509 | continue; |
1510 | uint64_t U; |
1511 | const BitTracker::RegisterCell &DRC = BT.lookup(Reg: DR); |
1512 | if (HBS::getConst(RC: DRC, B: 0, W: DRC.width(), U)) { |
1513 | int64_t C = U; |
1514 | DebugLoc DL = I->getDebugLoc(); |
1515 | auto At = I->isPHI() ? B.getFirstNonPHI() : I; |
1516 | Register ImmReg = genTfrConst(RC: MRI.getRegClass(Reg: DR), C, B, At, DL); |
1517 | if (ImmReg) { |
1518 | HBS::replaceReg(OldR: DR, NewR: ImmReg, MRI); |
1519 | BT.put(RR: ImmReg, RC: DRC); |
1520 | Changed = true; |
1521 | } |
1522 | } |
1523 | } |
1524 | return Changed; |
1525 | } |
1526 | |
1527 | namespace { |
1528 | |
1529 | // Identify pairs of available registers which hold identical values. |
1530 | // In such cases, only one of them needs to be calculated, the other one |
1531 | // will be defined as a copy of the first. |
1532 | class CopyGeneration : public Transformation { |
1533 | public: |
1534 | CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, |
1535 | const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1536 | : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} |
1537 | |
1538 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1539 | |
1540 | private: |
1541 | bool findMatch(const BitTracker::RegisterRef &Inp, |
1542 | BitTracker::RegisterRef &Out, const RegisterSet &AVs); |
1543 | |
1544 | const HexagonInstrInfo &HII; |
1545 | const HexagonRegisterInfo &HRI; |
1546 | MachineRegisterInfo &MRI; |
1547 | BitTracker &BT; |
1548 | RegisterSet Forbidden; |
1549 | }; |
1550 | |
1551 | // Eliminate register copies RD = RS, by replacing the uses of RD with |
1552 | // with uses of RS. |
1553 | class CopyPropagation : public Transformation { |
1554 | public: |
1555 | CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) |
1556 | : Transformation(false), HRI(hri), MRI(mri) {} |
1557 | |
1558 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1559 | |
1560 | static bool isCopyReg(unsigned Opc, bool NoConv); |
1561 | |
1562 | private: |
1563 | bool propagateRegCopy(MachineInstr &MI); |
1564 | |
1565 | const HexagonRegisterInfo &HRI; |
1566 | MachineRegisterInfo &MRI; |
1567 | }; |
1568 | |
1569 | } // end anonymous namespace |
1570 | |
1571 | /// Check if there is a register in AVs that is identical to Inp. If so, |
1572 | /// set Out to the found register. The output may be a pair Reg:Sub. |
1573 | bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, |
1574 | BitTracker::RegisterRef &Out, const RegisterSet &AVs) { |
1575 | if (!BT.has(Reg: Inp.Reg)) |
1576 | return false; |
1577 | const BitTracker::RegisterCell &InpRC = BT.lookup(Reg: Inp.Reg); |
1578 | auto *FRC = HBS::getFinalVRegClass(RR: Inp, MRI); |
1579 | unsigned B, W; |
1580 | if (!HBS::getSubregMask(RR: Inp, Begin&: B, Width&: W, MRI)) |
1581 | return false; |
1582 | |
1583 | for (Register R = AVs.find_first(); R; R = AVs.find_next(Prev: R)) { |
1584 | if (!BT.has(Reg: R) || Forbidden[R]) |
1585 | continue; |
1586 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: R); |
1587 | unsigned RW = RC.width(); |
1588 | if (W == RW) { |
1589 | if (FRC != MRI.getRegClass(Reg: R)) |
1590 | continue; |
1591 | if (!HBS::isTransparentCopy(RD: R, RS: Inp, MRI)) |
1592 | continue; |
1593 | if (!HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: 0, W)) |
1594 | continue; |
1595 | Out.Reg = R; |
1596 | Out.Sub = 0; |
1597 | return true; |
1598 | } |
1599 | // Check if there is a super-register, whose part (with a subregister) |
1600 | // is equal to the input. |
1601 | // Only do double registers for now. |
1602 | if (W*2 != RW) |
1603 | continue; |
1604 | if (MRI.getRegClass(Reg: R) != &Hexagon::DoubleRegsRegClass) |
1605 | continue; |
1606 | |
1607 | if (HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: 0, W)) |
1608 | Out.Sub = Hexagon::isub_lo; |
1609 | else if (HBS::isEqual(RC1: InpRC, B1: B, RC2: RC, B2: W, W)) |
1610 | Out.Sub = Hexagon::isub_hi; |
1611 | else |
1612 | continue; |
1613 | Out.Reg = R; |
1614 | if (HBS::isTransparentCopy(RD: Out, RS: Inp, MRI)) |
1615 | return true; |
1616 | } |
1617 | return false; |
1618 | } |
1619 | |
1620 | bool CopyGeneration::processBlock(MachineBasicBlock &B, |
1621 | const RegisterSet &AVs) { |
1622 | if (!BT.reached(B: &B)) |
1623 | return false; |
1624 | RegisterSet AVB(AVs); |
1625 | bool Changed = false; |
1626 | RegisterSet Defs; |
1627 | |
1628 | for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Rs: Defs)) { |
1629 | Defs.clear(); |
1630 | HBS::getInstrDefs(MI: *I, Defs); |
1631 | |
1632 | unsigned Opc = I->getOpcode(); |
1633 | if (CopyPropagation::isCopyReg(Opc, NoConv: false) || |
1634 | ConstGeneration::isTfrConst(MI: *I)) |
1635 | continue; |
1636 | |
1637 | DebugLoc DL = I->getDebugLoc(); |
1638 | auto At = I->isPHI() ? B.getFirstNonPHI() : I; |
1639 | |
1640 | for (Register R = Defs.find_first(); R; R = Defs.find_next(Prev: R)) { |
1641 | BitTracker::RegisterRef MR; |
1642 | auto *FRC = HBS::getFinalVRegClass(RR: R, MRI); |
1643 | |
1644 | if (findMatch(Inp: R, Out&: MR, AVs: AVB)) { |
1645 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1646 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::COPY), DestReg: NewR) |
1647 | .addReg(RegNo: MR.Reg, flags: 0, SubReg: MR.Sub); |
1648 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: BT.get(RR: MR)); |
1649 | HBS::replaceReg(OldR: R, NewR, MRI); |
1650 | Forbidden.insert(R); |
1651 | continue; |
1652 | } |
1653 | |
1654 | if (FRC == &Hexagon::DoubleRegsRegClass || |
1655 | FRC == &Hexagon::HvxWRRegClass) { |
1656 | // Try to generate REG_SEQUENCE. |
1657 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC: *FRC, GenIdx: Hexagon::ps_sub_lo); |
1658 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC: *FRC, GenIdx: Hexagon::ps_sub_hi); |
1659 | BitTracker::RegisterRef TL = { R, SubLo }; |
1660 | BitTracker::RegisterRef TH = { R, SubHi }; |
1661 | BitTracker::RegisterRef ML, MH; |
1662 | if (findMatch(Inp: TL, Out&: ML, AVs: AVB) && findMatch(Inp: TH, Out&: MH, AVs: AVB)) { |
1663 | auto *FRC = HBS::getFinalVRegClass(RR: R, MRI); |
1664 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
1665 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: TargetOpcode::REG_SEQUENCE), DestReg: NewR) |
1666 | .addReg(RegNo: ML.Reg, flags: 0, SubReg: ML.Sub) |
1667 | .addImm(Val: SubLo) |
1668 | .addReg(RegNo: MH.Reg, flags: 0, SubReg: MH.Sub) |
1669 | .addImm(Val: SubHi); |
1670 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: BT.get(RR: R)); |
1671 | HBS::replaceReg(OldR: R, NewR, MRI); |
1672 | Forbidden.insert(R); |
1673 | } |
1674 | } |
1675 | } |
1676 | } |
1677 | |
1678 | return Changed; |
1679 | } |
1680 | |
1681 | bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) { |
1682 | switch (Opc) { |
1683 | case TargetOpcode::COPY: |
1684 | case TargetOpcode::REG_SEQUENCE: |
1685 | case Hexagon::A4_combineir: |
1686 | case Hexagon::A4_combineri: |
1687 | return true; |
1688 | case Hexagon::A2_tfr: |
1689 | case Hexagon::A2_tfrp: |
1690 | case Hexagon::A2_combinew: |
1691 | case Hexagon::V6_vcombine: |
1692 | return NoConv; |
1693 | default: |
1694 | break; |
1695 | } |
1696 | return false; |
1697 | } |
1698 | |
1699 | bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { |
1700 | bool Changed = false; |
1701 | unsigned Opc = MI.getOpcode(); |
1702 | BitTracker::RegisterRef RD = MI.getOperand(i: 0); |
1703 | assert(MI.getOperand(0).getSubReg() == 0); |
1704 | |
1705 | switch (Opc) { |
1706 | case TargetOpcode::COPY: |
1707 | case Hexagon::A2_tfr: |
1708 | case Hexagon::A2_tfrp: { |
1709 | BitTracker::RegisterRef RS = MI.getOperand(i: 1); |
1710 | if (!HBS::isTransparentCopy(RD, RS, MRI)) |
1711 | break; |
1712 | if (RS.Sub != 0) |
1713 | Changed = HBS::replaceRegWithSub(OldR: RD.Reg, NewR: RS.Reg, NewSR: RS.Sub, MRI); |
1714 | else |
1715 | Changed = HBS::replaceReg(OldR: RD.Reg, NewR: RS.Reg, MRI); |
1716 | break; |
1717 | } |
1718 | case TargetOpcode::REG_SEQUENCE: { |
1719 | BitTracker::RegisterRef SL, SH; |
1720 | if (HBS::parseRegSequence(I: MI, SL, SH, MRI)) { |
1721 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg: RD.Reg); |
1722 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_lo); |
1723 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_hi); |
1724 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubLo, NewR: SL.Reg, NewSR: SL.Sub, MRI); |
1725 | Changed |= HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubHi, NewR: SH.Reg, NewSR: SH.Sub, MRI); |
1726 | } |
1727 | break; |
1728 | } |
1729 | case Hexagon::A2_combinew: |
1730 | case Hexagon::V6_vcombine: { |
1731 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg: RD.Reg); |
1732 | unsigned SubLo = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_lo); |
1733 | unsigned SubHi = HRI.getHexagonSubRegIndex(RC, GenIdx: Hexagon::ps_sub_hi); |
1734 | BitTracker::RegisterRef RH = MI.getOperand(i: 1), RL = MI.getOperand(i: 2); |
1735 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubLo, NewR: RL.Reg, NewSR: RL.Sub, MRI); |
1736 | Changed |= HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: SubHi, NewR: RH.Reg, NewSR: RH.Sub, MRI); |
1737 | break; |
1738 | } |
1739 | case Hexagon::A4_combineir: |
1740 | case Hexagon::A4_combineri: { |
1741 | unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; |
1742 | unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo |
1743 | : Hexagon::isub_hi; |
1744 | BitTracker::RegisterRef RS = MI.getOperand(i: SrcX); |
1745 | Changed = HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: Sub, NewR: RS.Reg, NewSR: RS.Sub, MRI); |
1746 | break; |
1747 | } |
1748 | } |
1749 | return Changed; |
1750 | } |
1751 | |
1752 | bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { |
1753 | std::vector<MachineInstr*> Instrs; |
1754 | for (MachineInstr &MI : llvm::reverse(C&: B)) |
1755 | Instrs.push_back(x: &MI); |
1756 | |
1757 | bool Changed = false; |
1758 | for (auto *I : Instrs) { |
1759 | unsigned Opc = I->getOpcode(); |
1760 | if (!CopyPropagation::isCopyReg(Opc, NoConv: true)) |
1761 | continue; |
1762 | Changed |= propagateRegCopy(MI&: *I); |
1763 | } |
1764 | |
1765 | return Changed; |
1766 | } |
1767 | |
1768 | namespace { |
1769 | |
1770 | // Recognize patterns that can be simplified and replace them with the |
1771 | // simpler forms. |
1772 | // This is by no means complete |
1773 | class BitSimplification : public Transformation { |
1774 | public: |
1775 | BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, |
1776 | const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, |
1777 | MachineRegisterInfo &mri, MachineFunction &mf) |
1778 | : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), |
1779 | MF(mf), BT(bt) {} |
1780 | |
1781 | bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; |
1782 | |
1783 | private: |
1784 | struct RegHalf : public BitTracker::RegisterRef { |
1785 | bool Low; // Low/High halfword. |
1786 | }; |
1787 | |
1788 | bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1789 | unsigned B, RegHalf &RH); |
1790 | bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum); |
1791 | |
1792 | bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, |
1793 | BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); |
1794 | unsigned getCombineOpcode(bool HLow, bool LLow); |
1795 | |
1796 | bool genStoreUpperHalf(MachineInstr *MI); |
1797 | bool genStoreImmediate(MachineInstr *MI); |
1798 | bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, |
1799 | const BitTracker::RegisterCell &RC); |
1800 | bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1801 | const BitTracker::RegisterCell &RC); |
1802 | bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, |
1803 | const BitTracker::RegisterCell &RC); |
1804 | bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1805 | const BitTracker::RegisterCell &RC); |
1806 | bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1807 | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1808 | bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, |
1809 | const BitTracker::RegisterCell &RC); |
1810 | bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, |
1811 | const BitTracker::RegisterCell &RC, const RegisterSet &AVs); |
1812 | bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD); |
1813 | |
1814 | // Cache of created instructions to avoid creating duplicates. |
1815 | // XXX Currently only used by genBitSplit. |
1816 | std::vector<MachineInstr*> NewMIs; |
1817 | |
1818 | const MachineDominatorTree &MDT; |
1819 | const HexagonInstrInfo &HII; |
1820 | const HexagonRegisterInfo &HRI; |
1821 | MachineRegisterInfo &MRI; |
1822 | MachineFunction &MF; |
1823 | BitTracker &BT; |
1824 | }; |
1825 | |
1826 | } // end anonymous namespace |
1827 | |
1828 | // Check if the bits [B..B+16) in register cell RC form a valid halfword, |
1829 | // i.e. [0..16), [16..32), etc. of some register. If so, return true and |
1830 | // set the information about the found register in RH. |
1831 | bool BitSimplification::matchHalf(unsigned SelfR, |
1832 | const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { |
1833 | // XXX This could be searching in the set of available registers, in case |
1834 | // the match is not exact. |
1835 | |
1836 | // Match 16-bit chunks, where the RC[B..B+15] references exactly one |
1837 | // register and all the bits B..B+15 match between RC and the register. |
1838 | // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, |
1839 | // and RC = { [0]:0 [1-15]:v1[1-15]... }. |
1840 | bool Low = false; |
1841 | unsigned I = B; |
1842 | while (I < B+16 && RC[I].num()) |
1843 | I++; |
1844 | if (I == B+16) |
1845 | return false; |
1846 | |
1847 | Register Reg = RC[I].RefI.Reg; |
1848 | unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. |
1849 | if (P < I-B) |
1850 | return false; |
1851 | unsigned Pos = P - (I-B); |
1852 | |
1853 | if (Reg == 0 || Reg == SelfR) // Don't match "self". |
1854 | return false; |
1855 | if (!Reg.isVirtual()) |
1856 | return false; |
1857 | if (!BT.has(Reg)) |
1858 | return false; |
1859 | |
1860 | const BitTracker::RegisterCell &SC = BT.lookup(Reg); |
1861 | if (Pos+16 > SC.width()) |
1862 | return false; |
1863 | |
1864 | for (unsigned i = 0; i < 16; ++i) { |
1865 | const BitTracker::BitValue &RV = RC[i+B]; |
1866 | if (RV.Type == BitTracker::BitValue::Ref) { |
1867 | if (RV.RefI.Reg != Reg) |
1868 | return false; |
1869 | if (RV.RefI.Pos != i+Pos) |
1870 | return false; |
1871 | continue; |
1872 | } |
1873 | if (RC[i+B] != SC[i+Pos]) |
1874 | return false; |
1875 | } |
1876 | |
1877 | unsigned Sub = 0; |
1878 | switch (Pos) { |
1879 | case 0: |
1880 | Sub = Hexagon::isub_lo; |
1881 | Low = true; |
1882 | break; |
1883 | case 16: |
1884 | Sub = Hexagon::isub_lo; |
1885 | Low = false; |
1886 | break; |
1887 | case 32: |
1888 | Sub = Hexagon::isub_hi; |
1889 | Low = true; |
1890 | break; |
1891 | case 48: |
1892 | Sub = Hexagon::isub_hi; |
1893 | Low = false; |
1894 | break; |
1895 | default: |
1896 | return false; |
1897 | } |
1898 | |
1899 | RH.Reg = Reg; |
1900 | RH.Sub = Sub; |
1901 | RH.Low = Low; |
1902 | // If the subregister is not valid with the register, set it to 0. |
1903 | if (!HBS::getFinalVRegClass(RR: RH, MRI)) |
1904 | RH.Sub = 0; |
1905 | |
1906 | return true; |
1907 | } |
1908 | |
1909 | bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc, |
1910 | unsigned OpNum) { |
1911 | auto *OpRC = HII.getRegClass(MCID: HII.get(Opcode: Opc), OpNum, TRI: &HRI, MF); |
1912 | auto *RRC = HBS::getFinalVRegClass(RR: R, MRI); |
1913 | return OpRC->hasSubClassEq(RC: RRC); |
1914 | } |
1915 | |
1916 | // Check if RC matches the pattern of a S2_packhl. If so, return true and |
1917 | // set the inputs Rs and Rt. |
1918 | bool BitSimplification::matchPackhl(unsigned SelfR, |
1919 | const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, |
1920 | BitTracker::RegisterRef &Rt) { |
1921 | RegHalf L1, H1, L2, H2; |
1922 | |
1923 | if (!matchHalf(SelfR, RC, B: 0, RH&: L2) || !matchHalf(SelfR, RC, B: 16, RH&: L1)) |
1924 | return false; |
1925 | if (!matchHalf(SelfR, RC, B: 32, RH&: H2) || !matchHalf(SelfR, RC, B: 48, RH&: H1)) |
1926 | return false; |
1927 | |
1928 | // Rs = H1.L1, Rt = H2.L2 |
1929 | if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) |
1930 | return false; |
1931 | if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) |
1932 | return false; |
1933 | |
1934 | Rs = H1; |
1935 | Rt = H2; |
1936 | return true; |
1937 | } |
1938 | |
1939 | unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { |
1940 | return HLow ? LLow ? Hexagon::A2_combine_ll |
1941 | : Hexagon::A2_combine_lh |
1942 | : LLow ? Hexagon::A2_combine_hl |
1943 | : Hexagon::A2_combine_hh; |
1944 | } |
1945 | |
1946 | // If MI stores the upper halfword of a register (potentially obtained via |
1947 | // shifts or extracts), replace it with a storerf instruction. This could |
1948 | // cause the "extraction" code to become dead. |
1949 | bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { |
1950 | unsigned Opc = MI->getOpcode(); |
1951 | if (Opc != Hexagon::S2_storerh_io) |
1952 | return false; |
1953 | |
1954 | MachineOperand &ValOp = MI->getOperand(i: 2); |
1955 | BitTracker::RegisterRef RS = ValOp; |
1956 | if (!BT.has(Reg: RS.Reg)) |
1957 | return false; |
1958 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RS.Reg); |
1959 | RegHalf H; |
1960 | unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0; |
1961 | if (!matchHalf(SelfR: 0, RC, B, RH&: H)) |
1962 | return false; |
1963 | if (H.Low) |
1964 | return false; |
1965 | MI->setDesc(HII.get(Opcode: Hexagon::S2_storerf_io)); |
1966 | ValOp.setReg(H.Reg); |
1967 | ValOp.setSubReg(H.Sub); |
1968 | return true; |
1969 | } |
1970 | |
1971 | // If MI stores a value known at compile-time, and the value is within a range |
1972 | // that avoids using constant-extenders, replace it with a store-immediate. |
1973 | bool BitSimplification::genStoreImmediate(MachineInstr *MI) { |
1974 | unsigned Opc = MI->getOpcode(); |
1975 | unsigned Align = 0; |
1976 | switch (Opc) { |
1977 | case Hexagon::S2_storeri_io: |
1978 | Align++; |
1979 | [[fallthrough]]; |
1980 | case Hexagon::S2_storerh_io: |
1981 | Align++; |
1982 | [[fallthrough]]; |
1983 | case Hexagon::S2_storerb_io: |
1984 | break; |
1985 | default: |
1986 | return false; |
1987 | } |
1988 | |
1989 | // Avoid stores to frame-indices (due to an unknown offset). |
1990 | if (!MI->getOperand(i: 0).isReg()) |
1991 | return false; |
1992 | MachineOperand &OffOp = MI->getOperand(i: 1); |
1993 | if (!OffOp.isImm()) |
1994 | return false; |
1995 | |
1996 | int64_t Off = OffOp.getImm(); |
1997 | // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). |
1998 | if (!isUIntN(N: 6+Align, x: Off) || (Off & ((1<<Align)-1))) |
1999 | return false; |
2000 | // Source register: |
2001 | BitTracker::RegisterRef RS = MI->getOperand(i: 2); |
2002 | if (!BT.has(Reg: RS.Reg)) |
2003 | return false; |
2004 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RS.Reg); |
2005 | uint64_t U; |
2006 | if (!HBS::getConst(RC, B: 0, W: RC.width(), U)) |
2007 | return false; |
2008 | |
2009 | // Only consider 8-bit values to avoid constant-extenders. |
2010 | int V; |
2011 | switch (Opc) { |
2012 | case Hexagon::S2_storerb_io: |
2013 | V = int8_t(U); |
2014 | break; |
2015 | case Hexagon::S2_storerh_io: |
2016 | V = int16_t(U); |
2017 | break; |
2018 | case Hexagon::S2_storeri_io: |
2019 | V = int32_t(U); |
2020 | break; |
2021 | default: |
2022 | // Opc is already checked above to be one of the three store instructions. |
2023 | // This silences a -Wuninitialized false positive on GCC 5.4. |
2024 | llvm_unreachable("Unexpected store opcode" ); |
2025 | } |
2026 | if (!isInt<8>(x: V)) |
2027 | return false; |
2028 | |
2029 | MI->removeOperand(OpNo: 2); |
2030 | switch (Opc) { |
2031 | case Hexagon::S2_storerb_io: |
2032 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeirb_io)); |
2033 | break; |
2034 | case Hexagon::S2_storerh_io: |
2035 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeirh_io)); |
2036 | break; |
2037 | case Hexagon::S2_storeri_io: |
2038 | MI->setDesc(HII.get(Opcode: Hexagon::S4_storeiri_io)); |
2039 | break; |
2040 | } |
2041 | MI->addOperand(Op: MachineOperand::CreateImm(Val: V)); |
2042 | return true; |
2043 | } |
2044 | |
2045 | // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the |
2046 | // last instruction in a sequence that results in something equivalent to |
2047 | // the pack-halfwords. The intent is to cause the entire sequence to become |
2048 | // dead. |
2049 | bool BitSimplification::genPackhl(MachineInstr *MI, |
2050 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2051 | unsigned Opc = MI->getOpcode(); |
2052 | if (Opc == Hexagon::S2_packhl) |
2053 | return false; |
2054 | BitTracker::RegisterRef Rs, Rt; |
2055 | if (!matchPackhl(SelfR: RD.Reg, RC, Rs, Rt)) |
2056 | return false; |
2057 | if (!validateReg(R: Rs, Opc: Hexagon::S2_packhl, OpNum: 1) || |
2058 | !validateReg(R: Rt, Opc: Hexagon::S2_packhl, OpNum: 2)) |
2059 | return false; |
2060 | |
2061 | MachineBasicBlock &B = *MI->getParent(); |
2062 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
2063 | DebugLoc DL = MI->getDebugLoc(); |
2064 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2065 | : MachineBasicBlock::iterator(MI); |
2066 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_packhl), DestReg: NewR) |
2067 | .addReg(RegNo: Rs.Reg, flags: 0, SubReg: Rs.Sub) |
2068 | .addReg(RegNo: Rt.Reg, flags: 0, SubReg: Rt.Sub); |
2069 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2070 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2071 | return true; |
2072 | } |
2073 | |
2074 | // If MI produces halfword of the input in the low half of the output, |
2075 | // replace it with zero-extend or extractu. |
2076 | bool BitSimplification::(MachineInstr *MI, |
2077 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2078 | RegHalf L; |
2079 | // Check for halfword in low 16 bits, zeros elsewhere. |
2080 | if (!matchHalf(SelfR: RD.Reg, RC, B: 0, RH&: L) || !HBS::isZero(RC, B: 16, W: 16)) |
2081 | return false; |
2082 | |
2083 | unsigned Opc = MI->getOpcode(); |
2084 | MachineBasicBlock &B = *MI->getParent(); |
2085 | DebugLoc DL = MI->getDebugLoc(); |
2086 | |
2087 | // Prefer zxth, since zxth can go in any slot, while extractu only in |
2088 | // slots 2 and 3. |
2089 | unsigned NewR = 0; |
2090 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2091 | : MachineBasicBlock::iterator(MI); |
2092 | if (L.Low && Opc != Hexagon::A2_zxth) { |
2093 | if (validateReg(R: L, Opc: Hexagon::A2_zxth, OpNum: 1)) { |
2094 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2095 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_zxth), DestReg: NewR) |
2096 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub); |
2097 | } |
2098 | } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { |
2099 | if (validateReg(R: L, Opc: Hexagon::S2_lsr_i_r, OpNum: 1)) { |
2100 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2101 | BuildMI(BB&: B, I: MI, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_lsr_i_r), DestReg: NewR) |
2102 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub) |
2103 | .addImm(Val: 16); |
2104 | } |
2105 | } |
2106 | if (NewR == 0) |
2107 | return false; |
2108 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2109 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2110 | return true; |
2111 | } |
2112 | |
2113 | // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the |
2114 | // combine. |
2115 | bool BitSimplification::genCombineHalf(MachineInstr *MI, |
2116 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2117 | RegHalf L, H; |
2118 | // Check for combine h/l |
2119 | if (!matchHalf(SelfR: RD.Reg, RC, B: 0, RH&: L) || !matchHalf(SelfR: RD.Reg, RC, B: 16, RH&: H)) |
2120 | return false; |
2121 | // Do nothing if this is just a reg copy. |
2122 | if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) |
2123 | return false; |
2124 | |
2125 | unsigned Opc = MI->getOpcode(); |
2126 | unsigned COpc = getCombineOpcode(HLow: H.Low, LLow: L.Low); |
2127 | if (COpc == Opc) |
2128 | return false; |
2129 | if (!validateReg(R: H, Opc: COpc, OpNum: 1) || !validateReg(R: L, Opc: COpc, OpNum: 2)) |
2130 | return false; |
2131 | |
2132 | MachineBasicBlock &B = *MI->getParent(); |
2133 | DebugLoc DL = MI->getDebugLoc(); |
2134 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2135 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2136 | : MachineBasicBlock::iterator(MI); |
2137 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: COpc), DestReg: NewR) |
2138 | .addReg(RegNo: H.Reg, flags: 0, SubReg: H.Sub) |
2139 | .addReg(RegNo: L.Reg, flags: 0, SubReg: L.Sub); |
2140 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2141 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2142 | return true; |
2143 | } |
2144 | |
2145 | // If MI resets high bits of a register and keeps the lower ones, replace it |
2146 | // with zero-extend byte/half, and-immediate, or extractu, as appropriate. |
2147 | bool BitSimplification::(MachineInstr *MI, |
2148 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2149 | unsigned Opc = MI->getOpcode(); |
2150 | switch (Opc) { |
2151 | case Hexagon::A2_zxtb: |
2152 | case Hexagon::A2_zxth: |
2153 | case Hexagon::S2_extractu: |
2154 | return false; |
2155 | } |
2156 | if (Opc == Hexagon::A2_andir && MI->getOperand(i: 2).isImm()) { |
2157 | int32_t Imm = MI->getOperand(i: 2).getImm(); |
2158 | if (isInt<10>(x: Imm)) |
2159 | return false; |
2160 | } |
2161 | |
2162 | if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) |
2163 | return false; |
2164 | unsigned W = RC.width(); |
2165 | while (W > 0 && RC[W-1].is(T: 0)) |
2166 | W--; |
2167 | if (W == 0 || W == RC.width()) |
2168 | return false; |
2169 | unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb |
2170 | : (W == 16) ? Hexagon::A2_zxth |
2171 | : (W < 10) ? Hexagon::A2_andir |
2172 | : Hexagon::S2_extractu; |
2173 | MachineBasicBlock &B = *MI->getParent(); |
2174 | DebugLoc DL = MI->getDebugLoc(); |
2175 | |
2176 | for (auto &Op : MI->uses()) { |
2177 | if (!Op.isReg()) |
2178 | continue; |
2179 | BitTracker::RegisterRef RS = Op; |
2180 | if (!BT.has(Reg: RS.Reg)) |
2181 | continue; |
2182 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
2183 | unsigned BN, BW; |
2184 | if (!HBS::getSubregMask(RR: RS, Begin&: BN, Width&: BW, MRI)) |
2185 | continue; |
2186 | if (BW < W || !HBS::isEqual(RC1: RC, B1: 0, RC2: SC, B2: BN, W)) |
2187 | continue; |
2188 | if (!validateReg(R: RS, Opc: NewOpc, OpNum: 1)) |
2189 | continue; |
2190 | |
2191 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
2192 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2193 | : MachineBasicBlock::iterator(MI); |
2194 | auto MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: NewOpc), DestReg: NewR) |
2195 | .addReg(RegNo: RS.Reg, flags: 0, SubReg: RS.Sub); |
2196 | if (NewOpc == Hexagon::A2_andir) |
2197 | MIB.addImm(Val: (1 << W) - 1); |
2198 | else if (NewOpc == Hexagon::S2_extractu) |
2199 | MIB.addImm(Val: W).addImm(Val: 0); |
2200 | HBS::replaceSubWithSub(OldR: RD.Reg, OldSR: RD.Sub, NewR, NewSR: 0, MRI); |
2201 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2202 | return true; |
2203 | } |
2204 | return false; |
2205 | } |
2206 | |
2207 | bool BitSimplification::genBitSplit(MachineInstr *MI, |
2208 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2209 | const RegisterSet &AVs) { |
2210 | if (!GenBitSplit) |
2211 | return false; |
2212 | if (MaxBitSplit.getNumOccurrences()) { |
2213 | if (CountBitSplit >= MaxBitSplit) |
2214 | return false; |
2215 | } |
2216 | |
2217 | unsigned Opc = MI->getOpcode(); |
2218 | switch (Opc) { |
2219 | case Hexagon::A4_bitsplit: |
2220 | case Hexagon::A4_bitspliti: |
2221 | return false; |
2222 | } |
2223 | |
2224 | unsigned W = RC.width(); |
2225 | if (W != 32) |
2226 | return false; |
2227 | |
2228 | auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned { |
2229 | unsigned Z = C.width(); |
2230 | while (Z > 0 && C[Z-1].is(T: 0)) |
2231 | --Z; |
2232 | return C.width() - Z; |
2233 | }; |
2234 | |
2235 | // Count the number of leading zeros in the target RC. |
2236 | unsigned Z = ctlz(RC); |
2237 | if (Z == 0 || Z == W) |
2238 | return false; |
2239 | |
2240 | // A simplistic analysis: assume the source register (the one being split) |
2241 | // is fully unknown, and that all its bits are self-references. |
2242 | const BitTracker::BitValue &B0 = RC[0]; |
2243 | if (B0.Type != BitTracker::BitValue::Ref) |
2244 | return false; |
2245 | |
2246 | unsigned SrcR = B0.RefI.Reg; |
2247 | unsigned SrcSR = 0; |
2248 | unsigned Pos = B0.RefI.Pos; |
2249 | |
2250 | // All the non-zero bits should be consecutive bits from the same register. |
2251 | for (unsigned i = 1; i < W-Z; ++i) { |
2252 | const BitTracker::BitValue &V = RC[i]; |
2253 | if (V.Type != BitTracker::BitValue::Ref) |
2254 | return false; |
2255 | if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i) |
2256 | return false; |
2257 | } |
2258 | |
2259 | // Now, find the other bitfield among AVs. |
2260 | for (unsigned S = AVs.find_first(); S; S = AVs.find_next(Prev: S)) { |
2261 | // The number of leading zeros here should be the number of trailing |
2262 | // non-zeros in RC. |
2263 | unsigned SRC = MRI.getRegClass(Reg: S)->getID(); |
2264 | if (SRC != Hexagon::IntRegsRegClassID && |
2265 | SRC != Hexagon::DoubleRegsRegClassID) |
2266 | continue; |
2267 | if (!BT.has(Reg: S)) |
2268 | continue; |
2269 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: S); |
2270 | if (SC.width() != W || ctlz(SC) != W-Z) |
2271 | continue; |
2272 | // The Z lower bits should now match SrcR. |
2273 | const BitTracker::BitValue &S0 = SC[0]; |
2274 | if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR) |
2275 | continue; |
2276 | unsigned P = S0.RefI.Pos; |
2277 | |
2278 | if (Pos <= P && (Pos + W-Z) != P) |
2279 | continue; |
2280 | if (P < Pos && (P + Z) != Pos) |
2281 | continue; |
2282 | // The starting bitfield position must be at a subregister boundary. |
2283 | if (std::min(a: P, b: Pos) != 0 && std::min(a: P, b: Pos) != 32) |
2284 | continue; |
2285 | |
2286 | unsigned I; |
2287 | for (I = 1; I < Z; ++I) { |
2288 | const BitTracker::BitValue &V = SC[I]; |
2289 | if (V.Type != BitTracker::BitValue::Ref) |
2290 | break; |
2291 | if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I) |
2292 | break; |
2293 | } |
2294 | if (I != Z) |
2295 | continue; |
2296 | |
2297 | // Generate bitsplit where S is defined. |
2298 | if (MaxBitSplit.getNumOccurrences()) |
2299 | CountBitSplit++; |
2300 | MachineInstr *DefS = MRI.getVRegDef(Reg: S); |
2301 | assert(DefS != nullptr); |
2302 | DebugLoc DL = DefS->getDebugLoc(); |
2303 | MachineBasicBlock &B = *DefS->getParent(); |
2304 | auto At = DefS->isPHI() ? B.getFirstNonPHI() |
2305 | : MachineBasicBlock::iterator(DefS); |
2306 | if (MRI.getRegClass(Reg: SrcR)->getID() == Hexagon::DoubleRegsRegClassID) |
2307 | SrcSR = (std::min(a: Pos, b: P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo; |
2308 | if (!validateReg(R: {SrcR,SrcSR}, Opc: Hexagon::A4_bitspliti, OpNum: 1)) |
2309 | continue; |
2310 | unsigned ImmOp = Pos <= P ? W-Z : Z; |
2311 | |
2312 | // Find an existing bitsplit instruction if one already exists. |
2313 | unsigned NewR = 0; |
2314 | for (MachineInstr *In : NewMIs) { |
2315 | if (In->getOpcode() != Hexagon::A4_bitspliti) |
2316 | continue; |
2317 | MachineOperand &Op1 = In->getOperand(i: 1); |
2318 | if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR) |
2319 | continue; |
2320 | if (In->getOperand(i: 2).getImm() != ImmOp) |
2321 | continue; |
2322 | // Check if the target register is available here. |
2323 | MachineOperand &Op0 = In->getOperand(i: 0); |
2324 | MachineInstr *DefI = MRI.getVRegDef(Reg: Op0.getReg()); |
2325 | assert(DefI != nullptr); |
2326 | if (!MDT.dominates(A: DefI, B: &*At)) |
2327 | continue; |
2328 | |
2329 | // Found one that can be reused. |
2330 | assert(Op0.getSubReg() == 0); |
2331 | NewR = Op0.getReg(); |
2332 | break; |
2333 | } |
2334 | if (!NewR) { |
2335 | NewR = MRI.createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
2336 | auto NewBS = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A4_bitspliti), DestReg: NewR) |
2337 | .addReg(RegNo: SrcR, flags: 0, SubReg: SrcSR) |
2338 | .addImm(Val: ImmOp); |
2339 | NewMIs.push_back(x: NewBS); |
2340 | } |
2341 | if (Pos <= P) { |
2342 | HBS::replaceRegWithSub(OldR: RD.Reg, NewR, NewSR: Hexagon::isub_lo, MRI); |
2343 | HBS::replaceRegWithSub(OldR: S, NewR, NewSR: Hexagon::isub_hi, MRI); |
2344 | } else { |
2345 | HBS::replaceRegWithSub(OldR: S, NewR, NewSR: Hexagon::isub_lo, MRI); |
2346 | HBS::replaceRegWithSub(OldR: RD.Reg, NewR, NewSR: Hexagon::isub_hi, MRI); |
2347 | } |
2348 | return true; |
2349 | } |
2350 | |
2351 | return false; |
2352 | } |
2353 | |
2354 | // Check for tstbit simplification opportunity, where the bit being checked |
2355 | // can be tracked back to another register. For example: |
2356 | // %2 = S2_lsr_i_r %1, 5 |
2357 | // %3 = S2_tstbit_i %2, 0 |
2358 | // => |
2359 | // %3 = S2_tstbit_i %1, 5 |
2360 | bool BitSimplification::simplifyTstbit(MachineInstr *MI, |
2361 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { |
2362 | unsigned Opc = MI->getOpcode(); |
2363 | if (Opc != Hexagon::S2_tstbit_i) |
2364 | return false; |
2365 | |
2366 | unsigned BN = MI->getOperand(i: 2).getImm(); |
2367 | BitTracker::RegisterRef RS = MI->getOperand(i: 1); |
2368 | unsigned F, W; |
2369 | DebugLoc DL = MI->getDebugLoc(); |
2370 | if (!BT.has(Reg: RS.Reg) || !HBS::getSubregMask(RR: RS, Begin&: F, Width&: W, MRI)) |
2371 | return false; |
2372 | MachineBasicBlock &B = *MI->getParent(); |
2373 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2374 | : MachineBasicBlock::iterator(MI); |
2375 | |
2376 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: RS.Reg); |
2377 | const BitTracker::BitValue &V = SC[F+BN]; |
2378 | if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { |
2379 | const TargetRegisterClass *TC = MRI.getRegClass(Reg: V.RefI.Reg); |
2380 | // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is |
2381 | // a double register, need to use a subregister and adjust bit |
2382 | // number. |
2383 | unsigned P = std::numeric_limits<unsigned>::max(); |
2384 | BitTracker::RegisterRef RR(V.RefI.Reg, 0); |
2385 | if (TC == &Hexagon::DoubleRegsRegClass) { |
2386 | P = V.RefI.Pos; |
2387 | RR.Sub = Hexagon::isub_lo; |
2388 | if (P >= 32) { |
2389 | P -= 32; |
2390 | RR.Sub = Hexagon::isub_hi; |
2391 | } |
2392 | } else if (TC == &Hexagon::IntRegsRegClass) { |
2393 | P = V.RefI.Pos; |
2394 | } |
2395 | if (P != std::numeric_limits<unsigned>::max()) { |
2396 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::PredRegsRegClass); |
2397 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::S2_tstbit_i), DestReg: NewR) |
2398 | .addReg(RegNo: RR.Reg, flags: 0, SubReg: RR.Sub) |
2399 | .addImm(Val: P); |
2400 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2401 | BT.put(RR: NewR, RC); |
2402 | return true; |
2403 | } |
2404 | } else if (V.is(T: 0) || V.is(T: 1)) { |
2405 | Register NewR = MRI.createVirtualRegister(RegClass: &Hexagon::PredRegsRegClass); |
2406 | unsigned NewOpc = V.is(T: 0) ? Hexagon::PS_false : Hexagon::PS_true; |
2407 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: NewOpc), DestReg: NewR); |
2408 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2409 | return true; |
2410 | } |
2411 | |
2412 | return false; |
2413 | } |
2414 | |
2415 | // Detect whether RD is a bitfield extract (sign- or zero-extended) of |
2416 | // some register from the AVs set. Create a new corresponding instruction |
2417 | // at the location of MI. The intent is to recognize situations where |
2418 | // a sequence of instructions performs an operation that is equivalent to |
2419 | // an extract operation, such as a shift left followed by a shift right. |
2420 | bool BitSimplification::(MachineInstr *MI, |
2421 | BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, |
2422 | const RegisterSet &AVs) { |
2423 | if (!GenExtract) |
2424 | return false; |
2425 | if (MaxExtract.getNumOccurrences()) { |
2426 | if (CountExtract >= MaxExtract) |
2427 | return false; |
2428 | CountExtract++; |
2429 | } |
2430 | |
2431 | unsigned W = RC.width(); |
2432 | unsigned RW = W; |
2433 | unsigned Len; |
2434 | bool Signed; |
2435 | |
2436 | // The code is mostly class-independent, except for the part that generates |
2437 | // the extract instruction, and establishes the source register (in case it |
2438 | // needs to use a subregister). |
2439 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2440 | if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) |
2441 | return false; |
2442 | assert(RD.Sub == 0); |
2443 | |
2444 | // Observation: |
2445 | // If the cell has a form of 00..0xx..x with k zeros and n remaining |
2446 | // bits, this could be an extractu of the n bits, but it could also be |
2447 | // an extractu of a longer field which happens to have 0s in the top |
2448 | // bit positions. |
2449 | // The same logic applies to sign-extended fields. |
2450 | // |
2451 | // Do not check for the extended extracts, since it would expand the |
2452 | // search space quite a bit. The search may be expensive as it is. |
2453 | |
2454 | const BitTracker::BitValue &TopV = RC[W-1]; |
2455 | |
2456 | // Eliminate candidates that have self-referential bits, since they |
2457 | // cannot be extracts from other registers. Also, skip registers that |
2458 | // have compile-time constant values. |
2459 | bool IsConst = true; |
2460 | for (unsigned I = 0; I != W; ++I) { |
2461 | const BitTracker::BitValue &V = RC[I]; |
2462 | if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg) |
2463 | return false; |
2464 | IsConst = IsConst && (V.is(T: 0) || V.is(T: 1)); |
2465 | } |
2466 | if (IsConst) |
2467 | return false; |
2468 | |
2469 | if (TopV.is(T: 0) || TopV.is(T: 1)) { |
2470 | bool S = TopV.is(T: 1); |
2471 | for (--W; W > 0 && RC[W-1].is(T: S); --W) |
2472 | ; |
2473 | Len = W; |
2474 | Signed = S; |
2475 | // The sign bit must be a part of the field being extended. |
2476 | if (Signed) |
2477 | ++Len; |
2478 | } else { |
2479 | // This could still be a sign-extended extract. |
2480 | assert(TopV.Type == BitTracker::BitValue::Ref); |
2481 | if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1) |
2482 | return false; |
2483 | for (--W; W > 0 && RC[W-1] == TopV; --W) |
2484 | ; |
2485 | // The top bits of RC are copies of TopV. One occurrence of TopV will |
2486 | // be a part of the field. |
2487 | Len = W + 1; |
2488 | Signed = true; |
2489 | } |
2490 | |
2491 | // This would be just a copy. It should be handled elsewhere. |
2492 | if (Len == RW) |
2493 | return false; |
2494 | |
2495 | LLVM_DEBUG({ |
2496 | dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub) |
2497 | << ", MI: " << *MI; |
2498 | dbgs() << "Cell: " << RC << '\n'; |
2499 | dbgs() << "Expected bitfield size: " << Len << " bits, " |
2500 | << (Signed ? "sign" : "zero" ) << "-extended\n" ; |
2501 | }); |
2502 | |
2503 | bool Changed = false; |
2504 | |
2505 | for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(Prev: R)) { |
2506 | if (!BT.has(Reg: R)) |
2507 | continue; |
2508 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: R); |
2509 | unsigned SW = SC.width(); |
2510 | |
2511 | // The source can be longer than the destination, as long as its size is |
2512 | // a multiple of the size of the destination. Also, we would need to be |
2513 | // able to refer to the subregister in the source that would be of the |
2514 | // same size as the destination, but only check the sizes here. |
2515 | if (SW < RW || (SW % RW) != 0) |
2516 | continue; |
2517 | |
2518 | // The field can start at any offset in SC as long as it contains Len |
2519 | // bits and does not cross subregister boundary (if the source register |
2520 | // is longer than the destination). |
2521 | unsigned Off = 0; |
2522 | while (Off <= SW-Len) { |
2523 | unsigned OE = (Off+Len)/RW; |
2524 | if (OE != Off/RW) { |
2525 | // The assumption here is that if the source (R) is longer than the |
2526 | // destination, then the destination is a sequence of words of |
2527 | // size RW, and each such word in R can be accessed via a subregister. |
2528 | // |
2529 | // If the beginning and the end of the field cross the subregister |
2530 | // boundary, advance to the next subregister. |
2531 | Off = OE*RW; |
2532 | continue; |
2533 | } |
2534 | if (HBS::isEqual(RC1: RC, B1: 0, RC2: SC, B2: Off, W: Len)) |
2535 | break; |
2536 | ++Off; |
2537 | } |
2538 | |
2539 | if (Off > SW-Len) |
2540 | continue; |
2541 | |
2542 | // Found match. |
2543 | unsigned ExtOpc = 0; |
2544 | if (Off == 0) { |
2545 | if (Len == 8) |
2546 | ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb; |
2547 | else if (Len == 16) |
2548 | ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth; |
2549 | else if (Len < 10 && !Signed) |
2550 | ExtOpc = Hexagon::A2_andir; |
2551 | } |
2552 | if (ExtOpc == 0) { |
2553 | ExtOpc = |
2554 | Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp) |
2555 | : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup); |
2556 | } |
2557 | unsigned SR = 0; |
2558 | // This only recognizes isub_lo and isub_hi. |
2559 | if (RW != SW && RW*2 != SW) |
2560 | continue; |
2561 | if (RW != SW) |
2562 | SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi; |
2563 | Off = Off % RW; |
2564 | |
2565 | if (!validateReg(R: {R,SR}, Opc: ExtOpc, OpNum: 1)) |
2566 | continue; |
2567 | |
2568 | // Don't generate the same instruction as the one being optimized. |
2569 | if (MI->getOpcode() == ExtOpc) { |
2570 | // All possible ExtOpc's have the source in operand(1). |
2571 | const MachineOperand &SrcOp = MI->getOperand(i: 1); |
2572 | if (SrcOp.getReg() == R) |
2573 | continue; |
2574 | } |
2575 | |
2576 | DebugLoc DL = MI->getDebugLoc(); |
2577 | MachineBasicBlock &B = *MI->getParent(); |
2578 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2579 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2580 | : MachineBasicBlock::iterator(MI); |
2581 | auto MIB = BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: ExtOpc), DestReg: NewR) |
2582 | .addReg(RegNo: R, flags: 0, SubReg: SR); |
2583 | switch (ExtOpc) { |
2584 | case Hexagon::A2_sxtb: |
2585 | case Hexagon::A2_zxtb: |
2586 | case Hexagon::A2_sxth: |
2587 | case Hexagon::A2_zxth: |
2588 | break; |
2589 | case Hexagon::A2_andir: |
2590 | MIB.addImm(Val: (1u << Len) - 1); |
2591 | break; |
2592 | case Hexagon::S4_extract: |
2593 | case Hexagon::S2_extractu: |
2594 | case Hexagon::S4_extractp: |
2595 | case Hexagon::S2_extractup: |
2596 | MIB.addImm(Val: Len) |
2597 | .addImm(Val: Off); |
2598 | break; |
2599 | default: |
2600 | llvm_unreachable("Unexpected opcode" ); |
2601 | } |
2602 | |
2603 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2604 | BT.put(RR: BitTracker::RegisterRef(NewR), RC); |
2605 | Changed = true; |
2606 | break; |
2607 | } |
2608 | |
2609 | return Changed; |
2610 | } |
2611 | |
2612 | bool BitSimplification::simplifyRCmp0(MachineInstr *MI, |
2613 | BitTracker::RegisterRef RD) { |
2614 | unsigned Opc = MI->getOpcode(); |
2615 | if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi) |
2616 | return false; |
2617 | MachineOperand &CmpOp = MI->getOperand(i: 2); |
2618 | if (!CmpOp.isImm() || CmpOp.getImm() != 0) |
2619 | return false; |
2620 | |
2621 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2622 | if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) |
2623 | return false; |
2624 | assert(RD.Sub == 0); |
2625 | |
2626 | MachineBasicBlock &B = *MI->getParent(); |
2627 | const DebugLoc &DL = MI->getDebugLoc(); |
2628 | auto At = MI->isPHI() ? B.getFirstNonPHI() |
2629 | : MachineBasicBlock::iterator(MI); |
2630 | bool KnownZ = true; |
2631 | bool KnownNZ = false; |
2632 | |
2633 | BitTracker::RegisterRef SR = MI->getOperand(i: 1); |
2634 | if (!BT.has(Reg: SR.Reg)) |
2635 | return false; |
2636 | const BitTracker::RegisterCell &SC = BT.lookup(Reg: SR.Reg); |
2637 | unsigned F, W; |
2638 | if (!HBS::getSubregMask(RR: SR, Begin&: F, Width&: W, MRI)) |
2639 | return false; |
2640 | |
2641 | for (uint16_t I = F; I != F+W; ++I) { |
2642 | const BitTracker::BitValue &V = SC[I]; |
2643 | if (!V.is(T: 0)) |
2644 | KnownZ = false; |
2645 | if (V.is(T: 1)) |
2646 | KnownNZ = true; |
2647 | } |
2648 | |
2649 | auto ReplaceWithConst = [&](int C) { |
2650 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2651 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::A2_tfrsi), DestReg: NewR) |
2652 | .addImm(Val: C); |
2653 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2654 | BitTracker::RegisterCell NewRC(W); |
2655 | for (uint16_t I = 0; I != W; ++I) { |
2656 | NewRC[I] = BitTracker::BitValue(C & 1); |
2657 | C = unsigned(C) >> 1; |
2658 | } |
2659 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: NewRC); |
2660 | return true; |
2661 | }; |
2662 | |
2663 | auto IsNonZero = [] (const MachineOperand &Op) { |
2664 | if (Op.isGlobal() || Op.isBlockAddress()) |
2665 | return true; |
2666 | if (Op.isImm()) |
2667 | return Op.getImm() != 0; |
2668 | if (Op.isCImm()) |
2669 | return !Op.getCImm()->isZero(); |
2670 | if (Op.isFPImm()) |
2671 | return !Op.getFPImm()->isZero(); |
2672 | return false; |
2673 | }; |
2674 | |
2675 | auto IsZero = [] (const MachineOperand &Op) { |
2676 | if (Op.isGlobal() || Op.isBlockAddress()) |
2677 | return false; |
2678 | if (Op.isImm()) |
2679 | return Op.getImm() == 0; |
2680 | if (Op.isCImm()) |
2681 | return Op.getCImm()->isZero(); |
2682 | if (Op.isFPImm()) |
2683 | return Op.getFPImm()->isZero(); |
2684 | return false; |
2685 | }; |
2686 | |
2687 | // If the source register is known to be 0 or non-0, the comparison can |
2688 | // be folded to a load of a constant. |
2689 | if (KnownZ || KnownNZ) { |
2690 | assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0" ); |
2691 | return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi)); |
2692 | } |
2693 | |
2694 | // Special case: if the compare comes from a C2_muxii, then we know the |
2695 | // two possible constants that can be the source value. |
2696 | MachineInstr *InpDef = MRI.getVRegDef(Reg: SR.Reg); |
2697 | if (!InpDef) |
2698 | return false; |
2699 | if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) { |
2700 | MachineOperand &Src1 = InpDef->getOperand(i: 2); |
2701 | MachineOperand &Src2 = InpDef->getOperand(i: 3); |
2702 | // Check if both are non-zero. |
2703 | bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2); |
2704 | if (KnownNZ1 && KnownNZ2) |
2705 | return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi); |
2706 | // Check if both are zero. |
2707 | bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2); |
2708 | if (KnownZ1 && KnownZ2) |
2709 | return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi); |
2710 | |
2711 | // If for both operands we know that they are either 0 or non-0, |
2712 | // replace the comparison with a C2_muxii, using the same predicate |
2713 | // register, but with operands substituted with 0/1 accordingly. |
2714 | if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { |
2715 | Register NewR = MRI.createVirtualRegister(RegClass: FRC); |
2716 | BuildMI(BB&: B, I: At, MIMD: DL, MCID: HII.get(Opcode: Hexagon::C2_muxii), DestReg: NewR) |
2717 | .addReg(RegNo: InpDef->getOperand(i: 1).getReg()) |
2718 | .addImm(Val: KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) |
2719 | .addImm(Val: KnownZ2 == (Opc == Hexagon::A4_rcmpeqi)); |
2720 | HBS::replaceReg(OldR: RD.Reg, NewR, MRI); |
2721 | // Create a new cell with only the least significant bit unknown. |
2722 | BitTracker::RegisterCell NewRC(W); |
2723 | NewRC[0] = BitTracker::BitValue::self(); |
2724 | NewRC.fill(B: 1, E: W, V: BitTracker::BitValue::Zero); |
2725 | BT.put(RR: BitTracker::RegisterRef(NewR), RC: NewRC); |
2726 | return true; |
2727 | } |
2728 | } |
2729 | |
2730 | return false; |
2731 | } |
2732 | |
2733 | bool BitSimplification::processBlock(MachineBasicBlock &B, |
2734 | const RegisterSet &AVs) { |
2735 | if (!BT.reached(B: &B)) |
2736 | return false; |
2737 | bool Changed = false; |
2738 | RegisterSet AVB = AVs; |
2739 | RegisterSet Defs; |
2740 | |
2741 | for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Rs: Defs)) { |
2742 | MachineInstr *MI = &*I; |
2743 | Defs.clear(); |
2744 | HBS::getInstrDefs(MI: *MI, Defs); |
2745 | |
2746 | unsigned Opc = MI->getOpcode(); |
2747 | if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) |
2748 | continue; |
2749 | |
2750 | if (MI->mayStore()) { |
2751 | bool T = genStoreUpperHalf(MI); |
2752 | T = T || genStoreImmediate(MI); |
2753 | Changed |= T; |
2754 | continue; |
2755 | } |
2756 | |
2757 | if (Defs.count() != 1) |
2758 | continue; |
2759 | const MachineOperand &Op0 = MI->getOperand(i: 0); |
2760 | if (!Op0.isReg() || !Op0.isDef()) |
2761 | continue; |
2762 | BitTracker::RegisterRef RD = Op0; |
2763 | if (!BT.has(Reg: RD.Reg)) |
2764 | continue; |
2765 | const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RR: RD, MRI); |
2766 | const BitTracker::RegisterCell &RC = BT.lookup(Reg: RD.Reg); |
2767 | |
2768 | if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { |
2769 | bool T = genPackhl(MI, RD, RC); |
2770 | T = T || simplifyExtractLow(MI, RD, RC, AVs: AVB); |
2771 | Changed |= T; |
2772 | continue; |
2773 | } |
2774 | |
2775 | if (FRC->getID() == Hexagon::IntRegsRegClassID) { |
2776 | bool T = genBitSplit(MI, RD, RC, AVs: AVB); |
2777 | T = T || simplifyExtractLow(MI, RD, RC, AVs: AVB); |
2778 | T = T || genExtractHalf(MI, RD, RC); |
2779 | T = T || genCombineHalf(MI, RD, RC); |
2780 | T = T || genExtractLow(MI, RD, RC); |
2781 | T = T || simplifyRCmp0(MI, RD); |
2782 | Changed |= T; |
2783 | continue; |
2784 | } |
2785 | |
2786 | if (FRC->getID() == Hexagon::PredRegsRegClassID) { |
2787 | bool T = simplifyTstbit(MI, RD, RC); |
2788 | Changed |= T; |
2789 | continue; |
2790 | } |
2791 | } |
2792 | return Changed; |
2793 | } |
2794 | |
2795 | bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { |
2796 | if (skipFunction(F: MF.getFunction())) |
2797 | return false; |
2798 | |
2799 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
2800 | auto &HRI = *HST.getRegisterInfo(); |
2801 | auto &HII = *HST.getInstrInfo(); |
2802 | |
2803 | MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
2804 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
2805 | bool Changed; |
2806 | |
2807 | Changed = DeadCodeElimination(MF, *MDT).run(); |
2808 | |
2809 | const HexagonEvaluator HE(HRI, MRI, HII, MF); |
2810 | BitTracker BT(HE, MF); |
2811 | LLVM_DEBUG(BT.trace(true)); |
2812 | BT.run(); |
2813 | |
2814 | MachineBasicBlock &Entry = MF.front(); |
2815 | |
2816 | RegisterSet AIG; // Available registers for IG. |
2817 | ConstGeneration ImmG(BT, HII, MRI); |
2818 | Changed |= visitBlock(B&: Entry, T&: ImmG, AVs&: AIG); |
2819 | |
2820 | RegisterSet ARE; // Available registers for RIE. |
2821 | RedundantInstrElimination RIE(BT, HII, HRI, MRI); |
2822 | bool Ried = visitBlock(B&: Entry, T&: RIE, AVs&: ARE); |
2823 | if (Ried) { |
2824 | Changed = true; |
2825 | BT.run(); |
2826 | } |
2827 | |
2828 | RegisterSet ACG; // Available registers for CG. |
2829 | CopyGeneration CopyG(BT, HII, HRI, MRI); |
2830 | Changed |= visitBlock(B&: Entry, T&: CopyG, AVs&: ACG); |
2831 | |
2832 | RegisterSet ACP; // Available registers for CP. |
2833 | CopyPropagation CopyP(HRI, MRI); |
2834 | Changed |= visitBlock(B&: Entry, T&: CopyP, AVs&: ACP); |
2835 | |
2836 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2837 | |
2838 | BT.run(); |
2839 | RegisterSet ABS; // Available registers for BS. |
2840 | BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF); |
2841 | Changed |= visitBlock(B&: Entry, T&: BitS, AVs&: ABS); |
2842 | |
2843 | Changed = DeadCodeElimination(MF, *MDT).run() || Changed; |
2844 | |
2845 | if (Changed) { |
2846 | for (auto &B : MF) |
2847 | for (auto &I : B) |
2848 | I.clearKillInfo(); |
2849 | DeadCodeElimination(MF, *MDT).run(); |
2850 | } |
2851 | return Changed; |
2852 | } |
2853 | |
2854 | // Recognize loops where the code at the end of the loop matches the code |
2855 | // before the entry of the loop, and the matching code is such that is can |
2856 | // be simplified. This pass relies on the bit simplification above and only |
2857 | // prepares code in a way that can be handled by the bit simplifcation. |
2858 | // |
2859 | // This is the motivating testcase (and explanation): |
2860 | // |
2861 | // { |
2862 | // loop0(.LBB0_2, r1) // %for.body.preheader |
2863 | // r5:4 = memd(r0++#8) |
2864 | // } |
2865 | // { |
2866 | // r3 = lsr(r4, #16) |
2867 | // r7:6 = combine(r5, r5) |
2868 | // } |
2869 | // { |
2870 | // r3 = insert(r5, #16, #16) |
2871 | // r7:6 = vlsrw(r7:6, #16) |
2872 | // } |
2873 | // .LBB0_2: |
2874 | // { |
2875 | // memh(r2+#4) = r5 |
2876 | // memh(r2+#6) = r6 # R6 is really R5.H |
2877 | // } |
2878 | // { |
2879 | // r2 = add(r2, #8) |
2880 | // memh(r2+#0) = r4 |
2881 | // memh(r2+#2) = r3 # R3 is really R4.H |
2882 | // } |
2883 | // { |
2884 | // r5:4 = memd(r0++#8) |
2885 | // } |
2886 | // { # "Shuffling" code that sets up R3 and R6 |
2887 | // r3 = lsr(r4, #16) # so that their halves can be stored in the |
2888 | // r7:6 = combine(r5, r5) # next iteration. This could be folded into |
2889 | // } # the stores if the code was at the beginning |
2890 | // { # of the loop iteration. Since the same code |
2891 | // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved |
2892 | // r7:6 = vlsrw(r7:6, #16) # there. |
2893 | // }:endloop0 |
2894 | // |
2895 | // |
2896 | // The outcome: |
2897 | // |
2898 | // { |
2899 | // loop0(.LBB0_2, r1) |
2900 | // r5:4 = memd(r0++#8) |
2901 | // } |
2902 | // .LBB0_2: |
2903 | // { |
2904 | // memh(r2+#4) = r5 |
2905 | // memh(r2+#6) = r5.h |
2906 | // } |
2907 | // { |
2908 | // r2 = add(r2, #8) |
2909 | // memh(r2+#0) = r4 |
2910 | // memh(r2+#2) = r4.h |
2911 | // } |
2912 | // { |
2913 | // r5:4 = memd(r0++#8) |
2914 | // }:endloop0 |
2915 | |
2916 | namespace llvm { |
2917 | |
2918 | FunctionPass *createHexagonLoopRescheduling(); |
2919 | void initializeHexagonLoopReschedulingPass(PassRegistry&); |
2920 | |
2921 | } // end namespace llvm |
2922 | |
2923 | namespace { |
2924 | |
2925 | class HexagonLoopRescheduling : public MachineFunctionPass { |
2926 | public: |
2927 | static char ID; |
2928 | |
2929 | HexagonLoopRescheduling() : MachineFunctionPass(ID) { |
2930 | initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); |
2931 | } |
2932 | |
2933 | bool runOnMachineFunction(MachineFunction &MF) override; |
2934 | |
2935 | private: |
2936 | const HexagonInstrInfo *HII = nullptr; |
2937 | const HexagonRegisterInfo *HRI = nullptr; |
2938 | MachineRegisterInfo *MRI = nullptr; |
2939 | BitTracker *BTP = nullptr; |
2940 | |
2941 | struct LoopCand { |
2942 | LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, |
2943 | MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} |
2944 | |
2945 | MachineBasicBlock *LB, *PB, *EB; |
2946 | }; |
2947 | using InstrList = std::vector<MachineInstr *>; |
2948 | struct InstrGroup { |
2949 | BitTracker::RegisterRef Inp, Out; |
2950 | InstrList Ins; |
2951 | }; |
2952 | struct PhiInfo { |
2953 | PhiInfo(MachineInstr &P, MachineBasicBlock &B); |
2954 | |
2955 | unsigned DefR; |
2956 | BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register |
2957 | MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block |
2958 | }; |
2959 | |
2960 | static unsigned getDefReg(const MachineInstr *MI); |
2961 | bool isConst(unsigned Reg) const; |
2962 | bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; |
2963 | bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; |
2964 | bool isShuffleOf(unsigned OutR, unsigned InpR) const; |
2965 | bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, |
2966 | unsigned &InpR2) const; |
2967 | void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, |
2968 | MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); |
2969 | bool processLoop(LoopCand &C); |
2970 | }; |
2971 | |
2972 | } // end anonymous namespace |
2973 | |
2974 | char HexagonLoopRescheduling::ID = 0; |
2975 | |
2976 | INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched" , |
2977 | "Hexagon Loop Rescheduling" , false, false) |
2978 | |
2979 | HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, |
2980 | MachineBasicBlock &B) { |
2981 | DefR = HexagonLoopRescheduling::getDefReg(MI: &P); |
2982 | LB = &B; |
2983 | PB = nullptr; |
2984 | for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { |
2985 | const MachineOperand &OpB = P.getOperand(i: i+1); |
2986 | if (OpB.getMBB() == &B) { |
2987 | LR = P.getOperand(i); |
2988 | continue; |
2989 | } |
2990 | PB = OpB.getMBB(); |
2991 | PR = P.getOperand(i); |
2992 | } |
2993 | } |
2994 | |
2995 | unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { |
2996 | RegisterSet Defs; |
2997 | HBS::getInstrDefs(MI: *MI, Defs); |
2998 | if (Defs.count() != 1) |
2999 | return 0; |
3000 | return Defs.find_first(); |
3001 | } |
3002 | |
3003 | bool HexagonLoopRescheduling::isConst(unsigned Reg) const { |
3004 | if (!BTP->has(Reg)) |
3005 | return false; |
3006 | const BitTracker::RegisterCell &RC = BTP->lookup(Reg); |
3007 | for (unsigned i = 0, w = RC.width(); i < w; ++i) { |
3008 | const BitTracker::BitValue &V = RC[i]; |
3009 | if (!V.is(T: 0) && !V.is(T: 1)) |
3010 | return false; |
3011 | } |
3012 | return true; |
3013 | } |
3014 | |
3015 | bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, |
3016 | unsigned DefR) const { |
3017 | unsigned Opc = MI->getOpcode(); |
3018 | switch (Opc) { |
3019 | case TargetOpcode::COPY: |
3020 | case Hexagon::S2_lsr_i_r: |
3021 | case Hexagon::S2_asr_i_r: |
3022 | case Hexagon::S2_asl_i_r: |
3023 | case Hexagon::S2_lsr_i_p: |
3024 | case Hexagon::S2_asr_i_p: |
3025 | case Hexagon::S2_asl_i_p: |
3026 | case Hexagon::S2_insert: |
3027 | case Hexagon::A2_or: |
3028 | case Hexagon::A2_orp: |
3029 | case Hexagon::A2_and: |
3030 | case Hexagon::A2_andp: |
3031 | case Hexagon::A2_combinew: |
3032 | case Hexagon::A4_combineri: |
3033 | case Hexagon::A4_combineir: |
3034 | case Hexagon::A2_combineii: |
3035 | case Hexagon::A4_combineii: |
3036 | case Hexagon::A2_combine_ll: |
3037 | case Hexagon::A2_combine_lh: |
3038 | case Hexagon::A2_combine_hl: |
3039 | case Hexagon::A2_combine_hh: |
3040 | return true; |
3041 | } |
3042 | return false; |
3043 | } |
3044 | |
3045 | bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, |
3046 | unsigned InpR) const { |
3047 | for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { |
3048 | const MachineOperand &Op = MI->getOperand(i); |
3049 | if (!Op.isReg()) |
3050 | continue; |
3051 | if (Op.getReg() == InpR) |
3052 | return i == n-1; |
3053 | } |
3054 | return false; |
3055 | } |
3056 | |
3057 | bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { |
3058 | if (!BTP->has(Reg: OutR) || !BTP->has(Reg: InpR)) |
3059 | return false; |
3060 | const BitTracker::RegisterCell &OutC = BTP->lookup(Reg: OutR); |
3061 | for (unsigned i = 0, w = OutC.width(); i < w; ++i) { |
3062 | const BitTracker::BitValue &V = OutC[i]; |
3063 | if (V.Type != BitTracker::BitValue::Ref) |
3064 | continue; |
3065 | if (V.RefI.Reg != InpR) |
3066 | return false; |
3067 | } |
3068 | return true; |
3069 | } |
3070 | |
3071 | bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, |
3072 | unsigned OutR2, unsigned &InpR2) const { |
3073 | if (!BTP->has(Reg: OutR1) || !BTP->has(Reg: InpR1) || !BTP->has(Reg: OutR2)) |
3074 | return false; |
3075 | const BitTracker::RegisterCell &OutC1 = BTP->lookup(Reg: OutR1); |
3076 | const BitTracker::RegisterCell &OutC2 = BTP->lookup(Reg: OutR2); |
3077 | unsigned W = OutC1.width(); |
3078 | unsigned MatchR = 0; |
3079 | if (W != OutC2.width()) |
3080 | return false; |
3081 | for (unsigned i = 0; i < W; ++i) { |
3082 | const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; |
3083 | if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) |
3084 | return false; |
3085 | if (V1.Type != BitTracker::BitValue::Ref) |
3086 | continue; |
3087 | if (V1.RefI.Pos != V2.RefI.Pos) |
3088 | return false; |
3089 | if (V1.RefI.Reg != InpR1) |
3090 | return false; |
3091 | if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) |
3092 | return false; |
3093 | if (!MatchR) |
3094 | MatchR = V2.RefI.Reg; |
3095 | else if (V2.RefI.Reg != MatchR) |
3096 | return false; |
3097 | } |
3098 | InpR2 = MatchR; |
3099 | return true; |
3100 | } |
3101 | |
3102 | void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, |
3103 | MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, |
3104 | unsigned NewPredR) { |
3105 | DenseMap<unsigned,unsigned> RegMap; |
3106 | |
3107 | const TargetRegisterClass *PhiRC = MRI->getRegClass(Reg: NewPredR); |
3108 | Register PhiR = MRI->createVirtualRegister(RegClass: PhiRC); |
3109 | BuildMI(BB&: LB, I: At, MIMD: At->getDebugLoc(), MCID: HII->get(Opcode: TargetOpcode::PHI), DestReg: PhiR) |
3110 | .addReg(RegNo: NewPredR) |
3111 | .addMBB(MBB: &PB) |
3112 | .addReg(RegNo: G.Inp.Reg) |
3113 | .addMBB(MBB: &LB); |
3114 | RegMap.insert(KV: std::make_pair(x&: G.Inp.Reg, y&: PhiR)); |
3115 | |
3116 | for (const MachineInstr *SI : llvm::reverse(C&: G.Ins)) { |
3117 | unsigned DR = getDefReg(MI: SI); |
3118 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DR); |
3119 | Register NewDR = MRI->createVirtualRegister(RegClass: RC); |
3120 | DebugLoc DL = SI->getDebugLoc(); |
3121 | |
3122 | auto MIB = BuildMI(BB&: LB, I: At, MIMD: DL, MCID: HII->get(Opcode: SI->getOpcode()), DestReg: NewDR); |
3123 | for (const MachineOperand &Op : SI->operands()) { |
3124 | if (!Op.isReg()) { |
3125 | MIB.add(MO: Op); |
3126 | continue; |
3127 | } |
3128 | if (!Op.isUse()) |
3129 | continue; |
3130 | unsigned UseR = RegMap[Op.getReg()]; |
3131 | MIB.addReg(RegNo: UseR, flags: 0, SubReg: Op.getSubReg()); |
3132 | } |
3133 | RegMap.insert(KV: std::make_pair(x&: DR, y&: NewDR)); |
3134 | } |
3135 | |
3136 | HBS::replaceReg(OldR: OldPhiR, NewR: RegMap[G.Out.Reg], MRI&: *MRI); |
3137 | } |
3138 | |
3139 | bool HexagonLoopRescheduling::processLoop(LoopCand &C) { |
3140 | LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) |
3141 | << "\n" ); |
3142 | std::vector<PhiInfo> Phis; |
3143 | for (auto &I : *C.LB) { |
3144 | if (!I.isPHI()) |
3145 | break; |
3146 | unsigned PR = getDefReg(MI: &I); |
3147 | if (isConst(Reg: PR)) |
3148 | continue; |
3149 | bool BadUse = false, GoodUse = false; |
3150 | for (const MachineOperand &MO : MRI->use_operands(Reg: PR)) { |
3151 | const MachineInstr *UseI = MO.getParent(); |
3152 | if (UseI->getParent() != C.LB) { |
3153 | BadUse = true; |
3154 | break; |
3155 | } |
3156 | if (isBitShuffle(MI: UseI, DefR: PR) || isStoreInput(MI: UseI, InpR: PR)) |
3157 | GoodUse = true; |
3158 | } |
3159 | if (BadUse || !GoodUse) |
3160 | continue; |
3161 | |
3162 | Phis.push_back(x: PhiInfo(I, *C.LB)); |
3163 | } |
3164 | |
3165 | LLVM_DEBUG({ |
3166 | dbgs() << "Phis: {" ; |
3167 | for (auto &I : Phis) { |
3168 | dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi(" |
3169 | << printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() |
3170 | << ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" |
3171 | << I.LB->getNumber() << ')'; |
3172 | } |
3173 | dbgs() << " }\n" ; |
3174 | }); |
3175 | |
3176 | if (Phis.empty()) |
3177 | return false; |
3178 | |
3179 | bool Changed = false; |
3180 | InstrList ShufIns; |
3181 | |
3182 | // Go backwards in the block: for each bit shuffling instruction, check |
3183 | // if that instruction could potentially be moved to the front of the loop: |
3184 | // the output of the loop cannot be used in a non-shuffling instruction |
3185 | // in this loop. |
3186 | for (MachineInstr &MI : llvm::reverse(C&: *C.LB)) { |
3187 | if (MI.isTerminator()) |
3188 | continue; |
3189 | if (MI.isPHI()) |
3190 | break; |
3191 | |
3192 | RegisterSet Defs; |
3193 | HBS::getInstrDefs(MI, Defs); |
3194 | if (Defs.count() != 1) |
3195 | continue; |
3196 | Register DefR = Defs.find_first(); |
3197 | if (!DefR.isVirtual()) |
3198 | continue; |
3199 | if (!isBitShuffle(MI: &MI, DefR)) |
3200 | continue; |
3201 | |
3202 | bool BadUse = false; |
3203 | for (auto UI = MRI->use_begin(RegNo: DefR), UE = MRI->use_end(); UI != UE; ++UI) { |
3204 | MachineInstr *UseI = UI->getParent(); |
3205 | if (UseI->getParent() == C.LB) { |
3206 | if (UseI->isPHI()) { |
3207 | // If the use is in a phi node in this loop, then it should be |
3208 | // the value corresponding to the back edge. |
3209 | unsigned Idx = UI.getOperandNo(); |
3210 | if (UseI->getOperand(i: Idx+1).getMBB() != C.LB) |
3211 | BadUse = true; |
3212 | } else { |
3213 | if (!llvm::is_contained(Range&: ShufIns, Element: UseI)) |
3214 | BadUse = true; |
3215 | } |
3216 | } else { |
3217 | // There is a use outside of the loop, but there is no epilog block |
3218 | // suitable for a copy-out. |
3219 | if (C.EB == nullptr) |
3220 | BadUse = true; |
3221 | } |
3222 | if (BadUse) |
3223 | break; |
3224 | } |
3225 | |
3226 | if (BadUse) |
3227 | continue; |
3228 | ShufIns.push_back(x: &MI); |
3229 | } |
3230 | |
3231 | // Partition the list of shuffling instructions into instruction groups, |
3232 | // where each group has to be moved as a whole (i.e. a group is a chain of |
3233 | // dependent instructions). A group produces a single live output register, |
3234 | // which is meant to be the input of the loop phi node (although this is |
3235 | // not checked here yet). It also uses a single register as its input, |
3236 | // which is some value produced in the loop body. After moving the group |
3237 | // to the beginning of the loop, that input register would need to be |
3238 | // the loop-carried register (through a phi node) instead of the (currently |
3239 | // loop-carried) output register. |
3240 | using InstrGroupList = std::vector<InstrGroup>; |
3241 | InstrGroupList Groups; |
3242 | |
3243 | for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { |
3244 | MachineInstr *SI = ShufIns[i]; |
3245 | if (SI == nullptr) |
3246 | continue; |
3247 | |
3248 | InstrGroup G; |
3249 | G.Ins.push_back(x: SI); |
3250 | G.Out.Reg = getDefReg(MI: SI); |
3251 | RegisterSet Inputs; |
3252 | HBS::getInstrUses(MI: *SI, Uses&: Inputs); |
3253 | |
3254 | for (unsigned j = i+1; j < n; ++j) { |
3255 | MachineInstr *MI = ShufIns[j]; |
3256 | if (MI == nullptr) |
3257 | continue; |
3258 | RegisterSet Defs; |
3259 | HBS::getInstrDefs(MI: *MI, Defs); |
3260 | // If this instruction does not define any pending inputs, skip it. |
3261 | if (!Defs.intersects(Rs: Inputs)) |
3262 | continue; |
3263 | // Otherwise, add it to the current group and remove the inputs that |
3264 | // are defined by MI. |
3265 | G.Ins.push_back(x: MI); |
3266 | Inputs.remove(Rs: Defs); |
3267 | // Then add all registers used by MI. |
3268 | HBS::getInstrUses(MI: *MI, Uses&: Inputs); |
3269 | ShufIns[j] = nullptr; |
3270 | } |
3271 | |
3272 | // Only add a group if it requires at most one register. |
3273 | if (Inputs.count() > 1) |
3274 | continue; |
3275 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool { |
3276 | return G.Out.Reg == P.LR.Reg; |
3277 | }; |
3278 | if (llvm::none_of(Range&: Phis, P: LoopInpEq)) |
3279 | continue; |
3280 | |
3281 | G.Inp.Reg = Inputs.find_first(); |
3282 | Groups.push_back(x: G); |
3283 | } |
3284 | |
3285 | LLVM_DEBUG({ |
3286 | for (unsigned i = 0, n = Groups.size(); i < n; ++i) { |
3287 | InstrGroup &G = Groups[i]; |
3288 | dbgs() << "Group[" << i << "] inp: " |
3289 | << printReg(G.Inp.Reg, HRI, G.Inp.Sub) |
3290 | << " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n" ; |
3291 | for (const MachineInstr *MI : G.Ins) |
3292 | dbgs() << " " << MI; |
3293 | } |
3294 | }); |
3295 | |
3296 | for (InstrGroup &G : Groups) { |
3297 | if (!isShuffleOf(OutR: G.Out.Reg, InpR: G.Inp.Reg)) |
3298 | continue; |
3299 | auto LoopInpEq = [G] (const PhiInfo &P) -> bool { |
3300 | return G.Out.Reg == P.LR.Reg; |
3301 | }; |
3302 | auto F = llvm::find_if(Range&: Phis, P: LoopInpEq); |
3303 | if (F == Phis.end()) |
3304 | continue; |
3305 | unsigned PrehR = 0; |
3306 | if (!isSameShuffle(OutR1: G.Out.Reg, InpR1: G.Inp.Reg, OutR2: F->PR.Reg, InpR2&: PrehR)) { |
3307 | const MachineInstr *DefPrehR = MRI->getVRegDef(Reg: F->PR.Reg); |
3308 | unsigned Opc = DefPrehR->getOpcode(); |
3309 | if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) |
3310 | continue; |
3311 | if (!DefPrehR->getOperand(i: 1).isImm()) |
3312 | continue; |
3313 | if (DefPrehR->getOperand(i: 1).getImm() != 0) |
3314 | continue; |
3315 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: G.Inp.Reg); |
3316 | if (RC != MRI->getRegClass(Reg: F->PR.Reg)) { |
3317 | PrehR = MRI->createVirtualRegister(RegClass: RC); |
3318 | unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi |
3319 | : Hexagon::A2_tfrpi; |
3320 | auto T = C.PB->getFirstTerminator(); |
3321 | DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); |
3322 | BuildMI(BB&: *C.PB, I: T, MIMD: DL, MCID: HII->get(Opcode: TfrI), DestReg: PrehR) |
3323 | .addImm(Val: 0); |
3324 | } else { |
3325 | PrehR = F->PR.Reg; |
3326 | } |
3327 | } |
3328 | // isSameShuffle could match with PrehR being of a wider class than |
3329 | // G.Inp.Reg, for example if G shuffles the low 32 bits of its input, |
3330 | // it would match for the input being a 32-bit register, and PrehR |
3331 | // being a 64-bit register (where the low 32 bits match). This could |
3332 | // be handled, but for now skip these cases. |
3333 | if (MRI->getRegClass(Reg: PrehR) != MRI->getRegClass(Reg: G.Inp.Reg)) |
3334 | continue; |
3335 | moveGroup(G, LB&: *F->LB, PB&: *F->PB, At: F->LB->getFirstNonPHI(), OldPhiR: F->DefR, NewPredR: PrehR); |
3336 | Changed = true; |
3337 | } |
3338 | |
3339 | return Changed; |
3340 | } |
3341 | |
3342 | bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { |
3343 | if (skipFunction(F: MF.getFunction())) |
3344 | return false; |
3345 | |
3346 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
3347 | HII = HST.getInstrInfo(); |
3348 | HRI = HST.getRegisterInfo(); |
3349 | MRI = &MF.getRegInfo(); |
3350 | const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); |
3351 | BitTracker BT(HE, MF); |
3352 | LLVM_DEBUG(BT.trace(true)); |
3353 | BT.run(); |
3354 | BTP = &BT; |
3355 | |
3356 | std::vector<LoopCand> Cand; |
3357 | |
3358 | for (auto &B : MF) { |
3359 | if (B.pred_size() != 2 || B.succ_size() != 2) |
3360 | continue; |
3361 | MachineBasicBlock *PB = nullptr; |
3362 | bool IsLoop = false; |
3363 | for (MachineBasicBlock *Pred : B.predecessors()) { |
3364 | if (Pred != &B) |
3365 | PB = Pred; |
3366 | else |
3367 | IsLoop = true; |
3368 | } |
3369 | if (!IsLoop) |
3370 | continue; |
3371 | |
3372 | MachineBasicBlock *EB = nullptr; |
3373 | for (MachineBasicBlock *Succ : B.successors()) { |
3374 | if (Succ == &B) |
3375 | continue; |
3376 | // Set EP to the epilog block, if it has only 1 predecessor (i.e. the |
3377 | // edge from B to EP is non-critical. |
3378 | if (Succ->pred_size() == 1) |
3379 | EB = Succ; |
3380 | break; |
3381 | } |
3382 | |
3383 | Cand.push_back(x: LoopCand(&B, PB, EB)); |
3384 | } |
3385 | |
3386 | bool Changed = false; |
3387 | for (auto &C : Cand) |
3388 | Changed |= processLoop(C); |
3389 | |
3390 | return Changed; |
3391 | } |
3392 | |
3393 | //===----------------------------------------------------------------------===// |
3394 | // Public Constructor Functions |
3395 | //===----------------------------------------------------------------------===// |
3396 | |
3397 | FunctionPass *llvm::createHexagonLoopRescheduling() { |
3398 | return new HexagonLoopRescheduling(); |
3399 | } |
3400 | |
3401 | FunctionPass *llvm::createHexagonBitSimplify() { |
3402 | return new HexagonBitSimplify(); |
3403 | } |
3404 | |