1 | //===---HexagonLoadStoreWidening.cpp---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // HexagonStoreWidening: |
9 | // Replace sequences of "narrow" stores to adjacent memory locations with |
10 | // a fewer "wide" stores that have the same effect. |
11 | // For example, replace: |
12 | // S4_storeirb_io %100, 0, 0 ; store-immediate-byte |
13 | // S4_storeirb_io %100, 1, 0 ; store-immediate-byte |
14 | // with |
15 | // S4_storeirh_io %100, 0, 0 ; store-immediate-halfword |
16 | // The above is the general idea. The actual cases handled by the code |
17 | // may be a bit more complex. |
18 | // The purpose of this pass is to reduce the number of outstanding stores, |
19 | // or as one could say, "reduce store queue pressure". Also, wide stores |
20 | // mean fewer stores, and since there are only two memory instructions allowed |
21 | // per packet, it also means fewer packets, and ultimately fewer cycles. |
22 | // |
23 | // HexagonLoadWidening does the same thing as HexagonStoreWidening but |
24 | // for Loads. Here, we try to replace 4-byte Loads with register-pair loads. |
25 | // For example: |
26 | // Replace |
27 | // %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8) |
28 | // %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2) |
29 | // with |
30 | // %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1) |
31 | // %2:intregs = COPY %4.isub_lo:doubleregs |
32 | // %3:intregs = COPY %4.isub_hi:doubleregs |
33 | // |
34 | // LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N |
35 | // insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts |
36 | |
37 | //===---------------------------------------------------------------------===// |
38 | |
39 | #include "Hexagon.h" |
40 | #include "HexagonInstrInfo.h" |
41 | #include "HexagonRegisterInfo.h" |
42 | #include "HexagonSubtarget.h" |
43 | #include "llvm/ADT/SmallPtrSet.h" |
44 | #include "llvm/Analysis/AliasAnalysis.h" |
45 | #include "llvm/Analysis/MemoryLocation.h" |
46 | #include "llvm/CodeGen/MachineBasicBlock.h" |
47 | #include "llvm/CodeGen/MachineFunction.h" |
48 | #include "llvm/CodeGen/MachineFunctionPass.h" |
49 | #include "llvm/CodeGen/MachineInstr.h" |
50 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
51 | #include "llvm/CodeGen/MachineMemOperand.h" |
52 | #include "llvm/CodeGen/MachineOperand.h" |
53 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
54 | #include "llvm/IR/DebugLoc.h" |
55 | #include "llvm/InitializePasses.h" |
56 | #include "llvm/MC/MCInstrDesc.h" |
57 | #include "llvm/Pass.h" |
58 | #include "llvm/Support/Debug.h" |
59 | #include "llvm/Support/ErrorHandling.h" |
60 | #include "llvm/Support/MathExtras.h" |
61 | #include "llvm/Support/raw_ostream.h" |
62 | #include <algorithm> |
63 | #include <cassert> |
64 | #include <cstdint> |
65 | #include <iterator> |
66 | |
67 | using namespace llvm; |
68 | |
69 | #define DEBUG_TYPE "hexagon-load-store-widening" |
70 | |
71 | static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening( |
72 | "max-bb-size-for-load-store-widening" , cl::Hidden, cl::init(Val: 1000), |
73 | cl::desc("Limit block size to analyze in load/store widening pass" )); |
74 | |
75 | namespace { |
76 | |
77 | struct HexagonLoadStoreWidening { |
78 | enum WideningMode { Store, Load }; |
79 | const HexagonInstrInfo *TII; |
80 | const HexagonRegisterInfo *TRI; |
81 | MachineRegisterInfo *MRI; |
82 | AliasAnalysis *AA; |
83 | MachineFunction *MF; |
84 | |
85 | public: |
86 | HexagonLoadStoreWidening(const HexagonInstrInfo *TII, |
87 | const HexagonRegisterInfo *TRI, |
88 | MachineRegisterInfo *MRI, AliasAnalysis *AA, |
89 | MachineFunction *MF, bool StoreMode) |
90 | : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF), |
91 | Mode(StoreMode ? WideningMode::Store : WideningMode::Load), |
92 | HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {} |
93 | |
94 | bool run(); |
95 | |
96 | private: |
97 | const bool Mode; |
98 | const unsigned MaxWideSize = 8; |
99 | const HexagonInstrInfo *HII = nullptr; |
100 | |
101 | using InstrSet = SmallPtrSet<MachineInstr *, 16>; |
102 | using InstrGroup = SmallVector<MachineInstr *, 8>; |
103 | using InstrGroupList = SmallVector<InstrGroup, 8>; |
104 | |
105 | InstrSet ProcessedInsts; |
106 | |
107 | unsigned getBaseAddressRegister(const MachineInstr *MI); |
108 | int64_t getOffset(const MachineInstr *MI); |
109 | int64_t getPostIncrementValue(const MachineInstr *MI); |
110 | bool handledInstType(const MachineInstr *MI); |
111 | |
112 | void createGroup(MachineInstr *BaseInst, InstrGroup &Group); |
113 | void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups); |
114 | bool processBasicBlock(MachineBasicBlock &MBB); |
115 | bool processGroup(InstrGroup &Group); |
116 | bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End, |
117 | InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); |
118 | bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); |
119 | bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); |
120 | bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); |
121 | bool replaceInsts(InstrGroup &OG, InstrGroup &NG); |
122 | bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2); |
123 | bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B); |
124 | }; |
125 | |
126 | struct HexagonStoreWidening : public MachineFunctionPass { |
127 | static char ID; |
128 | |
129 | HexagonStoreWidening() : MachineFunctionPass(ID) {} |
130 | |
131 | StringRef getPassName() const override { return "Hexagon Store Widening" ; } |
132 | |
133 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
134 | AU.addRequired<AAResultsWrapperPass>(); |
135 | AU.addPreserved<AAResultsWrapperPass>(); |
136 | MachineFunctionPass::getAnalysisUsage(AU); |
137 | } |
138 | |
139 | bool runOnMachineFunction(MachineFunction &MFn) override { |
140 | if (skipFunction(F: MFn.getFunction())) |
141 | return false; |
142 | |
143 | auto &ST = MFn.getSubtarget<HexagonSubtarget>(); |
144 | const HexagonInstrInfo *TII = ST.getInstrInfo(); |
145 | const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); |
146 | MachineRegisterInfo *MRI = &MFn.getRegInfo(); |
147 | AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
148 | |
149 | return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run(); |
150 | } |
151 | }; |
152 | |
153 | struct HexagonLoadWidening : public MachineFunctionPass { |
154 | static char ID; |
155 | |
156 | HexagonLoadWidening() : MachineFunctionPass(ID) {} |
157 | |
158 | StringRef getPassName() const override { return "Hexagon Load Widening" ; } |
159 | |
160 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
161 | AU.addRequired<AAResultsWrapperPass>(); |
162 | AU.addPreserved<AAResultsWrapperPass>(); |
163 | MachineFunctionPass::getAnalysisUsage(AU); |
164 | } |
165 | |
166 | bool runOnMachineFunction(MachineFunction &MFn) override { |
167 | if (skipFunction(F: MFn.getFunction())) |
168 | return false; |
169 | |
170 | auto &ST = MFn.getSubtarget<HexagonSubtarget>(); |
171 | const HexagonInstrInfo *TII = ST.getInstrInfo(); |
172 | const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); |
173 | MachineRegisterInfo *MRI = &MFn.getRegInfo(); |
174 | AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
175 | return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run(); |
176 | } |
177 | }; |
178 | |
179 | char HexagonStoreWidening::ID = 0; |
180 | char HexagonLoadWidening::ID = 0; |
181 | |
182 | } // end anonymous namespace |
183 | |
184 | INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores" , |
185 | "Hexagon Store Widening" , false, false) |
186 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
187 | INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores" , |
188 | "Hexagon Store Widening" , false, false) |
189 | |
190 | INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads" , |
191 | "Hexagon Load Widening" , false, false) |
192 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
193 | INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads" , |
194 | "Hexagon Load Widening" , false, false) |
195 | |
196 | static const MachineMemOperand &getMemTarget(const MachineInstr *MI) { |
197 | assert(!MI->memoperands_empty() && "Expecting memory operands" ); |
198 | return **MI->memoperands_begin(); |
199 | } |
200 | |
201 | unsigned |
202 | HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) { |
203 | assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode" ); |
204 | unsigned Base, Offset; |
205 | HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset); |
206 | const MachineOperand &MO = MI->getOperand(i: Base); |
207 | assert(MO.isReg() && "Expecting register operand" ); |
208 | return MO.getReg(); |
209 | } |
210 | |
211 | int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) { |
212 | assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode" ); |
213 | |
214 | // On Hexagon, post-incs always have an offset of 0 |
215 | // There is no Offset operand to post-incs |
216 | if (HII->isPostIncrement(MI: *MI)) |
217 | return 0; |
218 | |
219 | unsigned Base, Offset; |
220 | |
221 | HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset); |
222 | const MachineOperand &MO = MI->getOperand(i: Offset); |
223 | switch (MO.getType()) { |
224 | case MachineOperand::MO_Immediate: |
225 | return MO.getImm(); |
226 | case MachineOperand::MO_GlobalAddress: |
227 | return MO.getOffset(); |
228 | default: |
229 | break; |
230 | } |
231 | llvm_unreachable("Expecting an immediate or global operand" ); |
232 | } |
233 | |
234 | inline int64_t |
235 | HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) { |
236 | unsigned Base, PostIncIdx; |
237 | HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: PostIncIdx); |
238 | const MachineOperand &MO = MI->getOperand(i: PostIncIdx); |
239 | return MO.getImm(); |
240 | } |
241 | |
242 | // Filtering function: any loads/stores whose opcodes are not "approved" of by |
243 | // this function will not be subjected to widening. |
244 | inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) { |
245 | unsigned Opc = MI->getOpcode(); |
246 | if (Mode == WideningMode::Store) { |
247 | switch (Opc) { |
248 | case Hexagon::S4_storeirb_io: |
249 | case Hexagon::S4_storeirh_io: |
250 | case Hexagon::S4_storeiri_io: |
251 | case Hexagon::S2_storeri_io: |
252 | // Base address must be a register. (Implement FI later.) |
253 | return MI->getOperand(i: 0).isReg(); |
254 | case Hexagon::S2_storeri_pi: |
255 | return MI->getOperand(i: 1).isReg(); |
256 | } |
257 | } else { |
258 | // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x |
259 | // loads. So we only widen 32 bit loads as we don't need to select the |
260 | // right bits with AND & SHIFT ops. |
261 | switch (Opc) { |
262 | case Hexagon::L2_loadri_io: |
263 | // Base address must be a register and offset must be immediate. |
264 | return !MI->memoperands_empty() && MI->getOperand(i: 1).isReg() && |
265 | MI->getOperand(i: 2).isImm(); |
266 | case Hexagon::L2_loadri_pi: |
267 | return !MI->memoperands_empty() && MI->getOperand(i: 2).isReg(); |
268 | } |
269 | } |
270 | return false; |
271 | } |
272 | |
273 | static void addDefsUsesToList(const MachineInstr *MI, |
274 | DenseSet<Register> &RegDefs, |
275 | DenseSet<Register> &RegUses) { |
276 | for (const auto &Op : MI->operands()) { |
277 | if (!Op.isReg()) |
278 | continue; |
279 | if (Op.isDef()) |
280 | RegDefs.insert(V: Op.getReg()); |
281 | if (Op.readsReg()) |
282 | RegUses.insert(V: Op.getReg()); |
283 | } |
284 | } |
285 | |
286 | bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A, |
287 | const MachineInstr *B) { |
288 | DenseSet<Register> ARegDefs; |
289 | DenseSet<Register> ARegUses; |
290 | addDefsUsesToList(MI: A, RegDefs&: ARegDefs, RegUses&: ARegUses); |
291 | if (A->mayLoadOrStore() && B->mayLoadOrStore() && |
292 | (A->mayStore() || B->mayStore()) && A->mayAlias(AA, Other: *B, UseTBAA: true)) |
293 | return false; |
294 | for (const auto &BOp : B->operands()) { |
295 | if (!BOp.isReg()) |
296 | continue; |
297 | if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(V: BOp.getReg())) |
298 | return false; |
299 | if (BOp.isDef() && ARegUses.contains(V: BOp.getReg())) |
300 | return false; |
301 | } |
302 | return true; |
303 | } |
304 | |
305 | // Inspect a machine basic block, and generate groups out of loads/stores |
306 | // encountered in the block. |
307 | // |
308 | // A load/store group is a group of loads or stores that use the same base |
309 | // register, and which can be reordered within that group without altering the |
310 | // semantics of the program. A single group could be widened as |
311 | // a whole, if there existed a single load/store instruction with the same |
312 | // semantics as the entire group. In many cases, a single group may need more |
313 | // than one wide load or store. |
314 | void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB, |
315 | InstrGroupList &StoreGroups) { |
316 | // Traverse all instructions and if we encounter |
317 | // a load/store, then try to create a group starting at that instruction |
318 | // i.e. a sequence of independent loads/stores that can be widened. |
319 | for (auto I = MBB.begin(); I != MBB.end(); ++I) { |
320 | MachineInstr *MI = &(*I); |
321 | if (!handledInstType(MI)) |
322 | continue; |
323 | if (ProcessedInsts.count(Ptr: MI)) |
324 | continue; |
325 | |
326 | // Found a store. Try to create a store group. |
327 | InstrGroup G; |
328 | createGroup(BaseInst: MI, Group&: G); |
329 | if (G.size() > 1) |
330 | StoreGroups.push_back(Elt: G); |
331 | } |
332 | } |
333 | |
334 | // Create a single load/store group. The insts need to be independent between |
335 | // themselves, and also there cannot be other instructions between them |
336 | // that could read or modify storage being read from or stored into. |
337 | void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst, |
338 | InstrGroup &Group) { |
339 | assert(handledInstType(BaseInst) && "Unexpected instruction" ); |
340 | unsigned BaseReg = getBaseAddressRegister(MI: BaseInst); |
341 | InstrGroup Other; |
342 | |
343 | Group.push_back(Elt: BaseInst); |
344 | LLVM_DEBUG(dbgs() << "BaseInst: " ; BaseInst->dump()); |
345 | auto End = BaseInst->getParent()->end(); |
346 | auto I = BaseInst->getIterator(); |
347 | |
348 | while (true) { |
349 | I = std::next(x: I); |
350 | if (I == End) |
351 | break; |
352 | MachineInstr *MI = &(*I); |
353 | |
354 | // Assume calls are aliased to everything. |
355 | if (MI->isCall() || MI->hasUnmodeledSideEffects() || |
356 | MI->hasOrderedMemoryRef()) |
357 | return; |
358 | |
359 | if (!handledInstType(MI)) { |
360 | if (MI->mayLoadOrStore()) |
361 | Other.push_back(Elt: MI); |
362 | continue; |
363 | } |
364 | |
365 | // We have a handledInstType instruction |
366 | // If this load/store instruction is aliased with anything already in the |
367 | // group, terminate the group now. |
368 | for (auto GI : Group) |
369 | if (GI->mayAlias(AA, Other: *MI, UseTBAA: true)) |
370 | return; |
371 | if (Mode == WideningMode::Load) { |
372 | // Check if current load MI can be moved to the first load instruction |
373 | // in Group. If any load instruction aliases with memory instructions in |
374 | // Other, terminate the group. |
375 | for (auto MemI : Other) |
376 | if (!canSwapInstructions(A: MI, B: MemI)) |
377 | return; |
378 | } else { |
379 | // Check if store instructions in the group can be moved to current |
380 | // store MI. If any store instruction aliases with memory instructions |
381 | // in Other, terminate the group. |
382 | for (auto MemI : Other) { |
383 | if (std::distance(first: Group.back()->getIterator(), last: MemI->getIterator()) <= |
384 | 0) |
385 | continue; |
386 | for (auto GI : Group) |
387 | if (!canSwapInstructions(A: MemI, B: GI)) |
388 | return; |
389 | } |
390 | } |
391 | |
392 | unsigned BR = getBaseAddressRegister(MI); |
393 | if (BR == BaseReg) { |
394 | LLVM_DEBUG(dbgs() << "Added MI to group: " ; MI->dump()); |
395 | Group.push_back(Elt: MI); |
396 | ProcessedInsts.insert(Ptr: MI); |
397 | } |
398 | } // while |
399 | } |
400 | |
401 | // Check if load/store instructions S1 and S2 are adjacent. More precisely, |
402 | // S2 has to access memory immediately following that accessed by S1. |
403 | bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1, |
404 | const MachineInstr *S2) { |
405 | if (!handledInstType(MI: S1) || !handledInstType(MI: S2)) |
406 | return false; |
407 | |
408 | const MachineMemOperand &S1MO = getMemTarget(MI: S1); |
409 | |
410 | // Currently only handling immediate stores. |
411 | int Off1 = getOffset(MI: S1); |
412 | int Off2 = getOffset(MI: S2); |
413 | |
414 | return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2) |
415 | : int(Off1 + S1MO.getSize().getValue()) == Off2; |
416 | } |
417 | |
418 | /// Given a sequence of adjacent loads/stores, and a maximum size of a single |
419 | /// wide inst, pick a group of insts that can be replaced by a single load/store |
420 | /// of size not exceeding MaxSize. The selected sequence will be recorded |
421 | /// in OG ("old group" of instructions). |
422 | /// OG should be empty on entry, and should be left empty if the function |
423 | /// fails. |
424 | bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin, |
425 | InstrGroup::iterator End, |
426 | InstrGroup &OG, unsigned &TotalSize, |
427 | unsigned MaxSize) { |
428 | assert(Begin != End && "No instructions to analyze" ); |
429 | assert(OG.empty() && "Old group not empty on entry" ); |
430 | |
431 | if (std::distance(first: Begin, last: End) <= 1) |
432 | return false; |
433 | |
434 | MachineInstr *FirstMI = *Begin; |
435 | assert(!FirstMI->memoperands_empty() && "Expecting some memory operands" ); |
436 | const MachineMemOperand &FirstMMO = getMemTarget(MI: FirstMI); |
437 | if (!FirstMMO.getType().isValid()) |
438 | return false; |
439 | |
440 | unsigned Alignment = FirstMMO.getAlign().value(); |
441 | unsigned SizeAccum = FirstMMO.getSize().getValue(); |
442 | unsigned FirstOffset = getOffset(MI: FirstMI); |
443 | |
444 | // The initial value of SizeAccum should always be a power of 2. |
445 | assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2" ); |
446 | |
447 | // If the size of the first store equals to or exceeds the limit, do nothing. |
448 | if (SizeAccum >= MaxSize) |
449 | return false; |
450 | |
451 | // If the size of the first load/store is greater than or equal to the address |
452 | // stored to, then the inst cannot be made any wider. |
453 | if (SizeAccum >= Alignment) { |
454 | LLVM_DEBUG( |
455 | dbgs() << "Size of load/store greater than equal to its alignment\n" ); |
456 | return false; |
457 | } |
458 | |
459 | // The offset of a load/store will put restrictions on how wide the inst can |
460 | // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest |
461 | // bits be 0. If the first inst already exhausts the offset limits, quit. |
462 | // Test this by checking if the next wider size would exceed the limit. |
463 | // For post-increment instructions, the increment amount needs to follow the |
464 | // same rule. |
465 | unsigned OffsetOrIncVal = 0; |
466 | if (HII->isPostIncrement(MI: *FirstMI)) |
467 | OffsetOrIncVal = getPostIncrementValue(MI: FirstMI); |
468 | else |
469 | OffsetOrIncVal = FirstOffset; |
470 | if ((2 * SizeAccum - 1) & OffsetOrIncVal) { |
471 | LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc" |
472 | << " value: " << getPostIncrementValue(FirstMI) |
473 | << " is invalid in the widened version\n" ); |
474 | return false; |
475 | } |
476 | |
477 | OG.push_back(Elt: FirstMI); |
478 | MachineInstr *S1 = FirstMI; |
479 | |
480 | // Pow2Num will be the largest number of elements in OG such that the sum |
481 | // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2. |
482 | unsigned Pow2Num = 1; |
483 | unsigned Pow2Size = SizeAccum; |
484 | bool HavePostInc = HII->isPostIncrement(MI: *S1); |
485 | |
486 | // Be greedy: keep accumulating insts as long as they are to adjacent |
487 | // memory locations, and as long as the total number of bytes stored |
488 | // does not exceed the limit (MaxSize). |
489 | // Keep track of when the total size covered is a power of 2, since |
490 | // this is a size a single load/store can cover. |
491 | for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { |
492 | MachineInstr *S2 = *I; |
493 | // Insts are sorted, so if S1 and S2 are not adjacent, there won't be |
494 | // any other store to fill the "hole". |
495 | if (!areAdjacent(S1, S2)) |
496 | break; |
497 | |
498 | // Cannot widen two post increments, need to return two registers |
499 | // with incremented values |
500 | if (HavePostInc && HII->isPostIncrement(MI: *S2)) |
501 | break; |
502 | |
503 | unsigned S2Size = getMemTarget(MI: S2).getSize().getValue(); |
504 | if (SizeAccum + S2Size > std::min(a: MaxSize, b: Alignment)) |
505 | break; |
506 | |
507 | OG.push_back(Elt: S2); |
508 | SizeAccum += S2Size; |
509 | if (isPowerOf2_32(Value: SizeAccum)) { |
510 | Pow2Num = OG.size(); |
511 | Pow2Size = SizeAccum; |
512 | } |
513 | if ((2 * Pow2Size - 1) & FirstOffset) |
514 | break; |
515 | |
516 | S1 = S2; |
517 | } |
518 | |
519 | // The insts don't add up to anything that can be widened. Clean up. |
520 | if (Pow2Num <= 1) { |
521 | OG.clear(); |
522 | return false; |
523 | } |
524 | |
525 | // Only leave the loads/stores being widened. |
526 | OG.resize(N: Pow2Num); |
527 | TotalSize = Pow2Size; |
528 | return true; |
529 | } |
530 | |
531 | /// Given an "old group" OG of insts, create a "new group" NG of instructions |
532 | /// to replace them. |
533 | bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG, |
534 | unsigned TotalSize) { |
535 | if (Mode == WideningMode::Store) { |
536 | return createWideStores(OG, NG, TotalSize); |
537 | } |
538 | return createWideLoads(OG, NG, TotalSize); |
539 | } |
540 | |
541 | /// Given an "old group" OG of stores, create a "new group" NG of instructions |
542 | /// to replace them. Ideally, NG would only have a single instruction in it, |
543 | /// but that may only be possible for store-immediate. |
544 | bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, |
545 | unsigned TotalSize) { |
546 | // XXX Current limitations: |
547 | // - only handle a TotalSize of up to 8 |
548 | |
549 | LLVM_DEBUG(dbgs() << "Creating wide stores\n" ); |
550 | if (TotalSize > MaxWideSize) |
551 | return false; |
552 | |
553 | uint64_t Acc = 0; // Value accumulator. |
554 | unsigned Shift = 0; |
555 | bool HaveImm = false; |
556 | bool HaveReg = false; |
557 | |
558 | for (MachineInstr *MI : OG) { |
559 | const MachineMemOperand &MMO = getMemTarget(MI); |
560 | MachineOperand &SO = HII->isPostIncrement(MI: *MI) |
561 | ? MI->getOperand(i: 3) |
562 | : MI->getOperand(i: 2); // Source. |
563 | unsigned NBits; |
564 | uint64_t Mask; |
565 | uint64_t Val; |
566 | |
567 | switch (SO.getType()) { |
568 | case MachineOperand::MO_Immediate: |
569 | LLVM_DEBUG(dbgs() << "Have store immediate\n" ); |
570 | HaveImm = true; |
571 | |
572 | NBits = MMO.getSizeInBits().toRaw(); |
573 | Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits)); |
574 | Val = (SO.getImm() & Mask) << Shift; |
575 | Acc |= Val; |
576 | Shift += NBits; |
577 | break; |
578 | case MachineOperand::MO_Register: |
579 | HaveReg = true; |
580 | break; |
581 | default: |
582 | LLVM_DEBUG(dbgs() << "Unhandled store\n" ); |
583 | return false; |
584 | } |
585 | } |
586 | |
587 | if (HaveImm && HaveReg) { |
588 | LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n" ); |
589 | return false; |
590 | } |
591 | |
592 | MachineInstr *FirstSt = OG.front(); |
593 | DebugLoc DL = OG.back()->getDebugLoc(); |
594 | const MachineMemOperand &OldM = getMemTarget(MI: FirstSt); |
595 | MachineMemOperand *NewM = |
596 | MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(), |
597 | Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo()); |
598 | MachineInstr *StI; |
599 | MachineOperand &MR = |
600 | (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 1) |
601 | : FirstSt->getOperand(i: 0)); |
602 | auto SecondSt = OG.back(); |
603 | if (HaveReg) { |
604 | MachineOperand FReg = |
605 | (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 3) |
606 | : FirstSt->getOperand(i: 2)); |
607 | // Post increments appear first in the sorted group. |
608 | // Cannot have a post increment for the second instruction |
609 | assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc" ); |
610 | MachineOperand SReg = SecondSt->getOperand(i: 2); |
611 | assert(FReg.isReg() && SReg.isReg() && |
612 | "Cannot merge store register and store imm" ); |
613 | const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A2_combinew); |
614 | Register VReg = |
615 | MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
616 | MachineInstr *CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: VReg).add(MO: SReg).add(MO: FReg); |
617 | NG.push_back(Elt: CombI); |
618 | |
619 | if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) { |
620 | const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_pi); |
621 | auto IncDestMO = FirstSt->getOperand(i: 0); |
622 | auto IncMO = FirstSt->getOperand(i: 2); |
623 | StI = |
624 | BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: IncDestMO).add(MO: MR).add(MO: IncMO).addReg(RegNo: VReg); |
625 | } else { |
626 | const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io); |
627 | auto OffMO = FirstSt->getOperand(i: 1); |
628 | StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).add(MO: OffMO).addReg(RegNo: VReg); |
629 | } |
630 | StI->addMemOperand(MF&: *MF, MO: NewM); |
631 | NG.push_back(Elt: StI); |
632 | return true; |
633 | } |
634 | |
635 | // Handle store immediates |
636 | // There are no post increment store immediates on Hexagon |
637 | assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc" ); |
638 | auto Off = FirstSt->getOperand(i: 1).getImm(); |
639 | if (TotalSize == 8) { |
640 | // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg |
641 | uint64_t Mask = 0xFFFFFFFFU; |
642 | int LowerAcc = int(Mask & Acc); |
643 | int UpperAcc = Acc >> 32; |
644 | Register DReg = |
645 | MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
646 | MachineInstr *CombI; |
647 | if (Acc != 0) { |
648 | const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi); |
649 | const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0, TRI, MF: *MF); |
650 | Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC); |
651 | MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: LowerAcc); |
652 | NG.push_back(Elt: TfrI); |
653 | const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineir); |
654 | CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg) |
655 | .addImm(Val: UpperAcc) |
656 | .addReg(RegNo: VReg, flags: RegState::Kill); |
657 | } |
658 | // If immediates are 0, we do not need A2_tfrsi |
659 | else { |
660 | const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineii); |
661 | CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg).addImm(Val: 0).addImm(Val: 0); |
662 | } |
663 | NG.push_back(Elt: CombI); |
664 | const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io); |
665 | StI = |
666 | BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: DReg, flags: RegState::Kill); |
667 | } else if (Acc < 0x10000) { |
668 | // Create mem[hw] = #Acc |
669 | unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io |
670 | : (TotalSize == 4) ? Hexagon::S4_storeiri_io |
671 | : 0; |
672 | assert(WOpc && "Unexpected size" ); |
673 | |
674 | int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); |
675 | const MCInstrDesc &StD = TII->get(Opcode: WOpc); |
676 | StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addImm(Val); |
677 | } else { |
678 | // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg |
679 | const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi); |
680 | const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0, TRI, MF: *MF); |
681 | Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC); |
682 | MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: int(Acc)); |
683 | NG.push_back(Elt: TfrI); |
684 | |
685 | unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io |
686 | : (TotalSize == 4) ? Hexagon::S2_storeri_io |
687 | : 0; |
688 | assert(WOpc && "Unexpected size" ); |
689 | |
690 | const MCInstrDesc &StD = TII->get(Opcode: WOpc); |
691 | StI = |
692 | BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: VReg, flags: RegState::Kill); |
693 | } |
694 | StI->addMemOperand(MF&: *MF, MO: NewM); |
695 | NG.push_back(Elt: StI); |
696 | |
697 | return true; |
698 | } |
699 | |
700 | /// Given an "old group" OG of loads, create a "new group" NG of instructions |
701 | /// to replace them. Ideally, NG would only have a single instruction in it, |
702 | /// but that may only be possible for double register loads. |
703 | bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG, |
704 | unsigned TotalSize) { |
705 | LLVM_DEBUG(dbgs() << "Creating wide loads\n" ); |
706 | // XXX Current limitations: |
707 | // - only expect stores of immediate values in OG, |
708 | // - only handle a TotalSize of up to 8 |
709 | if (TotalSize > MaxWideSize) |
710 | return false; |
711 | assert(OG.size() == 2 && "Expecting two elements in Instruction Group." ); |
712 | |
713 | MachineInstr *FirstLd = OG.front(); |
714 | const MachineMemOperand &OldM = getMemTarget(MI: FirstLd); |
715 | MachineMemOperand *NewM = |
716 | MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(), |
717 | Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo()); |
718 | |
719 | MachineOperand &MR = FirstLd->getOperand(i: 0); |
720 | MachineOperand &MRBase = |
721 | (HII->isPostIncrement(MI: *FirstLd) ? FirstLd->getOperand(i: 2) |
722 | : FirstLd->getOperand(i: 1)); |
723 | DebugLoc DL = OG.back()->getDebugLoc(); |
724 | |
725 | // Create the double register Load Instruction. |
726 | Register NewMR = MRI->createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass); |
727 | MachineInstr *LdI; |
728 | |
729 | // Post increments appear first in the sorted group |
730 | if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) { |
731 | auto IncDestMO = FirstLd->getOperand(i: 1); |
732 | auto IncMO = FirstLd->getOperand(i: 3); |
733 | LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_pi)) |
734 | .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg()) |
735 | .add(MO: IncDestMO) |
736 | .add(MO: MRBase) |
737 | .add(MO: IncMO); |
738 | LdI->addMemOperand(MF&: *MF, MO: NewM); |
739 | } else { |
740 | auto OffMO = FirstLd->getOperand(i: 2); |
741 | LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_io)) |
742 | .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg()) |
743 | .add(MO: MRBase) |
744 | .add(MO: OffMO); |
745 | LdI->addMemOperand(MF&: *MF, MO: NewM); |
746 | } |
747 | NG.push_back(Elt: LdI); |
748 | |
749 | auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg, |
750 | MachineInstr *DstReg) { |
751 | Register DestReg = DstReg->getOperand(i: 0).getReg(); |
752 | return BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::COPY), DestReg) |
753 | .addReg(RegNo: NewMR, flags: getKillRegState(B: LdI->isKill()), SubReg); |
754 | }; |
755 | |
756 | MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd); |
757 | MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back()); |
758 | NG.push_back(Elt: LdI_lo); |
759 | NG.push_back(Elt: LdI_hi); |
760 | |
761 | return true; |
762 | } |
763 | |
764 | // Replace instructions from the old group OG with instructions from the |
765 | // new group NG. Conceptually, remove all instructions in OG, and then |
766 | // insert all instructions in NG, starting at where the first instruction |
767 | // from OG was (in the order in which they appeared in the basic block). |
768 | // (The ordering in OG does not have to match the order in the basic block.) |
769 | bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) { |
770 | LLVM_DEBUG({ |
771 | dbgs() << "Replacing:\n" ; |
772 | for (auto I : OG) |
773 | dbgs() << " " << *I; |
774 | dbgs() << "with\n" ; |
775 | for (auto I : NG) |
776 | dbgs() << " " << *I; |
777 | }); |
778 | |
779 | MachineBasicBlock *MBB = OG.back()->getParent(); |
780 | MachineBasicBlock::iterator InsertAt = MBB->end(); |
781 | |
782 | // Need to establish the insertion point. |
783 | // For loads the best one is right before the first load in the OG, |
784 | // but in the order in which the insts occur in the program list. |
785 | // For stores the best point is right after the last store in the OG. |
786 | // Since the ordering in OG does not correspond |
787 | // to the order in the program list, we need to do some work to find |
788 | // the insertion point. |
789 | |
790 | // Create a set of all instructions in OG (for quick lookup). |
791 | InstrSet OldMemInsts(llvm::from_range, OG); |
792 | |
793 | if (Mode == WideningMode::Load) { |
794 | // Find the first load instruction in the block that is present in OG. |
795 | for (auto &I : *MBB) { |
796 | if (OldMemInsts.count(Ptr: &I)) { |
797 | InsertAt = I; |
798 | break; |
799 | } |
800 | } |
801 | |
802 | assert((InsertAt != MBB->end()) && "Cannot locate any load from the group" ); |
803 | |
804 | for (auto *I : NG) |
805 | MBB->insert(I: InsertAt, MI: I); |
806 | } else { |
807 | // Find the last store instruction in the block that is present in OG. |
808 | auto I = MBB->rbegin(); |
809 | for (; I != MBB->rend(); ++I) { |
810 | if (OldMemInsts.count(Ptr: &(*I))) { |
811 | InsertAt = (*I).getIterator(); |
812 | break; |
813 | } |
814 | } |
815 | |
816 | assert((I != MBB->rend()) && "Cannot locate any store from the group" ); |
817 | |
818 | for (auto I = NG.rbegin(); I != NG.rend(); ++I) |
819 | MBB->insertAfter(I: InsertAt, MI: *I); |
820 | } |
821 | |
822 | for (auto *I : OG) |
823 | I->eraseFromParent(); |
824 | |
825 | return true; |
826 | } |
827 | |
828 | // Break up the group into smaller groups, each of which can be replaced by |
829 | // a single wide load/store. Widen each such smaller group and replace the old |
830 | // instructions with the widened ones. |
831 | bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) { |
832 | bool Changed = false; |
833 | InstrGroup::iterator I = Group.begin(), E = Group.end(); |
834 | InstrGroup OG, NG; // Old and new groups. |
835 | unsigned CollectedSize; |
836 | |
837 | while (I != E) { |
838 | OG.clear(); |
839 | NG.clear(); |
840 | |
841 | bool Succ = selectInsts(Begin: I++, End: E, OG, TotalSize&: CollectedSize, MaxSize: MaxWideSize) && |
842 | createWideInsts(OG, NG, TotalSize: CollectedSize) && replaceInsts(OG, NG); |
843 | if (!Succ) |
844 | continue; |
845 | |
846 | assert(OG.size() > 1 && "Created invalid group" ); |
847 | assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements" ); |
848 | I += OG.size() - 1; |
849 | |
850 | Changed = true; |
851 | } |
852 | |
853 | return Changed; |
854 | } |
855 | |
856 | // Process a single basic block: create the load/store groups, and replace them |
857 | // with the widened insts, if possible. Processing of each basic block |
858 | // is independent from processing of any other basic block. This transfor- |
859 | // mation could be stopped after having processed any basic block without |
860 | // any ill effects (other than not having performed widening in the unpro- |
861 | // cessed blocks). Also, the basic blocks can be processed in any order. |
862 | bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { |
863 | InstrGroupList SGs; |
864 | bool Changed = false; |
865 | |
866 | // To prevent long compile time check for max BB size. |
867 | if (MBB.size() > MaxMBBSizeForLoadStoreWidening) |
868 | return false; |
869 | |
870 | createGroups(MBB, StoreGroups&: SGs); |
871 | |
872 | auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool { |
873 | return getOffset(MI: A) < getOffset(MI: B); |
874 | }; |
875 | for (auto &G : SGs) { |
876 | assert(G.size() > 1 && "Group with fewer than 2 elements" ); |
877 | llvm::sort(C&: G, Comp: Less); |
878 | |
879 | Changed |= processGroup(Group&: G); |
880 | } |
881 | |
882 | return Changed; |
883 | } |
884 | |
885 | bool HexagonLoadStoreWidening::run() { |
886 | bool Changed = false; |
887 | |
888 | for (auto &B : *MF) |
889 | Changed |= processBasicBlock(MBB&: B); |
890 | |
891 | return Changed; |
892 | } |
893 | |
894 | FunctionPass *llvm::createHexagonStoreWidening() { |
895 | return new HexagonStoreWidening(); |
896 | } |
897 | |
898 | FunctionPass *llvm::createHexagonLoadWidening() { |
899 | return new HexagonLoadWidening(); |
900 | } |
901 | |