1//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonStoreWidening:
9// Replace sequences of "narrow" stores to adjacent memory locations with
10// a fewer "wide" stores that have the same effect.
11// For example, replace:
12// S4_storeirb_io %100, 0, 0 ; store-immediate-byte
13// S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14// with
15// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
16// The above is the general idea. The actual cases handled by the code
17// may be a bit more complex.
18// The purpose of this pass is to reduce the number of outstanding stores,
19// or as one could say, "reduce store queue pressure". Also, wide stores
20// mean fewer stores, and since there are only two memory instructions allowed
21// per packet, it also means fewer packets, and ultimately fewer cycles.
22//
23// HexagonLoadWidening does the same thing as HexagonStoreWidening but
24// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25// For example:
26// Replace
27// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29// with
30// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31// %2:intregs = COPY %4.isub_lo:doubleregs
32// %3:intregs = COPY %4.isub_hi:doubleregs
33//
34// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36
37//===---------------------------------------------------------------------===//
38
39#include "Hexagon.h"
40#include "HexagonInstrInfo.h"
41#include "HexagonRegisterInfo.h"
42#include "HexagonSubtarget.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/Analysis/AliasAnalysis.h"
45#include "llvm/Analysis/MemoryLocation.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineFunction.h"
48#include "llvm/CodeGen/MachineFunctionPass.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineMemOperand.h"
52#include "llvm/CodeGen/MachineOperand.h"
53#include "llvm/CodeGen/MachineRegisterInfo.h"
54#include "llvm/IR/DebugLoc.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/MathExtras.h"
61#include "llvm/Support/raw_ostream.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <iterator>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "hexagon-load-store-widening"
70
71static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
72 "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(Val: 1000),
73 cl::desc("Limit block size to analyze in load/store widening pass"));
74
75namespace {
76
77struct HexagonLoadStoreWidening {
78 enum WideningMode { Store, Load };
79 const HexagonInstrInfo *TII;
80 const HexagonRegisterInfo *TRI;
81 MachineRegisterInfo *MRI;
82 AliasAnalysis *AA;
83 MachineFunction *MF;
84
85public:
86 HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87 const HexagonRegisterInfo *TRI,
88 MachineRegisterInfo *MRI, AliasAnalysis *AA,
89 MachineFunction *MF, bool StoreMode)
90 : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91 Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92 HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93
94 bool run();
95
96private:
97 const bool Mode;
98 const unsigned MaxWideSize = 8;
99 const HexagonInstrInfo *HII = nullptr;
100
101 using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102 using InstrGroup = SmallVector<MachineInstr *, 8>;
103 using InstrGroupList = SmallVector<InstrGroup, 8>;
104
105 InstrSet ProcessedInsts;
106
107 unsigned getBaseAddressRegister(const MachineInstr *MI);
108 int64_t getOffset(const MachineInstr *MI);
109 int64_t getPostIncrementValue(const MachineInstr *MI);
110 bool handledInstType(const MachineInstr *MI);
111
112 void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113 void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114 bool processBasicBlock(MachineBasicBlock &MBB);
115 bool processGroup(InstrGroup &Group);
116 bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118 bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120 bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121 bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122 bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123 bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124};
125
126struct HexagonStoreWidening : public MachineFunctionPass {
127 static char ID;
128
129 HexagonStoreWidening() : MachineFunctionPass(ID) {}
130
131 StringRef getPassName() const override { return "Hexagon Store Widening"; }
132
133 void getAnalysisUsage(AnalysisUsage &AU) const override {
134 AU.addRequired<AAResultsWrapperPass>();
135 AU.addPreserved<AAResultsWrapperPass>();
136 MachineFunctionPass::getAnalysisUsage(AU);
137 }
138
139 bool runOnMachineFunction(MachineFunction &MFn) override {
140 if (skipFunction(F: MFn.getFunction()))
141 return false;
142
143 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
144 const HexagonInstrInfo *TII = ST.getInstrInfo();
145 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
146 MachineRegisterInfo *MRI = &MFn.getRegInfo();
147 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
148
149 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
150 }
151};
152
153struct HexagonLoadWidening : public MachineFunctionPass {
154 static char ID;
155
156 HexagonLoadWidening() : MachineFunctionPass(ID) {}
157
158 StringRef getPassName() const override { return "Hexagon Load Widening"; }
159
160 void getAnalysisUsage(AnalysisUsage &AU) const override {
161 AU.addRequired<AAResultsWrapperPass>();
162 AU.addPreserved<AAResultsWrapperPass>();
163 MachineFunctionPass::getAnalysisUsage(AU);
164 }
165
166 bool runOnMachineFunction(MachineFunction &MFn) override {
167 if (skipFunction(F: MFn.getFunction()))
168 return false;
169
170 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
171 const HexagonInstrInfo *TII = ST.getInstrInfo();
172 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
173 MachineRegisterInfo *MRI = &MFn.getRegInfo();
174 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
175 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
176 }
177};
178
179char HexagonStoreWidening::ID = 0;
180char HexagonLoadWidening::ID = 0;
181
182} // end anonymous namespace
183
184INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
185 "Hexagon Store Widening", false, false)
186INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
187INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
188 "Hexagon Store Widening", false, false)
189
190INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
191 "Hexagon Load Widening", false, false)
192INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
193INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
194 "Hexagon Load Widening", false, false)
195
196static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
197 assert(!MI->memoperands_empty() && "Expecting memory operands");
198 return **MI->memoperands_begin();
199}
200
201unsigned
202HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
203 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
204 unsigned Base, Offset;
205 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset);
206 const MachineOperand &MO = MI->getOperand(i: Base);
207 assert(MO.isReg() && "Expecting register operand");
208 return MO.getReg();
209}
210
211int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
212 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
213
214 // On Hexagon, post-incs always have an offset of 0
215 // There is no Offset operand to post-incs
216 if (HII->isPostIncrement(MI: *MI))
217 return 0;
218
219 unsigned Base, Offset;
220
221 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset);
222 const MachineOperand &MO = MI->getOperand(i: Offset);
223 switch (MO.getType()) {
224 case MachineOperand::MO_Immediate:
225 return MO.getImm();
226 case MachineOperand::MO_GlobalAddress:
227 return MO.getOffset();
228 default:
229 break;
230 }
231 llvm_unreachable("Expecting an immediate or global operand");
232}
233
234inline int64_t
235HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
236 unsigned Base, PostIncIdx;
237 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: PostIncIdx);
238 const MachineOperand &MO = MI->getOperand(i: PostIncIdx);
239 return MO.getImm();
240}
241
242// Filtering function: any loads/stores whose opcodes are not "approved" of by
243// this function will not be subjected to widening.
244inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
245 unsigned Opc = MI->getOpcode();
246 if (Mode == WideningMode::Store) {
247 switch (Opc) {
248 case Hexagon::S4_storeirb_io:
249 case Hexagon::S4_storeirh_io:
250 case Hexagon::S4_storeiri_io:
251 case Hexagon::S2_storeri_io:
252 // Base address must be a register. (Implement FI later.)
253 return MI->getOperand(i: 0).isReg();
254 case Hexagon::S2_storeri_pi:
255 return MI->getOperand(i: 1).isReg();
256 }
257 } else {
258 // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
259 // loads. So we only widen 32 bit loads as we don't need to select the
260 // right bits with AND & SHIFT ops.
261 switch (Opc) {
262 case Hexagon::L2_loadri_io:
263 // Base address must be a register and offset must be immediate.
264 return !MI->memoperands_empty() && MI->getOperand(i: 1).isReg() &&
265 MI->getOperand(i: 2).isImm();
266 case Hexagon::L2_loadri_pi:
267 return !MI->memoperands_empty() && MI->getOperand(i: 2).isReg();
268 }
269 }
270 return false;
271}
272
273static void addDefsUsesToList(const MachineInstr *MI,
274 DenseSet<Register> &RegDefs,
275 DenseSet<Register> &RegUses) {
276 for (const auto &Op : MI->operands()) {
277 if (!Op.isReg())
278 continue;
279 if (Op.isDef())
280 RegDefs.insert(V: Op.getReg());
281 if (Op.readsReg())
282 RegUses.insert(V: Op.getReg());
283 }
284}
285
286bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
287 const MachineInstr *B) {
288 DenseSet<Register> ARegDefs;
289 DenseSet<Register> ARegUses;
290 addDefsUsesToList(MI: A, RegDefs&: ARegDefs, RegUses&: ARegUses);
291 if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
292 (A->mayStore() || B->mayStore()) && A->mayAlias(AA, Other: *B, UseTBAA: true))
293 return false;
294 for (const auto &BOp : B->operands()) {
295 if (!BOp.isReg())
296 continue;
297 if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(V: BOp.getReg()))
298 return false;
299 if (BOp.isDef() && ARegUses.contains(V: BOp.getReg()))
300 return false;
301 }
302 return true;
303}
304
305// Inspect a machine basic block, and generate groups out of loads/stores
306// encountered in the block.
307//
308// A load/store group is a group of loads or stores that use the same base
309// register, and which can be reordered within that group without altering the
310// semantics of the program. A single group could be widened as
311// a whole, if there existed a single load/store instruction with the same
312// semantics as the entire group. In many cases, a single group may need more
313// than one wide load or store.
314void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
315 InstrGroupList &StoreGroups) {
316 // Traverse all instructions and if we encounter
317 // a load/store, then try to create a group starting at that instruction
318 // i.e. a sequence of independent loads/stores that can be widened.
319 for (auto I = MBB.begin(); I != MBB.end(); ++I) {
320 MachineInstr *MI = &(*I);
321 if (!handledInstType(MI))
322 continue;
323 if (ProcessedInsts.count(Ptr: MI))
324 continue;
325
326 // Found a store. Try to create a store group.
327 InstrGroup G;
328 createGroup(BaseInst: MI, Group&: G);
329 if (G.size() > 1)
330 StoreGroups.push_back(Elt: G);
331 }
332}
333
334// Create a single load/store group. The insts need to be independent between
335// themselves, and also there cannot be other instructions between them
336// that could read or modify storage being read from or stored into.
337void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
338 InstrGroup &Group) {
339 assert(handledInstType(BaseInst) && "Unexpected instruction");
340 unsigned BaseReg = getBaseAddressRegister(MI: BaseInst);
341 InstrGroup Other;
342
343 Group.push_back(Elt: BaseInst);
344 LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
345 auto End = BaseInst->getParent()->end();
346 auto I = BaseInst->getIterator();
347
348 while (true) {
349 I = std::next(x: I);
350 if (I == End)
351 break;
352 MachineInstr *MI = &(*I);
353
354 // Assume calls are aliased to everything.
355 if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
356 MI->hasOrderedMemoryRef())
357 return;
358
359 if (!handledInstType(MI)) {
360 if (MI->mayLoadOrStore())
361 Other.push_back(Elt: MI);
362 continue;
363 }
364
365 // We have a handledInstType instruction
366 // If this load/store instruction is aliased with anything already in the
367 // group, terminate the group now.
368 for (auto GI : Group)
369 if (GI->mayAlias(AA, Other: *MI, UseTBAA: true))
370 return;
371 if (Mode == WideningMode::Load) {
372 // Check if current load MI can be moved to the first load instruction
373 // in Group. If any load instruction aliases with memory instructions in
374 // Other, terminate the group.
375 for (auto MemI : Other)
376 if (!canSwapInstructions(A: MI, B: MemI))
377 return;
378 } else {
379 // Check if store instructions in the group can be moved to current
380 // store MI. If any store instruction aliases with memory instructions
381 // in Other, terminate the group.
382 for (auto MemI : Other) {
383 if (std::distance(first: Group.back()->getIterator(), last: MemI->getIterator()) <=
384 0)
385 continue;
386 for (auto GI : Group)
387 if (!canSwapInstructions(A: MemI, B: GI))
388 return;
389 }
390 }
391
392 unsigned BR = getBaseAddressRegister(MI);
393 if (BR == BaseReg) {
394 LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
395 Group.push_back(Elt: MI);
396 ProcessedInsts.insert(Ptr: MI);
397 }
398 } // while
399}
400
401// Check if load/store instructions S1 and S2 are adjacent. More precisely,
402// S2 has to access memory immediately following that accessed by S1.
403bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
404 const MachineInstr *S2) {
405 if (!handledInstType(MI: S1) || !handledInstType(MI: S2))
406 return false;
407
408 const MachineMemOperand &S1MO = getMemTarget(MI: S1);
409
410 // Currently only handling immediate stores.
411 int Off1 = getOffset(MI: S1);
412 int Off2 = getOffset(MI: S2);
413
414 return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
415 : int(Off1 + S1MO.getSize().getValue()) == Off2;
416}
417
418/// Given a sequence of adjacent loads/stores, and a maximum size of a single
419/// wide inst, pick a group of insts that can be replaced by a single load/store
420/// of size not exceeding MaxSize. The selected sequence will be recorded
421/// in OG ("old group" of instructions).
422/// OG should be empty on entry, and should be left empty if the function
423/// fails.
424bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
425 InstrGroup::iterator End,
426 InstrGroup &OG, unsigned &TotalSize,
427 unsigned MaxSize) {
428 assert(Begin != End && "No instructions to analyze");
429 assert(OG.empty() && "Old group not empty on entry");
430
431 if (std::distance(first: Begin, last: End) <= 1)
432 return false;
433
434 MachineInstr *FirstMI = *Begin;
435 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
436 const MachineMemOperand &FirstMMO = getMemTarget(MI: FirstMI);
437 if (!FirstMMO.getType().isValid())
438 return false;
439
440 unsigned Alignment = FirstMMO.getAlign().value();
441 unsigned SizeAccum = FirstMMO.getSize().getValue();
442 unsigned FirstOffset = getOffset(MI: FirstMI);
443
444 // The initial value of SizeAccum should always be a power of 2.
445 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
446
447 // If the size of the first store equals to or exceeds the limit, do nothing.
448 if (SizeAccum >= MaxSize)
449 return false;
450
451 // If the size of the first load/store is greater than or equal to the address
452 // stored to, then the inst cannot be made any wider.
453 if (SizeAccum >= Alignment) {
454 LLVM_DEBUG(
455 dbgs() << "Size of load/store greater than equal to its alignment\n");
456 return false;
457 }
458
459 // The offset of a load/store will put restrictions on how wide the inst can
460 // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest
461 // bits be 0. If the first inst already exhausts the offset limits, quit.
462 // Test this by checking if the next wider size would exceed the limit.
463 // For post-increment instructions, the increment amount needs to follow the
464 // same rule.
465 unsigned OffsetOrIncVal = 0;
466 if (HII->isPostIncrement(MI: *FirstMI))
467 OffsetOrIncVal = getPostIncrementValue(MI: FirstMI);
468 else
469 OffsetOrIncVal = FirstOffset;
470 if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
471 LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
472 << " value: " << getPostIncrementValue(FirstMI)
473 << " is invalid in the widened version\n");
474 return false;
475 }
476
477 OG.push_back(Elt: FirstMI);
478 MachineInstr *S1 = FirstMI;
479
480 // Pow2Num will be the largest number of elements in OG such that the sum
481 // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
482 unsigned Pow2Num = 1;
483 unsigned Pow2Size = SizeAccum;
484 bool HavePostInc = HII->isPostIncrement(MI: *S1);
485
486 // Be greedy: keep accumulating insts as long as they are to adjacent
487 // memory locations, and as long as the total number of bytes stored
488 // does not exceed the limit (MaxSize).
489 // Keep track of when the total size covered is a power of 2, since
490 // this is a size a single load/store can cover.
491 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
492 MachineInstr *S2 = *I;
493 // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
494 // any other store to fill the "hole".
495 if (!areAdjacent(S1, S2))
496 break;
497
498 // Cannot widen two post increments, need to return two registers
499 // with incremented values
500 if (HavePostInc && HII->isPostIncrement(MI: *S2))
501 break;
502
503 unsigned S2Size = getMemTarget(MI: S2).getSize().getValue();
504 if (SizeAccum + S2Size > std::min(a: MaxSize, b: Alignment))
505 break;
506
507 OG.push_back(Elt: S2);
508 SizeAccum += S2Size;
509 if (isPowerOf2_32(Value: SizeAccum)) {
510 Pow2Num = OG.size();
511 Pow2Size = SizeAccum;
512 }
513 if ((2 * Pow2Size - 1) & FirstOffset)
514 break;
515
516 S1 = S2;
517 }
518
519 // The insts don't add up to anything that can be widened. Clean up.
520 if (Pow2Num <= 1) {
521 OG.clear();
522 return false;
523 }
524
525 // Only leave the loads/stores being widened.
526 OG.resize(N: Pow2Num);
527 TotalSize = Pow2Size;
528 return true;
529}
530
531/// Given an "old group" OG of insts, create a "new group" NG of instructions
532/// to replace them.
533bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
534 unsigned TotalSize) {
535 if (Mode == WideningMode::Store) {
536 return createWideStores(OG, NG, TotalSize);
537 }
538 return createWideLoads(OG, NG, TotalSize);
539}
540
541/// Given an "old group" OG of stores, create a "new group" NG of instructions
542/// to replace them. Ideally, NG would only have a single instruction in it,
543/// but that may only be possible for store-immediate.
544bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
545 unsigned TotalSize) {
546 // XXX Current limitations:
547 // - only handle a TotalSize of up to 8
548
549 LLVM_DEBUG(dbgs() << "Creating wide stores\n");
550 if (TotalSize > MaxWideSize)
551 return false;
552
553 uint64_t Acc = 0; // Value accumulator.
554 unsigned Shift = 0;
555 bool HaveImm = false;
556 bool HaveReg = false;
557
558 for (MachineInstr *MI : OG) {
559 const MachineMemOperand &MMO = getMemTarget(MI);
560 MachineOperand &SO = HII->isPostIncrement(MI: *MI)
561 ? MI->getOperand(i: 3)
562 : MI->getOperand(i: 2); // Source.
563 unsigned NBits;
564 uint64_t Mask;
565 uint64_t Val;
566
567 switch (SO.getType()) {
568 case MachineOperand::MO_Immediate:
569 LLVM_DEBUG(dbgs() << "Have store immediate\n");
570 HaveImm = true;
571
572 NBits = MMO.getSizeInBits().toRaw();
573 Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
574 Val = (SO.getImm() & Mask) << Shift;
575 Acc |= Val;
576 Shift += NBits;
577 break;
578 case MachineOperand::MO_Register:
579 HaveReg = true;
580 break;
581 default:
582 LLVM_DEBUG(dbgs() << "Unhandled store\n");
583 return false;
584 }
585 }
586
587 if (HaveImm && HaveReg) {
588 LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
589 return false;
590 }
591
592 MachineInstr *FirstSt = OG.front();
593 DebugLoc DL = OG.back()->getDebugLoc();
594 const MachineMemOperand &OldM = getMemTarget(MI: FirstSt);
595 MachineMemOperand *NewM =
596 MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(),
597 Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo());
598 MachineInstr *StI;
599 MachineOperand &MR =
600 (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 1)
601 : FirstSt->getOperand(i: 0));
602 auto SecondSt = OG.back();
603 if (HaveReg) {
604 MachineOperand FReg =
605 (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 3)
606 : FirstSt->getOperand(i: 2));
607 // Post increments appear first in the sorted group.
608 // Cannot have a post increment for the second instruction
609 assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
610 MachineOperand SReg = SecondSt->getOperand(i: 2);
611 assert(FReg.isReg() && SReg.isReg() &&
612 "Cannot merge store register and store imm");
613 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A2_combinew);
614 Register VReg =
615 MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
616 MachineInstr *CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: VReg).add(MO: SReg).add(MO: FReg);
617 NG.push_back(Elt: CombI);
618
619 if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
620 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_pi);
621 auto IncDestMO = FirstSt->getOperand(i: 0);
622 auto IncMO = FirstSt->getOperand(i: 2);
623 StI =
624 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: IncDestMO).add(MO: MR).add(MO: IncMO).addReg(RegNo: VReg);
625 } else {
626 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io);
627 auto OffMO = FirstSt->getOperand(i: 1);
628 StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).add(MO: OffMO).addReg(RegNo: VReg);
629 }
630 StI->addMemOperand(MF&: *MF, MO: NewM);
631 NG.push_back(Elt: StI);
632 return true;
633 }
634
635 // Handle store immediates
636 // There are no post increment store immediates on Hexagon
637 assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
638 auto Off = FirstSt->getOperand(i: 1).getImm();
639 if (TotalSize == 8) {
640 // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
641 uint64_t Mask = 0xFFFFFFFFU;
642 int LowerAcc = int(Mask & Acc);
643 int UpperAcc = Acc >> 32;
644 Register DReg =
645 MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
646 MachineInstr *CombI;
647 if (Acc != 0) {
648 const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi);
649 const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0, TRI, MF: *MF);
650 Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC);
651 MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: LowerAcc);
652 NG.push_back(Elt: TfrI);
653 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineir);
654 CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg)
655 .addImm(Val: UpperAcc)
656 .addReg(RegNo: VReg, flags: RegState::Kill);
657 }
658 // If immediates are 0, we do not need A2_tfrsi
659 else {
660 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineii);
661 CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg).addImm(Val: 0).addImm(Val: 0);
662 }
663 NG.push_back(Elt: CombI);
664 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io);
665 StI =
666 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: DReg, flags: RegState::Kill);
667 } else if (Acc < 0x10000) {
668 // Create mem[hw] = #Acc
669 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io
670 : (TotalSize == 4) ? Hexagon::S4_storeiri_io
671 : 0;
672 assert(WOpc && "Unexpected size");
673
674 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
675 const MCInstrDesc &StD = TII->get(Opcode: WOpc);
676 StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addImm(Val);
677 } else {
678 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
679 const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi);
680 const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0, TRI, MF: *MF);
681 Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC);
682 MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: int(Acc));
683 NG.push_back(Elt: TfrI);
684
685 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io
686 : (TotalSize == 4) ? Hexagon::S2_storeri_io
687 : 0;
688 assert(WOpc && "Unexpected size");
689
690 const MCInstrDesc &StD = TII->get(Opcode: WOpc);
691 StI =
692 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: VReg, flags: RegState::Kill);
693 }
694 StI->addMemOperand(MF&: *MF, MO: NewM);
695 NG.push_back(Elt: StI);
696
697 return true;
698}
699
700/// Given an "old group" OG of loads, create a "new group" NG of instructions
701/// to replace them. Ideally, NG would only have a single instruction in it,
702/// but that may only be possible for double register loads.
703bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
704 unsigned TotalSize) {
705 LLVM_DEBUG(dbgs() << "Creating wide loads\n");
706 // XXX Current limitations:
707 // - only expect stores of immediate values in OG,
708 // - only handle a TotalSize of up to 8
709 if (TotalSize > MaxWideSize)
710 return false;
711 assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
712
713 MachineInstr *FirstLd = OG.front();
714 const MachineMemOperand &OldM = getMemTarget(MI: FirstLd);
715 MachineMemOperand *NewM =
716 MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(),
717 Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo());
718
719 MachineOperand &MR = FirstLd->getOperand(i: 0);
720 MachineOperand &MRBase =
721 (HII->isPostIncrement(MI: *FirstLd) ? FirstLd->getOperand(i: 2)
722 : FirstLd->getOperand(i: 1));
723 DebugLoc DL = OG.back()->getDebugLoc();
724
725 // Create the double register Load Instruction.
726 Register NewMR = MRI->createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
727 MachineInstr *LdI;
728
729 // Post increments appear first in the sorted group
730 if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
731 auto IncDestMO = FirstLd->getOperand(i: 1);
732 auto IncMO = FirstLd->getOperand(i: 3);
733 LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_pi))
734 .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg())
735 .add(MO: IncDestMO)
736 .add(MO: MRBase)
737 .add(MO: IncMO);
738 LdI->addMemOperand(MF&: *MF, MO: NewM);
739 } else {
740 auto OffMO = FirstLd->getOperand(i: 2);
741 LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_io))
742 .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg())
743 .add(MO: MRBase)
744 .add(MO: OffMO);
745 LdI->addMemOperand(MF&: *MF, MO: NewM);
746 }
747 NG.push_back(Elt: LdI);
748
749 auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
750 MachineInstr *DstReg) {
751 Register DestReg = DstReg->getOperand(i: 0).getReg();
752 return BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::COPY), DestReg)
753 .addReg(RegNo: NewMR, flags: getKillRegState(B: LdI->isKill()), SubReg);
754 };
755
756 MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
757 MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
758 NG.push_back(Elt: LdI_lo);
759 NG.push_back(Elt: LdI_hi);
760
761 return true;
762}
763
764// Replace instructions from the old group OG with instructions from the
765// new group NG. Conceptually, remove all instructions in OG, and then
766// insert all instructions in NG, starting at where the first instruction
767// from OG was (in the order in which they appeared in the basic block).
768// (The ordering in OG does not have to match the order in the basic block.)
769bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
770 LLVM_DEBUG({
771 dbgs() << "Replacing:\n";
772 for (auto I : OG)
773 dbgs() << " " << *I;
774 dbgs() << "with\n";
775 for (auto I : NG)
776 dbgs() << " " << *I;
777 });
778
779 MachineBasicBlock *MBB = OG.back()->getParent();
780 MachineBasicBlock::iterator InsertAt = MBB->end();
781
782 // Need to establish the insertion point.
783 // For loads the best one is right before the first load in the OG,
784 // but in the order in which the insts occur in the program list.
785 // For stores the best point is right after the last store in the OG.
786 // Since the ordering in OG does not correspond
787 // to the order in the program list, we need to do some work to find
788 // the insertion point.
789
790 // Create a set of all instructions in OG (for quick lookup).
791 InstrSet OldMemInsts(llvm::from_range, OG);
792
793 if (Mode == WideningMode::Load) {
794 // Find the first load instruction in the block that is present in OG.
795 for (auto &I : *MBB) {
796 if (OldMemInsts.count(Ptr: &I)) {
797 InsertAt = I;
798 break;
799 }
800 }
801
802 assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
803
804 for (auto *I : NG)
805 MBB->insert(I: InsertAt, MI: I);
806 } else {
807 // Find the last store instruction in the block that is present in OG.
808 auto I = MBB->rbegin();
809 for (; I != MBB->rend(); ++I) {
810 if (OldMemInsts.count(Ptr: &(*I))) {
811 InsertAt = (*I).getIterator();
812 break;
813 }
814 }
815
816 assert((I != MBB->rend()) && "Cannot locate any store from the group");
817
818 for (auto I = NG.rbegin(); I != NG.rend(); ++I)
819 MBB->insertAfter(I: InsertAt, MI: *I);
820 }
821
822 for (auto *I : OG)
823 I->eraseFromParent();
824
825 return true;
826}
827
828// Break up the group into smaller groups, each of which can be replaced by
829// a single wide load/store. Widen each such smaller group and replace the old
830// instructions with the widened ones.
831bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
832 bool Changed = false;
833 InstrGroup::iterator I = Group.begin(), E = Group.end();
834 InstrGroup OG, NG; // Old and new groups.
835 unsigned CollectedSize;
836
837 while (I != E) {
838 OG.clear();
839 NG.clear();
840
841 bool Succ = selectInsts(Begin: I++, End: E, OG, TotalSize&: CollectedSize, MaxSize: MaxWideSize) &&
842 createWideInsts(OG, NG, TotalSize: CollectedSize) && replaceInsts(OG, NG);
843 if (!Succ)
844 continue;
845
846 assert(OG.size() > 1 && "Created invalid group");
847 assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
848 I += OG.size() - 1;
849
850 Changed = true;
851 }
852
853 return Changed;
854}
855
856// Process a single basic block: create the load/store groups, and replace them
857// with the widened insts, if possible. Processing of each basic block
858// is independent from processing of any other basic block. This transfor-
859// mation could be stopped after having processed any basic block without
860// any ill effects (other than not having performed widening in the unpro-
861// cessed blocks). Also, the basic blocks can be processed in any order.
862bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
863 InstrGroupList SGs;
864 bool Changed = false;
865
866 // To prevent long compile time check for max BB size.
867 if (MBB.size() > MaxMBBSizeForLoadStoreWidening)
868 return false;
869
870 createGroups(MBB, StoreGroups&: SGs);
871
872 auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
873 return getOffset(MI: A) < getOffset(MI: B);
874 };
875 for (auto &G : SGs) {
876 assert(G.size() > 1 && "Group with fewer than 2 elements");
877 llvm::sort(C&: G, Comp: Less);
878
879 Changed |= processGroup(Group&: G);
880 }
881
882 return Changed;
883}
884
885bool HexagonLoadStoreWidening::run() {
886 bool Changed = false;
887
888 for (auto &B : *MF)
889 Changed |= processBasicBlock(MBB&: B);
890
891 return Changed;
892}
893
894FunctionPass *llvm::createHexagonStoreWidening() {
895 return new HexagonStoreWidening();
896}
897
898FunctionPass *llvm::createHexagonLoadWidening() {
899 return new HexagonLoadWidening();
900}
901