1//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonStoreWidening:
9// Replace sequences of "narrow" stores to adjacent memory locations with
10// a fewer "wide" stores that have the same effect.
11// For example, replace:
12// S4_storeirb_io %100, 0, 0 ; store-immediate-byte
13// S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14// with
15// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
16// The above is the general idea. The actual cases handled by the code
17// may be a bit more complex.
18// The purpose of this pass is to reduce the number of outstanding stores,
19// or as one could say, "reduce store queue pressure". Also, wide stores
20// mean fewer stores, and since there are only two memory instructions allowed
21// per packet, it also means fewer packets, and ultimately fewer cycles.
22//
23// HexagonLoadWidening does the same thing as HexagonStoreWidening but
24// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25// For example:
26// Replace
27// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29// with
30// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31// %2:intregs = COPY %4.isub_lo:doubleregs
32// %3:intregs = COPY %4.isub_hi:doubleregs
33//
34// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36
37//===---------------------------------------------------------------------===//
38
39#include "Hexagon.h"
40#include "HexagonInstrInfo.h"
41#include "HexagonRegisterInfo.h"
42#include "HexagonSubtarget.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/Analysis/AliasAnalysis.h"
45#include "llvm/Analysis/MemoryLocation.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineFunction.h"
48#include "llvm/CodeGen/MachineFunctionPass.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineMemOperand.h"
52#include "llvm/CodeGen/MachineOperand.h"
53#include "llvm/CodeGen/MachineRegisterInfo.h"
54#include "llvm/IR/DebugLoc.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/MathExtras.h"
61#include "llvm/Support/raw_ostream.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <iterator>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "hexagon-load-store-widening"
70
71static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
72 "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(Val: 1000),
73 cl::desc("Limit block size to analyze in load/store widening pass"));
74
75namespace {
76
77struct HexagonLoadStoreWidening {
78 enum WideningMode { Store, Load };
79 const HexagonInstrInfo *TII;
80 const HexagonRegisterInfo *TRI;
81 MachineRegisterInfo *MRI;
82 AliasAnalysis *AA;
83 MachineFunction *MF;
84
85public:
86 HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
87 const HexagonRegisterInfo *TRI,
88 MachineRegisterInfo *MRI, AliasAnalysis *AA,
89 MachineFunction *MF, bool StoreMode)
90 : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
91 Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
92 HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
93
94 bool run();
95
96private:
97 const bool Mode;
98 const unsigned MaxWideSize = 8;
99 const HexagonInstrInfo *HII = nullptr;
100
101 using InstrSet = SmallPtrSet<MachineInstr *, 16>;
102 using InstrGroup = SmallVector<MachineInstr *, 8>;
103 using InstrGroupList = SmallVector<InstrGroup, 8>;
104
105 InstrSet ProcessedInsts;
106
107 unsigned getBaseAddressRegister(const MachineInstr *MI);
108 int64_t getOffset(const MachineInstr *MI);
109 int64_t getPostIncrementValue(const MachineInstr *MI);
110 bool handledInstType(const MachineInstr *MI);
111
112 void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
113 void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
114 bool processBasicBlock(MachineBasicBlock &MBB);
115 bool processGroup(InstrGroup &Group);
116 bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
117 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
118 bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
119 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
120 bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
121 bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
122 bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
123 bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
124 MachineInstr *widenLoadStoreAddAsl(Register NewPairReg,
125 const MachineInstr *FirstMem,
126 const DebugLoc &DL);
127};
128
129struct HexagonStoreWidening : public MachineFunctionPass {
130 static char ID;
131
132 HexagonStoreWidening() : MachineFunctionPass(ID) {}
133
134 StringRef getPassName() const override { return "Hexagon Store Widening"; }
135
136 void getAnalysisUsage(AnalysisUsage &AU) const override {
137 AU.addRequired<AAResultsWrapperPass>();
138 AU.addPreserved<AAResultsWrapperPass>();
139 MachineFunctionPass::getAnalysisUsage(AU);
140 }
141
142 bool runOnMachineFunction(MachineFunction &MFn) override {
143 if (skipFunction(F: MFn.getFunction()))
144 return false;
145
146 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
147 const HexagonInstrInfo *TII = ST.getInstrInfo();
148 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
149 MachineRegisterInfo *MRI = &MFn.getRegInfo();
150 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
151
152 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
153 }
154};
155
156struct HexagonLoadWidening : public MachineFunctionPass {
157 static char ID;
158
159 HexagonLoadWidening() : MachineFunctionPass(ID) {}
160
161 StringRef getPassName() const override { return "Hexagon Load Widening"; }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<AAResultsWrapperPass>();
165 AU.addPreserved<AAResultsWrapperPass>();
166 MachineFunctionPass::getAnalysisUsage(AU);
167 }
168
169 bool runOnMachineFunction(MachineFunction &MFn) override {
170 if (skipFunction(F: MFn.getFunction()))
171 return false;
172
173 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
174 const HexagonInstrInfo *TII = ST.getInstrInfo();
175 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
176 MachineRegisterInfo *MRI = &MFn.getRegInfo();
177 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
178 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
179 }
180};
181
182char HexagonStoreWidening::ID = 0;
183char HexagonLoadWidening::ID = 0;
184
185} // end anonymous namespace
186
187INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
188 "Hexagon Store Widening", false, false)
189INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
190INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
191 "Hexagon Store Widening", false, false)
192
193INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
194 "Hexagon Load Widening", false, false)
195INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
196INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
197 "Hexagon Load Widening", false, false)
198
199static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
200 assert(!MI->memoperands_empty() && "Expecting memory operands");
201 return **MI->memoperands_begin();
202}
203
204unsigned
205HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
206 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
207 unsigned Base, Offset;
208 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset);
209 const MachineOperand &MO = MI->getOperand(i: Base);
210 assert(MO.isReg() && "Expecting register operand");
211 return MO.getReg();
212}
213
214int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
215 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
216
217 // On Hexagon, post-incs always have an offset of 0
218 // There is no Offset operand to post-incs
219 if (HII->isPostIncrement(MI: *MI))
220 return 0;
221
222 unsigned Base, Offset;
223
224 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: Offset);
225 const MachineOperand &MO = MI->getOperand(i: Offset);
226 switch (MO.getType()) {
227 case MachineOperand::MO_Immediate:
228 return MO.getImm();
229 case MachineOperand::MO_GlobalAddress:
230 return MO.getOffset();
231 default:
232 break;
233 }
234 llvm_unreachable("Expecting an immediate or global operand");
235}
236
237inline int64_t
238HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
239 unsigned Base, PostIncIdx;
240 HII->getBaseAndOffsetPosition(MI: *MI, BasePos&: Base, OffsetPos&: PostIncIdx);
241 const MachineOperand &MO = MI->getOperand(i: PostIncIdx);
242 return MO.getImm();
243}
244
245// Filtering function: any loads/stores whose opcodes are not "approved" of by
246// this function will not be subjected to widening.
247inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
248 unsigned Opc = MI->getOpcode();
249 if (Mode == WideningMode::Store) {
250 switch (Opc) {
251 case Hexagon::S4_storeirb_io:
252 case Hexagon::S4_storeirh_io:
253 case Hexagon::S4_storeiri_io:
254 case Hexagon::S2_storeri_io:
255 // Base address must be a register. (Implement FI later.)
256 return MI->getOperand(i: 0).isReg();
257 case Hexagon::S2_storeri_pi:
258 return MI->getOperand(i: 1).isReg();
259 }
260 } else {
261 // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
262 // loads. So we only widen 32 bit loads as we don't need to select the
263 // right bits with AND & SHIFT ops.
264 switch (Opc) {
265 case Hexagon::L2_loadri_io:
266 // Base address must be a register and offset must be immediate.
267 return !MI->memoperands_empty() && MI->getOperand(i: 1).isReg() &&
268 MI->getOperand(i: 2).isImm();
269 case Hexagon::L2_loadri_pi:
270 return !MI->memoperands_empty() && MI->getOperand(i: 2).isReg();
271 }
272 }
273 return false;
274}
275
276static void addDefsUsesToList(const MachineInstr *MI,
277 DenseSet<Register> &RegDefs,
278 DenseSet<Register> &RegUses) {
279 for (const auto &Op : MI->operands()) {
280 if (!Op.isReg())
281 continue;
282 if (Op.isDef())
283 RegDefs.insert(V: Op.getReg());
284 if (Op.readsReg())
285 RegUses.insert(V: Op.getReg());
286 }
287}
288
289bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
290 const MachineInstr *B) {
291 DenseSet<Register> ARegDefs;
292 DenseSet<Register> ARegUses;
293 addDefsUsesToList(MI: A, RegDefs&: ARegDefs, RegUses&: ARegUses);
294 if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
295 (A->mayStore() || B->mayStore()) && A->mayAlias(AA, Other: *B, UseTBAA: true))
296 return false;
297 for (const auto &BOp : B->operands()) {
298 if (!BOp.isReg())
299 continue;
300 if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(V: BOp.getReg()))
301 return false;
302 if (BOp.isDef() && ARegUses.contains(V: BOp.getReg()))
303 return false;
304 }
305 return true;
306}
307
308// Inspect a machine basic block, and generate groups out of loads/stores
309// encountered in the block.
310//
311// A load/store group is a group of loads or stores that use the same base
312// register, and which can be reordered within that group without altering the
313// semantics of the program. A single group could be widened as
314// a whole, if there existed a single load/store instruction with the same
315// semantics as the entire group. In many cases, a single group may need more
316// than one wide load or store.
317void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
318 InstrGroupList &StoreGroups) {
319 // Traverse all instructions and if we encounter
320 // a load/store, then try to create a group starting at that instruction
321 // i.e. a sequence of independent loads/stores that can be widened.
322 for (auto I = MBB.begin(); I != MBB.end(); ++I) {
323 MachineInstr *MI = &(*I);
324 if (!handledInstType(MI))
325 continue;
326 if (ProcessedInsts.count(Ptr: MI))
327 continue;
328
329 // Found a store. Try to create a store group.
330 InstrGroup G;
331 createGroup(BaseInst: MI, Group&: G);
332 if (G.size() > 1)
333 StoreGroups.push_back(Elt: G);
334 }
335}
336
337// Create a single load/store group. The insts need to be independent between
338// themselves, and also there cannot be other instructions between them
339// that could read or modify storage being read from or stored into.
340void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
341 InstrGroup &Group) {
342 assert(handledInstType(BaseInst) && "Unexpected instruction");
343 unsigned BaseReg = getBaseAddressRegister(MI: BaseInst);
344 InstrGroup Other;
345
346 Group.push_back(Elt: BaseInst);
347 LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
348 auto End = BaseInst->getParent()->end();
349 auto I = BaseInst->getIterator();
350
351 while (true) {
352 I = std::next(x: I);
353 if (I == End)
354 break;
355 MachineInstr *MI = &(*I);
356
357 // Assume calls are aliased to everything.
358 if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
359 MI->hasOrderedMemoryRef())
360 return;
361
362 if (!handledInstType(MI)) {
363 if (MI->mayLoadOrStore())
364 Other.push_back(Elt: MI);
365 continue;
366 }
367
368 // We have a handledInstType instruction
369 // If this load/store instruction is aliased with anything already in the
370 // group, terminate the group now.
371 for (auto GI : Group)
372 if (GI->mayAlias(AA, Other: *MI, UseTBAA: true))
373 return;
374 if (Mode == WideningMode::Load) {
375 // Check if current load MI can be moved to the first load instruction
376 // in Group. If any load instruction aliases with memory instructions in
377 // Other, terminate the group.
378 for (auto MemI : Other)
379 if (!canSwapInstructions(A: MI, B: MemI))
380 return;
381 } else {
382 // Check if store instructions in the group can be moved to current
383 // store MI. If any store instruction aliases with memory instructions
384 // in Other, terminate the group.
385 for (auto MemI : Other) {
386 if (std::distance(first: Group.back()->getIterator(), last: MemI->getIterator()) <=
387 0)
388 continue;
389 for (auto GI : Group)
390 if (!canSwapInstructions(A: MemI, B: GI))
391 return;
392 }
393 }
394
395 unsigned BR = getBaseAddressRegister(MI);
396 if (BR == BaseReg) {
397 LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
398 Group.push_back(Elt: MI);
399 ProcessedInsts.insert(Ptr: MI);
400 }
401 } // while
402}
403
404// Check if load/store instructions S1 and S2 are adjacent. More precisely,
405// S2 has to access memory immediately following that accessed by S1.
406bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
407 const MachineInstr *S2) {
408 if (!handledInstType(MI: S1) || !handledInstType(MI: S2))
409 return false;
410
411 const MachineMemOperand &S1MO = getMemTarget(MI: S1);
412
413 // Currently only handling immediate stores.
414 int Off1 = getOffset(MI: S1);
415 int Off2 = getOffset(MI: S2);
416
417 return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
418 : int(Off1 + S1MO.getSize().getValue()) == Off2;
419}
420
421/// Given a sequence of adjacent loads/stores, and a maximum size of a single
422/// wide inst, pick a group of insts that can be replaced by a single load/store
423/// of size not exceeding MaxSize. The selected sequence will be recorded
424/// in OG ("old group" of instructions).
425/// OG should be empty on entry, and should be left empty if the function
426/// fails.
427bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
428 InstrGroup::iterator End,
429 InstrGroup &OG, unsigned &TotalSize,
430 unsigned MaxSize) {
431 assert(Begin != End && "No instructions to analyze");
432 assert(OG.empty() && "Old group not empty on entry");
433
434 if (std::distance(first: Begin, last: End) <= 1)
435 return false;
436
437 MachineInstr *FirstMI = *Begin;
438 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
439 const MachineMemOperand &FirstMMO = getMemTarget(MI: FirstMI);
440 if (!FirstMMO.getType().isValid())
441 return false;
442
443 unsigned Alignment = FirstMMO.getAlign().value();
444 unsigned SizeAccum = FirstMMO.getSize().getValue();
445 unsigned FirstOffset = getOffset(MI: FirstMI);
446
447 // The initial value of SizeAccum should always be a power of 2.
448 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
449
450 // If the size of the first store equals to or exceeds the limit, do nothing.
451 if (SizeAccum >= MaxSize)
452 return false;
453
454 // If the size of the first load/store is greater than or equal to the address
455 // stored to, then the inst cannot be made any wider.
456 if (SizeAccum >= Alignment) {
457 LLVM_DEBUG(
458 dbgs() << "Size of load/store greater than equal to its alignment\n");
459 return false;
460 }
461
462 // The offset of a load/store will put restrictions on how wide the inst can
463 // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest
464 // bits be 0. If the first inst already exhausts the offset limits, quit.
465 // Test this by checking if the next wider size would exceed the limit.
466 // For post-increment instructions, the increment amount needs to follow the
467 // same rule.
468 unsigned OffsetOrIncVal = 0;
469 if (HII->isPostIncrement(MI: *FirstMI))
470 OffsetOrIncVal = getPostIncrementValue(MI: FirstMI);
471 else
472 OffsetOrIncVal = FirstOffset;
473 if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
474 LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
475 << " value: " << getPostIncrementValue(FirstMI)
476 << " is invalid in the widened version\n");
477 return false;
478 }
479
480 OG.push_back(Elt: FirstMI);
481 MachineInstr *S1 = FirstMI;
482
483 // Pow2Num will be the largest number of elements in OG such that the sum
484 // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
485 unsigned Pow2Num = 1;
486 unsigned Pow2Size = SizeAccum;
487 bool HavePostInc = HII->isPostIncrement(MI: *S1);
488
489 // Be greedy: keep accumulating insts as long as they are to adjacent
490 // memory locations, and as long as the total number of bytes stored
491 // does not exceed the limit (MaxSize).
492 // Keep track of when the total size covered is a power of 2, since
493 // this is a size a single load/store can cover.
494 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
495 MachineInstr *S2 = *I;
496 // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
497 // any other store to fill the "hole".
498 if (!areAdjacent(S1, S2))
499 break;
500
501 // Cannot widen two post increments, need to return two registers
502 // with incremented values
503 if (HavePostInc && HII->isPostIncrement(MI: *S2))
504 break;
505
506 unsigned S2Size = getMemTarget(MI: S2).getSize().getValue();
507 if (SizeAccum + S2Size > std::min(a: MaxSize, b: Alignment))
508 break;
509
510 OG.push_back(Elt: S2);
511 SizeAccum += S2Size;
512 if (isPowerOf2_32(Value: SizeAccum)) {
513 Pow2Num = OG.size();
514 Pow2Size = SizeAccum;
515 }
516 if ((2 * Pow2Size - 1) & FirstOffset)
517 break;
518
519 S1 = S2;
520 }
521
522 // The insts don't add up to anything that can be widened. Clean up.
523 if (Pow2Num <= 1) {
524 OG.clear();
525 return false;
526 }
527
528 // Only leave the loads/stores being widened.
529 OG.resize(N: Pow2Num);
530 TotalSize = Pow2Size;
531 return true;
532}
533
534/// Given an "old group" OG of insts, create a "new group" NG of instructions
535/// to replace them.
536bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
537 unsigned TotalSize) {
538 if (Mode == WideningMode::Store) {
539 return createWideStores(OG, NG, TotalSize);
540 }
541 return createWideLoads(OG, NG, TotalSize);
542}
543
544// check for the pattern:
545// %def = S2_addasl_rrri %base, %offset, #imm
546// %other = L2_loadrd_io %def, 0
547// to
548// %other = L4_loadrd_rr %base, %offset, #imm
549// If addasl does not define the base of the load,
550// do a normal widening.
551// Same behavior followed for store.
552MachineInstr *HexagonLoadStoreWidening::widenLoadStoreAddAsl(
553 Register NewPairReg, const MachineInstr *FirstMem, const DebugLoc &DL) {
554
555 unsigned Base, Offset;
556 HII->getBaseAndOffsetPosition(MI: *FirstMem, BasePos&: Base, OffsetPos&: Offset);
557 auto BaseOp = FirstMem->getOperand(i: Base);
558 auto OffMO = FirstMem->getOperand(i: Offset);
559 Register BaseReg = BaseOp.getReg();
560
561 MachineInstr *AddaslDef = nullptr;
562 if (Register::isVirtualRegister(Reg: BaseReg))
563 AddaslDef = MRI->getVRegDef(Reg: BaseReg);
564
565 // Skip through trivial copies to the defining addasl.
566 while (AddaslDef && AddaslDef->getOpcode() == Hexagon::COPY) {
567 const MachineOperand &Src = AddaslDef->getOperand(i: 1);
568 if (!Src.isReg() || !Register::isVirtualRegister(Reg: Src.getReg()))
569 break;
570 AddaslDef = MRI->getUniqueVRegDef(Reg: Src.getReg());
571 }
572
573 if (!AddaslDef || AddaslDef->getOpcode() != Hexagon::S2_addasl_rrri ||
574 (OffMO.isImm() && OffMO.getImm() != 0))
575 return nullptr;
576
577 int64_t ImmVal = AddaslDef->getOperand(i: 3).getImm();
578 if (ImmVal > 3 || ImmVal < 0)
579 return nullptr;
580
581 MachineInstr *WidenedInstr = nullptr;
582 auto Reg1 = AddaslDef->getOperand(i: 1).getReg();
583 auto Reg2 = AddaslDef->getOperand(i: 2).getReg();
584 unsigned Reg2SubReg = AddaslDef->getOperand(i: 2).getSubReg();
585
586 // S4_storerd_rr and L4_loadrd_rr require IntRegs for Reg2.
587 const TargetRegisterClass *Reg2RC = MRI->getRegClass(Reg: Reg2);
588 // Cannot widen if:
589 // 1. DoubleReg without subreg, or
590 // 2. Neither DoubleReg nor IntReg
591 if ((Hexagon::DoubleRegsRegClass.hasSubClassEq(RC: Reg2RC) && Reg2SubReg == 0) ||
592 (!Hexagon::DoubleRegsRegClass.hasSubClassEq(RC: Reg2RC) &&
593 !Hexagon::IntRegsRegClass.hasSubClassEq(RC: Reg2RC)))
594 return nullptr;
595
596 auto Reg3 = AddaslDef->getOperand(i: 3).getImm();
597 MachineInstrBuilder MIB;
598
599 if (Mode == WideningMode::Load) {
600 auto LdOp = FirstMem->getOperand(i: 0);
601 MIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L4_loadrd_rr))
602 .addDef(RegNo: NewPairReg, Flags: getKillRegState(B: LdOp.isKill()),
603 SubReg: LdOp.getSubReg())
604 .addReg(RegNo: Reg1)
605 .addReg(RegNo: Reg2, Flags: {}, SubReg: Reg2SubReg)
606 .addImm(Val: Reg3);
607 } else {
608 MIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::S4_storerd_rr))
609 .addReg(RegNo: Reg1)
610 .addReg(RegNo: Reg2, Flags: {}, SubReg: Reg2SubReg)
611 .addImm(Val: Reg3)
612 .addReg(RegNo: NewPairReg);
613 }
614
615 // Reset kill flags of the addasl instruction
616 AddaslDef->getOperand(i: 1).setIsKill(false);
617 AddaslDef->getOperand(i: 2).setIsKill(false);
618
619 WidenedInstr = *&MIB;
620 return WidenedInstr;
621}
622
623/// Given an "old group" OG of stores, create a "new group" NG of instructions
624/// to replace them. Ideally, NG would only have a single instruction in it,
625/// but that may only be possible for store-immediate.
626bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
627 unsigned TotalSize) {
628 // XXX Current limitations:
629 // - only handle a TotalSize of up to 8
630
631 LLVM_DEBUG(dbgs() << "Creating wide stores\n");
632 if (TotalSize > MaxWideSize)
633 return false;
634
635 uint64_t Acc = 0; // Value accumulator.
636 unsigned Shift = 0;
637 bool HaveImm = false;
638 bool HaveReg = false;
639
640 for (MachineInstr *MI : OG) {
641 const MachineMemOperand &MMO = getMemTarget(MI);
642 MachineOperand &SO = HII->isPostIncrement(MI: *MI)
643 ? MI->getOperand(i: 3)
644 : MI->getOperand(i: 2); // Source.
645 unsigned NBits;
646 uint64_t Mask;
647 uint64_t Val;
648
649 switch (SO.getType()) {
650 case MachineOperand::MO_Immediate:
651 LLVM_DEBUG(dbgs() << "Have store immediate\n");
652 HaveImm = true;
653
654 NBits = MMO.getSizeInBits().toRaw();
655 Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
656 Val = (SO.getImm() & Mask) << Shift;
657 Acc |= Val;
658 Shift += NBits;
659 break;
660 case MachineOperand::MO_Register:
661 HaveReg = true;
662 break;
663 default:
664 LLVM_DEBUG(dbgs() << "Unhandled store\n");
665 return false;
666 }
667 }
668
669 if (HaveImm && HaveReg) {
670 LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
671 return false;
672 }
673
674 MachineInstr *FirstSt = OG.front();
675 DebugLoc DL = OG.back()->getDebugLoc();
676 const MachineMemOperand &OldM = getMemTarget(MI: FirstSt);
677 MachineMemOperand *NewM =
678 MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(),
679 Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo());
680 MachineInstr *StI;
681 MachineOperand &MR =
682 (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 1)
683 : FirstSt->getOperand(i: 0));
684 auto SecondSt = OG.back();
685 if (HaveReg) {
686 MachineOperand FReg =
687 (HII->isPostIncrement(MI: *FirstSt) ? FirstSt->getOperand(i: 3)
688 : FirstSt->getOperand(i: 2));
689 // Post increments appear first in the sorted group.
690 // Cannot have a post increment for the second instruction
691 assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
692 MachineOperand SReg = SecondSt->getOperand(i: 2);
693 assert(FReg.isReg() && SReg.isReg() &&
694 "Cannot merge store register and store imm");
695 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A2_combinew);
696 Register VReg =
697 MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
698 MachineInstr *CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: VReg).add(MO: SReg).add(MO: FReg);
699 NG.push_back(Elt: CombI);
700
701 if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
702 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_pi);
703 auto IncDestMO = FirstSt->getOperand(i: 0);
704 auto IncMO = FirstSt->getOperand(i: 2);
705 StI =
706 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: IncDestMO).add(MO: MR).add(MO: IncMO).addReg(RegNo: VReg);
707 } else {
708 StI = widenLoadStoreAddAsl(NewPairReg: VReg, FirstMem: FirstSt, DL);
709 if (!StI) {
710 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io);
711 auto OffMO = FirstSt->getOperand(i: 1);
712 StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).add(MO: OffMO).addReg(RegNo: VReg);
713 }
714 }
715 StI->addMemOperand(MF&: *MF, MO: NewM);
716 NG.push_back(Elt: StI);
717 return true;
718 }
719
720 // Handle store immediates
721 // There are no post increment store immediates on Hexagon
722 assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
723 auto Off = FirstSt->getOperand(i: 1).getImm();
724 if (TotalSize == 8) {
725 // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
726 uint64_t Mask = 0xFFFFFFFFU;
727 int LowerAcc = int(Mask & Acc);
728 int UpperAcc = Acc >> 32;
729 Register DReg =
730 MF->getRegInfo().createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
731 MachineInstr *CombI;
732 if (Acc != 0) {
733 const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi);
734 const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0);
735 Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC);
736 MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: LowerAcc);
737 NG.push_back(Elt: TfrI);
738 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineir);
739 CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg)
740 .addImm(Val: UpperAcc)
741 .addReg(RegNo: VReg, Flags: RegState::Kill);
742 }
743 // If immediates are 0, we do not need A2_tfrsi
744 else {
745 const MCInstrDesc &CombD = TII->get(Opcode: Hexagon::A4_combineii);
746 CombI = BuildMI(MF&: *MF, MIMD: DL, MCID: CombD, DestReg: DReg).addImm(Val: 0).addImm(Val: 0);
747 }
748 NG.push_back(Elt: CombI);
749 const MCInstrDesc &StD = TII->get(Opcode: Hexagon::S2_storerd_io);
750 StI =
751 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: DReg, Flags: RegState::Kill);
752 } else if (Acc < 0x10000) {
753 // Create mem[hw] = #Acc
754 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io
755 : (TotalSize == 4) ? Hexagon::S4_storeiri_io
756 : 0;
757 assert(WOpc && "Unexpected size");
758
759 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
760 const MCInstrDesc &StD = TII->get(Opcode: WOpc);
761 StI = BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addImm(Val);
762 } else {
763 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
764 const MCInstrDesc &TfrD = TII->get(Opcode: Hexagon::A2_tfrsi);
765 const TargetRegisterClass *RC = TII->getRegClass(MCID: TfrD, OpNum: 0);
766 Register VReg = MF->getRegInfo().createVirtualRegister(RegClass: RC);
767 MachineInstr *TfrI = BuildMI(MF&: *MF, MIMD: DL, MCID: TfrD, DestReg: VReg).addImm(Val: int(Acc));
768 NG.push_back(Elt: TfrI);
769
770 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io
771 : (TotalSize == 4) ? Hexagon::S2_storeri_io
772 : 0;
773 assert(WOpc && "Unexpected size");
774
775 const MCInstrDesc &StD = TII->get(Opcode: WOpc);
776 StI =
777 BuildMI(MF&: *MF, MIMD: DL, MCID: StD).add(MO: MR).addImm(Val: Off).addReg(RegNo: VReg, Flags: RegState::Kill);
778 }
779 StI->addMemOperand(MF&: *MF, MO: NewM);
780 NG.push_back(Elt: StI);
781
782 return true;
783}
784
785/// Given an "old group" OG of loads, create a "new group" NG of instructions
786/// to replace them. Ideally, NG would only have a single instruction in it,
787/// but that may only be possible for double register loads.
788bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
789 unsigned TotalSize) {
790 LLVM_DEBUG(dbgs() << "Creating wide loads\n");
791 // XXX Current limitations:
792 // - only expect stores of immediate values in OG,
793 // - only handle a TotalSize of up to 8
794 if (TotalSize > MaxWideSize)
795 return false;
796 assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
797
798 MachineInstr *FirstLd = OG.front();
799 const MachineMemOperand &OldM = getMemTarget(MI: FirstLd);
800 MachineMemOperand *NewM =
801 MF->getMachineMemOperand(PtrInfo: OldM.getPointerInfo(), F: OldM.getFlags(),
802 Size: TotalSize, BaseAlignment: OldM.getAlign(), AAInfo: OldM.getAAInfo());
803
804 MachineOperand &MR = FirstLd->getOperand(i: 0);
805 MachineOperand &MRBase =
806 (HII->isPostIncrement(MI: *FirstLd) ? FirstLd->getOperand(i: 2)
807 : FirstLd->getOperand(i: 1));
808 DebugLoc DL = OG.back()->getDebugLoc();
809
810 // Create the double register Load Instruction.
811 Register NewMR = MRI->createVirtualRegister(RegClass: &Hexagon::DoubleRegsRegClass);
812 MachineInstr *LdI;
813
814 // Post increments appear first in the sorted group
815 if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
816 auto IncDestMO = FirstLd->getOperand(i: 1);
817 auto IncMO = FirstLd->getOperand(i: 3);
818 LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_pi))
819 .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg())
820 .add(MO: IncDestMO)
821 .add(MO: MRBase)
822 .add(MO: IncMO);
823 LdI->addMemOperand(MF&: *MF, MO: NewM);
824 } else {
825 // Try rr widening with addasl-defined base; otherwise fall back to io.
826 MachineInstr *RR = widenLoadStoreAddAsl(NewPairReg: NewMR, FirstMem: FirstLd, DL);
827 if (RR) {
828 RR->addMemOperand(MF&: *MF, MO: NewM);
829 LdI = RR;
830 } else {
831 auto OffMO = FirstLd->getOperand(i: 2);
832 LdI = BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::L2_loadrd_io))
833 .addDef(RegNo: NewMR, Flags: getKillRegState(B: MR.isKill()), SubReg: MR.getSubReg())
834 .add(MO: MRBase)
835 .add(MO: OffMO);
836 LdI->addMemOperand(MF&: *MF, MO: NewM);
837 }
838 }
839 NG.push_back(Elt: LdI);
840
841 auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
842 MachineInstr *DstReg) {
843 Register DestReg = DstReg->getOperand(i: 0).getReg();
844 return BuildMI(MF&: *MF, MIMD: DL, MCID: TII->get(Opcode: Hexagon::COPY), DestReg)
845 .addReg(RegNo: NewMR, Flags: getKillRegState(B: LdI->isKill()), SubReg);
846 };
847
848 MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
849 MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
850 NG.push_back(Elt: LdI_lo);
851 NG.push_back(Elt: LdI_hi);
852
853 return true;
854}
855
856// Replace instructions from the old group OG with instructions from the
857// new group NG. Conceptually, remove all instructions in OG, and then
858// insert all instructions in NG, starting at where the first instruction
859// from OG was (in the order in which they appeared in the basic block).
860// (The ordering in OG does not have to match the order in the basic block.)
861bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
862 LLVM_DEBUG({
863 dbgs() << "Replacing:\n";
864 for (auto I : OG)
865 dbgs() << " " << *I;
866 dbgs() << "with\n";
867 for (auto I : NG)
868 dbgs() << " " << *I;
869 });
870
871 MachineBasicBlock *MBB = OG.back()->getParent();
872 MachineBasicBlock::iterator InsertAt = MBB->end();
873
874 // Need to establish the insertion point.
875 // For loads the best one is right before the first load in the OG,
876 // but in the order in which the insts occur in the program list.
877 // For stores the best point is right after the last store in the OG.
878 // Since the ordering in OG does not correspond
879 // to the order in the program list, we need to do some work to find
880 // the insertion point.
881
882 // Create a set of all instructions in OG (for quick lookup).
883 InstrSet OldMemInsts(llvm::from_range, OG);
884
885 if (Mode == WideningMode::Load) {
886 // Find the first load instruction in the block that is present in OG.
887 for (auto &I : *MBB) {
888 if (OldMemInsts.count(Ptr: &I)) {
889 InsertAt = I;
890 break;
891 }
892 }
893
894 assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
895
896 for (auto *I : NG)
897 MBB->insert(I: InsertAt, MI: I);
898 } else {
899 // Find the last store instruction in the block that is present in OG.
900 auto I = MBB->rbegin();
901 for (; I != MBB->rend(); ++I) {
902 if (OldMemInsts.count(Ptr: &(*I))) {
903 InsertAt = (*I).getIterator();
904 break;
905 }
906 }
907
908 assert((I != MBB->rend()) && "Cannot locate any store from the group");
909
910 for (auto I = NG.rbegin(); I != NG.rend(); ++I)
911 MBB->insertAfter(I: InsertAt, MI: *I);
912 }
913
914 for (auto *I : OG)
915 I->eraseFromParent();
916
917 return true;
918}
919
920// Break up the group into smaller groups, each of which can be replaced by
921// a single wide load/store. Widen each such smaller group and replace the old
922// instructions with the widened ones.
923bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
924 bool Changed = false;
925 InstrGroup::iterator I = Group.begin(), E = Group.end();
926 InstrGroup OG, NG; // Old and new groups.
927 unsigned CollectedSize;
928
929 while (I != E) {
930 OG.clear();
931 NG.clear();
932
933 bool Succ = selectInsts(Begin: I++, End: E, OG, TotalSize&: CollectedSize, MaxSize: MaxWideSize) &&
934 createWideInsts(OG, NG, TotalSize: CollectedSize) && replaceInsts(OG, NG);
935 if (!Succ)
936 continue;
937
938 assert(OG.size() > 1 && "Created invalid group");
939 assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
940 I += OG.size() - 1;
941
942 Changed = true;
943 }
944
945 return Changed;
946}
947
948// Process a single basic block: create the load/store groups, and replace them
949// with the widened insts, if possible. Processing of each basic block
950// is independent from processing of any other basic block. This transfor-
951// mation could be stopped after having processed any basic block without
952// any ill effects (other than not having performed widening in the unpro-
953// cessed blocks). Also, the basic blocks can be processed in any order.
954bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
955 InstrGroupList SGs;
956 bool Changed = false;
957
958 // To prevent long compile time check for max BB size.
959 if (MBB.size() > MaxMBBSizeForLoadStoreWidening)
960 return false;
961
962 createGroups(MBB, StoreGroups&: SGs);
963
964 auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
965 return getOffset(MI: A) < getOffset(MI: B);
966 };
967 for (auto &G : SGs) {
968 assert(G.size() > 1 && "Group with fewer than 2 elements");
969 llvm::sort(C&: G, Comp: Less);
970
971 Changed |= processGroup(Group&: G);
972 }
973
974 return Changed;
975}
976
977bool HexagonLoadStoreWidening::run() {
978 bool Changed = false;
979
980 for (auto &B : *MF)
981 Changed |= processBasicBlock(MBB&: B);
982
983 return Changed;
984}
985
986FunctionPass *llvm::createHexagonStoreWidening() {
987 return new HexagonStoreWidening();
988}
989
990FunctionPass *llvm::createHexagonLoadWidening() {
991 return new HexagonLoadWidening();
992}
993