LoadStoreOpt.cpp source code [llvm_projects/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp]

1	//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the LoadStoreOpt optimization pass.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
13	#include "llvm/ADT/STLExtras.h"
14	#include "llvm/ADT/SmallPtrSet.h"
15	#include "llvm/ADT/Statistic.h"
16	#include "llvm/Analysis/AliasAnalysis.h"
17	#include "llvm/Analysis/MemoryLocation.h"
18	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
19	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20	#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22	#include "llvm/CodeGen/GlobalISel/Utils.h"
23	#include "llvm/CodeGen/LowLevelTypeUtils.h"
24	#include "llvm/CodeGen/MachineBasicBlock.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/MachineFunction.h"
27	#include "llvm/CodeGen/MachineInstr.h"
28	#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/Register.h"
31	#include "llvm/CodeGen/TargetLowering.h"
32	#include "llvm/CodeGen/TargetOpcodes.h"
33	#include "llvm/IR/DebugInfoMetadata.h"
34	#include "llvm/InitializePasses.h"
35	#include "llvm/Support/AtomicOrdering.h"
36	#include "llvm/Support/Casting.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Support/ErrorHandling.h"
39	#include "llvm/Support/MathExtras.h"
40	#include <algorithm>
41
42	#define DEBUG_TYPE "loadstore-opt"
43
44	using namespace llvm;
45	using namespace ore;
46	using namespace MIPatternMatch;
47
48	STATISTIC(NumStoresMerged, "Number of stores merged");
49
50	const unsigned MaxStoreSizeToForm = `128`;
51
52	char LoadStoreOpt::ID = `0`;
53	INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
54	false, false)
55	INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
56	false, false)
57
58	LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
59	: MachineFunctionPass (ID), DoNotRunPass (F) {}
60
61	LoadStoreOpt::LoadStoreOpt()
62	: LoadStoreOpt ([](const MachineFunction &) { return false; }) {}
63
64	void LoadStoreOpt::init(MachineFunction &MF) {
65	this->MF = &MF;
66	MRI = &MF.getRegInfo();
67	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
68	TLI = MF.getSubtarget().getTargetLowering();
69	LI = MF.getSubtarget().getLegalizerInfo();
70	Builder.setMF(MF);
71	IsPreLegalizer = !MF.getProperties().hasProperty(
72	P: MachineFunctionProperties::Property::Legalized);
73	InstsToErase.clear();
74	}
75
76	void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
77	AU.addRequired<AAResultsWrapperPass>();
78	AU.setPreservesAll();
79	getSelectionDAGFallbackAnalysisUsage(AU);
80	MachineFunctionPass::getAnalysisUsage(AU);
81	}
82
83	BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
84	MachineRegisterInfo &MRI) {
85	BaseIndexOffset Info;
86	Register PtrAddRHS;
87	Register BaseReg;
88	if (!mi_match(R: Ptr, MRI, P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_Reg(R&: PtrAddRHS)))) {
89	Info.setBase(Ptr);
90	Info.setOffset(`0`);
91	return Info;
92	}
93	Info.setBase(BaseReg);
94	auto RHSCst = getIConstantVRegValWithLookThrough(VReg: PtrAddRHS, MRI);
95	if (RHSCst)
96	Info.setOffset(RHSCst ->Value.getSExtValue());
97
98	// Just recognize a simple case for now. In future we'll need to match
99	// indexing patterns for base + index + constant.
100	Info.setIndex(PtrAddRHS);
101	return Info;
102	}
103
104	bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
105	const MachineInstr &MI2,
106	bool &IsAlias,
107	MachineRegisterInfo &MRI) {
108	auto *LdSt1 = dyn_cast<GLoadStore>(Val: &MI1);
109	auto *LdSt2 = dyn_cast<GLoadStore>(Val: &MI2);
110	if (!LdSt1 \|\| !LdSt2)
111	return false;
112
113	BaseIndexOffset BasePtr0 = getPointerInfo(Ptr: LdSt1->getPointerReg(), MRI);
114	BaseIndexOffset BasePtr1 = getPointerInfo(Ptr: LdSt2->getPointerReg(), MRI);
115
116	if (!BasePtr0.getBase().isValid() \|\| !BasePtr1.getBase().isValid())
117	return false;
118
119	LocationSize Size1 = LdSt1->getMemSize();
120	LocationSize Size2 = LdSt2->getMemSize();
121
122	int64_t PtrDiff;
123	if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
124	BasePtr1.hasValidOffset()) {
125	PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
126	// If the size of memory access is unknown, do not use it to do analysis.
127	// One example of unknown size memory access is to load/store scalable
128	// vector objects on the stack.
129	// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
130	// following situations arise:
131	if (PtrDiff >= `0` && Size1.hasValue() && !Size1.isScalable()) {
132	// [----BasePtr0----]
133	// [---BasePtr1--]
134	// ========PtrDiff========>
135	IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
136	return true;
137	}
138	if (PtrDiff < `0` && Size2.hasValue() && !Size2.isScalable()) {
139	// [----BasePtr0----]
140	// [---BasePtr1--]
141	// =====(-PtrDiff)====>
142	IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= `0`);
143	return true;
144	}
145	return false;
146	}
147
148	// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
149	// able to calculate their relative offset if at least one arises
150	// from an alloca. However, these allocas cannot overlap and we
151	// can infer there is no alias.
152	auto *Base0Def = getDefIgnoringCopies(Reg: BasePtr0.getBase(), MRI);
153	auto *Base1Def = getDefIgnoringCopies(Reg: BasePtr1.getBase(), MRI);
154	if (!Base0Def \|\| !Base1Def)
155	return false; // Couldn't tell anything.
156
157
158	if (Base0Def->getOpcode() != Base1Def->getOpcode())
159	return false;
160
161	if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
162	MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
163	// If the bases have the same frame index but we couldn't find a
164	// constant offset, (indices are different) be conservative.
165	if (Base0Def != Base1Def &&
166	(!MFI.isFixedObjectIndex(ObjectIdx: Base0Def->getOperand(i: `1`).getIndex()) \|\|
167	!MFI.isFixedObjectIndex(ObjectIdx: Base1Def->getOperand(i: `1`).getIndex()))) {
168	IsAlias = false;
169	return true;
170	}
171	}
172
173	// This implementation is a lot more primitive than the SDAG one for now.
174	// FIXME: what about constant pools?
175	if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
176	auto GV0 = Base0Def->getOperand(i: `1`).getGlobal();
177	auto GV1 = Base1Def->getOperand(i: `1`).getGlobal();
178	if (GV0 != GV1) {
179	IsAlias = false;
180	return true;
181	}
182	}
183
184	// Can't tell anything about aliasing.
185	return false;
186	}
187
188	bool GISelAddressing::instMayAlias(const MachineInstr &MI,
189	const MachineInstr &Other,
190	MachineRegisterInfo &MRI,
191	AliasAnalysis *AA) {
192	struct MemUseCharacteristics {
193	bool IsVolatile;
194	bool IsAtomic;
195	Register BasePtr;
196	int64_t Offset;
197	LocationSize NumBytes;
198	MachineMemOperand *MMO;
199	};
200
201	auto getCharacteristics =
202	[&](const MachineInstr *MI) -> MemUseCharacteristics {
203	if (const auto *LS = dyn_cast<GLoadStore>(Val: MI)) {
204	Register BaseReg;
205	int64_t Offset = `0`;
206	// No pre/post-inc addressing modes are considered here, unlike in SDAG.
207	if (!mi_match(R: LS->getPointerReg(), MRI,
208	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: Offset)))) {
209	BaseReg = LS->getPointerReg();
210	Offset = `0`;
211	}
212
213	LocationSize Size = LS->getMMO().getSize();
214	return {.IsVolatile: LS->isVolatile(), .IsAtomic: LS->isAtomic(), .BasePtr: BaseReg,
215	.Offset: Offset /base offset/, .NumBytes: Size, .MMO: &LS->getMMO()};
216	}
217	// FIXME: support recognizing lifetime instructions.
218	// Default.
219	return {.IsVolatile: false /isvolatile/,
220	/isAtomic/ .IsAtomic: false,
221	.BasePtr: Register (),
222	.Offset: (int64_t)`0` /offset/,
223	.NumBytes: LocationSize::beforeOrAfterPointer() /size/,
224	.MMO: (MachineMemOperand )nullptr*};
225	};
226	MemUseCharacteristics MUC0 = getCharacteristics (&MI),
227	MUC1 = getCharacteristics (&Other);
228
229	// If they are to the same address, then they must be aliases.
230	if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
231	MUC0.Offset == MUC1.Offset)
232	return true;
233
234	// If they are both volatile then they cannot be reordered.
235	if (MUC0.IsVolatile && MUC1.IsVolatile)
236	return true;
237
238	// Be conservative about atomics for the moment
239	// TODO: This is way overconservative for unordered atomics (see D66309)
240	if (MUC0.IsAtomic && MUC1.IsAtomic)
241	return true;
242
243	// If one operation reads from invariant memory, and the other may store, they
244	// cannot alias.
245	if (MUC0.MMO && MUC1.MMO) {
246	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
247	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
248	return false;
249	}
250
251	// If NumBytes is scalable and offset is not 0, conservatively return may
252	// alias
253	if ((MUC0.NumBytes.isScalable() && MUC0.Offset != `0`) \|\|
254	(MUC1.NumBytes.isScalable() && MUC1.Offset != `0`))
255	return true;
256
257	const bool BothNotScalable =
258	!MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
259
260	// Try to prove that there is aliasing, or that there is no aliasing. Either
261	// way, we can return now. If nothing can be proved, proceed with more tests.
262	bool IsAlias;
263	if (BothNotScalable &&
264	GISelAddressing::aliasIsKnownForLoadStore(MI1: MI, MI2: Other, IsAlias, MRI))
265	return IsAlias;
266
267	// The following all rely on MMO0 and MMO1 being valid.
268	if (!MUC0.MMO \|\| !MUC1.MMO)
269	return true;
270
271	// FIXME: port the alignment based alias analysis from SDAG's isAlias().
272	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
273	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
274	LocationSize Size0 = MUC0.NumBytes;
275	LocationSize Size1 = MUC1.NumBytes;
276	if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() &&
277	Size1.hasValue()) {
278	// Use alias analysis information.
279	int64_t MinOffset = std::min(a: SrcValOffset0, b: SrcValOffset1);
280	int64_t Overlap0 =
281	Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
282	int64_t Overlap1 =
283	Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
284	LocationSize Loc0 =
285	Size0.isScalable() ? Size0 : LocationSize::precise(Value: Overlap0);
286	LocationSize Loc1 =
287	Size1.isScalable() ? Size1 : LocationSize::precise(Value: Overlap1);
288
289	if (AA->isNoAlias(
290	LocA: MemoryLocation (MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
291	LocB: MemoryLocation (MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
292	return false;
293	}
294
295	// Otherwise we have to assume they alias.
296	return true;
297	}
298
299	/// Returns true if the instruction creates an unavoidable hazard that
300	/// forces a boundary between store merge candidates.
301	static bool isInstHardMergeHazard(MachineInstr &MI) {
302	return MI.hasUnmodeledSideEffects() \|\| MI.hasOrderedMemoryRef();
303	}
304
305	bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
306	// Try to merge all the stores in the vector, splitting into separate segments
307	// as necessary.
308	assert(StoresToMerge.size() > `1` && "Expected multiple stores to merge");
309	LLT OrigTy = MRI->getType(Reg: StoresToMerge [`0`]->getValueReg());
310	LLT PtrTy = MRI->getType(Reg: StoresToMerge [`0`]->getPointerReg());
311	unsigned AS = PtrTy.getAddressSpace();
312	// Ensure the legal store info is computed for this address space.
313	initializeStoreMergeTargetInfo(AddrSpace: AS);
314	const auto &LegalSizes = LegalStoreSizes [AS];
315
316	#ifndef NDEBUG
317	for (auto *StoreMI : StoresToMerge)
318	assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
319	#endif
320
321	const auto &DL = MF->getFunction().getDataLayout();
322	bool AnyMerged = false;
323	do {
324	unsigned NumPow2 = llvm::bit_floor(Value: StoresToMerge.size());
325	unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
326	// Compute the biggest store we can generate to handle the number of stores.
327	unsigned MergeSizeBits;
328	for (MergeSizeBits = MaxSizeBits; MergeSizeBits > `1`; MergeSizeBits /= `2`) {
329	LLT StoreTy = LLT::scalar(SizeInBits: MergeSizeBits);
330	EVT StoreEVT =
331	getApproximateEVTForLLT(Ty: StoreTy, DL, Ctx&: MF->getFunction().getContext());
332	if (LegalSizes.size() > MergeSizeBits && LegalSizes [MergeSizeBits] &&
333	TLI->canMergeStoresTo(AS, MemVT: StoreEVT, MF: *MF) &&
334	(TLI->isTypeLegal(VT: StoreEVT)))
335	break; // We can generate a MergeSize bits store.
336	}
337	if (MergeSizeBits <= OrigTy.getSizeInBits())
338	return AnyMerged; // No greater merge.
339
340	unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
341	// Perform the actual merging.
342	SmallVector<GStore *, `8`> SingleMergeStores(
343	StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
344	AnyMerged \|= doSingleStoreMerge(Stores&: SingleMergeStores);
345	StoresToMerge.erase(CS: StoresToMerge.begin(),
346	CE: StoresToMerge.begin() + NumStoresToMerge);
347	} while (StoresToMerge.size() > `1`);
348	return AnyMerged;
349	}
350
351	bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
352	MachineFunction &MF) const {
353	auto Action = LI->getAction(Query).Action;
354	// If the instruction is unsupported, it can't be legalized at all.
355	if (Action == LegalizeActions::Unsupported)
356	return false;
357	return IsPreLegalizer \|\| Action == LegalizeAction::Legal;
358	}
359
360	bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
361	assert(Stores.size() > `1`);
362	// We know that all the stores are consecutive and there are no aliasing
363	// operations in the range. However, the values that are being stored may be
364	// generated anywhere before each store. To ensure we have the values
365	// available, we materialize the wide value and new store at the place of the
366	// final store in the merge sequence.
367	GStore *FirstStore = Stores [`0`];
368	const unsigned NumStores = Stores.size();
369	LLT SmallTy = MRI->getType(Reg: FirstStore->getValueReg());
370	LLT WideValueTy =
371	LLT::scalar(SizeInBits: NumStores * SmallTy.getSizeInBits().getFixedValue());
372
373	// For each store, compute pairwise merged debug locs.
374	DebugLoc MergedLoc = Stores.front()->getDebugLoc();
375	for (auto *Store : drop_begin(RangeOrContainer&: Stores))
376	MergedLoc = DILocation::getMergedLocation(LocA: MergedLoc, LocB: Store->getDebugLoc());
377
378	Builder.setInstr(*Stores.back());
379	Builder.setDebugLoc(MergedLoc);
380
381	// If all of the store values are constants, then create a wide constant
382	// directly. Otherwise, we need to generate some instructions to merge the
383	// existing values together into a wider type.
384	SmallVector<APInt, `8`> ConstantVals;
385	for (auto *Store : Stores) {
386	auto MaybeCst =
387	getIConstantVRegValWithLookThrough(VReg: Store->getValueReg(), MRI: *MRI);
388	if (!MaybeCst) {
389	ConstantVals.clear();
390	break;
391	}
392	ConstantVals.emplace_back(Args&: MaybeCst ->Value);
393	}
394
395	Register WideReg;
396	auto *WideMMO =
397	MF->getMachineMemOperand(MMO: &FirstStore->getMMO(), Offset: `0`, Ty: WideValueTy);
398	if (ConstantVals.empty()) {
399	// Mimic the SDAG behaviour here and don't try to do anything for unknown
400	// values. In future, we should also support the cases of loads and
401	// extracted vector elements.
402	return false;
403	}
404
405	assert(ConstantVals.size() == NumStores);
406	// Check if our wide constant is legal.
407	if (!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_CONSTANT, {WideValueTy}}, MF&: *MF))
408	return false;
409	APInt WideConst(WideValueTy.getSizeInBits(), `0`);
410	for (unsigned Idx = `0`; Idx < ConstantVals.size(); ++Idx) {
411	// Insert the smaller constant into the corresponding position in the
412	// wider one.
413	WideConst.insertBits(SubBits: ConstantVals [Idx], bitPosition: Idx * SmallTy.getSizeInBits());
414	}
415	WideReg = Builder.buildConstant(Res: WideValueTy, Val: WideConst).getReg(Idx: `0`);
416	auto NewStore =
417	Builder.buildStore(Val: WideReg, Addr: FirstStore->getPointerReg(), MMO&: *WideMMO);
418	(void) NewStore;
419	LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
420	<< " stores into merged store: " << *NewStore);
421	LLVM_DEBUG(for (auto MI : Stores) dbgs() << " " << MI;);
422	NumStoresMerged += Stores.size();
423
424	MachineOptimizationRemarkEmitter MORE(MF, nullptr*);
425	MORE.emit(RemarkBuilder: [&]() {
426	MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
427	FirstStore->getDebugLoc(),
428	FirstStore->getParent());
429	R << "Merged " << NV ("NumMerged", Stores.size()) << " stores of "
430	<< NV ("OrigWidth", SmallTy.getSizeInBytes())
431	<< " bytes into a single store of "
432	<< NV ("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
433	return R;
434	});
435
436	for (auto *MI : Stores)
437	InstsToErase.insert(Ptr: MI);
438	return true;
439	}
440
441	bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
442	if (C.Stores.size() < `2`) {
443	C.reset();
444	return false;
445	}
446
447	LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
448	<< " stores, starting with " << *C.Stores[`0`]);
449	// We know that the stores in the candidate are adjacent.
450	// Now we need to check if any potential aliasing instructions recorded
451	// during the search alias with load/stores added to the candidate after.
452	// For example, if we have the candidate:
453	// C.Stores = [ST1, ST2, ST3, ST4]
454	// and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
455	// ST2, then we would have recorded it into the PotentialAliases structure
456	// with the associated index value of "1". Then we see ST3 and ST4 and add
457	// them to the candidate group. We know that LD1 does not alias with ST1 or
458	// ST2, since we already did that check. However we don't yet know if it
459	// may alias ST3 and ST4, so we perform those checks now.
460	SmallVector<GStore *> StoresToMerge;
461
462	auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
463	for (auto AliasInfo : reverse(C&: C.PotentialAliases)) {
464	MachineInstr *PotentialAliasOp = AliasInfo.first;
465	unsigned PreCheckedIdx = AliasInfo.second;
466	if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
467	// Once our store index is lower than the index associated with the
468	// potential alias, we know that we've already checked for this alias
469	// and all of the earlier potential aliases too.
470	return false;
471	}
472	// Need to check this alias.
473	if (GISelAddressing::instMayAlias(MI: CheckStore, Other: PotentialAliasOp, MRI&: MRI,
474	AA)) {
475	LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
476	<< " detected\n");
477	return true;
478	}
479	}
480	return false;
481	};
482	// Start from the last store in the group, and check if it aliases with any
483	// of the potential aliasing operations in the list.
484	for (int StoreIdx = C.Stores.size() - `1`; StoreIdx >= `0`; --StoreIdx) {
485	auto *CheckStore = C.Stores [StoreIdx];
486	if (DoesStoreAliasWithPotential (StoreIdx, *CheckStore))
487	continue;
488	StoresToMerge.emplace_back(Args&: CheckStore);
489	}
490
491	LLVM_DEBUG(dbgs() << StoresToMerge.size()
492	<< " stores remaining after alias checks. Merging...\n");
493
494	// Now we've checked for aliasing hazards, merge any stores left.
495	C.reset();
496	if (StoresToMerge.size() < `2`)
497	return false;
498	return mergeStores(StoresToMerge);
499	}
500
501	bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
502	StoreMergeCandidate &C) {
503	if (C.Stores.empty())
504	return false;
505	return llvm::any_of(Range&: C.Stores, P: [&](MachineInstr *OtherMI) {
506	return instMayAlias(MI, Other: OtherMI, MRI&: MRI, AA);
507	});
508	}
509
510	void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
511	PotentialAliases.emplace_back(Args: std::make_pair(x: &MI, y: Stores.size() - `1`));
512	}
513
514	bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
515	StoreMergeCandidate &C) {
516	// Check if the given store writes to an adjacent address, and other
517	// requirements.
518	LLT ValueTy = MRI->getType(Reg: StoreMI.getValueReg());
519	LLT PtrTy = MRI->getType(Reg: StoreMI.getPointerReg());
520
521	// Only handle scalars.
522	if (!ValueTy.isScalar())
523	return false;
524
525	// Don't allow truncating stores for now.
526	if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
527	return false;
528
529	// Avoid adding volatile or ordered stores to the candidate. We already have a
530	// check for this in instMayAlias() but that only get's called later between
531	// potential aliasing hazards.
532	if (!StoreMI.isSimple())
533	return false;
534
535	Register StoreAddr = StoreMI.getPointerReg();
536	auto BIO = getPointerInfo(Ptr: StoreAddr, MRI&: *MRI);
537	Register StoreBase = BIO.getBase();
538	if (C.Stores.empty()) {
539	C.BasePtr = StoreBase;
540	if (!BIO.hasValidOffset()) {
541	C.CurrentLowestOffset = `0`;
542	} else {
543	C.CurrentLowestOffset = BIO.getOffset();
544	}
545	// This is the first store of the candidate.
546	// If the offset can't possibly allow for a lower addressed store with the
547	// same base, don't bother adding it.
548	if (BIO.hasValidOffset() &&
549	BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
550	return false;
551	C.Stores.emplace_back(Args: &StoreMI);
552	LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
553	<< StoreMI);
554	return true;
555	}
556
557	// Check the store is the same size as the existing ones in the candidate.
558	if (MRI->getType(Reg: C.Stores [`0`]->getValueReg()).getSizeInBits() !=
559	ValueTy.getSizeInBits())
560	return false;
561
562	if (MRI->getType(Reg: C.Stores [`0`]->getPointerReg()).getAddressSpace() !=
563	PtrTy.getAddressSpace())
564	return false;
565
566	// There are other stores in the candidate. Check that the store address
567	// writes to the next lowest adjacent address.
568	if (C.BasePtr != StoreBase)
569	return false;
570	// If we don't have a valid offset, we can't guarantee to be an adjacent
571	// offset.
572	if (!BIO.hasValidOffset())
573	return false;
574	if ((C.CurrentLowestOffset -
575	static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
576	return false;
577
578	// This writes to an adjacent address. Allow it.
579	C.Stores.emplace_back(Args: &StoreMI);
580	C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
581	LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
582	return true;
583	}
584
585	bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
586	bool Changed = false;
587	// Walk through the block bottom-up, looking for merging candidates.
588	StoreMergeCandidate Candidate;
589	for (MachineInstr &MI : llvm::reverse(C&: MBB)) {
590	if (InstsToErase.contains(Ptr: &MI))
591	continue;
592
593	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI)) {
594	// We have a G_STORE. Add it to the candidate if it writes to an adjacent
595	// address.
596	if (!addStoreToCandidate(StoreMI&: *StoreMI, C&: Candidate)) {
597	// Store wasn't eligible to be added. May need to record it as a
598	// potential alias.
599	if (operationAliasesWithCandidate(MI&: *StoreMI, C&: Candidate)) {
600	Changed \|= processMergeCandidate(C&: Candidate);
601	continue;
602	}
603	Candidate.addPotentialAlias(MI&: *StoreMI);
604	}
605	continue;
606	}
607
608	// If we don't have any stores yet, this instruction can't pose a problem.
609	if (Candidate.Stores.empty())
610	continue;
611
612	// We're dealing with some other kind of instruction.
613	if (isInstHardMergeHazard(MI)) {
614	Changed \|= processMergeCandidate(C&: Candidate);
615	Candidate.Stores.clear();
616	continue;
617	}
618
619	if (!MI.mayLoadOrStore())
620	continue;
621
622	if (operationAliasesWithCandidate(MI, C&: Candidate)) {
623	// We have a potential alias, so process the current candidate if we can
624	// and then continue looking for a new candidate.
625	Changed \|= processMergeCandidate(C&: Candidate);
626	continue;
627	}
628
629	// Record this instruction as a potential alias for future stores that are
630	// added to the candidate.
631	Candidate.addPotentialAlias(MI);
632	}
633
634	// Process any candidate left after finishing searching the entire block.
635	Changed \|= processMergeCandidate(C&: Candidate);
636
637	// Erase instructions now that we're no longer iterating over the block.
638	for (auto *MI : InstsToErase)
639	MI->eraseFromParent();
640	InstsToErase.clear();
641	return Changed;
642	}
643
644	/// Check if the store \p Store is a truncstore that can be merged. That is,
645	/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
646	/// Register then it does not need to match and SrcVal is set to the source
647	/// value found.
648	/// On match, returns the start byte offset of the \p SrcVal that is being
649	/// stored.
650	static std::optional<int64_t>
651	getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
652	MachineRegisterInfo &MRI) {
653	Register TruncVal;
654	if (!mi_match(R: Store.getValueReg(), MRI, P: m_GTrunc(Src: m_Reg(R&: TruncVal))))
655	return std::nullopt;
656
657	// The shift amount must be a constant multiple of the narrow type.
658	// It is translated to the offset address in the wide source value "y".
659	//
660	// x = G_LSHR y, ShiftAmtC
661	// s8 z = G_TRUNC x
662	// store z, ...
663	Register FoundSrcVal;
664	int64_t ShiftAmt;
665	if (!mi_match(R: TruncVal, MRI,
666	P: m_any_of(preds: m_GLShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt)),
667	preds: m_GAShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt))))) {
668	if (!SrcVal.isValid() \|\| TruncVal == SrcVal) {
669	if (!SrcVal.isValid())
670	SrcVal = TruncVal;
671	return `0`; // If it's the lowest index store.
672	}
673	return std::nullopt;
674	}
675
676	unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
677	if (ShiftAmt % NarrowBits != `0`)
678	return std::nullopt;
679	const unsigned Offset = ShiftAmt / NarrowBits;
680
681	if (SrcVal.isValid() && FoundSrcVal != SrcVal)
682	return std::nullopt;
683
684	if (!SrcVal.isValid())
685	SrcVal = FoundSrcVal;
686	else if (MRI.getType(Reg: SrcVal) != MRI.getType(Reg: FoundSrcVal))
687	return std::nullopt;
688	return Offset;
689	}
690
691	/// Match a pattern where a wide type scalar value is stored by several narrow
692	/// stores. Fold it into a single store or a BSWAP and a store if the targets
693	/// supports it.
694	///
695	/// Assuming little endian target:
696	/// i8 p = ...*
697	/// i32 val = ...
698	/// p[0] = (val >> 0) & 0xFF;
699	/// p[1] = (val >> 8) & 0xFF;
700	/// p[2] = (val >> 16) & 0xFF;
701	/// p[3] = (val >> 24) & 0xFF;
702	/// =>
703	/// ((i32)p) = val;*
704	///
705	/// i8 p = ...*
706	/// i32 val = ...
707	/// p[0] = (val >> 24) & 0xFF;
708	/// p[1] = (val >> 16) & 0xFF;
709	/// p[2] = (val >> 8) & 0xFF;
710	/// p[3] = (val >> 0) & 0xFF;
711	/// =>
712	/// ((i32)p) = BSWAP(val);*
713	bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
714	SmallPtrSetImpl<GStore *> &DeletedStores) {
715	LLT MemTy = StoreMI.getMMO().getMemoryType();
716
717	// We only handle merging simple stores of 1-4 bytes.
718	if (!MemTy.isScalar())
719	return false;
720	switch (MemTy.getSizeInBits()) {
721	case `8`:
722	case `16`:
723	case `32`:
724	break;
725	default:
726	return false;
727	}
728	if (!StoreMI.isSimple())
729	return false;
730
731	// We do a simple search for mergeable stores prior to this one.
732	// Any potential alias hazard along the way terminates the search.
733	SmallVector<GStore *> FoundStores;
734
735	// We're looking for:
736	// 1) a (store(trunc(...)))
737	// 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
738	// the partial value stored.
739	// 3) where the offsets form either a little or big-endian sequence.
740
741	auto &LastStore = StoreMI;
742
743	// The single base pointer that all stores must use.
744	Register BaseReg;
745	int64_t LastOffset;
746	if (!mi_match(R: LastStore.getPointerReg(), MRI: *MRI,
747	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: LastOffset)))) {
748	BaseReg = LastStore.getPointerReg();
749	LastOffset = `0`;
750	}
751
752	GStore *LowestIdxStore = &LastStore;
753	int64_t LowestIdxOffset = LastOffset;
754
755	Register WideSrcVal;
756	auto LowestShiftAmt = getTruncStoreByteOffset(Store&: LastStore, SrcVal&: WideSrcVal, MRI&: *MRI);
757	if (!LowestShiftAmt)
758	return false; // Didn't match a trunc.
759	assert(WideSrcVal.isValid());
760
761	LLT WideStoreTy = MRI->getType(Reg: WideSrcVal);
762	// The wide type might not be a multiple of the memory type, e.g. s48 and s32.
763	if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != `0`)
764	return false;
765	const unsigned NumStoresRequired =
766	WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
767
768	SmallVector<int64_t, `8`> OffsetMap(NumStoresRequired, INT64_MAX);
769	OffsetMap [*LowestShiftAmt] = LastOffset;
770	FoundStores.emplace_back(Args: &LastStore);
771
772	const int MaxInstsToCheck = `10`;
773	int NumInstsChecked = `0`;
774	for (auto II = ++LastStore.getReverseIterator();
775	II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
776	++II) {
777	NumInstsChecked++;
778	GStore *NewStore;
779	if ((NewStore = dyn_cast<GStore>(Val: &*II))) {
780	if (NewStore->getMMO().getMemoryType() != MemTy \|\| !NewStore->isSimple())
781	break;
782	} else if (II ->isLoadFoldBarrier() \|\| II ->mayLoad()) {
783	break;
784	} else {
785	continue; // This is a safe instruction we can look past.
786	}
787
788	Register NewBaseReg;
789	int64_t MemOffset;
790	// Check we're storing to the same base + some offset.
791	if (!mi_match(R: NewStore->getPointerReg(), MRI: *MRI,
792	P: m_GPtrAdd(L: m_Reg(R&: NewBaseReg), R: m_ICst(Cst&: MemOffset)))) {
793	NewBaseReg = NewStore->getPointerReg();
794	MemOffset = `0`;
795	}
796	if (BaseReg != NewBaseReg)
797	break;
798
799	auto ShiftByteOffset = getTruncStoreByteOffset(Store&: NewStore, SrcVal&: WideSrcVal, MRI&: MRI);
800	if (!ShiftByteOffset)
801	break;
802	if (MemOffset < LowestIdxOffset) {
803	LowestIdxOffset = MemOffset;
804	LowestIdxStore = NewStore;
805	}
806
807	// Map the offset in the store and the offset in the combined value, and
808	// early return if it has been set before.
809	if (ShiftByteOffset < `0` \|\| ShiftByteOffset >= NumStoresRequired \|\|
810	OffsetMap [*ShiftByteOffset] != INT64_MAX)
811	break;
812	OffsetMap [*ShiftByteOffset] = MemOffset;
813
814	FoundStores.emplace_back(Args&: NewStore);
815	// Reset counter since we've found a matching inst.
816	NumInstsChecked = `0`;
817	if (FoundStores.size() == NumStoresRequired)
818	break;
819	}
820
821	if (FoundStores.size() != NumStoresRequired) {
822	if (FoundStores.size() == `1`)
823	return false;
824	// We didn't find enough stores to merge into the size of the original
825	// source value, but we may be able to generate a smaller store if we
826	// truncate the source value.
827	WideStoreTy = LLT::scalar(SizeInBits: FoundStores.size() * MemTy.getScalarSizeInBits());
828	}
829
830	unsigned NumStoresFound = FoundStores.size();
831
832	const auto &DL = LastStore.getMF()->getDataLayout();
833	auto &C = LastStore.getMF()->getFunction().getContext();
834	// Check that a store of the wide type is both allowed and fast on the target
835	unsigned Fast = `0`;
836	bool Allowed = TLI->allowsMemoryAccess(
837	Context&: C, DL, Ty: WideStoreTy, MMO: LowestIdxStore->getMMO(), Fast: &Fast);
838	if (!Allowed \|\| !Fast)
839	return false;
840
841	// Check if the pieces of the value are going to the expected places in memory
842	// to merge the stores.
843	unsigned NarrowBits = MemTy.getScalarSizeInBits();
844	auto checkOffsets = [&](bool MatchLittleEndian) {
845	if (MatchLittleEndian) {
846	for (unsigned i = `0`; i != NumStoresFound; ++i)
847	if (OffsetMap [i] != i * (NarrowBits / `8`) + LowestIdxOffset)
848	return false;
849	} else { // MatchBigEndian by reversing loop counter.
850	for (unsigned i = `0`, j = NumStoresFound - `1`; i != NumStoresFound;
851	++i, --j)
852	if (OffsetMap [j] != i * (NarrowBits / `8`) + LowestIdxOffset)
853	return false;
854	}
855	return true;
856	};
857
858	// Check if the offsets line up for the native data layout of this target.
859	bool NeedBswap = false;
860	bool NeedRotate = false;
861	if (!checkOffsets (DL.isLittleEndian())) {
862	// Special-case: check if byte offsets line up for the opposite endian.
863	if (NarrowBits == `8` && checkOffsets (DL.isBigEndian()))
864	NeedBswap = true;
865	else if (NumStoresFound == `2` && checkOffsets (DL.isBigEndian()))
866	NeedRotate = true;
867	else
868	return false;
869	}
870
871	if (NeedBswap &&
872	!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_BSWAP, {WideStoreTy}}, MF&: *MF))
873	return false;
874	if (NeedRotate &&
875	!isLegalOrBeforeLegalizer(
876	Query: {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, MF&: *MF))
877	return false;
878
879	Builder.setInstrAndDebugLoc(StoreMI);
880
881	if (WideStoreTy != MRI->getType(Reg: WideSrcVal))
882	WideSrcVal = Builder.buildTrunc(Res: WideStoreTy, Op: WideSrcVal).getReg(Idx: `0`);
883
884	if (NeedBswap) {
885	WideSrcVal = Builder.buildBSwap(Dst: WideStoreTy, Src0: WideSrcVal).getReg(Idx: `0`);
886	} else if (NeedRotate) {
887	assert(WideStoreTy.getSizeInBits() % `2` == `0` &&
888	"Unexpected type for rotate");
889	auto RotAmt =
890	Builder.buildConstant(Res: WideStoreTy, Val: WideStoreTy.getSizeInBits() / `2`);
891	WideSrcVal =
892	Builder.buildRotateRight(Dst: WideStoreTy, Src: WideSrcVal, Amt: RotAmt).getReg(Idx: `0`);
893	}
894
895	Builder.buildStore(Val: WideSrcVal, Addr: LowestIdxStore->getPointerReg(),
896	PtrInfo: LowestIdxStore->getMMO().getPointerInfo(),
897	Alignment: LowestIdxStore->getMMO().getAlign());
898
899	// Erase the old stores.
900	for (auto *ST : FoundStores) {
901	ST->eraseFromParent();
902	DeletedStores.insert(Ptr: ST);
903	}
904	return true;
905	}
906
907	bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
908	bool Changed = false;
909	SmallVector<GStore *, `16`> Stores;
910	SmallPtrSet<GStore *, `8`> DeletedStores;
911	// Walk up the block so we can see the most eligible stores.
912	for (MachineInstr &MI : llvm::reverse(C&: BB))
913	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI))
914	Stores.emplace_back(Args&: StoreMI);
915
916	for (auto *StoreMI : Stores) {
917	if (DeletedStores.count(Ptr: StoreMI))
918	continue;
919	if (mergeTruncStore(StoreMI&: *StoreMI, DeletedStores))
920	Changed = true;
921	}
922	return Changed;
923	}
924
925	bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
926	bool Changed = false;
927	for (auto &BB : MF){
928	Changed \|= mergeBlockStores(MBB&: BB);
929	Changed \|= mergeTruncStoresBlock(BB);
930	}
931
932	// Erase all dead instructions left over by the merging.
933	if (Changed) {
934	for (auto &BB : MF) {
935	for (auto &I : make_early_inc_range(Range: make_range(x: BB.rbegin(), y: BB.rend()))) {
936	if (isTriviallyDead(MI: I, MRI: *MRI))
937	I.eraseFromParent();
938	}
939	}
940	}
941
942	return Changed;
943	}
944
945	void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
946	// Query the legalizer info to record what store types are legal.
947	// We record this because we don't want to bother trying to merge stores into
948	// illegal ones, which would just result in being split again.
949
950	if (LegalStoreSizes.count(Val: AddrSpace)) {
951	assert(LegalStoreSizes[AddrSpace].any());
952	return; // Already cached sizes for this address space.
953	}
954
955	// Need to reserve at least MaxStoreSizeToForm + 1 bits.
956	BitVector LegalSizes(MaxStoreSizeToForm * `2`);
957	const auto &LI = *MF->getSubtarget().getLegalizerInfo();
958	const auto &DL = MF->getFunction().getDataLayout();
959	Type *IRPtrTy = PointerType::get(C&: MF->getFunction().getContext(), AddressSpace: AddrSpace);
960	LLT PtrTy = getLLTForType(Ty&: *IRPtrTy, DL);
961	// We assume that we're not going to be generating any stores wider than
962	// MaxStoreSizeToForm bits for now.
963	for (unsigned Size = `2`; Size <= MaxStoreSizeToForm; Size *= `2`) {
964	LLT Ty = LLT::scalar(SizeInBits: Size);
965	SmallVector<LegalityQuery::MemDesc, `2`> MemDescrs(
966	{{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
967	SmallVector<LLT> StoreTys({Ty, PtrTy});
968	LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
969	LegalizeActionStep ActionStep = LI.getAction(Query: Q);
970	if (ActionStep.Action == LegalizeActions::Legal)
971	LegalSizes.set(Size);
972	}
973	assert(LegalSizes.any() && "Expected some store sizes to be legal!");
974	LegalStoreSizes [AddrSpace] = LegalSizes;
975	}
976
977	bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
978	// If the ISel pipeline failed, do not bother running that pass.
979	if (MF.getProperties().hasProperty(
980	P: MachineFunctionProperties::Property::FailedISel))
981	return false;
982
983	LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
984	<< `'\n'`);
985
986	init(MF);
987	bool Changed = false;
988	Changed \|= mergeFunctionStores(MF);
989
990	LegalStoreSizes.clear();
991	return Changed;
992	}
993

Browse the source code of llvm_projects/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp