LoadStoreOpt.cpp source code [llvm_projects/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp]

1	//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the LoadStoreOpt optimization pass.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
13	#include "llvm/ADT/STLExtras.h"
14	#include "llvm/ADT/SmallPtrSet.h"
15	#include "llvm/ADT/Statistic.h"
16	#include "llvm/Analysis/AliasAnalysis.h"
17	#include "llvm/Analysis/MemoryLocation.h"
18	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
19	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20	#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22	#include "llvm/CodeGen/GlobalISel/Utils.h"
23	#include "llvm/CodeGen/LowLevelTypeUtils.h"
24	#include "llvm/CodeGen/MachineBasicBlock.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/MachineFunction.h"
27	#include "llvm/CodeGen/MachineInstr.h"
28	#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/Register.h"
31	#include "llvm/CodeGen/TargetLowering.h"
32	#include "llvm/CodeGen/TargetOpcodes.h"
33	#include "llvm/InitializePasses.h"
34	#include "llvm/Support/AtomicOrdering.h"
35	#include "llvm/Support/Casting.h"
36	#include "llvm/Support/Debug.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include <algorithm>
39
40	#define DEBUG_TYPE "loadstore-opt"
41
42	using namespace llvm;
43	using namespace ore;
44	using namespace MIPatternMatch;
45
46	STATISTIC(NumStoresMerged, "Number of stores merged");
47
48	const unsigned MaxStoreSizeToForm = `128`;
49
50	char LoadStoreOpt::ID = `0`;
51	INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
52	false, false)
53	INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
54	false, false)
55
56	LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
57	: MachineFunctionPass (ID), DoNotRunPass (F) {}
58
59	LoadStoreOpt::LoadStoreOpt()
60	: LoadStoreOpt ([](const MachineFunction &) { return false; }) {}
61
62	void LoadStoreOpt::init(MachineFunction &MF) {
63	this->MF = &MF;
64	MRI = &MF.getRegInfo();
65	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
66	TLI = MF.getSubtarget().getTargetLowering();
67	LI = MF.getSubtarget().getLegalizerInfo();
68	Builder.setMF(MF);
69	IsPreLegalizer = !MF.getProperties().hasLegalized();
70	InstsToErase.clear();
71	}
72
73	void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
74	AU.addRequired<AAResultsWrapperPass>();
75	AU.setPreservesAll();
76	getSelectionDAGFallbackAnalysisUsage(AU);
77	MachineFunctionPass::getAnalysisUsage(AU);
78	}
79
80	BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
81	MachineRegisterInfo &MRI) {
82	BaseIndexOffset Info;
83	Register PtrAddRHS;
84	Register BaseReg;
85	if (!mi_match(R: Ptr, MRI, P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_Reg(R&: PtrAddRHS)))) {
86	Info.setBase(Ptr);
87	Info.setOffset(`0`);
88	return Info;
89	}
90	Info.setBase(BaseReg);
91	auto RHSCst = getIConstantVRegValWithLookThrough(VReg: PtrAddRHS, MRI);
92	if (RHSCst)
93	Info.setOffset(RHSCst ->Value.getSExtValue());
94
95	// Just recognize a simple case for now. In future we'll need to match
96	// indexing patterns for base + index + constant.
97	Info.setIndex(PtrAddRHS);
98	return Info;
99	}
100
101	bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
102	const MachineInstr &MI2,
103	bool &IsAlias,
104	MachineRegisterInfo &MRI) {
105	auto *LdSt1 = dyn_cast<GLoadStore>(Val: &MI1);
106	auto *LdSt2 = dyn_cast<GLoadStore>(Val: &MI2);
107	if (!LdSt1 \|\| !LdSt2)
108	return false;
109
110	BaseIndexOffset BasePtr0 = getPointerInfo(Ptr: LdSt1->getPointerReg(), MRI);
111	BaseIndexOffset BasePtr1 = getPointerInfo(Ptr: LdSt2->getPointerReg(), MRI);
112
113	if (!BasePtr0.getBase().isValid() \|\| !BasePtr1.getBase().isValid())
114	return false;
115
116	LocationSize Size1 = LdSt1->getMemSize();
117	LocationSize Size2 = LdSt2->getMemSize();
118
119	int64_t PtrDiff;
120	if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
121	BasePtr1.hasValidOffset()) {
122	PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
123	// If the size of memory access is unknown, do not use it to do analysis.
124	// One example of unknown size memory access is to load/store scalable
125	// vector objects on the stack.
126	// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
127	// following situations arise:
128	if (PtrDiff >= `0` && Size1.hasValue() && !Size1.isScalable()) {
129	// [----BasePtr0----]
130	// [---BasePtr1--]
131	// ========PtrDiff========>
132	IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
133	return true;
134	}
135	if (PtrDiff < `0` && Size2.hasValue() && !Size2.isScalable()) {
136	// [----BasePtr0----]
137	// [---BasePtr1--]
138	// =====(-PtrDiff)====>
139	IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= `0`);
140	return true;
141	}
142	return false;
143	}
144
145	// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
146	// able to calculate their relative offset if at least one arises
147	// from an alloca. However, these allocas cannot overlap and we
148	// can infer there is no alias.
149	auto *Base0Def = getDefIgnoringCopies(Reg: BasePtr0.getBase(), MRI);
150	auto *Base1Def = getDefIgnoringCopies(Reg: BasePtr1.getBase(), MRI);
151	if (!Base0Def \|\| !Base1Def)
152	return false; // Couldn't tell anything.
153
154
155	if (Base0Def->getOpcode() != Base1Def->getOpcode())
156	return false;
157
158	if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
159	MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
160	// If the bases have the same frame index but we couldn't find a
161	// constant offset, (indices are different) be conservative.
162	if (Base0Def != Base1Def &&
163	(!MFI.isFixedObjectIndex(ObjectIdx: Base0Def->getOperand(i: `1`).getIndex()) \|\|
164	!MFI.isFixedObjectIndex(ObjectIdx: Base1Def->getOperand(i: `1`).getIndex()))) {
165	IsAlias = false;
166	return true;
167	}
168	}
169
170	// This implementation is a lot more primitive than the SDAG one for now.
171	// FIXME: what about constant pools?
172	if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
173	auto GV0 = Base0Def->getOperand(i: `1`).getGlobal();
174	auto GV1 = Base1Def->getOperand(i: `1`).getGlobal();
175	if (GV0 != GV1) {
176	IsAlias = false;
177	return true;
178	}
179	}
180
181	// Can't tell anything about aliasing.
182	return false;
183	}
184
185	bool GISelAddressing::instMayAlias(const MachineInstr &MI,
186	const MachineInstr &Other,
187	MachineRegisterInfo &MRI,
188	AliasAnalysis *AA) {
189	struct MemUseCharacteristics {
190	bool IsVolatile;
191	bool IsAtomic;
192	Register BasePtr;
193	int64_t Offset;
194	LocationSize NumBytes;
195	MachineMemOperand *MMO;
196	};
197
198	auto getCharacteristics =
199	[&](const MachineInstr *MI) -> MemUseCharacteristics {
200	if (const auto *LS = dyn_cast<GLoadStore>(Val: MI)) {
201	Register BaseReg;
202	int64_t Offset = `0`;
203	// No pre/post-inc addressing modes are considered here, unlike in SDAG.
204	if (!mi_match(R: LS->getPointerReg(), MRI,
205	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: Offset)))) {
206	BaseReg = LS->getPointerReg();
207	Offset = `0`;
208	}
209
210	LocationSize Size = LS->getMMO().getSize();
211	return {.IsVolatile: LS->isVolatile(), .IsAtomic: LS->isAtomic(), .BasePtr: BaseReg,
212	.Offset: Offset /base offset/, .NumBytes: Size, .MMO: &LS->getMMO()};
213	}
214	// FIXME: support recognizing lifetime instructions.
215	// Default.
216	return {.IsVolatile: false /isvolatile/,
217	/isAtomic/ .IsAtomic: false,
218	.BasePtr: Register (),
219	.Offset: (int64_t)`0` /offset/,
220	.NumBytes: LocationSize::beforeOrAfterPointer() /size/,
221	.MMO: (MachineMemOperand )nullptr*};
222	};
223	MemUseCharacteristics MUC0 = getCharacteristics (&MI),
224	MUC1 = getCharacteristics (&Other);
225
226	// If they are to the same address, then they must be aliases.
227	if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
228	MUC0.Offset == MUC1.Offset)
229	return true;
230
231	// If they are both volatile then they cannot be reordered.
232	if (MUC0.IsVolatile && MUC1.IsVolatile)
233	return true;
234
235	// Be conservative about atomics for the moment
236	// TODO: This is way overconservative for unordered atomics (see D66309)
237	if (MUC0.IsAtomic && MUC1.IsAtomic)
238	return true;
239
240	// If one operation reads from invariant memory, and the other may store, they
241	// cannot alias.
242	if (MUC0.MMO && MUC1.MMO) {
243	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
244	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
245	return false;
246	}
247
248	// If NumBytes is scalable and offset is not 0, conservatively return may
249	// alias
250	if ((MUC0.NumBytes.isScalable() && MUC0.Offset != `0`) \|\|
251	(MUC1.NumBytes.isScalable() && MUC1.Offset != `0`))
252	return true;
253
254	const bool BothNotScalable =
255	!MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
256
257	// Try to prove that there is aliasing, or that there is no aliasing. Either
258	// way, we can return now. If nothing can be proved, proceed with more tests.
259	bool IsAlias;
260	if (BothNotScalable &&
261	GISelAddressing::aliasIsKnownForLoadStore(MI1: MI, MI2: Other, IsAlias, MRI))
262	return IsAlias;
263
264	// The following all rely on MMO0 and MMO1 being valid.
265	if (!MUC0.MMO \|\| !MUC1.MMO)
266	return true;
267
268	// FIXME: port the alignment based alias analysis from SDAG's isAlias().
269	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
270	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
271	LocationSize Size0 = MUC0.NumBytes;
272	LocationSize Size1 = MUC1.NumBytes;
273	if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() &&
274	Size1.hasValue()) {
275	// Use alias analysis information.
276	int64_t MinOffset = std::min(a: SrcValOffset0, b: SrcValOffset1);
277	int64_t Overlap0 =
278	Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
279	int64_t Overlap1 =
280	Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
281	LocationSize Loc0 =
282	Size0.isScalable() ? Size0 : LocationSize::precise(Value: Overlap0);
283	LocationSize Loc1 =
284	Size1.isScalable() ? Size1 : LocationSize::precise(Value: Overlap1);
285
286	if (AA->isNoAlias(
287	LocA: MemoryLocation (MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
288	LocB: MemoryLocation (MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
289	return false;
290	}
291
292	// Otherwise we have to assume they alias.
293	return true;
294	}
295
296	/// Returns true if the instruction creates an unavoidable hazard that
297	/// forces a boundary between store merge candidates.
298	static bool isInstHardMergeHazard(MachineInstr &MI) {
299	return MI.hasUnmodeledSideEffects() \|\| MI.hasOrderedMemoryRef();
300	}
301
302	bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
303	// Try to merge all the stores in the vector, splitting into separate segments
304	// as necessary.
305	assert(StoresToMerge.size() > `1` && "Expected multiple stores to merge");
306	LLT OrigTy = MRI->getType(Reg: StoresToMerge [`0`]->getValueReg());
307	LLT PtrTy = MRI->getType(Reg: StoresToMerge [`0`]->getPointerReg());
308	unsigned AS = PtrTy.getAddressSpace();
309	// Ensure the legal store info is computed for this address space.
310	initializeStoreMergeTargetInfo(AddrSpace: AS);
311	const auto &LegalSizes = LegalStoreSizes [AS];
312
313	#ifndef NDEBUG
314	for (auto *StoreMI : StoresToMerge)
315	assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
316	#endif
317
318	bool AnyMerged = false;
319	do {
320	unsigned NumPow2 = llvm::bit_floor(Value: StoresToMerge.size());
321	unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
322	// Compute the biggest store we can generate to handle the number of stores.
323	unsigned MergeSizeBits;
324	for (MergeSizeBits = MaxSizeBits; MergeSizeBits > `1`; MergeSizeBits /= `2`) {
325	LLT StoreTy = LLT::scalar(SizeInBits: MergeSizeBits);
326	EVT StoreEVT =
327	getApproximateEVTForLLT(Ty: StoreTy, Ctx&: MF->getFunction().getContext());
328	if (LegalSizes.size() > MergeSizeBits && LegalSizes [MergeSizeBits] &&
329	TLI->canMergeStoresTo(AS, MemVT: StoreEVT, MF: *MF) &&
330	(TLI->isTypeLegal(VT: StoreEVT)))
331	break; // We can generate a MergeSize bits store.
332	}
333	if (MergeSizeBits <= OrigTy.getSizeInBits())
334	return AnyMerged; // No greater merge.
335
336	unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
337	// Perform the actual merging.
338	SmallVector<GStore *, `8`> SingleMergeStores(
339	StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
340	AnyMerged \|= doSingleStoreMerge(Stores&: SingleMergeStores);
341	StoresToMerge.erase(CS: StoresToMerge.begin(),
342	CE: StoresToMerge.begin() + NumStoresToMerge);
343	} while (StoresToMerge.size() > `1`);
344	return AnyMerged;
345	}
346
347	bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
348	MachineFunction &MF) const {
349	auto Action = LI->getAction(Query).Action;
350	// If the instruction is unsupported, it can't be legalized at all.
351	if (Action == LegalizeActions::Unsupported)
352	return false;
353	return IsPreLegalizer \|\| Action == LegalizeAction::Legal;
354	}
355
356	bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
357	assert(Stores.size() > `1`);
358	// We know that all the stores are consecutive and there are no aliasing
359	// operations in the range. However, the values that are being stored may be
360	// generated anywhere before each store. To ensure we have the values
361	// available, we materialize the wide value and new store at the place of the
362	// final store in the merge sequence.
363	GStore *FirstStore = Stores [`0`];
364	const unsigned NumStores = Stores.size();
365	LLT SmallTy = MRI->getType(Reg: FirstStore->getValueReg());
366	LLT WideValueTy =
367	LLT::scalar(SizeInBits: NumStores * SmallTy.getSizeInBits().getFixedValue());
368
369	// For each store, compute pairwise merged debug locs.
370	DebugLoc MergedLoc = Stores.front()->getDebugLoc();
371	for (auto *Store : drop_begin(RangeOrContainer&: Stores))
372	MergedLoc = DebugLoc::getMergedLocation(LocA: MergedLoc, LocB: Store->getDebugLoc());
373
374	Builder.setInstr(*Stores.back());
375	Builder.setDebugLoc(MergedLoc);
376
377	// If all of the store values are constants, then create a wide constant
378	// directly. Otherwise, we need to generate some instructions to merge the
379	// existing values together into a wider type.
380	SmallVector<APInt, `8`> ConstantVals;
381	for (auto *Store : Stores) {
382	auto MaybeCst =
383	getIConstantVRegValWithLookThrough(VReg: Store->getValueReg(), MRI: *MRI);
384	if (!MaybeCst) {
385	ConstantVals.clear();
386	break;
387	}
388	ConstantVals.emplace_back(Args&: MaybeCst ->Value);
389	}
390
391	Register WideReg;
392	auto *WideMMO =
393	MF->getMachineMemOperand(MMO: &FirstStore->getMMO(), Offset: `0`, Ty: WideValueTy);
394	if (ConstantVals.empty()) {
395	// Mimic the SDAG behaviour here and don't try to do anything for unknown
396	// values. In future, we should also support the cases of loads and
397	// extracted vector elements.
398	return false;
399	}
400
401	assert(ConstantVals.size() == NumStores);
402	// Check if our wide constant is legal.
403	if (!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_CONSTANT, {WideValueTy}}, MF&: *MF))
404	return false;
405	APInt WideConst(WideValueTy.getSizeInBits(), `0`);
406	for (unsigned Idx = `0`; Idx < ConstantVals.size(); ++Idx) {
407	// Insert the smaller constant into the corresponding position in the
408	// wider one.
409	WideConst.insertBits(SubBits: ConstantVals [Idx], bitPosition: Idx * SmallTy.getSizeInBits());
410	}
411	WideReg = Builder.buildConstant(Res: WideValueTy, Val: WideConst).getReg(Idx: `0`);
412	auto NewStore =
413	Builder.buildStore(Val: WideReg, Addr: FirstStore->getPointerReg(), MMO&: *WideMMO);
414	(void) NewStore;
415	LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
416	<< " stores into merged store: " << *NewStore);
417	LLVM_DEBUG(for (auto MI : Stores) dbgs() << " " << MI;);
418	NumStoresMerged += Stores.size();
419
420	MachineOptimizationRemarkEmitter MORE(MF, nullptr*);
421	MORE.emit(RemarkBuilder: [&]() {
422	MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
423	FirstStore->getDebugLoc(),
424	FirstStore->getParent());
425	R << "Merged " << NV ("NumMerged", Stores.size()) << " stores of "
426	<< NV ("OrigWidth", SmallTy.getSizeInBytes())
427	<< " bytes into a single store of "
428	<< NV ("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
429	return R;
430	});
431
432	InstsToErase.insert_range(R&: Stores);
433	return true;
434	}
435
436	bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
437	if (C.Stores.size() < `2`) {
438	C.reset();
439	return false;
440	}
441
442	LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
443	<< " stores, starting with " << *C.Stores[`0`]);
444	// We know that the stores in the candidate are adjacent.
445	// Now we need to check if any potential aliasing instructions recorded
446	// during the search alias with load/stores added to the candidate after.
447	// For example, if we have the candidate:
448	// C.Stores = [ST1, ST2, ST3, ST4]
449	// and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
450	// ST2, then we would have recorded it into the PotentialAliases structure
451	// with the associated index value of "1". Then we see ST3 and ST4 and add
452	// them to the candidate group. We know that LD1 does not alias with ST1 or
453	// ST2, since we already did that check. However we don't yet know if it
454	// may alias ST3 and ST4, so we perform those checks now.
455	SmallVector<GStore *> StoresToMerge;
456
457	auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
458	for (auto AliasInfo : reverse(C&: C.PotentialAliases)) {
459	MachineInstr *PotentialAliasOp = AliasInfo.first;
460	unsigned PreCheckedIdx = AliasInfo.second;
461	if (Idx < PreCheckedIdx) {
462	// Once our store index is lower than the index associated with the
463	// potential alias, we know that we've already checked for this alias
464	// and all of the earlier potential aliases too.
465	return false;
466	}
467	// Need to check this alias.
468	if (GISelAddressing::instMayAlias(MI: CheckStore, Other: PotentialAliasOp, MRI&: MRI,
469	AA)) {
470	LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
471	<< " detected\n");
472	return true;
473	}
474	}
475	return false;
476	};
477	// Start from the last store in the group, and check if it aliases with any
478	// of the potential aliasing operations in the list.
479	for (int StoreIdx = C.Stores.size() - `1`; StoreIdx >= `0`; --StoreIdx) {
480	auto *CheckStore = C.Stores [StoreIdx];
481	if (DoesStoreAliasWithPotential (StoreIdx, *CheckStore))
482	continue;
483	StoresToMerge.emplace_back(Args&: CheckStore);
484	}
485
486	LLVM_DEBUG(dbgs() << StoresToMerge.size()
487	<< " stores remaining after alias checks. Merging...\n");
488
489	// Now we've checked for aliasing hazards, merge any stores left.
490	C.reset();
491	if (StoresToMerge.size() < `2`)
492	return false;
493	return mergeStores(StoresToMerge);
494	}
495
496	bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
497	StoreMergeCandidate &C) {
498	if (C.Stores.empty())
499	return false;
500	return llvm::any_of(Range&: C.Stores, P: [&](MachineInstr *OtherMI) {
501	return instMayAlias(MI, Other: OtherMI, MRI&: MRI, AA);
502	});
503	}
504
505	void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
506	PotentialAliases.emplace_back(Args: std::make_pair(x: &MI, y: Stores.size() - `1`));
507	}
508
509	bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
510	StoreMergeCandidate &C) {
511	// Check if the given store writes to an adjacent address, and other
512	// requirements.
513	LLT ValueTy = MRI->getType(Reg: StoreMI.getValueReg());
514	LLT PtrTy = MRI->getType(Reg: StoreMI.getPointerReg());
515
516	// Only handle scalars.
517	if (!ValueTy.isScalar())
518	return false;
519
520	// Don't allow truncating stores for now.
521	if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
522	return false;
523
524	// Avoid adding volatile or ordered stores to the candidate. We already have a
525	// check for this in instMayAlias() but that only get's called later between
526	// potential aliasing hazards.
527	if (!StoreMI.isSimple())
528	return false;
529
530	Register StoreAddr = StoreMI.getPointerReg();
531	auto BIO = getPointerInfo(Ptr: StoreAddr, MRI&: *MRI);
532	Register StoreBase = BIO.getBase();
533	if (C.Stores.empty()) {
534	C.BasePtr = StoreBase;
535	if (!BIO.hasValidOffset()) {
536	C.CurrentLowestOffset = `0`;
537	} else {
538	C.CurrentLowestOffset = BIO.getOffset();
539	}
540	// This is the first store of the candidate.
541	// If the offset can't possibly allow for a lower addressed store with the
542	// same base, don't bother adding it.
543	if (BIO.hasValidOffset() &&
544	BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
545	return false;
546	C.Stores.emplace_back(Args: &StoreMI);
547	LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
548	<< StoreMI);
549	return true;
550	}
551
552	// Check the store is the same size as the existing ones in the candidate.
553	if (MRI->getType(Reg: C.Stores [`0`]->getValueReg()).getSizeInBits() !=
554	ValueTy.getSizeInBits())
555	return false;
556
557	if (MRI->getType(Reg: C.Stores [`0`]->getPointerReg()).getAddressSpace() !=
558	PtrTy.getAddressSpace())
559	return false;
560
561	// There are other stores in the candidate. Check that the store address
562	// writes to the next lowest adjacent address.
563	if (C.BasePtr != StoreBase)
564	return false;
565	// If we don't have a valid offset, we can't guarantee to be an adjacent
566	// offset.
567	if (!BIO.hasValidOffset())
568	return false;
569	if ((C.CurrentLowestOffset -
570	static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
571	return false;
572
573	// This writes to an adjacent address. Allow it.
574	C.Stores.emplace_back(Args: &StoreMI);
575	C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
576	LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
577	return true;
578	}
579
580	bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
581	bool Changed = false;
582	// Walk through the block bottom-up, looking for merging candidates.
583	StoreMergeCandidate Candidate;
584	for (MachineInstr &MI : llvm::reverse(C&: MBB)) {
585	if (InstsToErase.contains(Ptr: &MI))
586	continue;
587
588	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI)) {
589	// We have a G_STORE. Add it to the candidate if it writes to an adjacent
590	// address.
591	if (!addStoreToCandidate(StoreMI&: *StoreMI, C&: Candidate)) {
592	// Store wasn't eligible to be added. May need to record it as a
593	// potential alias.
594	if (operationAliasesWithCandidate(MI&: *StoreMI, C&: Candidate)) {
595	Changed \|= processMergeCandidate(C&: Candidate);
596	continue;
597	}
598	Candidate.addPotentialAlias(MI&: *StoreMI);
599	}
600	continue;
601	}
602
603	// If we don't have any stores yet, this instruction can't pose a problem.
604	if (Candidate.Stores.empty())
605	continue;
606
607	// We're dealing with some other kind of instruction.
608	if (isInstHardMergeHazard(MI)) {
609	Changed \|= processMergeCandidate(C&: Candidate);
610	Candidate.Stores.clear();
611	continue;
612	}
613
614	if (!MI.mayLoadOrStore())
615	continue;
616
617	if (operationAliasesWithCandidate(MI, C&: Candidate)) {
618	// We have a potential alias, so process the current candidate if we can
619	// and then continue looking for a new candidate.
620	Changed \|= processMergeCandidate(C&: Candidate);
621	continue;
622	}
623
624	// Record this instruction as a potential alias for future stores that are
625	// added to the candidate.
626	Candidate.addPotentialAlias(MI);
627	}
628
629	// Process any candidate left after finishing searching the entire block.
630	Changed \|= processMergeCandidate(C&: Candidate);
631
632	// Erase instructions now that we're no longer iterating over the block.
633	for (auto *MI : InstsToErase)
634	MI->eraseFromParent();
635	InstsToErase.clear();
636	return Changed;
637	}
638
639	/// Check if the store \p Store is a truncstore that can be merged. That is,
640	/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
641	/// Register then it does not need to match and SrcVal is set to the source
642	/// value found.
643	/// On match, returns the start byte offset of the \p SrcVal that is being
644	/// stored.
645	static std::optional<int64_t>
646	getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
647	MachineRegisterInfo &MRI) {
648	Register TruncVal;
649	if (!mi_match(R: Store.getValueReg(), MRI, P: m_GTrunc(Src: m_Reg(R&: TruncVal))))
650	return std::nullopt;
651
652	// The shift amount must be a constant multiple of the narrow type.
653	// It is translated to the offset address in the wide source value "y".
654	//
655	// x = G_LSHR y, ShiftAmtC
656	// s8 z = G_TRUNC x
657	// store z, ...
658	Register FoundSrcVal;
659	int64_t ShiftAmt;
660	if (!mi_match(R: TruncVal, MRI,
661	P: m_any_of(preds: m_GLShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt)),
662	preds: m_GAShr(L: m_Reg(R&: FoundSrcVal), R: m_ICst(Cst&: ShiftAmt))))) {
663	if (!SrcVal.isValid() \|\| TruncVal == SrcVal) {
664	if (!SrcVal.isValid())
665	SrcVal = TruncVal;
666	return `0`; // If it's the lowest index store.
667	}
668	return std::nullopt;
669	}
670
671	unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
672	if (ShiftAmt % NarrowBits != `0`)
673	return std::nullopt;
674	const unsigned Offset = ShiftAmt / NarrowBits;
675
676	if (SrcVal.isValid() && FoundSrcVal != SrcVal)
677	return std::nullopt;
678
679	if (!SrcVal.isValid())
680	SrcVal = FoundSrcVal;
681	else if (MRI.getType(Reg: SrcVal) != MRI.getType(Reg: FoundSrcVal))
682	return std::nullopt;
683	return Offset;
684	}
685
686	/// Match a pattern where a wide type scalar value is stored by several narrow
687	/// stores. Fold it into a single store or a BSWAP and a store if the targets
688	/// supports it.
689	///
690	/// Assuming little endian target:
691	/// i8 p = ...*
692	/// i32 val = ...
693	/// p[0] = (val >> 0) & 0xFF;
694	/// p[1] = (val >> 8) & 0xFF;
695	/// p[2] = (val >> 16) & 0xFF;
696	/// p[3] = (val >> 24) & 0xFF;
697	/// =>
698	/// ((i32)p) = val;*
699	///
700	/// i8 p = ...*
701	/// i32 val = ...
702	/// p[0] = (val >> 24) & 0xFF;
703	/// p[1] = (val >> 16) & 0xFF;
704	/// p[2] = (val >> 8) & 0xFF;
705	/// p[3] = (val >> 0) & 0xFF;
706	/// =>
707	/// ((i32)p) = BSWAP(val);*
708	bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
709	SmallPtrSetImpl<GStore *> &DeletedStores) {
710	LLT MemTy = StoreMI.getMMO().getMemoryType();
711
712	// We only handle merging simple stores of 1-4 bytes.
713	if (!MemTy.isScalar())
714	return false;
715	switch (MemTy.getSizeInBits()) {
716	case `8`:
717	case `16`:
718	case `32`:
719	break;
720	default:
721	return false;
722	}
723	if (!StoreMI.isSimple())
724	return false;
725
726	// We do a simple search for mergeable stores prior to this one.
727	// Any potential alias hazard along the way terminates the search.
728	SmallVector<GStore *> FoundStores;
729
730	// We're looking for:
731	// 1) a (store(trunc(...)))
732	// 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
733	// the partial value stored.
734	// 3) where the offsets form either a little or big-endian sequence.
735
736	auto &LastStore = StoreMI;
737
738	// The single base pointer that all stores must use.
739	Register BaseReg;
740	int64_t LastOffset;
741	if (!mi_match(R: LastStore.getPointerReg(), MRI: *MRI,
742	P: m_GPtrAdd(L: m_Reg(R&: BaseReg), R: m_ICst(Cst&: LastOffset)))) {
743	BaseReg = LastStore.getPointerReg();
744	LastOffset = `0`;
745	}
746
747	GStore *LowestIdxStore = &LastStore;
748	int64_t LowestIdxOffset = LastOffset;
749
750	Register WideSrcVal;
751	auto LowestShiftAmt = getTruncStoreByteOffset(Store&: LastStore, SrcVal&: WideSrcVal, MRI&: *MRI);
752	if (!LowestShiftAmt)
753	return false; // Didn't match a trunc.
754	assert(WideSrcVal.isValid());
755
756	LLT WideStoreTy = MRI->getType(Reg: WideSrcVal);
757	// The wide type might not be a multiple of the memory type, e.g. s48 and s32.
758	if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != `0`)
759	return false;
760	const unsigned NumStoresRequired =
761	WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
762
763	SmallVector<int64_t, `8`> OffsetMap(NumStoresRequired, INT64_MAX);
764	OffsetMap [*LowestShiftAmt] = LastOffset;
765	FoundStores.emplace_back(Args: &LastStore);
766
767	const int MaxInstsToCheck = `10`;
768	int NumInstsChecked = `0`;
769	for (auto II = ++LastStore.getReverseIterator();
770	II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
771	++II) {
772	NumInstsChecked++;
773	GStore *NewStore;
774	if ((NewStore = dyn_cast<GStore>(Val: &*II))) {
775	if (NewStore->getMMO().getMemoryType() != MemTy \|\| !NewStore->isSimple())
776	break;
777	} else if (II ->isLoadFoldBarrier() \|\| II ->mayLoad()) {
778	break;
779	} else {
780	continue; // This is a safe instruction we can look past.
781	}
782
783	Register NewBaseReg;
784	int64_t MemOffset;
785	// Check we're storing to the same base + some offset.
786	if (!mi_match(R: NewStore->getPointerReg(), MRI: *MRI,
787	P: m_GPtrAdd(L: m_Reg(R&: NewBaseReg), R: m_ICst(Cst&: MemOffset)))) {
788	NewBaseReg = NewStore->getPointerReg();
789	MemOffset = `0`;
790	}
791	if (BaseReg != NewBaseReg)
792	break;
793
794	auto ShiftByteOffset = getTruncStoreByteOffset(Store&: NewStore, SrcVal&: WideSrcVal, MRI&: MRI);
795	if (!ShiftByteOffset)
796	break;
797	if (MemOffset < LowestIdxOffset) {
798	LowestIdxOffset = MemOffset;
799	LowestIdxStore = NewStore;
800	}
801
802	// Map the offset in the store and the offset in the combined value, and
803	// early return if it has been set before.
804	if (ShiftByteOffset < `0` \|\| ShiftByteOffset >= NumStoresRequired \|\|
805	OffsetMap [*ShiftByteOffset] != INT64_MAX)
806	break;
807	OffsetMap [*ShiftByteOffset] = MemOffset;
808
809	FoundStores.emplace_back(Args&: NewStore);
810	// Reset counter since we've found a matching inst.
811	NumInstsChecked = `0`;
812	if (FoundStores.size() == NumStoresRequired)
813	break;
814	}
815
816	if (FoundStores.size() != NumStoresRequired) {
817	if (FoundStores.size() == `1`)
818	return false;
819	// We didn't find enough stores to merge into the size of the original
820	// source value, but we may be able to generate a smaller store if we
821	// truncate the source value.
822	WideStoreTy = LLT::scalar(SizeInBits: FoundStores.size() * MemTy.getScalarSizeInBits());
823	}
824
825	unsigned NumStoresFound = FoundStores.size();
826
827	const auto &DL = LastStore.getMF()->getDataLayout();
828	auto &C = LastStore.getMF()->getFunction().getContext();
829	// Check that a store of the wide type is both allowed and fast on the target
830	unsigned Fast = `0`;
831	bool Allowed = TLI->allowsMemoryAccess(
832	Context&: C, DL, Ty: WideStoreTy, MMO: LowestIdxStore->getMMO(), Fast: &Fast);
833	if (!Allowed \|\| !Fast)
834	return false;
835
836	// Check if the pieces of the value are going to the expected places in memory
837	// to merge the stores.
838	unsigned NarrowBits = MemTy.getScalarSizeInBits();
839	auto checkOffsets = [&](bool MatchLittleEndian) {
840	if (MatchLittleEndian) {
841	for (unsigned i = `0`; i != NumStoresFound; ++i)
842	if (OffsetMap [i] != i * (NarrowBits / `8`) + LowestIdxOffset)
843	return false;
844	} else { // MatchBigEndian by reversing loop counter.
845	for (unsigned i = `0`, j = NumStoresFound - `1`; i != NumStoresFound;
846	++i, --j)
847	if (OffsetMap [j] != i * (NarrowBits / `8`) + LowestIdxOffset)
848	return false;
849	}
850	return true;
851	};
852
853	// Check if the offsets line up for the native data layout of this target.
854	bool NeedBswap = false;
855	bool NeedRotate = false;
856	if (!checkOffsets (DL.isLittleEndian())) {
857	// Special-case: check if byte offsets line up for the opposite endian.
858	if (NarrowBits == `8` && checkOffsets (DL.isBigEndian()))
859	NeedBswap = true;
860	else if (NumStoresFound == `2` && checkOffsets (DL.isBigEndian()))
861	NeedRotate = true;
862	else
863	return false;
864	}
865
866	if (NeedBswap &&
867	!isLegalOrBeforeLegalizer(Query: {TargetOpcode::G_BSWAP, {WideStoreTy}}, MF&: *MF))
868	return false;
869	if (NeedRotate &&
870	!isLegalOrBeforeLegalizer(
871	Query: {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, MF&: *MF))
872	return false;
873
874	Builder.setInstrAndDebugLoc(StoreMI);
875
876	if (WideStoreTy != MRI->getType(Reg: WideSrcVal))
877	WideSrcVal = Builder.buildTrunc(Res: WideStoreTy, Op: WideSrcVal).getReg(Idx: `0`);
878
879	if (NeedBswap) {
880	WideSrcVal = Builder.buildBSwap(Dst: WideStoreTy, Src0: WideSrcVal).getReg(Idx: `0`);
881	} else if (NeedRotate) {
882	assert(WideStoreTy.getSizeInBits() % `2` == `0` &&
883	"Unexpected type for rotate");
884	auto RotAmt =
885	Builder.buildConstant(Res: WideStoreTy, Val: WideStoreTy.getSizeInBits() / `2`);
886	WideSrcVal =
887	Builder.buildRotateRight(Dst: WideStoreTy, Src: WideSrcVal, Amt: RotAmt).getReg(Idx: `0`);
888	}
889
890	Builder.buildStore(Val: WideSrcVal, Addr: LowestIdxStore->getPointerReg(),
891	PtrInfo: LowestIdxStore->getMMO().getPointerInfo(),
892	Alignment: LowestIdxStore->getMMO().getAlign());
893
894	// Erase the old stores.
895	for (auto *ST : FoundStores) {
896	ST->eraseFromParent();
897	DeletedStores.insert(Ptr: ST);
898	}
899	return true;
900	}
901
902	bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
903	bool Changed = false;
904	SmallVector<GStore *, `16`> Stores;
905	SmallPtrSet<GStore *, `8`> DeletedStores;
906	// Walk up the block so we can see the most eligible stores.
907	for (MachineInstr &MI : llvm::reverse(C&: BB))
908	if (auto *StoreMI = dyn_cast<GStore>(Val: &MI))
909	Stores.emplace_back(Args&: StoreMI);
910
911	for (auto *StoreMI : Stores) {
912	if (DeletedStores.count(Ptr: StoreMI))
913	continue;
914	if (mergeTruncStore(StoreMI&: *StoreMI, DeletedStores))
915	Changed = true;
916	}
917	return Changed;
918	}
919
920	bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
921	bool Changed = false;
922	for (auto &BB : MF){
923	Changed \|= mergeBlockStores(MBB&: BB);
924	Changed \|= mergeTruncStoresBlock(BB);
925	}
926
927	// Erase all dead instructions left over by the merging.
928	if (Changed) {
929	for (auto &BB : MF) {
930	for (auto &I : make_early_inc_range(Range: reverse(C&: BB))) {
931	if (isTriviallyDead(MI: I, MRI: *MRI))
932	I.eraseFromParent();
933	}
934	}
935	}
936
937	return Changed;
938	}
939
940	void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
941	// Query the legalizer info to record what store types are legal.
942	// We record this because we don't want to bother trying to merge stores into
943	// illegal ones, which would just result in being split again.
944
945	if (LegalStoreSizes.count(Val: AddrSpace)) {
946	assert(LegalStoreSizes[AddrSpace].any());
947	return; // Already cached sizes for this address space.
948	}
949
950	// Need to reserve at least MaxStoreSizeToForm + 1 bits.
951	BitVector LegalSizes(MaxStoreSizeToForm * `2`);
952	const auto &LI = *MF->getSubtarget().getLegalizerInfo();
953	const auto &DL = MF->getFunction().getDataLayout();
954	Type *IRPtrTy = PointerType::get(C&: MF->getFunction().getContext(), AddressSpace: AddrSpace);
955	LLT PtrTy = getLLTForType(Ty&: *IRPtrTy, DL);
956	// We assume that we're not going to be generating any stores wider than
957	// MaxStoreSizeToForm bits for now.
958	for (unsigned Size = `2`; Size <= MaxStoreSizeToForm; Size *= `2`) {
959	LLT Ty = LLT::scalar(SizeInBits: Size);
960	SmallVector<LegalityQuery::MemDesc, `2`> MemDescrs(
961	{{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
962	SmallVector<LLT> StoreTys({Ty, PtrTy});
963	LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
964	LegalizeActionStep ActionStep = LI.getAction(Query: Q);
965	if (ActionStep.Action == LegalizeActions::Legal)
966	LegalSizes.set(Size);
967	}
968	assert(LegalSizes.any() && "Expected some store sizes to be legal!");
969	LegalStoreSizes [AddrSpace] = LegalSizes;
970	}
971
972	bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
973	// If the ISel pipeline failed, do not bother running that pass.
974	if (MF.getProperties().hasFailedISel())
975	return false;
976
977	LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
978	<< `'\n'`);
979
980	init(MF);
981	bool Changed = false;
982	Changed \|= mergeFunctionStores(MF);
983
984	LegalStoreSizes.clear();
985	return Changed;
986	}
987

Browse the source code of llvm_projects/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp