1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
30#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
34#include "llvm/ADT/PostOrderIterator.h"
35#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
40#include "llvm/Analysis/AliasAnalysis.h"
41#include "llvm/Analysis/AssumptionCache.h"
42#include "llvm/Analysis/CaptureTracking.h"
43#include "llvm/Analysis/GlobalsModRef.h"
44#include "llvm/Analysis/LoopInfo.h"
45#include "llvm/Analysis/MemoryBuiltins.h"
46#include "llvm/Analysis/MemoryLocation.h"
47#include "llvm/Analysis/MemorySSA.h"
48#include "llvm/Analysis/MemorySSAUpdater.h"
49#include "llvm/Analysis/MustExecute.h"
50#include "llvm/Analysis/PostDominators.h"
51#include "llvm/Analysis/TargetLibraryInfo.h"
52#include "llvm/Analysis/ValueTracking.h"
53#include "llvm/IR/Argument.h"
54#include "llvm/IR/AttributeMask.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
57#include "llvm/IR/ConstantRangeList.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
61#include "llvm/IR/Dominators.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/IRBuilder.h"
64#include "llvm/IR/InstIterator.h"
65#include "llvm/IR/InstrTypes.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/IntrinsicInst.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/PassManager.h"
71#include "llvm/IR/PatternMatch.h"
72#include "llvm/IR/Value.h"
73#include "llvm/InitializePasses.h"
74#include "llvm/Support/Casting.h"
75#include "llvm/Support/CommandLine.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/DebugCounter.h"
78#include "llvm/Support/ErrorHandling.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Transforms/Scalar.h"
81#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82#include "llvm/Transforms/Utils/BuildLibCalls.h"
83#include "llvm/Transforms/Utils/Local.h"
84#include <algorithm>
85#include <cassert>
86#include <cstdint>
87#include <map>
88#include <optional>
89#include <utility>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "dse"
95
96STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
97STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
98STATISTIC(NumFastStores, "Number of stores deleted");
99STATISTIC(NumFastOther, "Number of other instrs removed");
100STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
101STATISTIC(NumModifiedStores, "Number of stores modified");
102STATISTIC(NumCFGChecks, "Number of stores modified");
103STATISTIC(NumCFGTries, "Number of stores modified");
104STATISTIC(NumCFGSuccess, "Number of stores modified");
105STATISTIC(NumGetDomMemoryDefPassed,
106 "Number of times a valid candidate is returned from getDomMemoryDef");
107STATISTIC(NumDomMemDefChecks,
108 "Number iterations check for reads in getDomMemoryDef");
109
110DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
111 "Controls which MemoryDefs are eliminated.");
112
113static cl::opt<bool>
114EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
115 cl::init(Val: true), cl::Hidden,
116 cl::desc("Enable partial-overwrite tracking in DSE"));
117
118static cl::opt<bool>
119EnablePartialStoreMerging("enable-dse-partial-store-merging",
120 cl::init(Val: true), cl::Hidden,
121 cl::desc("Enable partial store merging in DSE"));
122
123static cl::opt<unsigned>
124 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: 150), cl::Hidden,
125 cl::desc("The number of memory instructions to scan for "
126 "dead store elimination (default = 150)"));
127static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
128 "dse-memoryssa-walklimit", cl::init(Val: 90), cl::Hidden,
129 cl::desc("The maximum number of steps while walking upwards to find "
130 "MemoryDefs that may be killed (default = 90)"));
131
132static cl::opt<unsigned> MemorySSAPartialStoreLimit(
133 "dse-memoryssa-partial-store-limit", cl::init(Val: 5), cl::Hidden,
134 cl::desc("The maximum number candidates that only partially overwrite the "
135 "killing MemoryDef to consider"
136 " (default = 5)"));
137
138static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
139 "dse-memoryssa-defs-per-block-limit", cl::init(Val: 5000), cl::Hidden,
140 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
141 "other stores per basic block (default = 5000)"));
142
143static cl::opt<unsigned> MemorySSASameBBStepCost(
144 "dse-memoryssa-samebb-cost", cl::init(Val: 1), cl::Hidden,
145 cl::desc(
146 "The cost of a step in the same basic block as the killing MemoryDef"
147 "(default = 1)"));
148
149static cl::opt<unsigned>
150 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: 5),
151 cl::Hidden,
152 cl::desc("The cost of a step in a different basic "
153 "block than the killing MemoryDef"
154 "(default = 5)"));
155
156static cl::opt<unsigned> MemorySSAPathCheckLimit(
157 "dse-memoryssa-path-check-limit", cl::init(Val: 50), cl::Hidden,
158 cl::desc("The maximum number of blocks to check when trying to prove that "
159 "all paths to an exit go through a killing block (default = 50)"));
160
161// This flags allows or disallows DSE to optimize MemorySSA during its
162// traversal. Note that DSE optimizing MemorySSA may impact other passes
163// downstream of the DSE invocation and can lead to issues not being
164// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
165// those cases, the flag can be used to check if DSE's MemorySSA optimizations
166// impact follow-up passes.
167static cl::opt<bool>
168 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
169 cl::desc("Allow DSE to optimize memory accesses."));
170
171// TODO: remove this flag.
172static cl::opt<bool> EnableInitializesImprovement(
173 "enable-dse-initializes-attr-improvement", cl::init(Val: true), cl::Hidden,
174 cl::desc("Enable the initializes attr improvement in DSE"));
175
176//===----------------------------------------------------------------------===//
177// Helper functions
178//===----------------------------------------------------------------------===//
179using OverlapIntervalsTy = std::map<int64_t, int64_t>;
180using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
181
182/// Returns true if the end of this instruction can be safely shortened in
183/// length.
184static bool isShortenableAtTheEnd(Instruction *I) {
185 // Don't shorten stores for now
186 if (isa<StoreInst>(Val: I))
187 return false;
188
189 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
190 switch (II->getIntrinsicID()) {
191 default: return false;
192 case Intrinsic::memset:
193 case Intrinsic::memcpy:
194 case Intrinsic::memcpy_element_unordered_atomic:
195 case Intrinsic::memset_element_unordered_atomic:
196 // Do shorten memory intrinsics.
197 // FIXME: Add memmove if it's also safe to transform.
198 return true;
199 }
200 }
201
202 // Don't shorten libcalls calls for now.
203
204 return false;
205}
206
207/// Returns true if the beginning of this instruction can be safely shortened
208/// in length.
209static bool isShortenableAtTheBeginning(Instruction *I) {
210 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
211 // easily done by offsetting the source address.
212 return isa<AnyMemSetInst>(Val: I);
213}
214
215static std::optional<TypeSize> getPointerSize(const Value *V,
216 const DataLayout &DL,
217 const TargetLibraryInfo &TLI,
218 const Function *F) {
219 uint64_t Size;
220 ObjectSizeOpts Opts;
221 Opts.NullIsUnknownSize = NullPointerIsDefined(F);
222
223 if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
224 return TypeSize::getFixed(ExactSize: Size);
225 return std::nullopt;
226}
227
228namespace {
229
230enum OverwriteResult {
231 OW_Begin,
232 OW_Complete,
233 OW_End,
234 OW_PartialEarlierWithFullLater,
235 OW_MaybePartial,
236 OW_None,
237 OW_Unknown
238};
239
240} // end anonymous namespace
241
242/// Check if two instruction are masked stores that completely
243/// overwrite one another. More specifically, \p KillingI has to
244/// overwrite \p DeadI.
245static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
246 const Instruction *DeadI,
247 BatchAAResults &AA) {
248 const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
249 const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
250 if (KillingII == nullptr || DeadII == nullptr)
251 return OW_Unknown;
252 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
253 return OW_Unknown;
254
255 switch (KillingII->getIntrinsicID()) {
256 case Intrinsic::masked_store:
257 case Intrinsic::vp_store: {
258 const DataLayout &DL = KillingII->getDataLayout();
259 auto *KillingTy = KillingII->getArgOperand(i: 0)->getType();
260 auto *DeadTy = DeadII->getArgOperand(i: 0)->getType();
261 if (DL.getTypeSizeInBits(Ty: KillingTy) != DL.getTypeSizeInBits(Ty: DeadTy))
262 return OW_Unknown;
263 // Element count.
264 if (cast<VectorType>(Val: KillingTy)->getElementCount() !=
265 cast<VectorType>(Val: DeadTy)->getElementCount())
266 return OW_Unknown;
267 // Pointers.
268 Value *KillingPtr = KillingII->getArgOperand(i: 1);
269 Value *DeadPtr = DeadII->getArgOperand(i: 1);
270 if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
271 return OW_Unknown;
272 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
273 // Masks.
274 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
275 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
276 return OW_Unknown;
277 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
278 // Masks.
279 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
280 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
281 return OW_Unknown;
282 // Lengths.
283 if (KillingII->getArgOperand(i: 3) != DeadII->getArgOperand(i: 3))
284 return OW_Unknown;
285 }
286 return OW_Complete;
287 }
288 default:
289 return OW_Unknown;
290 }
291}
292
293/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
294/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
295/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
296/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
297/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
298/// overwritten by a killing (smaller) store which doesn't write outside the big
299/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
300/// NOTE: This function must only be called if both \p KillingLoc and \p
301/// DeadLoc belong to the same underlying object with valid \p KillingOff and
302/// \p DeadOff.
303static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
304 const MemoryLocation &DeadLoc,
305 int64_t KillingOff, int64_t DeadOff,
306 Instruction *DeadI,
307 InstOverlapIntervalsTy &IOL) {
308 const uint64_t KillingSize = KillingLoc.Size.getValue();
309 const uint64_t DeadSize = DeadLoc.Size.getValue();
310 // We may now overlap, although the overlap is not complete. There might also
311 // be other incomplete overlaps, and together, they might cover the complete
312 // dead store.
313 // Note: The correctness of this logic depends on the fact that this function
314 // is not even called providing DepWrite when there are any intervening reads.
315 if (EnablePartialOverwriteTracking &&
316 KillingOff < int64_t(DeadOff + DeadSize) &&
317 int64_t(KillingOff + KillingSize) >= DeadOff) {
318
319 // Insert our part of the overlap into the map.
320 auto &IM = IOL[DeadI];
321 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
322 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
323 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
324 << ")\n");
325
326 // Make sure that we only insert non-overlapping intervals and combine
327 // adjacent intervals. The intervals are stored in the map with the ending
328 // offset as the key (in the half-open sense) and the starting offset as
329 // the value.
330 int64_t KillingIntStart = KillingOff;
331 int64_t KillingIntEnd = KillingOff + KillingSize;
332
333 // Find any intervals ending at, or after, KillingIntStart which start
334 // before KillingIntEnd.
335 auto ILI = IM.lower_bound(x: KillingIntStart);
336 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
337 // This existing interval is overlapped with the current store somewhere
338 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
339 // intervals and adjusting our start and end.
340 KillingIntStart = std::min(a: KillingIntStart, b: ILI->second);
341 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
342 ILI = IM.erase(position: ILI);
343
344 // Continue erasing and adjusting our end in case other previous
345 // intervals are also overlapped with the current store.
346 //
347 // |--- dead 1 ---| |--- dead 2 ---|
348 // |------- killing---------|
349 //
350 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
351 assert(ILI->second > KillingIntStart && "Unexpected interval");
352 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
353 ILI = IM.erase(position: ILI);
354 }
355 }
356
357 IM[KillingIntEnd] = KillingIntStart;
358
359 ILI = IM.begin();
360 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
361 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
362 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
363 << ") Composite KillingLoc [" << ILI->second << ", "
364 << ILI->first << ")\n");
365 ++NumCompletePartials;
366 return OW_Complete;
367 }
368 }
369
370 // Check for a dead store which writes to all the memory locations that
371 // the killing store writes to.
372 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
373 int64_t(DeadOff + DeadSize) > KillingOff &&
374 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
375 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
376 << ", " << int64_t(DeadOff + DeadSize)
377 << ") by a killing store [" << KillingOff << ", "
378 << int64_t(KillingOff + KillingSize) << ")\n");
379 // TODO: Maybe come up with a better name?
380 return OW_PartialEarlierWithFullLater;
381 }
382
383 // Another interesting case is if the killing store overwrites the end of the
384 // dead store.
385 //
386 // |--dead--|
387 // |-- killing --|
388 //
389 // In this case we may want to trim the size of dead store to avoid
390 // generating stores to addresses which will definitely be overwritten killing
391 // store.
392 if (!EnablePartialOverwriteTracking &&
393 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
394 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
395 return OW_End;
396
397 // Finally, we also need to check if the killing store overwrites the
398 // beginning of the dead store.
399 //
400 // |--dead--|
401 // |-- killing --|
402 //
403 // In this case we may want to move the destination address and trim the size
404 // of dead store to avoid generating stores to addresses which will definitely
405 // be overwritten killing store.
406 if (!EnablePartialOverwriteTracking &&
407 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
408 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
409 "Expect to be handled as OW_Complete");
410 return OW_Begin;
411 }
412 // Otherwise, they don't completely overlap.
413 return OW_Unknown;
414}
415
416/// Returns true if the memory which is accessed by the second instruction is not
417/// modified between the first and the second instruction.
418/// Precondition: Second instruction must be dominated by the first
419/// instruction.
420static bool
421memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
422 BatchAAResults &AA, const DataLayout &DL,
423 DominatorTree *DT) {
424 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
425 // instructions which can modify the memory location accessed by SecondI.
426 //
427 // While doing the walk keep track of the address to check. It might be
428 // different in different basic blocks due to PHI translation.
429 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
430 SmallVector<BlockAddressPair, 16> WorkList;
431 // Keep track of the address we visited each block with. Bail out if we
432 // visit a block with different addresses.
433 DenseMap<BasicBlock *, Value *> Visited;
434
435 BasicBlock::iterator FirstBBI(FirstI);
436 ++FirstBBI;
437 BasicBlock::iterator SecondBBI(SecondI);
438 BasicBlock *FirstBB = FirstI->getParent();
439 BasicBlock *SecondBB = SecondI->getParent();
440 MemoryLocation MemLoc;
441 if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
442 MemLoc = MemoryLocation::getForDest(MI: MemSet);
443 else
444 MemLoc = MemoryLocation::get(Inst: SecondI);
445
446 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
447
448 // Start checking the SecondBB.
449 WorkList.push_back(
450 Elt: std::make_pair(x&: SecondBB, y: PHITransAddr(MemLocPtr, DL, nullptr)));
451 bool isFirstBlock = true;
452
453 // Check all blocks going backward until we reach the FirstBB.
454 while (!WorkList.empty()) {
455 BlockAddressPair Current = WorkList.pop_back_val();
456 BasicBlock *B = Current.first;
457 PHITransAddr &Addr = Current.second;
458 Value *Ptr = Addr.getAddr();
459
460 // Ignore instructions before FirstI if this is the FirstBB.
461 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
462
463 BasicBlock::iterator EI;
464 if (isFirstBlock) {
465 // Ignore instructions after SecondI if this is the first visit of SecondBB.
466 assert(B == SecondBB && "first block is not the store block");
467 EI = SecondBBI;
468 isFirstBlock = false;
469 } else {
470 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
471 // In this case we also have to look at instructions after SecondI.
472 EI = B->end();
473 }
474 for (; BI != EI; ++BI) {
475 Instruction *I = &*BI;
476 if (I->mayWriteToMemory() && I != SecondI)
477 if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
478 return false;
479 }
480 if (B != FirstBB) {
481 assert(B != &FirstBB->getParent()->getEntryBlock() &&
482 "Should not hit the entry block because SI must be dominated by LI");
483 for (BasicBlock *Pred : predecessors(BB: B)) {
484 PHITransAddr PredAddr = Addr;
485 if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
486 if (!PredAddr.isPotentiallyPHITranslatable())
487 return false;
488 if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
489 return false;
490 }
491 Value *TranslatedPtr = PredAddr.getAddr();
492 auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
493 if (!Inserted.second) {
494 // We already visited this block before. If it was with a different
495 // address - bail out!
496 if (TranslatedPtr != Inserted.first->second)
497 return false;
498 // ... otherwise just skip it.
499 continue;
500 }
501 WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
502 }
503 }
504 }
505 return true;
506}
507
508static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
509 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
510 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
511 const DataLayout &DL = Inst->getDataLayout();
512 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
513 uint64_t DeadSliceOffsetInBits =
514 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
515 auto SetDeadFragExpr = [](auto *Assign,
516 DIExpression::FragmentInfo DeadFragment) {
517 // createFragmentExpression expects an offset relative to the existing
518 // fragment offset if there is one.
519 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
520 Assign->getExpression()
521 ->getFragmentInfo()
522 .value_or(DIExpression::FragmentInfo(0, 0))
523 .OffsetInBits;
524 if (auto NewExpr = DIExpression::createFragmentExpression(
525 Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
526 Assign->setExpression(*NewExpr);
527 return;
528 }
529 // Failed to create a fragment expression for this so discard the value,
530 // making this a kill location.
531 auto *Expr = *DIExpression::createFragmentExpression(
532 Expr: DIExpression::get(Context&: Assign->getContext(), Elements: {}), OffsetInBits: DeadFragment.OffsetInBits,
533 SizeInBits: DeadFragment.SizeInBits);
534 Assign->setExpression(Expr);
535 Assign->setKillLocation();
536 };
537
538 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
539 // link to any instructions. Created in the loop below (once).
540 DIAssignID *LinkToNothing = nullptr;
541 LLVMContext &Ctx = Inst->getContext();
542 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
543 if (!LinkToNothing)
544 LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
545 return LinkToNothing;
546 };
547
548 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
549 // overlapping dbg.assign intrinsic.
550 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
551 std::optional<DIExpression::FragmentInfo> NewFragment;
552 if (!at::calculateFragmentIntersect(DL, Dest: OriginalDest, SliceOffsetInBits: DeadSliceOffsetInBits,
553 SliceSizeInBits: DeadSliceSizeInBits, DVRAssign: Assign,
554 Result&: NewFragment) ||
555 !NewFragment) {
556 // We couldn't calculate the intersecting fragment for some reason. Be
557 // cautious and unlink the whole assignment from the store.
558 Assign->setKillAddress();
559 Assign->setAssignId(GetDeadLink());
560 continue;
561 }
562 // No intersect.
563 if (NewFragment->SizeInBits == 0)
564 continue;
565
566 // Fragments overlap: insert a new dbg.assign for this dead part.
567 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
568 NewAssign->insertAfter(InsertAfter: Assign->getIterator());
569 NewAssign->setAssignId(GetDeadLink());
570 if (NewFragment)
571 SetDeadFragExpr(NewAssign, *NewFragment);
572 NewAssign->setKillAddress();
573 }
574}
575
576/// Update the attributes given that a memory access is updated (the
577/// dereferenced pointer could be moved forward when shortening a
578/// mem intrinsic).
579static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
580 uint64_t PtrOffset) {
581 // Remember old attributes.
582 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
583
584 // Find attributes that should be kept, and remove the rest.
585 AttributeMask AttrsToRemove;
586 for (auto &Attr : OldAttrs) {
587 if (Attr.hasKindAsEnum()) {
588 switch (Attr.getKindAsEnum()) {
589 default:
590 break;
591 case Attribute::Alignment:
592 // Only keep alignment if PtrOffset satisfy the alignment.
593 if (isAligned(Lhs: Attr.getAlignment().valueOrOne(), SizeInBytes: PtrOffset))
594 continue;
595 break;
596 case Attribute::Dereferenceable:
597 case Attribute::DereferenceableOrNull:
598 // We could reduce the size of these attributes according to
599 // PtrOffset. But we simply drop these for now.
600 break;
601 case Attribute::NonNull:
602 case Attribute::NoUndef:
603 continue;
604 }
605 }
606 AttrsToRemove.addAttribute(A: Attr);
607 }
608
609 // Remove the attributes that should be dropped.
610 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
611}
612
613static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
614 uint64_t &DeadSize, int64_t KillingStart,
615 uint64_t KillingSize, bool IsOverwriteEnd) {
616 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
617 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
618
619 // We assume that memet/memcpy operates in chunks of the "largest" native
620 // type size and aligned on the same value. That means optimal start and size
621 // of memset/memcpy should be modulo of preferred alignment of that type. That
622 // is it there is no any sense in trying to reduce store size any further
623 // since any "extra" stores comes for free anyway.
624 // On the other hand, maximum alignment we can achieve is limited by alignment
625 // of initial store.
626
627 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
628 // "largest" native type.
629 // Note: What is the proper way to get that value?
630 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
631 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
632
633 int64_t ToRemoveStart = 0;
634 uint64_t ToRemoveSize = 0;
635 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
636 // maintained on the remaining store.
637 if (IsOverwriteEnd) {
638 // Calculate required adjustment for 'KillingStart' in order to keep
639 // remaining store size aligned on 'PerfAlign'.
640 uint64_t Off =
641 offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
642 ToRemoveStart = KillingStart + Off;
643 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
644 return false;
645 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
646 } else {
647 ToRemoveStart = DeadStart;
648 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
649 "Not overlapping accesses?");
650 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
651 // Calculate required adjustment for 'ToRemoveSize'in order to keep
652 // start of the remaining store aligned on 'PerfAlign'.
653 uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
654 if (Off != 0) {
655 if (ToRemoveSize <= (PrefAlign.value() - Off))
656 return false;
657 ToRemoveSize -= PrefAlign.value() - Off;
658 }
659 assert(isAligned(PrefAlign, ToRemoveSize) &&
660 "Should preserve selected alignment");
661 }
662
663 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
664 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
665
666 uint64_t NewSize = DeadSize - ToRemoveSize;
667 if (DeadIntrinsic->isAtomic()) {
668 // When shortening an atomic memory intrinsic, the newly shortened
669 // length must remain an integer multiple of the element size.
670 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
671 if (0 != NewSize % ElementSize)
672 return false;
673 }
674
675 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
676 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
677 << "\n KILLER [" << ToRemoveStart << ", "
678 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
679
680 DeadIntrinsic->setLength(NewSize);
681 DeadIntrinsic->setDestAlignment(PrefAlign);
682
683 Value *OrigDest = DeadIntrinsic->getRawDest();
684 if (!IsOverwriteEnd) {
685 Value *Indices[1] = {
686 ConstantInt::get(Ty: DeadIntrinsic->getLength()->getType(), V: ToRemoveSize)};
687 Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
688 PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
689 InsertBefore: DeadI->getIterator());
690 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
691 DeadIntrinsic->setDest(NewDestGEP);
692 adjustArgAttributes(Intrinsic: DeadIntrinsic, ArgNo: 0, PtrOffset: ToRemoveSize);
693 }
694
695 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
696 shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * 8, OldSizeInBits: DeadSize * 8, NewSizeInBits: NewSize * 8,
697 IsOverwriteEnd);
698
699 // Finally update start and size of dead access.
700 if (!IsOverwriteEnd)
701 DeadStart += ToRemoveSize;
702 DeadSize = NewSize;
703
704 return true;
705}
706
707static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
708 int64_t &DeadStart, uint64_t &DeadSize) {
709 if (IntervalMap.empty() || !isShortenableAtTheEnd(I: DeadI))
710 return false;
711
712 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
713 int64_t KillingStart = OII->second;
714 uint64_t KillingSize = OII->first - KillingStart;
715
716 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
717
718 if (KillingStart > DeadStart &&
719 // Note: "KillingStart - KillingStart" is known to be positive due to
720 // preceding check.
721 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
722 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
723 // be non negative due to preceding checks.
724 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
725 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
726 IsOverwriteEnd: true)) {
727 IntervalMap.erase(position: OII);
728 return true;
729 }
730 }
731 return false;
732}
733
734static bool tryToShortenBegin(Instruction *DeadI,
735 OverlapIntervalsTy &IntervalMap,
736 int64_t &DeadStart, uint64_t &DeadSize) {
737 if (IntervalMap.empty() || !isShortenableAtTheBeginning(I: DeadI))
738 return false;
739
740 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
741 int64_t KillingStart = OII->second;
742 uint64_t KillingSize = OII->first - KillingStart;
743
744 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
745
746 if (KillingStart <= DeadStart &&
747 // Note: "DeadStart - KillingStart" is known to be non negative due to
748 // preceding check.
749 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
750 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
751 // be positive due to preceding checks.
752 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
753 "Should have been handled as OW_Complete");
754 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
755 IsOverwriteEnd: false)) {
756 IntervalMap.erase(position: OII);
757 return true;
758 }
759 }
760 return false;
761}
762
763static Constant *
764tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
765 int64_t KillingOffset, int64_t DeadOffset,
766 const DataLayout &DL, BatchAAResults &AA,
767 DominatorTree *DT) {
768
769 if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
770 DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
771 KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
772 DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
773 memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
774 // If the store we find is:
775 // a) partially overwritten by the store to 'Loc'
776 // b) the killing store is fully contained in the dead one and
777 // c) they both have a constant value
778 // d) none of the two stores need padding
779 // Merge the two stores, replacing the dead store's value with a
780 // merge of both values.
781 // TODO: Deal with other constant types (vectors, etc), and probably
782 // some mem intrinsics (if needed)
783
784 APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
785 APInt KillingValue =
786 cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
787 unsigned KillingBits = KillingValue.getBitWidth();
788 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
789 KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
790
791 // Offset of the smaller store inside the larger store
792 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
793 unsigned LShiftAmount =
794 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
795 : BitOffsetDiff;
796 APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
797 hiBit: LShiftAmount + KillingBits);
798 // Clear the bits we'll be replacing, then OR with the smaller
799 // store, shifted appropriately.
800 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
801 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
802 << "\n Killing: " << *KillingI
803 << "\n Merged Value: " << Merged << '\n');
804 return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
805 }
806 return nullptr;
807}
808
809// Returns true if \p I is an intrinsic that does not read or write memory.
810static bool isNoopIntrinsic(Instruction *I) {
811 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
812 switch (II->getIntrinsicID()) {
813 case Intrinsic::lifetime_start:
814 case Intrinsic::lifetime_end:
815 case Intrinsic::invariant_end:
816 case Intrinsic::launder_invariant_group:
817 case Intrinsic::assume:
818 return true;
819 case Intrinsic::dbg_declare:
820 case Intrinsic::dbg_label:
821 case Intrinsic::dbg_value:
822 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
823 default:
824 return false;
825 }
826 }
827 return false;
828}
829
830// Check if we can ignore \p D for DSE.
831static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
832 Instruction *DI = D->getMemoryInst();
833 // Calls that only access inaccessible memory cannot read or write any memory
834 // locations we consider for elimination.
835 if (auto *CB = dyn_cast<CallBase>(Val: DI))
836 if (CB->onlyAccessesInaccessibleMemory())
837 return true;
838
839 // We can eliminate stores to locations not visible to the caller across
840 // throwing instructions.
841 if (DI->mayThrow() && !DefVisibleToCaller)
842 return true;
843
844 // We can remove the dead stores, irrespective of the fence and its ordering
845 // (release/acquire/seq_cst). Fences only constraints the ordering of
846 // already visible stores, it does not make a store visible to other
847 // threads. So, skipping over a fence does not change a store from being
848 // dead.
849 if (isa<FenceInst>(Val: DI))
850 return true;
851
852 // Skip intrinsics that do not really read or modify memory.
853 if (isNoopIntrinsic(I: DI))
854 return true;
855
856 return false;
857}
858
859namespace {
860
861// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
862// defined by `MemDef`.
863struct MemoryLocationWrapper {
864 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
865 bool DefByInitializesAttr)
866 : MemLoc(MemLoc), MemDef(MemDef),
867 DefByInitializesAttr(DefByInitializesAttr) {
868 assert(MemLoc.Ptr && "MemLoc should be not null");
869 UnderlyingObject = getUnderlyingObject(V: MemLoc.Ptr);
870 DefInst = MemDef->getMemoryInst();
871 }
872
873 MemoryLocation MemLoc;
874 const Value *UnderlyingObject;
875 MemoryDef *MemDef;
876 Instruction *DefInst;
877 bool DefByInitializesAttr = false;
878};
879
880// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
881// defined by this MemoryDef.
882struct MemoryDefWrapper {
883 MemoryDefWrapper(MemoryDef *MemDef,
884 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
885 DefInst = MemDef->getMemoryInst();
886 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
887 DefinedLocations.push_back(
888 Elt: MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
889 }
890 Instruction *DefInst;
891 SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
892};
893
894struct ArgumentInitInfo {
895 unsigned Idx;
896 bool IsDeadOrInvisibleOnUnwind;
897 ConstantRangeList Inits;
898};
899} // namespace
900
901static bool hasInitializesAttr(Instruction *I) {
902 CallBase *CB = dyn_cast<CallBase>(Val: I);
903 return CB && CB->getArgOperandWithAttribute(Kind: Attribute::Initializes);
904}
905
906// Return the intersected range list of the initializes attributes of "Args".
907// "Args" are call arguments that alias to each other.
908// If any argument in "Args" doesn't have dead_on_unwind attr and
909// "CallHasNoUnwindAttr" is false, return empty.
910static ConstantRangeList
911getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
912 bool CallHasNoUnwindAttr) {
913 if (Args.empty())
914 return {};
915
916 // To address unwind, the function should have nounwind attribute or the
917 // arguments have dead or invisible on unwind. Otherwise, return empty.
918 for (const auto &Arg : Args) {
919 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
920 return {};
921 if (Arg.Inits.empty())
922 return {};
923 }
924
925 ConstantRangeList IntersectedIntervals = Args.front().Inits;
926 for (auto &Arg : Args.drop_front())
927 IntersectedIntervals = IntersectedIntervals.intersectWith(CRL: Arg.Inits);
928
929 return IntersectedIntervals;
930}
931
932namespace {
933
934struct DSEState {
935 Function &F;
936 AliasAnalysis &AA;
937 EarliestEscapeAnalysis EA;
938
939 /// The single BatchAA instance that is used to cache AA queries. It will
940 /// not be invalidated over the whole run. This is safe, because:
941 /// 1. Only memory writes are removed, so the alias cache for memory
942 /// locations remains valid.
943 /// 2. No new instructions are added (only instructions removed), so cached
944 /// information for a deleted value cannot be accessed by a re-used new
945 /// value pointer.
946 BatchAAResults BatchAA;
947
948 MemorySSA &MSSA;
949 DominatorTree &DT;
950 PostDominatorTree &PDT;
951 const TargetLibraryInfo &TLI;
952 const DataLayout &DL;
953 const LoopInfo &LI;
954
955 // Whether the function contains any irreducible control flow, useful for
956 // being accurately able to detect loops.
957 bool ContainsIrreducibleLoops;
958
959 // All MemoryDefs that potentially could kill other MemDefs.
960 SmallVector<MemoryDef *, 64> MemDefs;
961 // Any that should be skipped as they are already deleted
962 SmallPtrSet<MemoryAccess *, 4> SkipStores;
963 // Keep track whether a given object is captured before return or not.
964 DenseMap<const Value *, bool> CapturedBeforeReturn;
965 // Keep track of all of the objects that are invisible to the caller after
966 // the function returns.
967 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
968 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
969 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
970 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
971 // Post-order numbers for each basic block. Used to figure out if memory
972 // accesses are executed before another access.
973 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
974
975 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
976 /// basic block.
977 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
978 // Check if there are root nodes that are terminated by UnreachableInst.
979 // Those roots pessimize post-dominance queries. If there are such roots,
980 // fall back to CFG scan starting from all non-unreachable roots.
981 bool AnyUnreachableExit;
982
983 // Whether or not we should iterate on removing dead stores at the end of the
984 // function due to removing a store causing a previously captured pointer to
985 // no longer be captured.
986 bool ShouldIterateEndOfFunctionDSE;
987
988 /// Dead instructions to be removed at the end of DSE.
989 SmallVector<Instruction *> ToRemove;
990
991 // Class contains self-reference, make sure it's not copied/moved.
992 DSEState(const DSEState &) = delete;
993 DSEState &operator=(const DSEState &) = delete;
994
995 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
996 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
997 const LoopInfo &LI)
998 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
999 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1000 // Collect blocks with throwing instructions not modeled in MemorySSA and
1001 // alloc-like objects.
1002 unsigned PO = 0;
1003 for (BasicBlock *BB : post_order(G: &F)) {
1004 PostOrderNumbers[BB] = PO++;
1005 for (Instruction &I : *BB) {
1006 MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
1007 if (I.mayThrow() && !MA)
1008 ThrowingBlocks.insert(Ptr: I.getParent());
1009
1010 auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
1011 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1012 (getLocForWrite(I: &I) || isMemTerminatorInst(I: &I) ||
1013 (EnableInitializesImprovement && hasInitializesAttr(I: &I))))
1014 MemDefs.push_back(Elt: MD);
1015 }
1016 }
1017
1018 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1019 // stores to them are dead at the end of the function.
1020 for (Argument &AI : F.args()) {
1021 if (AI.hasPassPointeeByValueCopyAttr()) {
1022 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1023 continue;
1024 }
1025
1026 if (!AI.getType()->isPointerTy())
1027 continue;
1028
1029 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1030 if (Info.coversAllReachableMemory())
1031 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1032 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1033 InvisibleToCallerAfterRetBounded.insert(KV: {&AI, DeadBytes});
1034 }
1035
1036 // Collect whether there is any irreducible control flow in the function.
1037 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
1038
1039 AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
1040 return isa<UnreachableInst>(Val: E->getTerminator());
1041 });
1042 }
1043
1044 static void pushMemUses(MemoryAccess *Acc,
1045 SmallVectorImpl<MemoryAccess *> &WorkList,
1046 SmallPtrSetImpl<MemoryAccess *> &Visited) {
1047 for (Use &U : Acc->uses()) {
1048 auto *MA = cast<MemoryAccess>(Val: U.getUser());
1049 if (Visited.insert(Ptr: MA).second)
1050 WorkList.push_back(Elt: MA);
1051 }
1052 };
1053
1054 LocationSize strengthenLocationSize(const Instruction *I,
1055 LocationSize Size) const {
1056 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1057 LibFunc F;
1058 if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
1059 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1060 // Use the precise location size specified by the 3rd argument
1061 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1062 // instruction. memset_chk will write either the amount specified as 3rd
1063 // argument or the function will immediately abort and exit the program.
1064 // NOTE: AA may determine NoAlias if it can prove that the access size
1065 // is larger than the allocation size due to that being UB. To avoid
1066 // returning potentially invalid NoAlias results by AA, limit the use of
1067 // the precise location size to isOverwrite.
1068 if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: 2)))
1069 return LocationSize::precise(Value: Len->getZExtValue());
1070 }
1071 }
1072 return Size;
1073 }
1074
1075 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1076 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1077 /// location (by \p DeadI instruction).
1078 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1079 /// \p DeadI, but they both write to the same underlying object. In that
1080 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1081 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1082 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1083 OverwriteResult isOverwrite(const Instruction *KillingI,
1084 const Instruction *DeadI,
1085 const MemoryLocation &KillingLoc,
1086 const MemoryLocation &DeadLoc,
1087 int64_t &KillingOff, int64_t &DeadOff) {
1088 // AliasAnalysis does not always account for loops. Limit overwrite checks
1089 // to dependencies for which we can guarantee they are independent of any
1090 // loops they are in.
1091 if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
1092 return OW_Unknown;
1093
1094 LocationSize KillingLocSize =
1095 strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
1096 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1097 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1098 const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
1099 const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
1100
1101 // Check whether the killing store overwrites the whole object, in which
1102 // case the size/offset of the dead store does not matter.
1103 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1104 isIdentifiedObject(V: KillingUndObj)) {
1105 std::optional<TypeSize> KillingUndObjSize =
1106 getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
1107 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1108 return OW_Complete;
1109 }
1110
1111 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1112 // get imprecise values here, though (except for unknown sizes).
1113 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1114 // In case no constant size is known, try to an IR values for the number
1115 // of bytes written and check if they match.
1116 const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
1117 const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
1118 if (KillingMemI && DeadMemI) {
1119 const Value *KillingV = KillingMemI->getLength();
1120 const Value *DeadV = DeadMemI->getLength();
1121 if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
1122 return OW_Complete;
1123 }
1124
1125 // Masked stores have imprecise locations, but we can reason about them
1126 // to some extent.
1127 return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
1128 }
1129
1130 const TypeSize KillingSize = KillingLocSize.getValue();
1131 const TypeSize DeadSize = DeadLoc.Size.getValue();
1132 // Bail on doing Size comparison which depends on AA for now
1133 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1134 const bool AnyScalable =
1135 DeadSize.isScalable() || KillingLocSize.isScalable();
1136
1137 if (AnyScalable)
1138 return OW_Unknown;
1139 // Query the alias information
1140 AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1141
1142 // If the start pointers are the same, we just have to compare sizes to see if
1143 // the killing store was larger than the dead store.
1144 if (AAR == AliasResult::MustAlias) {
1145 // Make sure that the KillingSize size is >= the DeadSize size.
1146 if (KillingSize >= DeadSize)
1147 return OW_Complete;
1148 }
1149
1150 // If we hit a partial alias we may have a full overwrite
1151 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1152 int32_t Off = AAR.getOffset();
1153 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1154 return OW_Complete;
1155 }
1156
1157 // If we can't resolve the same pointers to the same object, then we can't
1158 // analyze them at all.
1159 if (DeadUndObj != KillingUndObj) {
1160 // Non aliasing stores to different objects don't overlap. Note that
1161 // if the killing store is known to overwrite whole object (out of
1162 // bounds access overwrites whole object as well) then it is assumed to
1163 // completely overwrite any store to the same object even if they don't
1164 // actually alias (see next check).
1165 if (AAR == AliasResult::NoAlias)
1166 return OW_None;
1167 return OW_Unknown;
1168 }
1169
1170 // Okay, we have stores to two completely different pointers. Try to
1171 // decompose the pointer into a "base + constant_offset" form. If the base
1172 // pointers are equal, then we can reason about the two stores.
1173 DeadOff = 0;
1174 KillingOff = 0;
1175 const Value *DeadBasePtr =
1176 GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1177 const Value *KillingBasePtr =
1178 GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1179
1180 // If the base pointers still differ, we have two completely different
1181 // stores.
1182 if (DeadBasePtr != KillingBasePtr)
1183 return OW_Unknown;
1184
1185 // The killing access completely overlaps the dead store if and only if
1186 // both start and end of the dead one is "inside" the killing one:
1187 // |<->|--dead--|<->|
1188 // |-----killing------|
1189 // Accesses may overlap if and only if start of one of them is "inside"
1190 // another one:
1191 // |<->|--dead--|<-------->|
1192 // |-------killing--------|
1193 // OR
1194 // |-------dead-------|
1195 // |<->|---killing---|<----->|
1196 //
1197 // We have to be careful here as *Off is signed while *.Size is unsigned.
1198
1199 // Check if the dead access starts "not before" the killing one.
1200 if (DeadOff >= KillingOff) {
1201 // If the dead access ends "not after" the killing access then the
1202 // dead one is completely overwritten by the killing one.
1203 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1204 return OW_Complete;
1205 // If start of the dead access is "before" end of the killing access
1206 // then accesses overlap.
1207 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1208 return OW_MaybePartial;
1209 }
1210 // If start of the killing access is "before" end of the dead access then
1211 // accesses overlap.
1212 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1213 return OW_MaybePartial;
1214 }
1215
1216 // Can reach here only if accesses are known not to overlap.
1217 return OW_None;
1218 }
1219
1220 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1221 const LocationSize StoreSize) {
1222 if (isa<AllocaInst>(Val: V))
1223 return true;
1224
1225 auto IBounded = InvisibleToCallerAfterRetBounded.find(Val: V);
1226 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1227 int64_t ValueOffset;
1228 [[maybe_unused]] const Value *BaseValue =
1229 GetPointerBaseWithConstantOffset(Ptr, Offset&: ValueOffset, DL);
1230 assert(BaseValue == V);
1231 // This store is only invisible after return if we are in bounds of the
1232 // range marked dead.
1233 if (StoreSize.hasValue() &&
1234 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1235 ValueOffset >= 0)
1236 return true;
1237 }
1238 auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1239 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1240 I.first->second = capturesNothing(CC: PointerMayBeCaptured(
1241 V, /*ReturnCaptures=*/true, Mask: CaptureComponents::Provenance));
1242 return I.first->second;
1243 }
1244
1245 bool isInvisibleToCallerOnUnwind(const Value *V) {
1246 bool RequiresNoCaptureBeforeUnwind;
1247 if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1248 return false;
1249 if (!RequiresNoCaptureBeforeUnwind)
1250 return true;
1251
1252 auto I = CapturedBeforeReturn.insert(KV: {V, true});
1253 if (I.second)
1254 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1255 // with the killing MemoryDef. But we refrain from doing so for now to
1256 // limit compile-time and this does not cause any changes to the number
1257 // of stores removed on a large test set in practice.
1258 I.first->second = capturesAnything(CC: PointerMayBeCaptured(
1259 V, /*ReturnCaptures=*/false, Mask: CaptureComponents::Provenance));
1260 return !I.first->second;
1261 }
1262
1263 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1264 if (!I->mayWriteToMemory())
1265 return std::nullopt;
1266
1267 if (auto *CB = dyn_cast<CallBase>(Val: I))
1268 return MemoryLocation::getForDest(CI: CB, TLI);
1269
1270 return MemoryLocation::getOrNone(Inst: I);
1271 }
1272
1273 // Returns a list of <MemoryLocation, bool> pairs written by I.
1274 // The bool means whether the write is from Initializes attr.
1275 SmallVector<std::pair<MemoryLocation, bool>, 1>
1276 getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1277 SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
1278 if (isMemTerminatorInst(I)) {
1279 if (auto Loc = getLocForTerminator(I))
1280 Locations.push_back(Elt: std::make_pair(x&: Loc->first, y: false));
1281 return Locations;
1282 }
1283
1284 if (auto Loc = getLocForWrite(I))
1285 Locations.push_back(Elt: std::make_pair(x&: *Loc, y: false));
1286
1287 if (ConsiderInitializesAttr) {
1288 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1289 Locations.push_back(Elt: std::make_pair(x&: MemLoc, y: true));
1290 }
1291 }
1292 return Locations;
1293 }
1294
1295 /// Assuming this instruction has a dead analyzable write, can we delete
1296 /// this instruction?
1297 bool isRemovable(Instruction *I) {
1298 assert(getLocForWrite(I) && "Must have analyzable write");
1299
1300 // Don't remove volatile/atomic stores.
1301 if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1302 return SI->isUnordered();
1303
1304 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1305 // Don't remove volatile memory intrinsics.
1306 if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1307 return !MI->isVolatile();
1308
1309 // Never remove dead lifetime intrinsics, e.g. because they are followed
1310 // by a free.
1311 if (CB->isLifetimeStartOrEnd())
1312 return false;
1313
1314 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1315 !CB->isTerminator();
1316 }
1317
1318 return false;
1319 }
1320
1321 /// Returns true if \p UseInst completely overwrites \p DefLoc
1322 /// (stored by \p DefInst).
1323 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1324 Instruction *UseInst) {
1325 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1326 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1327 // MemoryDef.
1328 if (!UseInst->mayWriteToMemory())
1329 return false;
1330
1331 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1332 if (CB->onlyAccessesInaccessibleMemory())
1333 return false;
1334
1335 int64_t InstWriteOffset, DepWriteOffset;
1336 if (auto CC = getLocForWrite(I: UseInst))
1337 return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1338 DeadOff&: DepWriteOffset) == OW_Complete;
1339 return false;
1340 }
1341
1342 /// Returns true if \p Def is not read before returning from the function.
1343 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
1344 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1345 << *Def->getMemoryInst()
1346 << ") is at the end the function \n");
1347 SmallVector<MemoryAccess *, 4> WorkList;
1348 SmallPtrSet<MemoryAccess *, 8> Visited;
1349
1350 pushMemUses(Acc: Def, WorkList, Visited);
1351 for (unsigned I = 0; I < WorkList.size(); I++) {
1352 if (WorkList.size() >= MemorySSAScanLimit) {
1353 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1354 return false;
1355 }
1356
1357 MemoryAccess *UseAccess = WorkList[I];
1358 if (isa<MemoryPhi>(Val: UseAccess)) {
1359 // AliasAnalysis does not account for loops. Limit elimination to
1360 // candidates for which we can guarantee they always store to the same
1361 // memory location.
1362 if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1363 return false;
1364
1365 pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1366 continue;
1367 }
1368 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1369 // of times this is called and/or caching it.
1370 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1371 if (isReadClobber(DefLoc, UseInst)) {
1372 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1373 return false;
1374 }
1375
1376 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1377 pushMemUses(Acc: UseDef, WorkList, Visited);
1378 }
1379 return true;
1380 }
1381
1382 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1383 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1384 /// indicating whether \p I is a free-like call.
1385 std::optional<std::pair<MemoryLocation, bool>>
1386 getLocForTerminator(Instruction *I) const {
1387 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1388 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1389 return {
1390 std::make_pair(x: MemoryLocation::getForArgument(Call: CB, ArgIdx: 0, TLI: &TLI), y: false)};
1391 if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1392 return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1393 }
1394
1395 return std::nullopt;
1396 }
1397
1398 /// Returns true if \p I is a memory terminator instruction like
1399 /// llvm.lifetime.end or free.
1400 bool isMemTerminatorInst(Instruction *I) const {
1401 auto *CB = dyn_cast<CallBase>(Val: I);
1402 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1403 getFreedOperand(CB, TLI: &TLI) != nullptr);
1404 }
1405
1406 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1407 /// instruction \p AccessI.
1408 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1409 Instruction *MaybeTerm) {
1410 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1411 getLocForTerminator(I: MaybeTerm);
1412
1413 if (!MaybeTermLoc)
1414 return false;
1415
1416 // If the terminator is a free-like call, all accesses to the underlying
1417 // object can be considered terminated.
1418 if (getUnderlyingObject(V: Loc.Ptr) !=
1419 getUnderlyingObject(V: MaybeTermLoc->first.Ptr))
1420 return false;
1421
1422 auto TermLoc = MaybeTermLoc->first;
1423 if (MaybeTermLoc->second) {
1424 const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1425 return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1426 }
1427 int64_t InstWriteOffset = 0;
1428 int64_t DepWriteOffset = 0;
1429 return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1430 DeadOff&: DepWriteOffset) == OW_Complete;
1431 }
1432
1433 // Returns true if \p Use may read from \p DefLoc.
1434 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1435 if (isNoopIntrinsic(I: UseInst))
1436 return false;
1437
1438 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1439 // treated as read clobber.
1440 if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1441 return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1442
1443 if (!UseInst->mayReadFromMemory())
1444 return false;
1445
1446 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1447 if (CB->onlyAccessesInaccessibleMemory())
1448 return false;
1449
1450 return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1451 }
1452
1453 /// Returns true if a dependency between \p Current and \p KillingDef is
1454 /// guaranteed to be loop invariant for the loops that they are in. Either
1455 /// because they are known to be in the same block, in the same loop level or
1456 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1457 /// during execution of the containing function.
1458 bool isGuaranteedLoopIndependent(const Instruction *Current,
1459 const Instruction *KillingDef,
1460 const MemoryLocation &CurrentLoc) {
1461 // If the dependency is within the same block or loop level (being careful
1462 // of irreducible loops), we know that AA will return a valid result for the
1463 // memory dependency. (Both at the function level, outside of any loop,
1464 // would also be valid but we currently disable that to limit compile time).
1465 if (Current->getParent() == KillingDef->getParent())
1466 return true;
1467 const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1468 if (!ContainsIrreducibleLoops && CurrentLI &&
1469 CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1470 return true;
1471 // Otherwise check the memory location is invariant to any loops.
1472 return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1473 }
1474
1475 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1476 /// loop. In particular, this guarantees that it only references a single
1477 /// MemoryLocation during execution of the containing function.
1478 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1479 Ptr = Ptr->stripPointerCasts();
1480 if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1481 if (GEP->hasAllConstantIndices())
1482 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1483
1484 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1485 return I->getParent()->isEntryBlock() ||
1486 (!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1487 }
1488 return true;
1489 }
1490
1491 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1492 // with no read access between them or on any other path to a function exit
1493 // block if \p KillingLoc is not accessible after the function returns. If
1494 // there is no such MemoryDef, return std::nullopt. The returned value may not
1495 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1496 // encounter an aliasing MemoryUse (read).
1497 std::optional<MemoryAccess *>
1498 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1499 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1500 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1501 bool IsMemTerm, unsigned &PartialLimit,
1502 bool IsInitializesAttrMemLoc) {
1503 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1504 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1505 return std::nullopt;
1506 }
1507
1508 MemoryAccess *Current = StartAccess;
1509 Instruction *KillingI = KillingDef->getMemoryInst();
1510 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1511
1512 // Only optimize defining access of KillingDef when directly starting at its
1513 // defining access. The defining access also must only access KillingLoc. At
1514 // the moment we only support instructions with a single write location, so
1515 // it should be sufficient to disable optimizations for instructions that
1516 // also read from memory.
1517 bool CanOptimize = OptimizeMemorySSA &&
1518 KillingDef->getDefiningAccess() == StartAccess &&
1519 !KillingI->mayReadFromMemory();
1520
1521 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1522 std::optional<MemoryLocation> CurrentLoc;
1523 for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1524 LLVM_DEBUG({
1525 dbgs() << " visiting " << *Current;
1526 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1527 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1528 << ")";
1529 dbgs() << "\n";
1530 });
1531
1532 // Reached TOP.
1533 if (MSSA.isLiveOnEntryDef(MA: Current)) {
1534 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1535 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1536 // The first clobbering def is... none.
1537 KillingDef->setOptimized(Current);
1538 return std::nullopt;
1539 }
1540
1541 // Cost of a step. Accesses in the same block are more likely to be valid
1542 // candidates for elimination, hence consider them cheaper.
1543 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1544 ? MemorySSASameBBStepCost
1545 : MemorySSAOtherBBStepCost;
1546 if (WalkerStepLimit <= StepCost) {
1547 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1548 return std::nullopt;
1549 }
1550 WalkerStepLimit -= StepCost;
1551
1552 // Return for MemoryPhis. They cannot be eliminated directly and the
1553 // caller is responsible for traversing them.
1554 if (isa<MemoryPhi>(Val: Current)) {
1555 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1556 return Current;
1557 }
1558
1559 // Below, check if CurrentDef is a valid candidate to be eliminated by
1560 // KillingDef. If it is not, check the next candidate.
1561 MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1562 Instruction *CurrentI = CurrentDef->getMemoryInst();
1563
1564 if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1565 CanOptimize = false;
1566 continue;
1567 }
1568
1569 // Before we try to remove anything, check for any extra throwing
1570 // instructions that block us from DSEing
1571 if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1572 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1573 return std::nullopt;
1574 }
1575
1576 // Check for anything that looks like it will be a barrier to further
1577 // removal
1578 if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1579 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1580 return std::nullopt;
1581 }
1582
1583 // If Current is known to be on path that reads DefLoc or is a read
1584 // clobber, bail out, as the path is not profitable. We skip this check
1585 // for intrinsic calls, because the code knows how to handle memcpy
1586 // intrinsics.
1587 if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1588 return std::nullopt;
1589
1590 // Quick check if there are direct uses that are read-clobbers.
1591 if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1592 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1593 return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1594 isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1595 return false;
1596 })) {
1597 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1598 return std::nullopt;
1599 }
1600
1601 // If Current does not have an analyzable write location or is not
1602 // removable, skip it.
1603 CurrentLoc = getLocForWrite(I: CurrentI);
1604 if (!CurrentLoc || !isRemovable(I: CurrentI)) {
1605 CanOptimize = false;
1606 continue;
1607 }
1608
1609 // AliasAnalysis does not account for loops. Limit elimination to
1610 // candidates for which we can guarantee they always store to the same
1611 // memory location and not located in different loops.
1612 if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1613 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1614 CanOptimize = false;
1615 continue;
1616 }
1617
1618 if (IsMemTerm) {
1619 // If the killing def is a memory terminator (e.g. lifetime.end), check
1620 // the next candidate if the current Current does not write the same
1621 // underlying object as the terminator.
1622 if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1623 CanOptimize = false;
1624 continue;
1625 }
1626 } else {
1627 int64_t KillingOffset = 0;
1628 int64_t DeadOffset = 0;
1629 auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1630 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1631 if (CanOptimize) {
1632 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1633 // optimized access. Do not optimize if CurrentDef is already the
1634 // defining access of KillingDef.
1635 if (CurrentDef != KillingDef->getDefiningAccess() &&
1636 (OR == OW_Complete || OR == OW_MaybePartial))
1637 KillingDef->setOptimized(CurrentDef);
1638
1639 // Once a may-aliasing def is encountered do not set an optimized
1640 // access.
1641 if (OR != OW_None)
1642 CanOptimize = false;
1643 }
1644
1645 // If Current does not write to the same object as KillingDef, check
1646 // the next candidate.
1647 if (OR == OW_Unknown || OR == OW_None)
1648 continue;
1649 else if (OR == OW_MaybePartial) {
1650 // If KillingDef only partially overwrites Current, check the next
1651 // candidate if the partial step limit is exceeded. This aggressively
1652 // limits the number of candidates for partial store elimination,
1653 // which are less likely to be removable in the end.
1654 if (PartialLimit <= 1) {
1655 WalkerStepLimit -= 1;
1656 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1657 continue;
1658 }
1659 PartialLimit -= 1;
1660 }
1661 }
1662 break;
1663 };
1664
1665 // Accesses to objects accessible after the function returns can only be
1666 // eliminated if the access is dead along all paths to the exit. Collect
1667 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1668 // they cover all paths from MaybeDeadAccess to any function exit.
1669 SmallPtrSet<Instruction *, 16> KillingDefs;
1670 KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1671 MemoryAccess *MaybeDeadAccess = Current;
1672 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1673 Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1674 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1675 << *MaybeDeadI << ")\n");
1676
1677 SmallVector<MemoryAccess *, 32> WorkList;
1678 SmallPtrSet<MemoryAccess *, 32> Visited;
1679 pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1680
1681 // Check if DeadDef may be read.
1682 for (unsigned I = 0; I < WorkList.size(); I++) {
1683 MemoryAccess *UseAccess = WorkList[I];
1684
1685 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1686 // Bail out if the number of accesses to check exceeds the scan limit.
1687 if (ScanLimit < (WorkList.size() - I)) {
1688 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1689 return std::nullopt;
1690 }
1691 --ScanLimit;
1692 NumDomMemDefChecks++;
1693
1694 if (isa<MemoryPhi>(Val: UseAccess)) {
1695 if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1696 return DT.properlyDominates(A: KI->getParent(),
1697 B: UseAccess->getBlock());
1698 })) {
1699 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1700 continue;
1701 }
1702 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1703 pushMemUses(Acc: UseAccess, WorkList, Visited);
1704 continue;
1705 }
1706
1707 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1708 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1709
1710 if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1711 return DT.dominates(Def: KI, User: UseInst);
1712 })) {
1713 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1714 continue;
1715 }
1716
1717 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1718 // MemoryAccesses. We do not have to check it's users.
1719 if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1720 LLVM_DEBUG(
1721 dbgs()
1722 << " ... skipping, memterminator invalidates following accesses\n");
1723 continue;
1724 }
1725
1726 if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1727 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1728 pushMemUses(Acc: UseAccess, WorkList, Visited);
1729 continue;
1730 }
1731
1732 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1733 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1734 return std::nullopt;
1735 }
1736
1737 // Uses which may read the original MemoryDef mean we cannot eliminate the
1738 // original MD. Stop walk.
1739 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1740 // the callee would be dominated by initializations, so it should be safe.
1741 bool IsKillingDefFromInitAttr = false;
1742 if (IsInitializesAttrMemLoc) {
1743 if (KillingI == UseInst &&
1744 KillingUndObj == getUnderlyingObject(V: MaybeDeadLoc.Ptr))
1745 IsKillingDefFromInitAttr = true;
1746 }
1747
1748 if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1749 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1750 return std::nullopt;
1751 }
1752
1753 // If this worklist walks back to the original memory access (and the
1754 // pointer is not guarenteed loop invariant) then we cannot assume that a
1755 // store kills itself.
1756 if (MaybeDeadAccess == UseAccess &&
1757 !isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1758 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1759 return std::nullopt;
1760 }
1761 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1762 // if it reads the memory location.
1763 // TODO: It would probably be better to check for self-reads before
1764 // calling the function.
1765 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1766 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1767 continue;
1768 }
1769
1770 // Check all uses for MemoryDefs, except for defs completely overwriting
1771 // the original location. Otherwise we have to check uses of *all*
1772 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1773 // miss cases like the following
1774 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1775 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1776 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1777 // (The Use points to the *first* Def it may alias)
1778 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1779 // stores [0,1]
1780 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1781 if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1782 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1783 if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1784 PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1785 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1786 StoreSize: KillingLoc.Size)) {
1787 LLVM_DEBUG(dbgs()
1788 << " ... found killing def " << *UseInst << "\n");
1789 KillingDefs.insert(Ptr: UseInst);
1790 }
1791 } else {
1792 LLVM_DEBUG(dbgs()
1793 << " ... found preceeding def " << *UseInst << "\n");
1794 return std::nullopt;
1795 }
1796 } else
1797 pushMemUses(Acc: UseDef, WorkList, Visited);
1798 }
1799 }
1800
1801 // For accesses to locations visible after the function returns, make sure
1802 // that the location is dead (=overwritten) along all paths from
1803 // MaybeDeadAccess to the exit.
1804 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1805 StoreSize: KillingLoc.Size)) {
1806 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1807 for (Instruction *KD : KillingDefs)
1808 KillingBlocks.insert(Ptr: KD->getParent());
1809 assert(!KillingBlocks.empty() &&
1810 "Expected at least a single killing block");
1811
1812 // Find the common post-dominator of all killing blocks.
1813 BasicBlock *CommonPred = *KillingBlocks.begin();
1814 for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1815 if (!CommonPred)
1816 break;
1817 CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1818 }
1819
1820 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1821 // there is a path from MaybeDeadAccess to an exit not going through a
1822 // killing block.
1823 if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1824 if (!AnyUnreachableExit)
1825 return std::nullopt;
1826
1827 // Fall back to CFG scan starting at all non-unreachable roots if not
1828 // all paths to the exit go through CommonPred.
1829 CommonPred = nullptr;
1830 }
1831
1832 // If CommonPred itself is in the set of killing blocks, we're done.
1833 if (KillingBlocks.count(Ptr: CommonPred))
1834 return {MaybeDeadAccess};
1835
1836 SetVector<BasicBlock *> WorkList;
1837 // If CommonPred is null, there are multiple exits from the function.
1838 // They all have to be added to the worklist.
1839 if (CommonPred)
1840 WorkList.insert(X: CommonPred);
1841 else
1842 for (BasicBlock *R : PDT.roots()) {
1843 if (!isa<UnreachableInst>(Val: R->getTerminator()))
1844 WorkList.insert(X: R);
1845 }
1846
1847 NumCFGTries++;
1848 // Check if all paths starting from an exit node go through one of the
1849 // killing blocks before reaching MaybeDeadAccess.
1850 for (unsigned I = 0; I < WorkList.size(); I++) {
1851 NumCFGChecks++;
1852 BasicBlock *Current = WorkList[I];
1853 if (KillingBlocks.count(Ptr: Current))
1854 continue;
1855 if (Current == MaybeDeadAccess->getBlock())
1856 return std::nullopt;
1857
1858 // MaybeDeadAccess is reachable from the entry, so we don't have to
1859 // explore unreachable blocks further.
1860 if (!DT.isReachableFromEntry(A: Current))
1861 continue;
1862
1863 WorkList.insert_range(R: predecessors(BB: Current));
1864
1865 if (WorkList.size() >= MemorySSAPathCheckLimit)
1866 return std::nullopt;
1867 }
1868 NumCFGSuccess++;
1869 }
1870
1871 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1872 // potentially dead.
1873 return {MaybeDeadAccess};
1874 }
1875
1876 /// Delete dead memory defs and recursively add their operands to ToRemove if
1877 /// they became dead.
1878 void
1879 deleteDeadInstruction(Instruction *SI,
1880 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {
1881 MemorySSAUpdater Updater(&MSSA);
1882 SmallVector<Instruction *, 32> NowDeadInsts;
1883 NowDeadInsts.push_back(Elt: SI);
1884 --NumFastOther;
1885
1886 while (!NowDeadInsts.empty()) {
1887 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1888 ++NumFastOther;
1889
1890 // Try to preserve debug information attached to the dead instruction.
1891 salvageDebugInfo(I&: *DeadInst);
1892 salvageKnowledge(I: DeadInst);
1893
1894 // Remove the Instruction from MSSA.
1895 MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
1896 bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
1897 if (MA) {
1898 if (IsMemDef) {
1899 auto *MD = cast<MemoryDef>(Val: MA);
1900 SkipStores.insert(Ptr: MD);
1901 if (Deleted)
1902 Deleted->insert(Ptr: MD);
1903 if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
1904 if (SI->getValueOperand()->getType()->isPointerTy()) {
1905 const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
1906 if (CapturedBeforeReturn.erase(Val: UO))
1907 ShouldIterateEndOfFunctionDSE = true;
1908 InvisibleToCallerAfterRet.erase(Val: UO);
1909 InvisibleToCallerAfterRetBounded.erase(Val: UO);
1910 }
1911 }
1912 }
1913
1914 Updater.removeMemoryAccess(MA);
1915 }
1916
1917 auto I = IOLs.find(Key: DeadInst->getParent());
1918 if (I != IOLs.end())
1919 I->second.erase(Key: DeadInst);
1920 // Remove its operands
1921 for (Use &O : DeadInst->operands())
1922 if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
1923 O.set(PoisonValue::get(T: O->getType()));
1924 if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
1925 NowDeadInsts.push_back(Elt: OpI);
1926 }
1927
1928 EA.removeInstruction(I: DeadInst);
1929 // Remove memory defs directly if they don't produce results, but only
1930 // queue other dead instructions for later removal. They may have been
1931 // used as memory locations that have been cached by BatchAA. Removing
1932 // them here may lead to newly created instructions to be allocated at the
1933 // same address, yielding stale cache entries.
1934 if (IsMemDef && DeadInst->getType()->isVoidTy())
1935 DeadInst->eraseFromParent();
1936 else
1937 ToRemove.push_back(Elt: DeadInst);
1938 }
1939 }
1940
1941 // Check for any extra throws between \p KillingI and \p DeadI that block
1942 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1943 // MemoryDef that may throw are handled during the walk from one def to the
1944 // next.
1945 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1946 const Value *KillingUndObj) {
1947 // First see if we can ignore it by using the fact that KillingI is an
1948 // alloca/alloca like object that is not visible to the caller during
1949 // execution of the function.
1950 if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
1951 return false;
1952
1953 if (KillingI->getParent() == DeadI->getParent())
1954 return ThrowingBlocks.count(Ptr: KillingI->getParent());
1955 return !ThrowingBlocks.empty();
1956 }
1957
1958 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1959 // instructions act as barriers:
1960 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1961 // object.
1962 // * Atomic stores stronger that monotonic.
1963 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1964 // If DeadI may throw it acts as a barrier, unless we are to an
1965 // alloca/alloca like object that does not escape.
1966 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
1967 return true;
1968
1969 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1970 // eliminate/reorder it.
1971 if (DeadI->isAtomic()) {
1972 if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
1973 return isStrongerThanMonotonic(AO: LI->getOrdering());
1974 if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
1975 return isStrongerThanMonotonic(AO: SI->getOrdering());
1976 if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
1977 return isStrongerThanMonotonic(AO: ARMW->getOrdering());
1978 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
1979 return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) ||
1980 isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
1981 llvm_unreachable("other instructions should be skipped in MemorySSA");
1982 }
1983 return false;
1984 }
1985
1986 /// Eliminate writes to objects that are not visible in the caller and are not
1987 /// accessed before returning from the function.
1988 bool eliminateDeadWritesAtEndOfFunction() {
1989 bool MadeChange = false;
1990 LLVM_DEBUG(
1991 dbgs()
1992 << "Trying to eliminate MemoryDefs at the end of the function\n");
1993 do {
1994 ShouldIterateEndOfFunctionDSE = false;
1995 for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
1996 if (SkipStores.contains(Ptr: Def))
1997 continue;
1998
1999 Instruction *DefI = Def->getMemoryInst();
2000 auto DefLoc = getLocForWrite(I: DefI);
2001 if (!DefLoc || !isRemovable(I: DefI)) {
2002 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2003 "instruction not removable.\n");
2004 continue;
2005 }
2006
2007 // NOTE: Currently eliminating writes at the end of a function is
2008 // limited to MemoryDefs with a single underlying object, to save
2009 // compile-time. In practice it appears the case with multiple
2010 // underlying objects is very uncommon. If it turns out to be important,
2011 // we can use getUnderlyingObjects here instead.
2012 const Value *UO = getUnderlyingObject(V: DefLoc->Ptr);
2013 if (!isInvisibleToCallerAfterRet(V: UO, Ptr: DefLoc->Ptr, StoreSize: DefLoc->Size))
2014 continue;
2015
2016 if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
2017 // See through pointer-to-pointer bitcasts
2018 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2019 "of the function\n");
2020 deleteDeadInstruction(SI: DefI);
2021 ++NumFastStores;
2022 MadeChange = true;
2023 }
2024 }
2025 } while (ShouldIterateEndOfFunctionDSE);
2026 return MadeChange;
2027 }
2028
2029 /// If we have a zero initializing memset following a call to malloc,
2030 /// try folding it into a call to calloc.
2031 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2032 Instruction *DefI = Def->getMemoryInst();
2033 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2034 if (!MemSet)
2035 // TODO: Could handle zero store to small allocation as well.
2036 return false;
2037 Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2038 if (!StoredConstant || !StoredConstant->isNullValue())
2039 return false;
2040
2041 if (!isRemovable(I: DefI))
2042 // The memset might be volatile..
2043 return false;
2044
2045 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
2046 F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
2047 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) ||
2048 F.getName() == "calloc")
2049 return false;
2050 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(Val: DefUO));
2051 if (!Malloc)
2052 return false;
2053 auto *InnerCallee = Malloc->getCalledFunction();
2054 if (!InnerCallee)
2055 return false;
2056 LibFunc Func = NotLibFunc;
2057 StringRef ZeroedVariantName;
2058 if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) || !TLI.has(F: Func) ||
2059 Func != LibFunc_malloc) {
2060 Attribute Attr = Malloc->getFnAttr(Kind: "alloc-variant-zeroed");
2061 if (!Attr.isValid())
2062 return false;
2063 ZeroedVariantName = Attr.getValueAsString();
2064 if (ZeroedVariantName.empty())
2065 return false;
2066 }
2067
2068 // Gracefully handle malloc with unexpected memory attributes.
2069 auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
2070 if (!MallocDef)
2071 return false;
2072
2073 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2074 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2075 // of malloc block
2076 auto *MallocBB = Malloc->getParent(),
2077 *MemsetBB = Memset->getParent();
2078 if (MallocBB == MemsetBB)
2079 return true;
2080 auto *Ptr = Memset->getArgOperand(i: 0);
2081 auto *TI = MallocBB->getTerminator();
2082 BasicBlock *TrueBB, *FalseBB;
2083 if (!match(V: TI, P: m_Br(C: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: Ptr),
2084 R: m_Zero()),
2085 T&: TrueBB, F&: FalseBB)))
2086 return false;
2087 if (MemsetBB != FalseBB)
2088 return false;
2089 return true;
2090 };
2091
2092 if (Malloc->getOperand(i_nocapture: 0) != MemSet->getLength())
2093 return false;
2094 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Def: Malloc, User: MemSet) ||
2095 !memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
2096 return false;
2097 IRBuilder<> IRB(Malloc);
2098 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2099 Value *Calloc = nullptr;
2100 if (!ZeroedVariantName.empty()) {
2101 LLVMContext &Ctx = Malloc->getContext();
2102 AttributeList Attrs = InnerCallee->getAttributes();
2103 AllocFnKind AllocKind =
2104 Attrs.getFnAttr(Kind: Attribute::AllocKind).getAllocKind() |
2105 AllocFnKind::Zeroed;
2106 AllocKind &= ~AllocFnKind::Uninitialized;
2107 Attrs =
2108 Attrs.addFnAttribute(C&: Ctx, Attr: Attribute::getWithAllocKind(Context&: Ctx, Kind: AllocKind))
2109 .removeFnAttribute(C&: Ctx, Kind: "alloc-variant-zeroed");
2110 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2111 Name: ZeroedVariantName, T: InnerCallee->getFunctionType(), AttributeList: Attrs);
2112 cast<Function>(Val: ZeroedVariant.getCallee())
2113 ->setCallingConv(Malloc->getCallingConv());
2114 SmallVector<Value *, 3> Args;
2115 Args.append(in_start: Malloc->arg_begin(), in_end: Malloc->arg_end());
2116 CallInst *CI = IRB.CreateCall(Callee: ZeroedVariant, Args, Name: ZeroedVariantName);
2117 CI->setCallingConv(Malloc->getCallingConv());
2118 Calloc = CI;
2119 } else {
2120 Type *SizeTTy = Malloc->getArgOperand(i: 0)->getType();
2121 Calloc =
2122 emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: 1), Size: Malloc->getArgOperand(i: 0),
2123 B&: IRB, TLI, AddrSpace: Malloc->getType()->getPointerAddressSpace());
2124 }
2125 if (!Calloc)
2126 return false;
2127
2128 MemorySSAUpdater Updater(&MSSA);
2129 auto *NewAccess =
2130 Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc), Definition: nullptr,
2131 InsertPt: MallocDef);
2132 auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
2133 Updater.insertDef(Def: NewAccessMD, /*RenameUses=*/true);
2134 Malloc->replaceAllUsesWith(V: Calloc);
2135 deleteDeadInstruction(SI: Malloc);
2136 return true;
2137 }
2138
2139 // Check if there is a dominating condition, that implies that the value
2140 // being stored in a ptr is already present in the ptr.
2141 bool dominatingConditionImpliesValue(MemoryDef *Def) {
2142 auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
2143 BasicBlock *StoreBB = StoreI->getParent();
2144 Value *StorePtr = StoreI->getPointerOperand();
2145 Value *StoreVal = StoreI->getValueOperand();
2146
2147 DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
2148 if (!IDom)
2149 return false;
2150
2151 auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
2152 if (!BI || !BI->isConditional())
2153 return false;
2154
2155 // In case both blocks are the same, it is not possible to determine
2156 // if optimization is possible. (We would not want to optimize a store
2157 // in the FalseBB if condition is true and vice versa.)
2158 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
2159 return false;
2160
2161 Instruction *ICmpL;
2162 CmpPredicate Pred;
2163 if (!match(V: BI->getCondition(),
2164 P: m_c_ICmp(Pred,
2165 L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
2166 R: m_Instruction(I&: ICmpL)),
2167 R: m_Specific(V: StoreVal))) ||
2168 !ICmpInst::isEquality(P: Pred))
2169 return false;
2170
2171 // In case the else blocks also branches to the if block or the other way
2172 // around it is not possible to determine if the optimization is possible.
2173 if (Pred == ICmpInst::ICMP_EQ &&
2174 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 0)),
2175 BB: StoreBB))
2176 return false;
2177
2178 if (Pred == ICmpInst::ICMP_NE &&
2179 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 1)),
2180 BB: StoreBB))
2181 return false;
2182
2183 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
2184 MemoryAccess *ClobAcc =
2185 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2186
2187 return MSSA.dominates(A: ClobAcc, B: LoadAcc);
2188 }
2189
2190 /// \returns true if \p Def is a no-op store, either because it
2191 /// directly stores back a loaded value or stores zero to a calloced object.
2192 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2193 Instruction *DefI = Def->getMemoryInst();
2194 StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
2195 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2196 Constant *StoredConstant = nullptr;
2197 if (Store)
2198 StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: 0));
2199 else if (MemSet)
2200 StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2201 else
2202 return false;
2203
2204 if (!isRemovable(I: DefI))
2205 return false;
2206
2207 if (StoredConstant) {
2208 Constant *InitC =
2209 getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
2210 // If the clobbering access is LiveOnEntry, no instructions between them
2211 // can modify the memory location.
2212 if (InitC && InitC == StoredConstant)
2213 return MSSA.isLiveOnEntryDef(
2214 MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2215 }
2216
2217 if (!Store)
2218 return false;
2219
2220 if (dominatingConditionImpliesValue(Def))
2221 return true;
2222
2223 if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: 0))) {
2224 if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: 1)) {
2225 // Get the defining access for the load.
2226 auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2227 // Fast path: the defining accesses are the same.
2228 if (LoadAccess == Def->getDefiningAccess())
2229 return true;
2230
2231 // Look through phi accesses. Recursively scan all phi accesses by
2232 // adding them to a worklist. Bail when we run into a memory def that
2233 // does not match LoadAccess.
2234 SetVector<MemoryAccess *> ToCheck;
2235 MemoryAccess *Current =
2236 MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2237 // We don't want to bail when we run into the store memory def. But,
2238 // the phi access may point to it. So, pretend like we've already
2239 // checked it.
2240 ToCheck.insert(X: Def);
2241 ToCheck.insert(X: Current);
2242 // Start at current (1) to simulate already having checked Def.
2243 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2244 Current = ToCheck[I];
2245 if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2246 // Check all the operands.
2247 for (auto &Use : PhiAccess->incoming_values())
2248 ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2249 continue;
2250 }
2251
2252 // If we found a memory def, bail. This happens when we have an
2253 // unrelated write in between an otherwise noop store.
2254 assert(isa<MemoryDef>(Current) &&
2255 "Only MemoryDefs should reach here.");
2256 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2257 // We are searching for the definition of the store's destination.
2258 // So, if that is the same definition as the load, then this is a
2259 // noop. Otherwise, fail.
2260 if (LoadAccess != Current)
2261 return false;
2262 }
2263 return true;
2264 }
2265 }
2266
2267 return false;
2268 }
2269
2270 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2271 bool Changed = false;
2272 for (auto OI : IOL) {
2273 Instruction *DeadI = OI.first;
2274 MemoryLocation Loc = *getLocForWrite(I: DeadI);
2275 assert(isRemovable(DeadI) && "Expect only removable instruction");
2276
2277 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2278 int64_t DeadStart = 0;
2279 uint64_t DeadSize = Loc.Size.getValue();
2280 GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2281 OverlapIntervalsTy &IntervalMap = OI.second;
2282 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2283 if (IntervalMap.empty())
2284 continue;
2285 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2286 }
2287 return Changed;
2288 }
2289
2290 /// Eliminates writes to locations where the value that is being written
2291 /// is already stored at the same location.
2292 bool eliminateRedundantStoresOfExistingValues() {
2293 bool MadeChange = false;
2294 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2295 "already existing value\n");
2296 for (auto *Def : MemDefs) {
2297 if (SkipStores.contains(Ptr: Def) || MSSA.isLiveOnEntryDef(MA: Def))
2298 continue;
2299
2300 Instruction *DefInst = Def->getMemoryInst();
2301 auto MaybeDefLoc = getLocForWrite(I: DefInst);
2302 if (!MaybeDefLoc || !isRemovable(I: DefInst))
2303 continue;
2304
2305 MemoryDef *UpperDef;
2306 // To conserve compile-time, we avoid walking to the next clobbering def.
2307 // Instead, we just try to get the optimized access, if it exists. DSE
2308 // will try to optimize defs during the earlier traversal.
2309 if (Def->isOptimized())
2310 UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2311 else
2312 UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2313 if (!UpperDef || MSSA.isLiveOnEntryDef(MA: UpperDef))
2314 continue;
2315
2316 Instruction *UpperInst = UpperDef->getMemoryInst();
2317 auto IsRedundantStore = [&]() {
2318 // We don't care about differences in call attributes here.
2319 if (DefInst->isIdenticalToWhenDefined(I: UpperInst,
2320 /*IntersectAttrs=*/true))
2321 return true;
2322 if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2323 if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2324 // MemSetInst must have a write location.
2325 auto UpperLoc = getLocForWrite(I: UpperInst);
2326 if (!UpperLoc)
2327 return false;
2328 int64_t InstWriteOffset = 0;
2329 int64_t DepWriteOffset = 0;
2330 auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: *UpperLoc, DeadLoc: *MaybeDefLoc,
2331 KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2332 Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2333 return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: 1) &&
2334 OR == OW_Complete;
2335 }
2336 }
2337 return false;
2338 };
2339
2340 if (!IsRedundantStore() || isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2341 continue;
2342 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2343 << '\n');
2344 deleteDeadInstruction(SI: DefInst);
2345 NumRedundantStores++;
2346 MadeChange = true;
2347 }
2348 return MadeChange;
2349 }
2350
2351 // Return the locations written by the initializes attribute.
2352 // Note that this function considers:
2353 // 1. Unwind edge: use "initializes" attribute only if the callee has
2354 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
2355 // or the argument is invisible to caller on unwind. That is, we don't
2356 // perform incorrect DSE on unwind edges in the current function.
2357 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
2358 // the intersected range list of their "initializes" attributes.
2359 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
2360
2361 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
2362 // killed by `KillingLocWrapper.MemDef`. Return whether
2363 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
2364 std::pair<bool, bool>
2365 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
2366
2367 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
2368 // change state: whether make any change.
2369 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
2370};
2371} // namespace
2372
2373// Return true if "Arg" is function local and isn't captured before "CB".
2374static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
2375 EarliestEscapeAnalysis &EA) {
2376 const Value *UnderlyingObj = getUnderlyingObject(V: Arg);
2377 return isIdentifiedFunctionLocal(V: UnderlyingObj) &&
2378 capturesNothing(
2379 CC: EA.getCapturesBefore(Object: UnderlyingObj, I: CB, /*OrAt*/ true));
2380}
2381
2382SmallVector<MemoryLocation, 1>
2383DSEState::getInitializesArgMemLoc(const Instruction *I) {
2384 const CallBase *CB = dyn_cast<CallBase>(Val: I);
2385 if (!CB)
2386 return {};
2387
2388 // Collect aliasing arguments and their initializes ranges.
2389 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2390 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2391 Value *CurArg = CB->getArgOperand(i: Idx);
2392 if (!CurArg->getType()->isPointerTy())
2393 continue;
2394
2395 ConstantRangeList Inits;
2396 Attribute InitializesAttr = CB->getParamAttr(ArgNo: Idx, Kind: Attribute::Initializes);
2397 // initializes on byval arguments refers to the callee copy, not the
2398 // original memory the caller passed in.
2399 if (InitializesAttr.isValid() && !CB->isByValArgument(ArgNo: Idx))
2400 Inits = InitializesAttr.getValueAsConstantRangeList();
2401
2402 // Check whether "CurArg" could alias with global variables. We require
2403 // either it's function local and isn't captured before or the "CB" only
2404 // accesses arg or inaccessible mem.
2405 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2406 !isFuncLocalAndNotCaptured(Arg: CurArg, CB, EA))
2407 Inits = ConstantRangeList();
2408
2409 // We don't perform incorrect DSE on unwind edges in the current function,
2410 // and use the "initializes" attribute to kill dead stores if:
2411 // - The call does not throw exceptions, "CB->doesNotThrow()".
2412 // - Or the callee parameter has "dead_on_unwind" attribute.
2413 // - Or the argument is invisible to caller on unwind, and there are no
2414 // unwind edges from this call in the current function (e.g. `CallInst`).
2415 bool IsDeadOrInvisibleOnUnwind =
2416 CB->paramHasAttr(ArgNo: Idx, Kind: Attribute::DeadOnUnwind) ||
2417 (isa<CallInst>(Val: CB) && isInvisibleToCallerOnUnwind(V: CurArg));
2418 ArgumentInitInfo InitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind, .Inits: Inits};
2419 bool FoundAliasing = false;
2420 for (auto &[Arg, AliasList] : Arguments) {
2421 auto AAR = BatchAA.alias(LocA: MemoryLocation::getBeforeOrAfter(Ptr: Arg),
2422 LocB: MemoryLocation::getBeforeOrAfter(Ptr: CurArg));
2423 if (AAR == AliasResult::NoAlias) {
2424 continue;
2425 } else if (AAR == AliasResult::MustAlias) {
2426 FoundAliasing = true;
2427 AliasList.push_back(Elt: InitInfo);
2428 } else {
2429 // For PartialAlias and MayAlias, there is an offset or may be an
2430 // unknown offset between the arguments and we insert an empty init
2431 // range to discard the entire initializes info while intersecting.
2432 FoundAliasing = true;
2433 AliasList.push_back(Elt: ArgumentInitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind,
2434 .Inits: ConstantRangeList()});
2435 }
2436 }
2437 if (!FoundAliasing)
2438 Arguments[CurArg] = {InitInfo};
2439 }
2440
2441 SmallVector<MemoryLocation, 1> Locations;
2442 for (const auto &[_, Args] : Arguments) {
2443 auto IntersectedRanges =
2444 getIntersectedInitRangeList(Args, CallHasNoUnwindAttr: CB->doesNotThrow());
2445 if (IntersectedRanges.empty())
2446 continue;
2447
2448 for (const auto &Arg : Args) {
2449 for (const auto &Range : IntersectedRanges) {
2450 int64_t Start = Range.getLower().getSExtValue();
2451 int64_t End = Range.getUpper().getSExtValue();
2452 // For now, we only handle locations starting at offset 0.
2453 if (Start == 0)
2454 Locations.push_back(Elt: MemoryLocation(CB->getArgOperand(i: Arg.Idx),
2455 LocationSize::precise(Value: End - Start),
2456 CB->getAAMetadata()));
2457 }
2458 }
2459 }
2460 return Locations;
2461}
2462
2463std::pair<bool, bool>
2464DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2465 bool Changed = false;
2466 bool DeletedKillingLoc = false;
2467 unsigned ScanLimit = MemorySSAScanLimit;
2468 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2469 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2470 // Worklist of MemoryAccesses that may be killed by
2471 // "KillingLocWrapper.MemDef".
2472 SmallSetVector<MemoryAccess *, 8> ToCheck;
2473 // Track MemoryAccesses that have been deleted in the loop below, so we can
2474 // skip them. Don't use SkipStores for this, which may contain reused
2475 // MemoryAccess addresses.
2476 SmallPtrSet<MemoryAccess *, 8> Deleted;
2477 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2478 ToCheck.insert(X: KillingLocWrapper.MemDef->getDefiningAccess());
2479
2480 // Check if MemoryAccesses in the worklist are killed by
2481 // "KillingLocWrapper.MemDef".
2482 for (unsigned I = 0; I < ToCheck.size(); I++) {
2483 MemoryAccess *Current = ToCheck[I];
2484 if (Deleted.contains(Ptr: Current))
2485 continue;
2486 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2487 KillingDef: KillingLocWrapper.MemDef, StartAccess: Current, KillingLoc: KillingLocWrapper.MemLoc,
2488 KillingUndObj: KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2489 IsMemTerm: isMemTerminatorInst(I: KillingLocWrapper.DefInst), PartialLimit,
2490 IsInitializesAttrMemLoc: KillingLocWrapper.DefByInitializesAttr);
2491
2492 if (!MaybeDeadAccess) {
2493 LLVM_DEBUG(dbgs() << " finished walk\n");
2494 continue;
2495 }
2496 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2497 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2498 if (isa<MemoryPhi>(Val: DeadAccess)) {
2499 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2500 for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2501 MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2502 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2503 BasicBlock *PhiBlock = DeadAccess->getBlock();
2504
2505 // We only consider incoming MemoryAccesses that come before the
2506 // MemoryPhi. Otherwise we could discover candidates that do not
2507 // strictly dominate our starting def.
2508 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2509 ToCheck.insert(X: IncomingAccess);
2510 }
2511 continue;
2512 }
2513 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2514 // It would incorrectly consider a call instruction as redundant store
2515 // and remove this call instruction.
2516 // TODO: this conflates the existence of a MemoryLocation with being able
2517 // to delete the instruction. Fix isRemovable() to consider calls with
2518 // side effects that cannot be removed, e.g. calls with the initializes
2519 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2520 MemoryDefWrapper DeadDefWrapper(
2521 cast<MemoryDef>(Val: DeadAccess),
2522 getLocForInst(I: cast<MemoryDef>(Val: DeadAccess)->getMemoryInst(),
2523 /*ConsiderInitializesAttr=*/false));
2524 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2525 MemoryLocationWrapper &DeadLocWrapper =
2526 DeadDefWrapper.DefinedLocations.front();
2527 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2528 ToCheck.insert(X: DeadLocWrapper.MemDef->getDefiningAccess());
2529 NumGetDomMemoryDefPassed++;
2530
2531 if (!DebugCounter::shouldExecute(Counter&: MemorySSACounter))
2532 continue;
2533 if (isMemTerminatorInst(I: KillingLocWrapper.DefInst)) {
2534 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2535 continue;
2536 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2537 << *DeadLocWrapper.DefInst << "\n KILLER: "
2538 << *KillingLocWrapper.DefInst << '\n');
2539 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2540 ++NumFastStores;
2541 Changed = true;
2542 } else {
2543 // Check if DeadI overwrites KillingI.
2544 int64_t KillingOffset = 0;
2545 int64_t DeadOffset = 0;
2546 OverwriteResult OR =
2547 isOverwrite(KillingI: KillingLocWrapper.DefInst, DeadI: DeadLocWrapper.DefInst,
2548 KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2549 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2550 if (OR == OW_MaybePartial) {
2551 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2552 OR = isPartialOverwrite(KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2553 KillingOff: KillingOffset, DeadOff: DeadOffset,
2554 DeadI: DeadLocWrapper.DefInst, IOL);
2555 }
2556 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2557 auto *DeadSI = dyn_cast<StoreInst>(Val: DeadLocWrapper.DefInst);
2558 auto *KillingSI = dyn_cast<StoreInst>(Val: KillingLocWrapper.DefInst);
2559 // We are re-using tryToMergePartialOverlappingStores, which requires
2560 // DeadSI to dominate KillingSI.
2561 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2562 if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2563 if (Constant *Merged = tryToMergePartialOverlappingStores(
2564 KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL, AA&: BatchAA,
2565 DT: &DT)) {
2566
2567 // Update stored value of earlier store to merged constant.
2568 DeadSI->setOperand(i_nocapture: 0, Val_nocapture: Merged);
2569 ++NumModifiedStores;
2570 Changed = true;
2571 DeletedKillingLoc = true;
2572
2573 // Remove killing store and remove any outstanding overlap
2574 // intervals for the updated store.
2575 deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2576 auto I = IOLs.find(Key: DeadSI->getParent());
2577 if (I != IOLs.end())
2578 I->second.erase(Key: DeadSI);
2579 break;
2580 }
2581 }
2582 }
2583 if (OR == OW_Complete) {
2584 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2585 << *DeadLocWrapper.DefInst << "\n KILLER: "
2586 << *KillingLocWrapper.DefInst << '\n');
2587 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2588 ++NumFastStores;
2589 Changed = true;
2590 }
2591 }
2592 }
2593
2594 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2595 "SkipStores and Deleted out of sync?");
2596
2597 return {Changed, DeletedKillingLoc};
2598}
2599
2600bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2601 if (KillingDefWrapper.DefinedLocations.empty()) {
2602 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2603 << *KillingDefWrapper.DefInst << "\n");
2604 return false;
2605 }
2606
2607 bool MadeChange = false;
2608 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2609 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2610 << *KillingLocWrapper.MemDef << " ("
2611 << *KillingLocWrapper.DefInst << ")\n");
2612 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2613 MadeChange |= Changed;
2614
2615 // Check if the store is a no-op.
2616 if (!DeletedKillingLoc && storeIsNoop(Def: KillingLocWrapper.MemDef,
2617 DefUO: KillingLocWrapper.UnderlyingObject)) {
2618 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2619 << *KillingLocWrapper.DefInst << '\n');
2620 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2621 NumRedundantStores++;
2622 MadeChange = true;
2623 continue;
2624 }
2625 // Can we form a calloc from a memset/malloc pair?
2626 if (!DeletedKillingLoc &&
2627 tryFoldIntoCalloc(Def: KillingLocWrapper.MemDef,
2628 DefUO: KillingLocWrapper.UnderlyingObject)) {
2629 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2630 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2631 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2632 MadeChange = true;
2633 continue;
2634 }
2635 }
2636 return MadeChange;
2637}
2638
2639static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2640 DominatorTree &DT, PostDominatorTree &PDT,
2641 const TargetLibraryInfo &TLI,
2642 const LoopInfo &LI) {
2643 bool MadeChange = false;
2644 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2645 // For each store:
2646 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2647 MemoryDef *KillingDef = State.MemDefs[I];
2648 if (State.SkipStores.count(Ptr: KillingDef))
2649 continue;
2650
2651 MemoryDefWrapper KillingDefWrapper(
2652 KillingDef, State.getLocForInst(I: KillingDef->getMemoryInst(),
2653 ConsiderInitializesAttr: EnableInitializesImprovement));
2654 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2655 }
2656
2657 if (EnablePartialOverwriteTracking)
2658 for (auto &KV : State.IOLs)
2659 MadeChange |= State.removePartiallyOverlappedStores(IOL&: KV.second);
2660
2661 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2662 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2663
2664 while (!State.ToRemove.empty()) {
2665 Instruction *DeadInst = State.ToRemove.pop_back_val();
2666 DeadInst->eraseFromParent();
2667 }
2668
2669 return MadeChange;
2670}
2671
2672//===----------------------------------------------------------------------===//
2673// DSE Pass
2674//===----------------------------------------------------------------------===//
2675PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2676 AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2677 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2678 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2679 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2680 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2681 LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2682
2683 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2684
2685#ifdef LLVM_ENABLE_STATS
2686 if (AreStatisticsEnabled())
2687 for (auto &I : instructions(F))
2688 NumRemainingStores += isa<StoreInst>(Val: &I);
2689#endif
2690
2691 if (!Changed)
2692 return PreservedAnalyses::all();
2693
2694 PreservedAnalyses PA;
2695 PA.preserveSet<CFGAnalyses>();
2696 PA.preserve<MemorySSAAnalysis>();
2697 PA.preserve<LoopAnalysis>();
2698 return PA;
2699}
2700
2701namespace {
2702
2703/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2704class DSELegacyPass : public FunctionPass {
2705public:
2706 static char ID; // Pass identification, replacement for typeid
2707
2708 DSELegacyPass() : FunctionPass(ID) {
2709 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
2710 }
2711
2712 bool runOnFunction(Function &F) override {
2713 if (skipFunction(F))
2714 return false;
2715
2716 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2717 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2718 const TargetLibraryInfo &TLI =
2719 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2720 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2721 PostDominatorTree &PDT =
2722 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2723 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2724
2725 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2726
2727#ifdef LLVM_ENABLE_STATS
2728 if (AreStatisticsEnabled())
2729 for (auto &I : instructions(F))
2730 NumRemainingStores += isa<StoreInst>(Val: &I);
2731#endif
2732
2733 return Changed;
2734 }
2735
2736 void getAnalysisUsage(AnalysisUsage &AU) const override {
2737 AU.setPreservesCFG();
2738 AU.addRequired<AAResultsWrapperPass>();
2739 AU.addRequired<TargetLibraryInfoWrapperPass>();
2740 AU.addPreserved<GlobalsAAWrapperPass>();
2741 AU.addRequired<DominatorTreeWrapperPass>();
2742 AU.addPreserved<DominatorTreeWrapperPass>();
2743 AU.addRequired<PostDominatorTreeWrapperPass>();
2744 AU.addRequired<MemorySSAWrapperPass>();
2745 AU.addPreserved<PostDominatorTreeWrapperPass>();
2746 AU.addPreserved<MemorySSAWrapperPass>();
2747 AU.addRequired<LoopInfoWrapperPass>();
2748 AU.addPreserved<LoopInfoWrapperPass>();
2749 AU.addRequired<AssumptionCacheTracker>();
2750 }
2751};
2752
2753} // end anonymous namespace
2754
2755char DSELegacyPass::ID = 0;
2756
2757INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2758 false)
2759INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2760INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2761INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2762INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2763INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2764INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2765INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2766INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2767INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2768INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2769 false)
2770
2771LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
2772 return new DSELegacyPass();
2773}
2774