1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
30#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
34#include "llvm/ADT/PostOrderIterator.h"
35#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
40#include "llvm/Analysis/AliasAnalysis.h"
41#include "llvm/Analysis/AssumptionCache.h"
42#include "llvm/Analysis/CaptureTracking.h"
43#include "llvm/Analysis/GlobalsModRef.h"
44#include "llvm/Analysis/Loads.h"
45#include "llvm/Analysis/LoopInfo.h"
46#include "llvm/Analysis/MemoryBuiltins.h"
47#include "llvm/Analysis/MemoryLocation.h"
48#include "llvm/Analysis/MemorySSA.h"
49#include "llvm/Analysis/MemorySSAUpdater.h"
50#include "llvm/Analysis/MustExecute.h"
51#include "llvm/Analysis/PostDominators.h"
52#include "llvm/Analysis/TargetLibraryInfo.h"
53#include "llvm/Analysis/ValueTracking.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/AttributeMask.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/ConstantRangeList.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/IRBuilder.h"
65#include "llvm/IR/InstIterator.h"
66#include "llvm/IR/InstrTypes.h"
67#include "llvm/IR/Instruction.h"
68#include "llvm/IR/Instructions.h"
69#include "llvm/IR/IntrinsicInst.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/PassManager.h"
72#include "llvm/IR/PatternMatch.h"
73#include "llvm/IR/Value.h"
74#include "llvm/InitializePasses.h"
75#include "llvm/Support/Casting.h"
76#include "llvm/Support/CommandLine.h"
77#include "llvm/Support/Debug.h"
78#include "llvm/Support/DebugCounter.h"
79#include "llvm/Support/ErrorHandling.h"
80#include "llvm/Support/raw_ostream.h"
81#include "llvm/Transforms/Scalar.h"
82#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
83#include "llvm/Transforms/Utils/BuildLibCalls.h"
84#include "llvm/Transforms/Utils/Local.h"
85#include <algorithm>
86#include <cassert>
87#include <cstdint>
88#include <map>
89#include <optional>
90#include <utility>
91
92using namespace llvm;
93using namespace PatternMatch;
94
95#define DEBUG_TYPE "dse"
96
97STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99STATISTIC(NumFastStores, "Number of stores deleted");
100STATISTIC(NumFastOther, "Number of other instrs removed");
101STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102STATISTIC(NumModifiedStores, "Number of stores modified");
103STATISTIC(NumCFGChecks, "Number of stores modified");
104STATISTIC(NumCFGTries, "Number of stores modified");
105STATISTIC(NumCFGSuccess, "Number of stores modified");
106STATISTIC(NumGetDomMemoryDefPassed,
107 "Number of times a valid candidate is returned from getDomMemoryDef");
108STATISTIC(NumDomMemDefChecks,
109 "Number iterations check for reads in getDomMemoryDef");
110
111DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112 "Controls which MemoryDefs are eliminated.");
113
114static cl::opt<bool>
115EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116 cl::init(Val: true), cl::Hidden,
117 cl::desc("Enable partial-overwrite tracking in DSE"));
118
119static cl::opt<bool>
120EnablePartialStoreMerging("enable-dse-partial-store-merging",
121 cl::init(Val: true), cl::Hidden,
122 cl::desc("Enable partial store merging in DSE"));
123
124static cl::opt<unsigned>
125 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: 150), cl::Hidden,
126 cl::desc("The number of memory instructions to scan for "
127 "dead store elimination (default = 150)"));
128static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
129 "dse-memoryssa-walklimit", cl::init(Val: 90), cl::Hidden,
130 cl::desc("The maximum number of steps while walking upwards to find "
131 "MemoryDefs that may be killed (default = 90)"));
132
133static cl::opt<unsigned> MemorySSAPartialStoreLimit(
134 "dse-memoryssa-partial-store-limit", cl::init(Val: 5), cl::Hidden,
135 cl::desc("The maximum number candidates that only partially overwrite the "
136 "killing MemoryDef to consider"
137 " (default = 5)"));
138
139static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
140 "dse-memoryssa-defs-per-block-limit", cl::init(Val: 5000), cl::Hidden,
141 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142 "other stores per basic block (default = 5000)"));
143
144static cl::opt<unsigned> MemorySSASameBBStepCost(
145 "dse-memoryssa-samebb-cost", cl::init(Val: 1), cl::Hidden,
146 cl::desc(
147 "The cost of a step in the same basic block as the killing MemoryDef"
148 "(default = 1)"));
149
150static cl::opt<unsigned>
151 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: 5),
152 cl::Hidden,
153 cl::desc("The cost of a step in a different basic "
154 "block than the killing MemoryDef"
155 "(default = 5)"));
156
157static cl::opt<unsigned> MemorySSAPathCheckLimit(
158 "dse-memoryssa-path-check-limit", cl::init(Val: 50), cl::Hidden,
159 cl::desc("The maximum number of blocks to check when trying to prove that "
160 "all paths to an exit go through a killing block (default = 50)"));
161
162// This flags allows or disallows DSE to optimize MemorySSA during its
163// traversal. Note that DSE optimizing MemorySSA may impact other passes
164// downstream of the DSE invocation and can lead to issues not being
165// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167// impact follow-up passes.
168static cl::opt<bool>
169 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
170 cl::desc("Allow DSE to optimize memory accesses."));
171
172// TODO: remove this flag.
173static cl::opt<bool> EnableInitializesImprovement(
174 "enable-dse-initializes-attr-improvement", cl::init(Val: true), cl::Hidden,
175 cl::desc("Enable the initializes attr improvement in DSE"));
176
177//===----------------------------------------------------------------------===//
178// Helper functions
179//===----------------------------------------------------------------------===//
180using OverlapIntervalsTy = std::map<int64_t, int64_t>;
181using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
182
183/// Returns true if the end of this instruction can be safely shortened in
184/// length.
185static bool isShortenableAtTheEnd(Instruction *I) {
186 // Don't shorten stores for now
187 if (isa<StoreInst>(Val: I))
188 return false;
189
190 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
191 switch (II->getIntrinsicID()) {
192 default: return false;
193 case Intrinsic::memset:
194 case Intrinsic::memcpy:
195 case Intrinsic::memcpy_element_unordered_atomic:
196 case Intrinsic::memset_element_unordered_atomic:
197 // Do shorten memory intrinsics.
198 // FIXME: Add memmove if it's also safe to transform.
199 return true;
200 }
201 }
202
203 // Don't shorten libcalls calls for now.
204
205 return false;
206}
207
208/// Returns true if the beginning of this instruction can be safely shortened
209/// in length.
210static bool isShortenableAtTheBeginning(Instruction *I) {
211 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
212 // easily done by offsetting the source address.
213 return isa<AnyMemSetInst>(Val: I);
214}
215
216static std::optional<TypeSize> getPointerSize(const Value *V,
217 const DataLayout &DL,
218 const TargetLibraryInfo &TLI,
219 const Function *F) {
220 uint64_t Size;
221 ObjectSizeOpts Opts;
222 Opts.NullIsUnknownSize = NullPointerIsDefined(F);
223
224 if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
225 return TypeSize::getFixed(ExactSize: Size);
226 return std::nullopt;
227}
228
229namespace {
230
231enum OverwriteResult {
232 OW_Begin,
233 OW_Complete,
234 OW_End,
235 OW_PartialEarlierWithFullLater,
236 OW_MaybePartial,
237 OW_None,
238 OW_Unknown
239};
240
241} // end anonymous namespace
242
243/// Check if two instruction are masked stores that completely
244/// overwrite one another. More specifically, \p KillingI has to
245/// overwrite \p DeadI.
246static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
247 const Instruction *DeadI,
248 BatchAAResults &AA) {
249 const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
250 const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
251 if (KillingII == nullptr || DeadII == nullptr)
252 return OW_Unknown;
253 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
254 return OW_Unknown;
255
256 switch (KillingII->getIntrinsicID()) {
257 case Intrinsic::masked_store:
258 case Intrinsic::vp_store: {
259 const DataLayout &DL = KillingII->getDataLayout();
260 auto *KillingTy = KillingII->getArgOperand(i: 0)->getType();
261 auto *DeadTy = DeadII->getArgOperand(i: 0)->getType();
262 if (DL.getTypeSizeInBits(Ty: KillingTy) != DL.getTypeSizeInBits(Ty: DeadTy))
263 return OW_Unknown;
264 // Element count.
265 if (cast<VectorType>(Val: KillingTy)->getElementCount() !=
266 cast<VectorType>(Val: DeadTy)->getElementCount())
267 return OW_Unknown;
268 // Pointers.
269 Value *KillingPtr = KillingII->getArgOperand(i: 1);
270 Value *DeadPtr = DeadII->getArgOperand(i: 1);
271 if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
272 return OW_Unknown;
273 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
274 // Masks.
275 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
276 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
277 return OW_Unknown;
278 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
279 // Masks.
280 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
281 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
282 return OW_Unknown;
283 // Lengths.
284 if (KillingII->getArgOperand(i: 3) != DeadII->getArgOperand(i: 3))
285 return OW_Unknown;
286 }
287 return OW_Complete;
288 }
289 default:
290 return OW_Unknown;
291 }
292}
293
294/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
295/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
296/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
297/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
298/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
299/// overwritten by a killing (smaller) store which doesn't write outside the big
300/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
301/// NOTE: This function must only be called if both \p KillingLoc and \p
302/// DeadLoc belong to the same underlying object with valid \p KillingOff and
303/// \p DeadOff.
304static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
305 const MemoryLocation &DeadLoc,
306 int64_t KillingOff, int64_t DeadOff,
307 Instruction *DeadI,
308 InstOverlapIntervalsTy &IOL) {
309 const uint64_t KillingSize = KillingLoc.Size.getValue();
310 const uint64_t DeadSize = DeadLoc.Size.getValue();
311 // We may now overlap, although the overlap is not complete. There might also
312 // be other incomplete overlaps, and together, they might cover the complete
313 // dead store.
314 // Note: The correctness of this logic depends on the fact that this function
315 // is not even called providing DepWrite when there are any intervening reads.
316 if (EnablePartialOverwriteTracking &&
317 KillingOff < int64_t(DeadOff + DeadSize) &&
318 int64_t(KillingOff + KillingSize) >= DeadOff) {
319
320 // Insert our part of the overlap into the map.
321 auto &IM = IOL[DeadI];
322 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
323 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
324 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
325 << ")\n");
326
327 // Make sure that we only insert non-overlapping intervals and combine
328 // adjacent intervals. The intervals are stored in the map with the ending
329 // offset as the key (in the half-open sense) and the starting offset as
330 // the value.
331 int64_t KillingIntStart = KillingOff;
332 int64_t KillingIntEnd = KillingOff + KillingSize;
333
334 // Find any intervals ending at, or after, KillingIntStart which start
335 // before KillingIntEnd.
336 auto ILI = IM.lower_bound(x: KillingIntStart);
337 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
338 // This existing interval is overlapped with the current store somewhere
339 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
340 // intervals and adjusting our start and end.
341 KillingIntStart = std::min(a: KillingIntStart, b: ILI->second);
342 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
343 ILI = IM.erase(position: ILI);
344
345 // Continue erasing and adjusting our end in case other previous
346 // intervals are also overlapped with the current store.
347 //
348 // |--- dead 1 ---| |--- dead 2 ---|
349 // |------- killing---------|
350 //
351 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
352 assert(ILI->second > KillingIntStart && "Unexpected interval");
353 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
354 ILI = IM.erase(position: ILI);
355 }
356 }
357
358 IM[KillingIntEnd] = KillingIntStart;
359
360 ILI = IM.begin();
361 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
362 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
363 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
364 << ") Composite KillingLoc [" << ILI->second << ", "
365 << ILI->first << ")\n");
366 ++NumCompletePartials;
367 return OW_Complete;
368 }
369 }
370
371 // Check for a dead store which writes to all the memory locations that
372 // the killing store writes to.
373 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
374 int64_t(DeadOff + DeadSize) > KillingOff &&
375 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
376 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
377 << ", " << int64_t(DeadOff + DeadSize)
378 << ") by a killing store [" << KillingOff << ", "
379 << int64_t(KillingOff + KillingSize) << ")\n");
380 // TODO: Maybe come up with a better name?
381 return OW_PartialEarlierWithFullLater;
382 }
383
384 // Another interesting case is if the killing store overwrites the end of the
385 // dead store.
386 //
387 // |--dead--|
388 // |-- killing --|
389 //
390 // In this case we may want to trim the size of dead store to avoid
391 // generating stores to addresses which will definitely be overwritten killing
392 // store.
393 if (!EnablePartialOverwriteTracking &&
394 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
395 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
396 return OW_End;
397
398 // Finally, we also need to check if the killing store overwrites the
399 // beginning of the dead store.
400 //
401 // |--dead--|
402 // |-- killing --|
403 //
404 // In this case we may want to move the destination address and trim the size
405 // of dead store to avoid generating stores to addresses which will definitely
406 // be overwritten killing store.
407 if (!EnablePartialOverwriteTracking &&
408 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
409 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
410 "Expect to be handled as OW_Complete");
411 return OW_Begin;
412 }
413 // Otherwise, they don't completely overlap.
414 return OW_Unknown;
415}
416
417/// Returns true if the memory which is accessed by the second instruction is not
418/// modified between the first and the second instruction.
419/// Precondition: Second instruction must be dominated by the first
420/// instruction.
421static bool
422memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
423 BatchAAResults &AA, const DataLayout &DL,
424 DominatorTree *DT) {
425 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
426 // instructions which can modify the memory location accessed by SecondI.
427 //
428 // While doing the walk keep track of the address to check. It might be
429 // different in different basic blocks due to PHI translation.
430 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
431 SmallVector<BlockAddressPair, 16> WorkList;
432 // Keep track of the address we visited each block with. Bail out if we
433 // visit a block with different addresses.
434 DenseMap<BasicBlock *, Value *> Visited;
435
436 BasicBlock::iterator FirstBBI(FirstI);
437 ++FirstBBI;
438 BasicBlock::iterator SecondBBI(SecondI);
439 BasicBlock *FirstBB = FirstI->getParent();
440 BasicBlock *SecondBB = SecondI->getParent();
441 MemoryLocation MemLoc;
442 if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
443 MemLoc = MemoryLocation::getForDest(MI: MemSet);
444 else
445 MemLoc = MemoryLocation::get(Inst: SecondI);
446
447 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
448
449 // Start checking the SecondBB.
450 WorkList.push_back(
451 Elt: std::make_pair(x&: SecondBB, y: PHITransAddr(MemLocPtr, DL, nullptr)));
452 bool isFirstBlock = true;
453
454 // Check all blocks going backward until we reach the FirstBB.
455 while (!WorkList.empty()) {
456 BlockAddressPair Current = WorkList.pop_back_val();
457 BasicBlock *B = Current.first;
458 PHITransAddr &Addr = Current.second;
459 Value *Ptr = Addr.getAddr();
460
461 // Ignore instructions before FirstI if this is the FirstBB.
462 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
463
464 BasicBlock::iterator EI;
465 if (isFirstBlock) {
466 // Ignore instructions after SecondI if this is the first visit of SecondBB.
467 assert(B == SecondBB && "first block is not the store block");
468 EI = SecondBBI;
469 isFirstBlock = false;
470 } else {
471 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
472 // In this case we also have to look at instructions after SecondI.
473 EI = B->end();
474 }
475 for (; BI != EI; ++BI) {
476 Instruction *I = &*BI;
477 if (I->mayWriteToMemory() && I != SecondI)
478 if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
479 return false;
480 }
481 if (B != FirstBB) {
482 assert(B != &FirstBB->getParent()->getEntryBlock() &&
483 "Should not hit the entry block because SI must be dominated by LI");
484 for (BasicBlock *Pred : predecessors(BB: B)) {
485 PHITransAddr PredAddr = Addr;
486 if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
487 if (!PredAddr.isPotentiallyPHITranslatable())
488 return false;
489 if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
490 return false;
491 }
492 Value *TranslatedPtr = PredAddr.getAddr();
493 auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
494 if (!Inserted.second) {
495 // We already visited this block before. If it was with a different
496 // address - bail out!
497 if (TranslatedPtr != Inserted.first->second)
498 return false;
499 // ... otherwise just skip it.
500 continue;
501 }
502 WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
503 }
504 }
505 }
506 return true;
507}
508
509static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
510 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
511 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
512 const DataLayout &DL = Inst->getDataLayout();
513 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
514 uint64_t DeadSliceOffsetInBits =
515 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
516 auto SetDeadFragExpr = [](auto *Assign,
517 DIExpression::FragmentInfo DeadFragment) {
518 // createFragmentExpression expects an offset relative to the existing
519 // fragment offset if there is one.
520 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
521 Assign->getExpression()
522 ->getFragmentInfo()
523 .value_or(DIExpression::FragmentInfo(0, 0))
524 .OffsetInBits;
525 if (auto NewExpr = DIExpression::createFragmentExpression(
526 Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
527 Assign->setExpression(*NewExpr);
528 return;
529 }
530 // Failed to create a fragment expression for this so discard the value,
531 // making this a kill location.
532 auto *Expr = *DIExpression::createFragmentExpression(
533 Expr: DIExpression::get(Context&: Assign->getContext(), Elements: {}), OffsetInBits: DeadFragment.OffsetInBits,
534 SizeInBits: DeadFragment.SizeInBits);
535 Assign->setExpression(Expr);
536 Assign->setKillLocation();
537 };
538
539 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
540 // link to any instructions. Created in the loop below (once).
541 DIAssignID *LinkToNothing = nullptr;
542 LLVMContext &Ctx = Inst->getContext();
543 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
544 if (!LinkToNothing)
545 LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
546 return LinkToNothing;
547 };
548
549 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
550 // overlapping dbg.assign intrinsic.
551 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
552 std::optional<DIExpression::FragmentInfo> NewFragment;
553 if (!at::calculateFragmentIntersect(DL, Dest: OriginalDest, SliceOffsetInBits: DeadSliceOffsetInBits,
554 SliceSizeInBits: DeadSliceSizeInBits, DVRAssign: Assign,
555 Result&: NewFragment) ||
556 !NewFragment) {
557 // We couldn't calculate the intersecting fragment for some reason. Be
558 // cautious and unlink the whole assignment from the store.
559 Assign->setKillAddress();
560 Assign->setAssignId(GetDeadLink());
561 continue;
562 }
563 // No intersect.
564 if (NewFragment->SizeInBits == 0)
565 continue;
566
567 // Fragments overlap: insert a new dbg.assign for this dead part.
568 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
569 NewAssign->insertAfter(InsertAfter: Assign->getIterator());
570 NewAssign->setAssignId(GetDeadLink());
571 if (NewFragment)
572 SetDeadFragExpr(NewAssign, *NewFragment);
573 NewAssign->setKillAddress();
574 }
575}
576
577/// Update the attributes given that a memory access is updated (the
578/// dereferenced pointer could be moved forward when shortening a
579/// mem intrinsic).
580static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
581 uint64_t PtrOffset) {
582 // Remember old attributes.
583 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
584
585 // Find attributes that should be kept, and remove the rest.
586 AttributeMask AttrsToRemove;
587 for (auto &Attr : OldAttrs) {
588 if (Attr.hasKindAsEnum()) {
589 switch (Attr.getKindAsEnum()) {
590 default:
591 break;
592 case Attribute::Alignment:
593 // Only keep alignment if PtrOffset satisfy the alignment.
594 if (isAligned(Lhs: Attr.getAlignment().valueOrOne(), SizeInBytes: PtrOffset))
595 continue;
596 break;
597 case Attribute::Dereferenceable:
598 case Attribute::DereferenceableOrNull:
599 // We could reduce the size of these attributes according to
600 // PtrOffset. But we simply drop these for now.
601 break;
602 case Attribute::NonNull:
603 case Attribute::NoUndef:
604 continue;
605 }
606 }
607 AttrsToRemove.addAttribute(A: Attr);
608 }
609
610 // Remove the attributes that should be dropped.
611 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
612}
613
614static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
615 uint64_t &DeadSize, int64_t KillingStart,
616 uint64_t KillingSize, bool IsOverwriteEnd) {
617 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
618 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
619
620 // We assume that memet/memcpy operates in chunks of the "largest" native
621 // type size and aligned on the same value. That means optimal start and size
622 // of memset/memcpy should be modulo of preferred alignment of that type. That
623 // is it there is no any sense in trying to reduce store size any further
624 // since any "extra" stores comes for free anyway.
625 // On the other hand, maximum alignment we can achieve is limited by alignment
626 // of initial store.
627
628 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
629 // "largest" native type.
630 // Note: What is the proper way to get that value?
631 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
632 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
633
634 int64_t ToRemoveStart = 0;
635 uint64_t ToRemoveSize = 0;
636 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
637 // maintained on the remaining store.
638 if (IsOverwriteEnd) {
639 // Calculate required adjustment for 'KillingStart' in order to keep
640 // remaining store size aligned on 'PerfAlign'.
641 uint64_t Off =
642 offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
643 ToRemoveStart = KillingStart + Off;
644 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
645 return false;
646 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
647 } else {
648 ToRemoveStart = DeadStart;
649 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
650 "Not overlapping accesses?");
651 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
652 // Calculate required adjustment for 'ToRemoveSize'in order to keep
653 // start of the remaining store aligned on 'PerfAlign'.
654 uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
655 if (Off != 0) {
656 if (ToRemoveSize <= (PrefAlign.value() - Off))
657 return false;
658 ToRemoveSize -= PrefAlign.value() - Off;
659 }
660 assert(isAligned(PrefAlign, ToRemoveSize) &&
661 "Should preserve selected alignment");
662 }
663
664 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
665 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
666
667 uint64_t NewSize = DeadSize - ToRemoveSize;
668 if (DeadIntrinsic->isAtomic()) {
669 // When shortening an atomic memory intrinsic, the newly shortened
670 // length must remain an integer multiple of the element size.
671 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
672 if (0 != NewSize % ElementSize)
673 return false;
674 }
675
676 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
677 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
678 << "\n KILLER [" << ToRemoveStart << ", "
679 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
680
681 DeadIntrinsic->setLength(NewSize);
682 DeadIntrinsic->setDestAlignment(PrefAlign);
683
684 Value *OrigDest = DeadIntrinsic->getRawDest();
685 if (!IsOverwriteEnd) {
686 Value *Indices[1] = {
687 ConstantInt::get(Ty: DeadIntrinsic->getLength()->getType(), V: ToRemoveSize)};
688 Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
689 PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
690 InsertBefore: DeadI->getIterator());
691 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
692 DeadIntrinsic->setDest(NewDestGEP);
693 adjustArgAttributes(Intrinsic: DeadIntrinsic, ArgNo: 0, PtrOffset: ToRemoveSize);
694 }
695
696 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
697 shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * 8, OldSizeInBits: DeadSize * 8, NewSizeInBits: NewSize * 8,
698 IsOverwriteEnd);
699
700 // Finally update start and size of dead access.
701 if (!IsOverwriteEnd)
702 DeadStart += ToRemoveSize;
703 DeadSize = NewSize;
704
705 return true;
706}
707
708static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
709 int64_t &DeadStart, uint64_t &DeadSize) {
710 if (IntervalMap.empty() || !isShortenableAtTheEnd(I: DeadI))
711 return false;
712
713 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
714 int64_t KillingStart = OII->second;
715 uint64_t KillingSize = OII->first - KillingStart;
716
717 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
718
719 if (KillingStart > DeadStart &&
720 // Note: "KillingStart - KillingStart" is known to be positive due to
721 // preceding check.
722 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
723 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
724 // be non negative due to preceding checks.
725 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
726 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
727 IsOverwriteEnd: true)) {
728 IntervalMap.erase(position: OII);
729 return true;
730 }
731 }
732 return false;
733}
734
735static bool tryToShortenBegin(Instruction *DeadI,
736 OverlapIntervalsTy &IntervalMap,
737 int64_t &DeadStart, uint64_t &DeadSize) {
738 if (IntervalMap.empty() || !isShortenableAtTheBeginning(I: DeadI))
739 return false;
740
741 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
742 int64_t KillingStart = OII->second;
743 uint64_t KillingSize = OII->first - KillingStart;
744
745 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
746
747 if (KillingStart <= DeadStart &&
748 // Note: "DeadStart - KillingStart" is known to be non negative due to
749 // preceding check.
750 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
751 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
752 // be positive due to preceding checks.
753 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
754 "Should have been handled as OW_Complete");
755 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
756 IsOverwriteEnd: false)) {
757 IntervalMap.erase(position: OII);
758 return true;
759 }
760 }
761 return false;
762}
763
764static Constant *
765tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
766 int64_t KillingOffset, int64_t DeadOffset,
767 const DataLayout &DL, BatchAAResults &AA,
768 DominatorTree *DT) {
769
770 if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
771 DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
772 KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
773 DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
774 memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
775 // If the store we find is:
776 // a) partially overwritten by the store to 'Loc'
777 // b) the killing store is fully contained in the dead one and
778 // c) they both have a constant value
779 // d) none of the two stores need padding
780 // Merge the two stores, replacing the dead store's value with a
781 // merge of both values.
782 // TODO: Deal with other constant types (vectors, etc), and probably
783 // some mem intrinsics (if needed)
784
785 APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
786 APInt KillingValue =
787 cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
788 unsigned KillingBits = KillingValue.getBitWidth();
789 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
790 KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
791
792 // Offset of the smaller store inside the larger store
793 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
794 unsigned LShiftAmount =
795 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
796 : BitOffsetDiff;
797 APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
798 hiBit: LShiftAmount + KillingBits);
799 // Clear the bits we'll be replacing, then OR with the smaller
800 // store, shifted appropriately.
801 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
802 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
803 << "\n Killing: " << *KillingI
804 << "\n Merged Value: " << Merged << '\n');
805 return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
806 }
807 return nullptr;
808}
809
810// Returns true if \p I is an intrinsic that does not read or write memory.
811static bool isNoopIntrinsic(Instruction *I) {
812 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
813 switch (II->getIntrinsicID()) {
814 case Intrinsic::lifetime_start:
815 case Intrinsic::lifetime_end:
816 case Intrinsic::invariant_end:
817 case Intrinsic::launder_invariant_group:
818 case Intrinsic::assume:
819 return true;
820 case Intrinsic::dbg_declare:
821 case Intrinsic::dbg_label:
822 case Intrinsic::dbg_value:
823 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
824 default:
825 return false;
826 }
827 }
828 return false;
829}
830
831// Check if we can ignore \p D for DSE.
832static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
833 Instruction *DI = D->getMemoryInst();
834 // Calls that only access inaccessible memory cannot read or write any memory
835 // locations we consider for elimination.
836 if (auto *CB = dyn_cast<CallBase>(Val: DI))
837 if (CB->onlyAccessesInaccessibleMemory())
838 return true;
839
840 // We can eliminate stores to locations not visible to the caller across
841 // throwing instructions.
842 if (DI->mayThrow() && !DefVisibleToCaller)
843 return true;
844
845 // We can remove the dead stores, irrespective of the fence and its ordering
846 // (release/acquire/seq_cst). Fences only constraints the ordering of
847 // already visible stores, it does not make a store visible to other
848 // threads. So, skipping over a fence does not change a store from being
849 // dead.
850 if (isa<FenceInst>(Val: DI))
851 return true;
852
853 // Skip intrinsics that do not really read or modify memory.
854 if (isNoopIntrinsic(I: DI))
855 return true;
856
857 return false;
858}
859
860namespace {
861
862// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
863// defined by `MemDef`.
864struct MemoryLocationWrapper {
865 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
866 bool DefByInitializesAttr)
867 : MemLoc(MemLoc), MemDef(MemDef),
868 DefByInitializesAttr(DefByInitializesAttr) {
869 assert(MemLoc.Ptr && "MemLoc should be not null");
870 UnderlyingObject = getUnderlyingObject(V: MemLoc.Ptr);
871 DefInst = MemDef->getMemoryInst();
872 }
873
874 MemoryLocation MemLoc;
875 const Value *UnderlyingObject;
876 MemoryDef *MemDef;
877 Instruction *DefInst;
878 bool DefByInitializesAttr = false;
879};
880
881// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
882// defined by this MemoryDef.
883struct MemoryDefWrapper {
884 MemoryDefWrapper(MemoryDef *MemDef,
885 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
886 DefInst = MemDef->getMemoryInst();
887 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
888 DefinedLocations.push_back(
889 Elt: MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
890 }
891 Instruction *DefInst;
892 SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
893};
894
895struct ArgumentInitInfo {
896 unsigned Idx;
897 bool IsDeadOrInvisibleOnUnwind;
898 ConstantRangeList Inits;
899};
900} // namespace
901
902static bool hasInitializesAttr(Instruction *I) {
903 CallBase *CB = dyn_cast<CallBase>(Val: I);
904 return CB && CB->getArgOperandWithAttribute(Kind: Attribute::Initializes);
905}
906
907// Return the intersected range list of the initializes attributes of "Args".
908// "Args" are call arguments that alias to each other.
909// If any argument in "Args" doesn't have dead_on_unwind attr and
910// "CallHasNoUnwindAttr" is false, return empty.
911static ConstantRangeList
912getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
913 bool CallHasNoUnwindAttr) {
914 if (Args.empty())
915 return {};
916
917 // To address unwind, the function should have nounwind attribute or the
918 // arguments have dead or invisible on unwind. Otherwise, return empty.
919 for (const auto &Arg : Args) {
920 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
921 return {};
922 if (Arg.Inits.empty())
923 return {};
924 }
925
926 ConstantRangeList IntersectedIntervals = Args.front().Inits;
927 for (auto &Arg : Args.drop_front())
928 IntersectedIntervals = IntersectedIntervals.intersectWith(CRL: Arg.Inits);
929
930 return IntersectedIntervals;
931}
932
933namespace {
934
935struct DSEState {
936 Function &F;
937 AliasAnalysis &AA;
938 EarliestEscapeAnalysis EA;
939
940 /// The single BatchAA instance that is used to cache AA queries. It will
941 /// not be invalidated over the whole run. This is safe, because:
942 /// 1. Only memory writes are removed, so the alias cache for memory
943 /// locations remains valid.
944 /// 2. No new instructions are added (only instructions removed), so cached
945 /// information for a deleted value cannot be accessed by a re-used new
946 /// value pointer.
947 BatchAAResults BatchAA;
948
949 MemorySSA &MSSA;
950 DominatorTree &DT;
951 PostDominatorTree &PDT;
952 const TargetLibraryInfo &TLI;
953 const DataLayout &DL;
954 const LoopInfo &LI;
955
956 // Whether the function contains any irreducible control flow, useful for
957 // being accurately able to detect loops.
958 bool ContainsIrreducibleLoops;
959
960 // All MemoryDefs that potentially could kill other MemDefs.
961 SmallVector<MemoryDef *, 64> MemDefs;
962 // Any that should be skipped as they are already deleted
963 SmallPtrSet<MemoryAccess *, 4> SkipStores;
964 // Keep track whether a given object is captured before return or not.
965 DenseMap<const Value *, bool> CapturedBeforeReturn;
966 // Keep track of all of the objects that are invisible to the caller after
967 // the function returns.
968 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
969 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
970 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
971 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
972 // Post-order numbers for each basic block. Used to figure out if memory
973 // accesses are executed before another access.
974 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
975
976 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
977 /// basic block.
978 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
979 // Check if there are root nodes that are terminated by UnreachableInst.
980 // Those roots pessimize post-dominance queries. If there are such roots,
981 // fall back to CFG scan starting from all non-unreachable roots.
982 bool AnyUnreachableExit;
983
984 // Whether or not we should iterate on removing dead stores at the end of the
985 // function due to removing a store causing a previously captured pointer to
986 // no longer be captured.
987 bool ShouldIterateEndOfFunctionDSE;
988
989 /// Dead instructions to be removed at the end of DSE.
990 SmallVector<Instruction *> ToRemove;
991
992 // Class contains self-reference, make sure it's not copied/moved.
993 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
994 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
995 const LoopInfo &LI);
996 DSEState(const DSEState &) = delete;
997 DSEState &operator=(const DSEState &) = delete;
998
999 LocationSize strengthenLocationSize(const Instruction *I,
1000 LocationSize Size) const;
1001
1002 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1003 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1004 /// location (by \p DeadI instruction).
1005 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1006 /// \p DeadI, but they both write to the same underlying object. In that
1007 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1008 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1009 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1010 OverwriteResult isOverwrite(const Instruction *KillingI,
1011 const Instruction *DeadI,
1012 const MemoryLocation &KillingLoc,
1013 const MemoryLocation &DeadLoc,
1014 int64_t &KillingOff, int64_t &DeadOff);
1015
1016 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1017 const LocationSize StoreSize);
1018
1019 bool isInvisibleToCallerOnUnwind(const Value *V);
1020
1021 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1022
1023 // Returns a list of <MemoryLocation, bool> pairs written by I.
1024 // The bool means whether the write is from Initializes attr.
1025 SmallVector<std::pair<MemoryLocation, bool>, 1>
1026 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1027
1028 /// Assuming this instruction has a dead analyzable write, can we delete
1029 /// this instruction?
1030 bool isRemovable(Instruction *I);
1031
1032 /// Returns true if \p UseInst completely overwrites \p DefLoc
1033 /// (stored by \p DefInst).
1034 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1035 Instruction *UseInst);
1036
1037 /// Returns true if \p Def is not read before returning from the function.
1038 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1039
1040 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1041 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1042 /// indicating whether \p I is a free-like call.
1043 std::optional<std::pair<MemoryLocation, bool>>
1044 getLocForTerminator(Instruction *I) const;
1045
1046 /// Returns true if \p I is a memory terminator instruction like
1047 /// llvm.lifetime.end or free.
1048 bool isMemTerminatorInst(Instruction *I) const;
1049
1050 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1051 /// instruction \p AccessI.
1052 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1053 Instruction *MaybeTerm);
1054
1055 // Returns true if \p Use may read from \p DefLoc.
1056 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1057
1058 /// Returns true if a dependency between \p Current and \p KillingDef is
1059 /// guaranteed to be loop invariant for the loops that they are in. Either
1060 /// because they are known to be in the same block, in the same loop level or
1061 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1062 /// during execution of the containing function.
1063 bool isGuaranteedLoopIndependent(const Instruction *Current,
1064 const Instruction *KillingDef,
1065 const MemoryLocation &CurrentLoc);
1066
1067 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1068 /// loop. In particular, this guarantees that it only references a single
1069 /// MemoryLocation during execution of the containing function.
1070 bool isGuaranteedLoopInvariant(const Value *Ptr);
1071
1072 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1073 // with no read access between them or on any other path to a function exit
1074 // block if \p KillingLoc is not accessible after the function returns. If
1075 // there is no such MemoryDef, return std::nullopt. The returned value may not
1076 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1077 // encounter an aliasing MemoryUse (read).
1078 std::optional<MemoryAccess *>
1079 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1080 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1081 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1082 bool IsMemTerm, unsigned &PartialLimit,
1083 bool IsInitializesAttrMemLoc);
1084
1085 /// Delete dead memory defs and recursively add their operands to ToRemove if
1086 /// they became dead.
1087 void
1088 deleteDeadInstruction(Instruction *SI,
1089 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1090
1091 // Check for any extra throws between \p KillingI and \p DeadI that block
1092 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1093 // MemoryDef that may throw are handled during the walk from one def to the
1094 // next.
1095 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1096 const Value *KillingUndObj);
1097
1098 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1099 // instructions act as barriers:
1100 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1101 // object.
1102 // * Atomic stores stronger that monotonic.
1103 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1104
1105 /// Eliminate writes to objects that are not visible in the caller and are not
1106 /// accessed before returning from the function.
1107 bool eliminateDeadWritesAtEndOfFunction();
1108
1109 /// If we have a zero initializing memset following a call to malloc,
1110 /// try folding it into a call to calloc.
1111 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1112
1113 // Check if there is a dominating condition, that implies that the value
1114 // being stored in a ptr is already present in the ptr.
1115 bool dominatingConditionImpliesValue(MemoryDef *Def);
1116
1117 /// \returns true if \p Def is a no-op store, either because it
1118 /// directly stores back a loaded value or stores zero to a calloced object.
1119 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1120
1121 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1122
1123 /// Eliminates writes to locations where the value that is being written
1124 /// is already stored at the same location.
1125 bool eliminateRedundantStoresOfExistingValues();
1126
1127 // Return the locations written by the initializes attribute.
1128 // Note that this function considers:
1129 // 1. Unwind edge: use "initializes" attribute only if the callee has
1130 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1131 // or the argument is invisible to caller on unwind. That is, we don't
1132 // perform incorrect DSE on unwind edges in the current function.
1133 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1134 // the intersected range list of their "initializes" attributes.
1135 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1136
1137 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1138 // killed by `KillingLocWrapper.MemDef`. Return whether
1139 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1140 std::pair<bool, bool>
1141 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1142
1143 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1144 // change state: whether make any change.
1145 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1146};
1147
1148} // end anonymous namespace
1149
1150static void pushMemUses(MemoryAccess *Acc,
1151 SmallVectorImpl<MemoryAccess *> &WorkList,
1152 SmallPtrSetImpl<MemoryAccess *> &Visited) {
1153 for (Use &U : Acc->uses()) {
1154 auto *MA = cast<MemoryAccess>(Val: U.getUser());
1155 if (Visited.insert(Ptr: MA).second)
1156 WorkList.push_back(Elt: MA);
1157 }
1158}
1159
1160// Return true if "Arg" is function local and isn't captured before "CB".
1161static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1162 EarliestEscapeAnalysis &EA) {
1163 const Value *UnderlyingObj = getUnderlyingObject(V: Arg);
1164 return isIdentifiedFunctionLocal(V: UnderlyingObj) &&
1165 capturesNothing(
1166 CC: EA.getCapturesBefore(Object: UnderlyingObj, I: CB, /*OrAt*/ true));
1167}
1168
1169DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1170 DominatorTree &DT, PostDominatorTree &PDT,
1171 const TargetLibraryInfo &TLI, const LoopInfo &LI)
1172 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
1173 TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1174 // Collect blocks with throwing instructions not modeled in MemorySSA and
1175 // alloc-like objects.
1176 unsigned PO = 0;
1177 for (BasicBlock *BB : post_order(G: &F)) {
1178 PostOrderNumbers[BB] = PO++;
1179 for (Instruction &I : *BB) {
1180 MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
1181 if (I.mayThrow() && !MA)
1182 ThrowingBlocks.insert(Ptr: I.getParent());
1183
1184 auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
1185 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1186 (getLocForWrite(I: &I) || isMemTerminatorInst(I: &I) ||
1187 (EnableInitializesImprovement && hasInitializesAttr(I: &I))))
1188 MemDefs.push_back(Elt: MD);
1189 }
1190 }
1191
1192 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1193 // stores to them are dead at the end of the function.
1194 for (Argument &AI : F.args()) {
1195 if (AI.hasPassPointeeByValueCopyAttr()) {
1196 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1197 continue;
1198 }
1199
1200 if (!AI.getType()->isPointerTy())
1201 continue;
1202
1203 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1204 if (Info.coversAllReachableMemory())
1205 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1206 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1207 InvisibleToCallerAfterRetBounded.insert(KV: {&AI, DeadBytes});
1208 }
1209
1210 // Collect whether there is any irreducible control flow in the function.
1211 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
1212
1213 AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
1214 return isa<UnreachableInst>(Val: E->getTerminator());
1215 });
1216}
1217
1218LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1219 LocationSize Size) const {
1220 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1221 LibFunc F;
1222 if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
1223 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1224 // Use the precise location size specified by the 3rd argument
1225 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1226 // instruction. memset_chk will write either the amount specified as 3rd
1227 // argument or the function will immediately abort and exit the program.
1228 // NOTE: AA may determine NoAlias if it can prove that the access size
1229 // is larger than the allocation size due to that being UB. To avoid
1230 // returning potentially invalid NoAlias results by AA, limit the use of
1231 // the precise location size to isOverwrite.
1232 if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: 2)))
1233 return LocationSize::precise(Value: Len->getZExtValue());
1234 }
1235 }
1236 return Size;
1237}
1238
1239OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1240 const Instruction *DeadI,
1241 const MemoryLocation &KillingLoc,
1242 const MemoryLocation &DeadLoc,
1243 int64_t &KillingOff, int64_t &DeadOff) {
1244 // AliasAnalysis does not always account for loops. Limit overwrite checks
1245 // to dependencies for which we can guarantee they are independent of any
1246 // loops they are in.
1247 if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
1248 return OW_Unknown;
1249
1250 LocationSize KillingLocSize =
1251 strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
1252 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1253 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1254 const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
1255 const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
1256
1257 // Check whether the killing store overwrites the whole object, in which
1258 // case the size/offset of the dead store does not matter.
1259 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1260 isIdentifiedObject(V: KillingUndObj)) {
1261 std::optional<TypeSize> KillingUndObjSize =
1262 getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
1263 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1264 return OW_Complete;
1265 }
1266
1267 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1268 // get imprecise values here, though (except for unknown sizes).
1269 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1270 // In case no constant size is known, try to an IR values for the number
1271 // of bytes written and check if they match.
1272 const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
1273 const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
1274 if (KillingMemI && DeadMemI) {
1275 const Value *KillingV = KillingMemI->getLength();
1276 const Value *DeadV = DeadMemI->getLength();
1277 if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
1278 return OW_Complete;
1279 }
1280
1281 // Masked stores have imprecise locations, but we can reason about them
1282 // to some extent.
1283 return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
1284 }
1285
1286 const TypeSize KillingSize = KillingLocSize.getValue();
1287 const TypeSize DeadSize = DeadLoc.Size.getValue();
1288 // Bail on doing Size comparison which depends on AA for now
1289 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1290 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1291
1292 if (AnyScalable)
1293 return OW_Unknown;
1294 // Query the alias information
1295 AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1296
1297 // If the start pointers are the same, we just have to compare sizes to see if
1298 // the killing store was larger than the dead store.
1299 if (AAR == AliasResult::MustAlias) {
1300 // Make sure that the KillingSize size is >= the DeadSize size.
1301 if (KillingSize >= DeadSize)
1302 return OW_Complete;
1303 }
1304
1305 // If we hit a partial alias we may have a full overwrite
1306 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1307 int32_t Off = AAR.getOffset();
1308 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1309 return OW_Complete;
1310 }
1311
1312 // If we can't resolve the same pointers to the same object, then we can't
1313 // analyze them at all.
1314 if (DeadUndObj != KillingUndObj) {
1315 // Non aliasing stores to different objects don't overlap. Note that
1316 // if the killing store is known to overwrite whole object (out of
1317 // bounds access overwrites whole object as well) then it is assumed to
1318 // completely overwrite any store to the same object even if they don't
1319 // actually alias (see next check).
1320 if (AAR == AliasResult::NoAlias)
1321 return OW_None;
1322 return OW_Unknown;
1323 }
1324
1325 // Okay, we have stores to two completely different pointers. Try to
1326 // decompose the pointer into a "base + constant_offset" form. If the base
1327 // pointers are equal, then we can reason about the two stores.
1328 DeadOff = 0;
1329 KillingOff = 0;
1330 const Value *DeadBasePtr =
1331 GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1332 const Value *KillingBasePtr =
1333 GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1334
1335 // If the base pointers still differ, we have two completely different
1336 // stores.
1337 if (DeadBasePtr != KillingBasePtr)
1338 return OW_Unknown;
1339
1340 // The killing access completely overlaps the dead store if and only if
1341 // both start and end of the dead one is "inside" the killing one:
1342 // |<->|--dead--|<->|
1343 // |-----killing------|
1344 // Accesses may overlap if and only if start of one of them is "inside"
1345 // another one:
1346 // |<->|--dead--|<-------->|
1347 // |-------killing--------|
1348 // OR
1349 // |-------dead-------|
1350 // |<->|---killing---|<----->|
1351 //
1352 // We have to be careful here as *Off is signed while *.Size is unsigned.
1353
1354 // Check if the dead access starts "not before" the killing one.
1355 if (DeadOff >= KillingOff) {
1356 // If the dead access ends "not after" the killing access then the
1357 // dead one is completely overwritten by the killing one.
1358 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1359 return OW_Complete;
1360 // If start of the dead access is "before" end of the killing access
1361 // then accesses overlap.
1362 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1363 return OW_MaybePartial;
1364 }
1365 // If start of the killing access is "before" end of the dead access then
1366 // accesses overlap.
1367 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1368 return OW_MaybePartial;
1369 }
1370
1371 // Can reach here only if accesses are known not to overlap.
1372 return OW_None;
1373}
1374
1375bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1376 const LocationSize StoreSize) {
1377 if (isa<AllocaInst>(Val: V))
1378 return true;
1379
1380 auto IBounded = InvisibleToCallerAfterRetBounded.find(Val: V);
1381 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1382 int64_t ValueOffset;
1383 [[maybe_unused]] const Value *BaseValue =
1384 GetPointerBaseWithConstantOffset(Ptr, Offset&: ValueOffset, DL);
1385 // If we are not able to find a constant offset from the UO, we have to
1386 // pessimistically assume that the store writes to memory out of the
1387 // dead_on_return bounds.
1388 if (BaseValue != V)
1389 return false;
1390 // This store is only invisible after return if we are in bounds of the
1391 // range marked dead.
1392 if (StoreSize.hasValue() &&
1393 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1394 ValueOffset >= 0)
1395 return true;
1396 }
1397 auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1398 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1399 I.first->second = capturesNothing(CC: PointerMayBeCaptured(
1400 V, /*ReturnCaptures=*/true, Mask: CaptureComponents::Provenance));
1401 return I.first->second;
1402}
1403
1404bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1405 bool RequiresNoCaptureBeforeUnwind;
1406 if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1407 return false;
1408 if (!RequiresNoCaptureBeforeUnwind)
1409 return true;
1410
1411 auto I = CapturedBeforeReturn.insert(KV: {V, true});
1412 if (I.second)
1413 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1414 // with the killing MemoryDef. But we refrain from doing so for now to
1415 // limit compile-time and this does not cause any changes to the number
1416 // of stores removed on a large test set in practice.
1417 I.first->second = capturesAnything(CC: PointerMayBeCaptured(
1418 V, /*ReturnCaptures=*/false, Mask: CaptureComponents::Provenance));
1419 return !I.first->second;
1420}
1421
1422std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1423 if (!I->mayWriteToMemory())
1424 return std::nullopt;
1425
1426 if (auto *CB = dyn_cast<CallBase>(Val: I))
1427 return MemoryLocation::getForDest(CI: CB, TLI);
1428
1429 return MemoryLocation::getOrNone(Inst: I);
1430}
1431
1432SmallVector<std::pair<MemoryLocation, bool>, 1>
1433DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1434 SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
1435 if (isMemTerminatorInst(I)) {
1436 if (auto Loc = getLocForTerminator(I))
1437 Locations.push_back(Elt: std::make_pair(x&: Loc->first, y: false));
1438 return Locations;
1439 }
1440
1441 if (auto Loc = getLocForWrite(I))
1442 Locations.push_back(Elt: std::make_pair(x&: *Loc, y: false));
1443
1444 if (ConsiderInitializesAttr) {
1445 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1446 Locations.push_back(Elt: std::make_pair(x&: MemLoc, y: true));
1447 }
1448 }
1449 return Locations;
1450}
1451
1452bool DSEState::isRemovable(Instruction *I) {
1453 assert(getLocForWrite(I) && "Must have analyzable write");
1454
1455 // Don't remove volatile/atomic stores.
1456 if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1457 return SI->isUnordered();
1458
1459 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1460 // Don't remove volatile memory intrinsics.
1461 if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1462 return !MI->isVolatile();
1463
1464 // Never remove dead lifetime intrinsics, e.g. because they are followed
1465 // by a free.
1466 if (CB->isLifetimeStartOrEnd())
1467 return false;
1468
1469 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1470 !CB->isTerminator();
1471 }
1472
1473 return false;
1474}
1475
1476bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1477 Instruction *DefInst, Instruction *UseInst) {
1478 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1479 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1480 // MemoryDef.
1481 if (!UseInst->mayWriteToMemory())
1482 return false;
1483
1484 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1485 if (CB->onlyAccessesInaccessibleMemory())
1486 return false;
1487
1488 int64_t InstWriteOffset, DepWriteOffset;
1489 if (auto CC = getLocForWrite(I: UseInst))
1490 return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1491 DeadOff&: DepWriteOffset) == OW_Complete;
1492 return false;
1493}
1494
1495bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1496 const MemoryLocation &DefLoc) {
1497 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1498 << *Def->getMemoryInst()
1499 << ") is at the end the function \n");
1500 SmallVector<MemoryAccess *, 4> WorkList;
1501 SmallPtrSet<MemoryAccess *, 8> Visited;
1502
1503 pushMemUses(Acc: Def, WorkList, Visited);
1504 for (unsigned I = 0; I < WorkList.size(); I++) {
1505 if (WorkList.size() >= MemorySSAScanLimit) {
1506 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1507 return false;
1508 }
1509
1510 MemoryAccess *UseAccess = WorkList[I];
1511 if (isa<MemoryPhi>(Val: UseAccess)) {
1512 // AliasAnalysis does not account for loops. Limit elimination to
1513 // candidates for which we can guarantee they always store to the same
1514 // memory location.
1515 if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1516 return false;
1517
1518 pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1519 continue;
1520 }
1521 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1522 // of times this is called and/or caching it.
1523 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1524 if (isReadClobber(DefLoc, UseInst)) {
1525 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1526 return false;
1527 }
1528
1529 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1530 pushMemUses(Acc: UseDef, WorkList, Visited);
1531 }
1532 return true;
1533}
1534
1535std::optional<std::pair<MemoryLocation, bool>>
1536DSEState::getLocForTerminator(Instruction *I) const {
1537 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1538 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1539 return {
1540 std::make_pair(x: MemoryLocation::getForArgument(Call: CB, ArgIdx: 0, TLI: &TLI), y: false)};
1541 if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1542 return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1543 }
1544
1545 return std::nullopt;
1546}
1547
1548bool DSEState::isMemTerminatorInst(Instruction *I) const {
1549 auto *CB = dyn_cast<CallBase>(Val: I);
1550 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1551 getFreedOperand(CB, TLI: &TLI) != nullptr);
1552}
1553
1554bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1555 Instruction *MaybeTerm) {
1556 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1557 getLocForTerminator(I: MaybeTerm);
1558
1559 if (!MaybeTermLoc)
1560 return false;
1561
1562 // If the terminator is a free-like call, all accesses to the underlying
1563 // object can be considered terminated.
1564 if (getUnderlyingObject(V: Loc.Ptr) !=
1565 getUnderlyingObject(V: MaybeTermLoc->first.Ptr))
1566 return false;
1567
1568 auto TermLoc = MaybeTermLoc->first;
1569 if (MaybeTermLoc->second) {
1570 const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1571 return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1572 }
1573 int64_t InstWriteOffset = 0;
1574 int64_t DepWriteOffset = 0;
1575 return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1576 DeadOff&: DepWriteOffset) == OW_Complete;
1577}
1578
1579bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1580 Instruction *UseInst) {
1581 if (isNoopIntrinsic(I: UseInst))
1582 return false;
1583
1584 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1585 // treated as read clobber.
1586 if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1587 return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1588
1589 if (!UseInst->mayReadFromMemory())
1590 return false;
1591
1592 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1593 if (CB->onlyAccessesInaccessibleMemory())
1594 return false;
1595
1596 return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1597}
1598
1599bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1600 const Instruction *KillingDef,
1601 const MemoryLocation &CurrentLoc) {
1602 // If the dependency is within the same block or loop level (being careful
1603 // of irreducible loops), we know that AA will return a valid result for the
1604 // memory dependency. (Both at the function level, outside of any loop,
1605 // would also be valid but we currently disable that to limit compile time).
1606 if (Current->getParent() == KillingDef->getParent())
1607 return true;
1608 const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1609 if (!ContainsIrreducibleLoops && CurrentLI &&
1610 CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1611 return true;
1612 // Otherwise check the memory location is invariant to any loops.
1613 return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1614}
1615
1616bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1617 Ptr = Ptr->stripPointerCasts();
1618 if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1619 if (GEP->hasAllConstantIndices())
1620 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1621
1622 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1623 return I->getParent()->isEntryBlock() ||
1624 (!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1625 }
1626 return true;
1627}
1628
1629std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1630 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1631 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1632 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1633 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1634 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1635 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1636 return std::nullopt;
1637 }
1638
1639 MemoryAccess *Current = StartAccess;
1640 Instruction *KillingI = KillingDef->getMemoryInst();
1641 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1642
1643 // Only optimize defining access of KillingDef when directly starting at its
1644 // defining access. The defining access also must only access KillingLoc. At
1645 // the moment we only support instructions with a single write location, so
1646 // it should be sufficient to disable optimizations for instructions that
1647 // also read from memory.
1648 bool CanOptimize = OptimizeMemorySSA &&
1649 KillingDef->getDefiningAccess() == StartAccess &&
1650 !KillingI->mayReadFromMemory();
1651
1652 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1653 std::optional<MemoryLocation> CurrentLoc;
1654 for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1655 LLVM_DEBUG({
1656 dbgs() << " visiting " << *Current;
1657 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1658 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1659 << ")";
1660 dbgs() << "\n";
1661 });
1662
1663 // Reached TOP.
1664 if (MSSA.isLiveOnEntryDef(MA: Current)) {
1665 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1666 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1667 // The first clobbering def is... none.
1668 KillingDef->setOptimized(Current);
1669 return std::nullopt;
1670 }
1671
1672 // Cost of a step. Accesses in the same block are more likely to be valid
1673 // candidates for elimination, hence consider them cheaper.
1674 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1675 ? MemorySSASameBBStepCost
1676 : MemorySSAOtherBBStepCost;
1677 if (WalkerStepLimit <= StepCost) {
1678 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1679 return std::nullopt;
1680 }
1681 WalkerStepLimit -= StepCost;
1682
1683 // Return for MemoryPhis. They cannot be eliminated directly and the
1684 // caller is responsible for traversing them.
1685 if (isa<MemoryPhi>(Val: Current)) {
1686 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1687 return Current;
1688 }
1689
1690 // Below, check if CurrentDef is a valid candidate to be eliminated by
1691 // KillingDef. If it is not, check the next candidate.
1692 MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1693 Instruction *CurrentI = CurrentDef->getMemoryInst();
1694
1695 if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1696 CanOptimize = false;
1697 continue;
1698 }
1699
1700 // Before we try to remove anything, check for any extra throwing
1701 // instructions that block us from DSEing
1702 if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1703 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1704 return std::nullopt;
1705 }
1706
1707 // Check for anything that looks like it will be a barrier to further
1708 // removal
1709 if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1710 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1711 return std::nullopt;
1712 }
1713
1714 // If Current is known to be on path that reads DefLoc or is a read
1715 // clobber, bail out, as the path is not profitable. We skip this check
1716 // for intrinsic calls, because the code knows how to handle memcpy
1717 // intrinsics.
1718 if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1719 return std::nullopt;
1720
1721 // Quick check if there are direct uses that are read-clobbers.
1722 if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1723 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1724 return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1725 isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1726 return false;
1727 })) {
1728 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1729 return std::nullopt;
1730 }
1731
1732 // If Current does not have an analyzable write location or is not
1733 // removable, skip it.
1734 CurrentLoc = getLocForWrite(I: CurrentI);
1735 if (!CurrentLoc || !isRemovable(I: CurrentI)) {
1736 CanOptimize = false;
1737 continue;
1738 }
1739
1740 // AliasAnalysis does not account for loops. Limit elimination to
1741 // candidates for which we can guarantee they always store to the same
1742 // memory location and not located in different loops.
1743 if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1744 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1745 CanOptimize = false;
1746 continue;
1747 }
1748
1749 if (IsMemTerm) {
1750 // If the killing def is a memory terminator (e.g. lifetime.end), check
1751 // the next candidate if the current Current does not write the same
1752 // underlying object as the terminator.
1753 if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1754 CanOptimize = false;
1755 continue;
1756 }
1757 } else {
1758 int64_t KillingOffset = 0;
1759 int64_t DeadOffset = 0;
1760 auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1761 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1762 if (CanOptimize) {
1763 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1764 // optimized access. Do not optimize if CurrentDef is already the
1765 // defining access of KillingDef.
1766 if (CurrentDef != KillingDef->getDefiningAccess() &&
1767 (OR == OW_Complete || OR == OW_MaybePartial))
1768 KillingDef->setOptimized(CurrentDef);
1769
1770 // Once a may-aliasing def is encountered do not set an optimized
1771 // access.
1772 if (OR != OW_None)
1773 CanOptimize = false;
1774 }
1775
1776 // If Current does not write to the same object as KillingDef, check
1777 // the next candidate.
1778 if (OR == OW_Unknown || OR == OW_None)
1779 continue;
1780 else if (OR == OW_MaybePartial) {
1781 // If KillingDef only partially overwrites Current, check the next
1782 // candidate if the partial step limit is exceeded. This aggressively
1783 // limits the number of candidates for partial store elimination,
1784 // which are less likely to be removable in the end.
1785 if (PartialLimit <= 1) {
1786 WalkerStepLimit -= 1;
1787 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1788 "next access\n");
1789 continue;
1790 }
1791 PartialLimit -= 1;
1792 }
1793 }
1794 break;
1795 };
1796
1797 // Accesses to objects accessible after the function returns can only be
1798 // eliminated if the access is dead along all paths to the exit. Collect
1799 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1800 // they cover all paths from MaybeDeadAccess to any function exit.
1801 SmallPtrSet<Instruction *, 16> KillingDefs;
1802 KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1803 MemoryAccess *MaybeDeadAccess = Current;
1804 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1805 Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1806 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1807 << *MaybeDeadI << ")\n");
1808
1809 SmallVector<MemoryAccess *, 32> WorkList;
1810 SmallPtrSet<MemoryAccess *, 32> Visited;
1811 pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1812
1813 // Check if DeadDef may be read.
1814 for (unsigned I = 0; I < WorkList.size(); I++) {
1815 MemoryAccess *UseAccess = WorkList[I];
1816
1817 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1818 // Bail out if the number of accesses to check exceeds the scan limit.
1819 if (ScanLimit < (WorkList.size() - I)) {
1820 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1821 return std::nullopt;
1822 }
1823 --ScanLimit;
1824 NumDomMemDefChecks++;
1825
1826 if (isa<MemoryPhi>(Val: UseAccess)) {
1827 if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1828 return DT.properlyDominates(A: KI->getParent(), B: UseAccess->getBlock());
1829 })) {
1830 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1831 continue;
1832 }
1833 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1834 pushMemUses(Acc: UseAccess, WorkList, Visited);
1835 continue;
1836 }
1837
1838 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1839 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1840
1841 if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1842 return DT.dominates(Def: KI, User: UseInst);
1843 })) {
1844 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1845 continue;
1846 }
1847
1848 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1849 // MemoryAccesses. We do not have to check it's users.
1850 if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1851 LLVM_DEBUG(
1852 dbgs()
1853 << " ... skipping, memterminator invalidates following accesses\n");
1854 continue;
1855 }
1856
1857 if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1858 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1859 pushMemUses(Acc: UseAccess, WorkList, Visited);
1860 continue;
1861 }
1862
1863 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1864 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1865 return std::nullopt;
1866 }
1867
1868 // Uses which may read the original MemoryDef mean we cannot eliminate the
1869 // original MD. Stop walk.
1870 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1871 // the callee would be dominated by initializations, so it should be safe.
1872 bool IsKillingDefFromInitAttr = false;
1873 if (IsInitializesAttrMemLoc) {
1874 if (KillingI == UseInst &&
1875 KillingUndObj == getUnderlyingObject(V: MaybeDeadLoc.Ptr))
1876 IsKillingDefFromInitAttr = true;
1877 }
1878
1879 if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1880 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1881 return std::nullopt;
1882 }
1883
1884 // If this worklist walks back to the original memory access (and the
1885 // pointer is not guarenteed loop invariant) then we cannot assume that a
1886 // store kills itself.
1887 if (MaybeDeadAccess == UseAccess &&
1888 !isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1889 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1890 return std::nullopt;
1891 }
1892 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1893 // if it reads the memory location.
1894 // TODO: It would probably be better to check for self-reads before
1895 // calling the function.
1896 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1897 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1898 continue;
1899 }
1900
1901 // Check all uses for MemoryDefs, except for defs completely overwriting
1902 // the original location. Otherwise we have to check uses of *all*
1903 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1904 // miss cases like the following
1905 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1906 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1907 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1908 // (The Use points to the *first* Def it may alias)
1909 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1910 // stores [0,1]
1911 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1912 if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1913 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1914 if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1915 PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1916 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1917 StoreSize: KillingLoc.Size)) {
1918 LLVM_DEBUG(dbgs()
1919 << " ... found killing def " << *UseInst << "\n");
1920 KillingDefs.insert(Ptr: UseInst);
1921 }
1922 } else {
1923 LLVM_DEBUG(dbgs()
1924 << " ... found preceeding def " << *UseInst << "\n");
1925 return std::nullopt;
1926 }
1927 } else
1928 pushMemUses(Acc: UseDef, WorkList, Visited);
1929 }
1930 }
1931
1932 // For accesses to locations visible after the function returns, make sure
1933 // that the location is dead (=overwritten) along all paths from
1934 // MaybeDeadAccess to the exit.
1935 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1936 StoreSize: KillingLoc.Size)) {
1937 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1938 for (Instruction *KD : KillingDefs)
1939 KillingBlocks.insert(Ptr: KD->getParent());
1940 assert(!KillingBlocks.empty() &&
1941 "Expected at least a single killing block");
1942
1943 // Find the common post-dominator of all killing blocks.
1944 BasicBlock *CommonPred = *KillingBlocks.begin();
1945 for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1946 if (!CommonPred)
1947 break;
1948 CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1949 }
1950
1951 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1952 // there is a path from MaybeDeadAccess to an exit not going through a
1953 // killing block.
1954 if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1955 if (!AnyUnreachableExit)
1956 return std::nullopt;
1957
1958 // Fall back to CFG scan starting at all non-unreachable roots if not
1959 // all paths to the exit go through CommonPred.
1960 CommonPred = nullptr;
1961 }
1962
1963 // If CommonPred itself is in the set of killing blocks, we're done.
1964 if (KillingBlocks.count(Ptr: CommonPred))
1965 return {MaybeDeadAccess};
1966
1967 SetVector<BasicBlock *> WorkList;
1968 // If CommonPred is null, there are multiple exits from the function.
1969 // They all have to be added to the worklist.
1970 if (CommonPred)
1971 WorkList.insert(X: CommonPred);
1972 else
1973 for (BasicBlock *R : PDT.roots()) {
1974 if (!isa<UnreachableInst>(Val: R->getTerminator()))
1975 WorkList.insert(X: R);
1976 }
1977
1978 NumCFGTries++;
1979 // Check if all paths starting from an exit node go through one of the
1980 // killing blocks before reaching MaybeDeadAccess.
1981 for (unsigned I = 0; I < WorkList.size(); I++) {
1982 NumCFGChecks++;
1983 BasicBlock *Current = WorkList[I];
1984 if (KillingBlocks.count(Ptr: Current))
1985 continue;
1986 if (Current == MaybeDeadAccess->getBlock())
1987 return std::nullopt;
1988
1989 // MaybeDeadAccess is reachable from the entry, so we don't have to
1990 // explore unreachable blocks further.
1991 if (!DT.isReachableFromEntry(A: Current))
1992 continue;
1993
1994 WorkList.insert_range(R: predecessors(BB: Current));
1995
1996 if (WorkList.size() >= MemorySSAPathCheckLimit)
1997 return std::nullopt;
1998 }
1999 NumCFGSuccess++;
2000 }
2001
2002 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2003 // potentially dead.
2004 return {MaybeDeadAccess};
2005}
2006
2007void DSEState::deleteDeadInstruction(Instruction *SI,
2008 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2009 MemorySSAUpdater Updater(&MSSA);
2010 SmallVector<Instruction *, 32> NowDeadInsts;
2011 NowDeadInsts.push_back(Elt: SI);
2012 --NumFastOther;
2013
2014 while (!NowDeadInsts.empty()) {
2015 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2016 ++NumFastOther;
2017
2018 // Try to preserve debug information attached to the dead instruction.
2019 salvageDebugInfo(I&: *DeadInst);
2020 salvageKnowledge(I: DeadInst);
2021
2022 // Remove the Instruction from MSSA.
2023 MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
2024 bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
2025 if (MA) {
2026 if (IsMemDef) {
2027 auto *MD = cast<MemoryDef>(Val: MA);
2028 SkipStores.insert(Ptr: MD);
2029 if (Deleted)
2030 Deleted->insert(Ptr: MD);
2031 if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
2032 if (SI->getValueOperand()->getType()->isPointerTy()) {
2033 const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
2034 if (CapturedBeforeReturn.erase(Val: UO))
2035 ShouldIterateEndOfFunctionDSE = true;
2036 InvisibleToCallerAfterRet.erase(Val: UO);
2037 InvisibleToCallerAfterRetBounded.erase(Val: UO);
2038 }
2039 }
2040 }
2041
2042 Updater.removeMemoryAccess(MA);
2043 }
2044
2045 auto I = IOLs.find(Key: DeadInst->getParent());
2046 if (I != IOLs.end())
2047 I->second.erase(Key: DeadInst);
2048 // Remove its operands
2049 for (Use &O : DeadInst->operands())
2050 if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
2051 O.set(PoisonValue::get(T: O->getType()));
2052 if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
2053 NowDeadInsts.push_back(Elt: OpI);
2054 }
2055
2056 EA.removeInstruction(I: DeadInst);
2057 // Remove memory defs directly if they don't produce results, but only
2058 // queue other dead instructions for later removal. They may have been
2059 // used as memory locations that have been cached by BatchAA. Removing
2060 // them here may lead to newly created instructions to be allocated at the
2061 // same address, yielding stale cache entries.
2062 if (IsMemDef && DeadInst->getType()->isVoidTy())
2063 DeadInst->eraseFromParent();
2064 else
2065 ToRemove.push_back(Elt: DeadInst);
2066 }
2067}
2068
2069bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2070 const Value *KillingUndObj) {
2071 // First see if we can ignore it by using the fact that KillingI is an
2072 // alloca/alloca like object that is not visible to the caller during
2073 // execution of the function.
2074 if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
2075 return false;
2076
2077 if (KillingI->getParent() == DeadI->getParent())
2078 return ThrowingBlocks.count(Ptr: KillingI->getParent());
2079 return !ThrowingBlocks.empty();
2080}
2081
2082bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2083 // If DeadI may throw it acts as a barrier, unless we are to an
2084 // alloca/alloca like object that does not escape.
2085 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
2086 return true;
2087
2088 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2089 // eliminate/reorder it.
2090 if (DeadI->isAtomic()) {
2091 if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
2092 return isStrongerThanMonotonic(AO: LI->getOrdering());
2093 if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
2094 return isStrongerThanMonotonic(AO: SI->getOrdering());
2095 if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
2096 return isStrongerThanMonotonic(AO: ARMW->getOrdering());
2097 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
2098 return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) ||
2099 isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
2100 llvm_unreachable("other instructions should be skipped in MemorySSA");
2101 }
2102 return false;
2103}
2104
2105bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2106 bool MadeChange = false;
2107 LLVM_DEBUG(
2108 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2109 do {
2110 ShouldIterateEndOfFunctionDSE = false;
2111 for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
2112 if (SkipStores.contains(Ptr: Def))
2113 continue;
2114
2115 Instruction *DefI = Def->getMemoryInst();
2116 auto DefLoc = getLocForWrite(I: DefI);
2117 if (!DefLoc || !isRemovable(I: DefI)) {
2118 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2119 "instruction not removable.\n");
2120 continue;
2121 }
2122
2123 // NOTE: Currently eliminating writes at the end of a function is
2124 // limited to MemoryDefs with a single underlying object, to save
2125 // compile-time. In practice it appears the case with multiple
2126 // underlying objects is very uncommon. If it turns out to be important,
2127 // we can use getUnderlyingObjects here instead.
2128 const Value *UO = getUnderlyingObject(V: DefLoc->Ptr);
2129 if (!isInvisibleToCallerAfterRet(V: UO, Ptr: DefLoc->Ptr, StoreSize: DefLoc->Size))
2130 continue;
2131
2132 if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
2133 // See through pointer-to-pointer bitcasts
2134 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2135 "of the function\n");
2136 deleteDeadInstruction(SI: DefI);
2137 ++NumFastStores;
2138 MadeChange = true;
2139 }
2140 }
2141 } while (ShouldIterateEndOfFunctionDSE);
2142 return MadeChange;
2143}
2144
2145bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2146 Instruction *DefI = Def->getMemoryInst();
2147 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2148 if (!MemSet)
2149 // TODO: Could handle zero store to small allocation as well.
2150 return false;
2151 Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2152 if (!StoredConstant || !StoredConstant->isNullValue())
2153 return false;
2154
2155 if (!isRemovable(I: DefI))
2156 // The memset might be volatile..
2157 return false;
2158
2159 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
2160 F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
2161 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2162 return false;
2163 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(Val: DefUO));
2164 if (!Malloc)
2165 return false;
2166 auto *InnerCallee = Malloc->getCalledFunction();
2167 if (!InnerCallee)
2168 return false;
2169 LibFunc Func = NotLibFunc;
2170 StringRef ZeroedVariantName;
2171 if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) || !TLI.has(F: Func) ||
2172 Func != LibFunc_malloc) {
2173 Attribute Attr = Malloc->getFnAttr(Kind: "alloc-variant-zeroed");
2174 if (!Attr.isValid())
2175 return false;
2176 ZeroedVariantName = Attr.getValueAsString();
2177 if (ZeroedVariantName.empty())
2178 return false;
2179 }
2180
2181 // Gracefully handle malloc with unexpected memory attributes.
2182 auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
2183 if (!MallocDef)
2184 return false;
2185
2186 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2187 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2188 // of malloc block
2189 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2190 if (MallocBB == MemsetBB)
2191 return true;
2192 auto *Ptr = Memset->getArgOperand(i: 0);
2193 auto *TI = MallocBB->getTerminator();
2194 BasicBlock *TrueBB, *FalseBB;
2195 if (!match(V: TI, P: m_Br(C: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: Ptr),
2196 R: m_Zero()),
2197 T&: TrueBB, F&: FalseBB)))
2198 return false;
2199 if (MemsetBB != FalseBB)
2200 return false;
2201 return true;
2202 };
2203
2204 if (Malloc->getOperand(i_nocapture: 0) != MemSet->getLength())
2205 return false;
2206 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Def: Malloc, User: MemSet) ||
2207 !memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
2208 return false;
2209 IRBuilder<> IRB(Malloc);
2210 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2211 Value *Calloc = nullptr;
2212 if (!ZeroedVariantName.empty()) {
2213 LLVMContext &Ctx = Malloc->getContext();
2214 AttributeList Attrs = InnerCallee->getAttributes();
2215 AllocFnKind AllocKind =
2216 Attrs.getFnAttr(Kind: Attribute::AllocKind).getAllocKind() |
2217 AllocFnKind::Zeroed;
2218 AllocKind &= ~AllocFnKind::Uninitialized;
2219 Attrs =
2220 Attrs.addFnAttribute(C&: Ctx, Attr: Attribute::getWithAllocKind(Context&: Ctx, Kind: AllocKind))
2221 .removeFnAttribute(C&: Ctx, Kind: "alloc-variant-zeroed");
2222 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2223 Name: ZeroedVariantName, T: InnerCallee->getFunctionType(), AttributeList: Attrs);
2224 cast<Function>(Val: ZeroedVariant.getCallee())
2225 ->setCallingConv(Malloc->getCallingConv());
2226 SmallVector<Value *, 3> Args;
2227 Args.append(in_start: Malloc->arg_begin(), in_end: Malloc->arg_end());
2228 CallInst *CI = IRB.CreateCall(Callee: ZeroedVariant, Args, Name: ZeroedVariantName);
2229 CI->setCallingConv(Malloc->getCallingConv());
2230 Calloc = CI;
2231 } else {
2232 Type *SizeTTy = Malloc->getArgOperand(i: 0)->getType();
2233 Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: 1), Size: Malloc->getArgOperand(i: 0),
2234 B&: IRB, TLI, AddrSpace: Malloc->getType()->getPointerAddressSpace());
2235 }
2236 if (!Calloc)
2237 return false;
2238
2239 MemorySSAUpdater Updater(&MSSA);
2240 auto *NewAccess = Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc),
2241 Definition: nullptr, InsertPt: MallocDef);
2242 auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
2243 Updater.insertDef(Def: NewAccessMD, /*RenameUses=*/true);
2244 Malloc->replaceAllUsesWith(V: Calloc);
2245 deleteDeadInstruction(SI: Malloc);
2246 return true;
2247}
2248
2249bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2250 auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
2251 BasicBlock *StoreBB = StoreI->getParent();
2252 Value *StorePtr = StoreI->getPointerOperand();
2253 Value *StoreVal = StoreI->getValueOperand();
2254
2255 DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
2256 if (!IDom)
2257 return false;
2258
2259 auto *BI = dyn_cast<CondBrInst>(Val: IDom->getBlock()->getTerminator());
2260 if (!BI)
2261 return false;
2262
2263 // In case both blocks are the same, it is not possible to determine
2264 // if optimization is possible. (We would not want to optimize a store
2265 // in the FalseBB if condition is true and vice versa.)
2266 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
2267 return false;
2268
2269 Instruction *ICmpL;
2270 CmpPredicate Pred;
2271 if (!match(V: BI->getCondition(),
2272 P: m_c_ICmp(Pred,
2273 L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
2274 R: m_Instruction(I&: ICmpL)),
2275 R: m_Specific(V: StoreVal))) ||
2276 !ICmpInst::isEquality(P: Pred))
2277 return false;
2278
2279 // Ensure the replacement is allowed when comparing pointers, as
2280 // the equality compares addresses only, not pointers' provenance.
2281 if (StoreVal->getType()->isPointerTy() &&
2282 !canReplacePointersIfEqual(From: StoreVal, To: ICmpL, DL))
2283 return false;
2284
2285 // In case the else blocks also branches to the if block or the other way
2286 // around it is not possible to determine if the optimization is possible.
2287 if (Pred == ICmpInst::ICMP_EQ &&
2288 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 0)),
2289 BB: StoreBB))
2290 return false;
2291
2292 if (Pred == ICmpInst::ICMP_NE &&
2293 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 1)),
2294 BB: StoreBB))
2295 return false;
2296
2297 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
2298 MemoryAccess *ClobAcc =
2299 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2300
2301 return MSSA.dominates(A: ClobAcc, B: LoadAcc);
2302}
2303
2304bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2305 Instruction *DefI = Def->getMemoryInst();
2306 StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
2307 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2308 Constant *StoredConstant = nullptr;
2309 if (Store)
2310 StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: 0));
2311 else if (MemSet)
2312 StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2313 else
2314 return false;
2315
2316 if (!isRemovable(I: DefI))
2317 return false;
2318
2319 if (StoredConstant) {
2320 Constant *InitC =
2321 getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
2322 // If the clobbering access is LiveOnEntry, no instructions between them
2323 // can modify the memory location.
2324 if (InitC && InitC == StoredConstant)
2325 return MSSA.isLiveOnEntryDef(
2326 MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2327 }
2328
2329 if (!Store)
2330 return false;
2331
2332 if (dominatingConditionImpliesValue(Def))
2333 return true;
2334
2335 if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: 0))) {
2336 if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: 1)) {
2337 // Get the defining access for the load.
2338 auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2339 // Fast path: the defining accesses are the same.
2340 if (LoadAccess == Def->getDefiningAccess())
2341 return true;
2342
2343 // Look through phi accesses. Recursively scan all phi accesses by
2344 // adding them to a worklist. Bail when we run into a memory def that
2345 // does not match LoadAccess.
2346 SetVector<MemoryAccess *> ToCheck;
2347 MemoryAccess *Current =
2348 MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2349 // We don't want to bail when we run into the store memory def. But,
2350 // the phi access may point to it. So, pretend like we've already
2351 // checked it.
2352 ToCheck.insert(X: Def);
2353 ToCheck.insert(X: Current);
2354 // Start at current (1) to simulate already having checked Def.
2355 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2356 Current = ToCheck[I];
2357 if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2358 // Check all the operands.
2359 for (auto &Use : PhiAccess->incoming_values())
2360 ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2361 continue;
2362 }
2363
2364 // If we found a memory def, bail. This happens when we have an
2365 // unrelated write in between an otherwise noop store.
2366 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2367 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2368 // We are searching for the definition of the store's destination.
2369 // So, if that is the same definition as the load, then this is a
2370 // noop. Otherwise, fail.
2371 if (LoadAccess != Current)
2372 return false;
2373 }
2374 return true;
2375 }
2376 }
2377
2378 return false;
2379}
2380
2381bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2382 bool Changed = false;
2383 for (auto OI : IOL) {
2384 Instruction *DeadI = OI.first;
2385 MemoryLocation Loc = *getLocForWrite(I: DeadI);
2386 assert(isRemovable(DeadI) && "Expect only removable instruction");
2387
2388 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2389 int64_t DeadStart = 0;
2390 uint64_t DeadSize = Loc.Size.getValue();
2391 GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2392 OverlapIntervalsTy &IntervalMap = OI.second;
2393 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2394 if (IntervalMap.empty())
2395 continue;
2396 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2397 }
2398 return Changed;
2399}
2400
2401bool DSEState::eliminateRedundantStoresOfExistingValues() {
2402 bool MadeChange = false;
2403 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2404 "already existing value\n");
2405 for (auto *Def : MemDefs) {
2406 if (SkipStores.contains(Ptr: Def) || MSSA.isLiveOnEntryDef(MA: Def))
2407 continue;
2408
2409 Instruction *DefInst = Def->getMemoryInst();
2410 auto MaybeDefLoc = getLocForWrite(I: DefInst);
2411 if (!MaybeDefLoc || !isRemovable(I: DefInst))
2412 continue;
2413
2414 MemoryDef *UpperDef;
2415 // To conserve compile-time, we avoid walking to the next clobbering def.
2416 // Instead, we just try to get the optimized access, if it exists. DSE
2417 // will try to optimize defs during the earlier traversal.
2418 if (Def->isOptimized())
2419 UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2420 else
2421 UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2422 if (!UpperDef || MSSA.isLiveOnEntryDef(MA: UpperDef))
2423 continue;
2424
2425 Instruction *UpperInst = UpperDef->getMemoryInst();
2426 auto IsRedundantStore = [&]() {
2427 // We don't care about differences in call attributes here.
2428 if (DefInst->isIdenticalToWhenDefined(I: UpperInst,
2429 /*IntersectAttrs=*/true))
2430 return true;
2431 if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2432 if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2433 // MemSetInst must have a write location.
2434 auto UpperLoc = getLocForWrite(I: UpperInst);
2435 if (!UpperLoc)
2436 return false;
2437 int64_t InstWriteOffset = 0;
2438 int64_t DepWriteOffset = 0;
2439 auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: *UpperLoc, DeadLoc: *MaybeDefLoc,
2440 KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2441 Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2442 return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: 1) &&
2443 OR == OW_Complete;
2444 }
2445 }
2446 return false;
2447 };
2448
2449 if (!IsRedundantStore() || isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2450 continue;
2451 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2452 << '\n');
2453 deleteDeadInstruction(SI: DefInst);
2454 NumRedundantStores++;
2455 MadeChange = true;
2456 }
2457 return MadeChange;
2458}
2459
2460SmallVector<MemoryLocation, 1>
2461DSEState::getInitializesArgMemLoc(const Instruction *I) {
2462 const CallBase *CB = dyn_cast<CallBase>(Val: I);
2463 if (!CB)
2464 return {};
2465
2466 // Collect aliasing arguments and their initializes ranges.
2467 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2468 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2469 Value *CurArg = CB->getArgOperand(i: Idx);
2470 if (!CurArg->getType()->isPointerTy())
2471 continue;
2472
2473 ConstantRangeList Inits;
2474 Attribute InitializesAttr = CB->getParamAttr(ArgNo: Idx, Kind: Attribute::Initializes);
2475 // initializes on byval arguments refers to the callee copy, not the
2476 // original memory the caller passed in.
2477 if (InitializesAttr.isValid() && !CB->isByValArgument(ArgNo: Idx))
2478 Inits = InitializesAttr.getValueAsConstantRangeList();
2479
2480 // Check whether "CurArg" could alias with global variables. We require
2481 // either it's function local and isn't captured before or the "CB" only
2482 // accesses arg or inaccessible mem.
2483 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2484 !isFuncLocalAndNotCaptured(Arg: CurArg, CB, EA))
2485 Inits = ConstantRangeList();
2486
2487 // We don't perform incorrect DSE on unwind edges in the current function,
2488 // and use the "initializes" attribute to kill dead stores if:
2489 // - The call does not throw exceptions, "CB->doesNotThrow()".
2490 // - Or the callee parameter has "dead_on_unwind" attribute.
2491 // - Or the argument is invisible to caller on unwind, and there are no
2492 // unwind edges from this call in the current function (e.g. `CallInst`).
2493 bool IsDeadOrInvisibleOnUnwind =
2494 CB->paramHasAttr(ArgNo: Idx, Kind: Attribute::DeadOnUnwind) ||
2495 (isa<CallInst>(Val: CB) && isInvisibleToCallerOnUnwind(V: CurArg));
2496 ArgumentInitInfo InitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind, .Inits: Inits};
2497 bool FoundAliasing = false;
2498 for (auto &[Arg, AliasList] : Arguments) {
2499 auto AAR = BatchAA.alias(LocA: MemoryLocation::getBeforeOrAfter(Ptr: Arg),
2500 LocB: MemoryLocation::getBeforeOrAfter(Ptr: CurArg));
2501 if (AAR == AliasResult::NoAlias) {
2502 continue;
2503 } else if (AAR == AliasResult::MustAlias) {
2504 FoundAliasing = true;
2505 AliasList.push_back(Elt: InitInfo);
2506 } else {
2507 // For PartialAlias and MayAlias, there is an offset or may be an
2508 // unknown offset between the arguments and we insert an empty init
2509 // range to discard the entire initializes info while intersecting.
2510 FoundAliasing = true;
2511 AliasList.push_back(Elt: ArgumentInitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind,
2512 .Inits: ConstantRangeList()});
2513 }
2514 }
2515 if (!FoundAliasing)
2516 Arguments[CurArg] = {InitInfo};
2517 }
2518
2519 SmallVector<MemoryLocation, 1> Locations;
2520 for (const auto &[_, Args] : Arguments) {
2521 auto IntersectedRanges =
2522 getIntersectedInitRangeList(Args, CallHasNoUnwindAttr: CB->doesNotThrow());
2523 if (IntersectedRanges.empty())
2524 continue;
2525
2526 for (const auto &Arg : Args) {
2527 for (const auto &Range : IntersectedRanges) {
2528 int64_t Start = Range.getLower().getSExtValue();
2529 int64_t End = Range.getUpper().getSExtValue();
2530 // For now, we only handle locations starting at offset 0.
2531 if (Start == 0)
2532 Locations.push_back(Elt: MemoryLocation(CB->getArgOperand(i: Arg.Idx),
2533 LocationSize::precise(Value: End - Start),
2534 CB->getAAMetadata()));
2535 }
2536 }
2537 }
2538 return Locations;
2539}
2540
2541std::pair<bool, bool>
2542DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2543 bool Changed = false;
2544 bool DeletedKillingLoc = false;
2545 unsigned ScanLimit = MemorySSAScanLimit;
2546 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2547 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2548 // Worklist of MemoryAccesses that may be killed by
2549 // "KillingLocWrapper.MemDef".
2550 SmallSetVector<MemoryAccess *, 8> ToCheck;
2551 // Track MemoryAccesses that have been deleted in the loop below, so we can
2552 // skip them. Don't use SkipStores for this, which may contain reused
2553 // MemoryAccess addresses.
2554 SmallPtrSet<MemoryAccess *, 8> Deleted;
2555 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2556 ToCheck.insert(X: KillingLocWrapper.MemDef->getDefiningAccess());
2557
2558 // Check if MemoryAccesses in the worklist are killed by
2559 // "KillingLocWrapper.MemDef".
2560 for (unsigned I = 0; I < ToCheck.size(); I++) {
2561 MemoryAccess *Current = ToCheck[I];
2562 if (Deleted.contains(Ptr: Current))
2563 continue;
2564 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2565 KillingDef: KillingLocWrapper.MemDef, StartAccess: Current, KillingLoc: KillingLocWrapper.MemLoc,
2566 KillingUndObj: KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2567 IsMemTerm: isMemTerminatorInst(I: KillingLocWrapper.DefInst), PartialLimit,
2568 IsInitializesAttrMemLoc: KillingLocWrapper.DefByInitializesAttr);
2569
2570 if (!MaybeDeadAccess) {
2571 LLVM_DEBUG(dbgs() << " finished walk\n");
2572 continue;
2573 }
2574 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2575 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2576 if (isa<MemoryPhi>(Val: DeadAccess)) {
2577 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2578 for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2579 MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2580 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2581 BasicBlock *PhiBlock = DeadAccess->getBlock();
2582
2583 // We only consider incoming MemoryAccesses that come before the
2584 // MemoryPhi. Otherwise we could discover candidates that do not
2585 // strictly dominate our starting def.
2586 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2587 ToCheck.insert(X: IncomingAccess);
2588 }
2589 continue;
2590 }
2591 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2592 // It would incorrectly consider a call instruction as redundant store
2593 // and remove this call instruction.
2594 // TODO: this conflates the existence of a MemoryLocation with being able
2595 // to delete the instruction. Fix isRemovable() to consider calls with
2596 // side effects that cannot be removed, e.g. calls with the initializes
2597 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2598 MemoryDefWrapper DeadDefWrapper(
2599 cast<MemoryDef>(Val: DeadAccess),
2600 getLocForInst(I: cast<MemoryDef>(Val: DeadAccess)->getMemoryInst(),
2601 /*ConsiderInitializesAttr=*/false));
2602 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2603 MemoryLocationWrapper &DeadLocWrapper =
2604 DeadDefWrapper.DefinedLocations.front();
2605 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2606 ToCheck.insert(X: DeadLocWrapper.MemDef->getDefiningAccess());
2607 NumGetDomMemoryDefPassed++;
2608
2609 if (!DebugCounter::shouldExecute(Counter&: MemorySSACounter))
2610 continue;
2611 if (isMemTerminatorInst(I: KillingLocWrapper.DefInst)) {
2612 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2613 continue;
2614 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2615 << *DeadLocWrapper.DefInst << "\n KILLER: "
2616 << *KillingLocWrapper.DefInst << '\n');
2617 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2618 ++NumFastStores;
2619 Changed = true;
2620 } else {
2621 // Check if DeadI overwrites KillingI.
2622 int64_t KillingOffset = 0;
2623 int64_t DeadOffset = 0;
2624 OverwriteResult OR =
2625 isOverwrite(KillingI: KillingLocWrapper.DefInst, DeadI: DeadLocWrapper.DefInst,
2626 KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2627 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2628 if (OR == OW_MaybePartial) {
2629 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2630 OR = isPartialOverwrite(KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2631 KillingOff: KillingOffset, DeadOff: DeadOffset,
2632 DeadI: DeadLocWrapper.DefInst, IOL);
2633 }
2634 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2635 auto *DeadSI = dyn_cast<StoreInst>(Val: DeadLocWrapper.DefInst);
2636 auto *KillingSI = dyn_cast<StoreInst>(Val: KillingLocWrapper.DefInst);
2637 // We are re-using tryToMergePartialOverlappingStores, which requires
2638 // DeadSI to dominate KillingSI.
2639 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2640 if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2641 if (Constant *Merged = tryToMergePartialOverlappingStores(
2642 KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL, AA&: BatchAA,
2643 DT: &DT)) {
2644
2645 // Update stored value of earlier store to merged constant.
2646 DeadSI->setOperand(i_nocapture: 0, Val_nocapture: Merged);
2647 ++NumModifiedStores;
2648 Changed = true;
2649 DeletedKillingLoc = true;
2650
2651 // Remove killing store and remove any outstanding overlap
2652 // intervals for the updated store.
2653 deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2654 auto I = IOLs.find(Key: DeadSI->getParent());
2655 if (I != IOLs.end())
2656 I->second.erase(Key: DeadSI);
2657 break;
2658 }
2659 }
2660 }
2661 if (OR == OW_Complete) {
2662 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2663 << *DeadLocWrapper.DefInst << "\n KILLER: "
2664 << *KillingLocWrapper.DefInst << '\n');
2665 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2666 ++NumFastStores;
2667 Changed = true;
2668 }
2669 }
2670 }
2671
2672 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2673 "SkipStores and Deleted out of sync?");
2674
2675 return {Changed, DeletedKillingLoc};
2676}
2677
2678bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2679 if (KillingDefWrapper.DefinedLocations.empty()) {
2680 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2681 << *KillingDefWrapper.DefInst << "\n");
2682 return false;
2683 }
2684
2685 bool MadeChange = false;
2686 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2687 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2688 << *KillingLocWrapper.MemDef << " ("
2689 << *KillingLocWrapper.DefInst << ")\n");
2690 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2691 MadeChange |= Changed;
2692
2693 // Check if the store is a no-op.
2694 if (!DeletedKillingLoc && storeIsNoop(Def: KillingLocWrapper.MemDef,
2695 DefUO: KillingLocWrapper.UnderlyingObject)) {
2696 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2697 << *KillingLocWrapper.DefInst << '\n');
2698 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2699 NumRedundantStores++;
2700 MadeChange = true;
2701 continue;
2702 }
2703 // Can we form a calloc from a memset/malloc pair?
2704 if (!DeletedKillingLoc &&
2705 tryFoldIntoCalloc(Def: KillingLocWrapper.MemDef,
2706 DefUO: KillingLocWrapper.UnderlyingObject)) {
2707 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2708 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2709 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2710 MadeChange = true;
2711 continue;
2712 }
2713 }
2714 return MadeChange;
2715}
2716
2717static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2718 DominatorTree &DT, PostDominatorTree &PDT,
2719 const TargetLibraryInfo &TLI,
2720 const LoopInfo &LI) {
2721 bool MadeChange = false;
2722 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2723 // For each store:
2724 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2725 MemoryDef *KillingDef = State.MemDefs[I];
2726 if (State.SkipStores.count(Ptr: KillingDef))
2727 continue;
2728
2729 MemoryDefWrapper KillingDefWrapper(
2730 KillingDef, State.getLocForInst(I: KillingDef->getMemoryInst(),
2731 ConsiderInitializesAttr: EnableInitializesImprovement));
2732 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2733 }
2734
2735 if (EnablePartialOverwriteTracking)
2736 for (auto &KV : State.IOLs)
2737 MadeChange |= State.removePartiallyOverlappedStores(IOL&: KV.second);
2738
2739 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2740 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2741
2742 while (!State.ToRemove.empty()) {
2743 Instruction *DeadInst = State.ToRemove.pop_back_val();
2744 DeadInst->eraseFromParent();
2745 }
2746
2747 return MadeChange;
2748}
2749
2750//===----------------------------------------------------------------------===//
2751// DSE Pass
2752//===----------------------------------------------------------------------===//
2753PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2754 AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2755 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2756 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2757 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2758 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2759 LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2760
2761 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2762
2763#ifdef LLVM_ENABLE_STATS
2764 if (AreStatisticsEnabled())
2765 for (auto &I : instructions(F))
2766 NumRemainingStores += isa<StoreInst>(Val: &I);
2767#endif
2768
2769 if (!Changed)
2770 return PreservedAnalyses::all();
2771
2772 PreservedAnalyses PA;
2773 PA.preserveSet<CFGAnalyses>();
2774 PA.preserve<MemorySSAAnalysis>();
2775 PA.preserve<LoopAnalysis>();
2776 return PA;
2777}
2778
2779namespace {
2780
2781/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2782class DSELegacyPass : public FunctionPass {
2783public:
2784 static char ID; // Pass identification, replacement for typeid
2785
2786 DSELegacyPass() : FunctionPass(ID) {
2787 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
2788 }
2789
2790 bool runOnFunction(Function &F) override {
2791 if (skipFunction(F))
2792 return false;
2793
2794 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2795 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2796 const TargetLibraryInfo &TLI =
2797 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2798 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2799 PostDominatorTree &PDT =
2800 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2801 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2802
2803 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2804
2805#ifdef LLVM_ENABLE_STATS
2806 if (AreStatisticsEnabled())
2807 for (auto &I : instructions(F))
2808 NumRemainingStores += isa<StoreInst>(Val: &I);
2809#endif
2810
2811 return Changed;
2812 }
2813
2814 void getAnalysisUsage(AnalysisUsage &AU) const override {
2815 AU.setPreservesCFG();
2816 AU.addRequired<AAResultsWrapperPass>();
2817 AU.addRequired<TargetLibraryInfoWrapperPass>();
2818 AU.addPreserved<GlobalsAAWrapperPass>();
2819 AU.addRequired<DominatorTreeWrapperPass>();
2820 AU.addPreserved<DominatorTreeWrapperPass>();
2821 AU.addRequired<PostDominatorTreeWrapperPass>();
2822 AU.addRequired<MemorySSAWrapperPass>();
2823 AU.addPreserved<PostDominatorTreeWrapperPass>();
2824 AU.addPreserved<MemorySSAWrapperPass>();
2825 AU.addRequired<LoopInfoWrapperPass>();
2826 AU.addPreserved<LoopInfoWrapperPass>();
2827 AU.addRequired<AssumptionCacheTracker>();
2828 }
2829};
2830
2831} // end anonymous namespace
2832
2833char DSELegacyPass::ID = 0;
2834
2835INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2836 false)
2837INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2838INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2839INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2840INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2841INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2842INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2843INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2844INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2845INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2846INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2847 false)
2848
2849LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
2850 return new DSELegacyPass();
2851}
2852