1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
30#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
34#include "llvm/ADT/PostOrderIterator.h"
35#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
40#include "llvm/Analysis/AliasAnalysis.h"
41#include "llvm/Analysis/AssumptionCache.h"
42#include "llvm/Analysis/CaptureTracking.h"
43#include "llvm/Analysis/GlobalsModRef.h"
44#include "llvm/Analysis/LoopInfo.h"
45#include "llvm/Analysis/MemoryBuiltins.h"
46#include "llvm/Analysis/MemoryLocation.h"
47#include "llvm/Analysis/MemorySSA.h"
48#include "llvm/Analysis/MemorySSAUpdater.h"
49#include "llvm/Analysis/MustExecute.h"
50#include "llvm/Analysis/PostDominators.h"
51#include "llvm/Analysis/TargetLibraryInfo.h"
52#include "llvm/Analysis/ValueTracking.h"
53#include "llvm/IR/Argument.h"
54#include "llvm/IR/AttributeMask.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
57#include "llvm/IR/ConstantRangeList.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
61#include "llvm/IR/Dominators.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/IRBuilder.h"
64#include "llvm/IR/InstIterator.h"
65#include "llvm/IR/InstrTypes.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/IntrinsicInst.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/PassManager.h"
71#include "llvm/IR/PatternMatch.h"
72#include "llvm/IR/Value.h"
73#include "llvm/InitializePasses.h"
74#include "llvm/Support/Casting.h"
75#include "llvm/Support/CommandLine.h"
76#include "llvm/Support/Debug.h"
77#include "llvm/Support/DebugCounter.h"
78#include "llvm/Support/ErrorHandling.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Transforms/Scalar.h"
81#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82#include "llvm/Transforms/Utils/BuildLibCalls.h"
83#include "llvm/Transforms/Utils/Local.h"
84#include <algorithm>
85#include <cassert>
86#include <cstdint>
87#include <map>
88#include <optional>
89#include <utility>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "dse"
95
96STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
97STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
98STATISTIC(NumFastStores, "Number of stores deleted");
99STATISTIC(NumFastOther, "Number of other instrs removed");
100STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
101STATISTIC(NumModifiedStores, "Number of stores modified");
102STATISTIC(NumCFGChecks, "Number of stores modified");
103STATISTIC(NumCFGTries, "Number of stores modified");
104STATISTIC(NumCFGSuccess, "Number of stores modified");
105STATISTIC(NumGetDomMemoryDefPassed,
106 "Number of times a valid candidate is returned from getDomMemoryDef");
107STATISTIC(NumDomMemDefChecks,
108 "Number iterations check for reads in getDomMemoryDef");
109
110DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
111 "Controls which MemoryDefs are eliminated.");
112
113static cl::opt<bool>
114EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
115 cl::init(Val: true), cl::Hidden,
116 cl::desc("Enable partial-overwrite tracking in DSE"));
117
118static cl::opt<bool>
119EnablePartialStoreMerging("enable-dse-partial-store-merging",
120 cl::init(Val: true), cl::Hidden,
121 cl::desc("Enable partial store merging in DSE"));
122
123static cl::opt<unsigned>
124 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: 150), cl::Hidden,
125 cl::desc("The number of memory instructions to scan for "
126 "dead store elimination (default = 150)"));
127static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
128 "dse-memoryssa-walklimit", cl::init(Val: 90), cl::Hidden,
129 cl::desc("The maximum number of steps while walking upwards to find "
130 "MemoryDefs that may be killed (default = 90)"));
131
132static cl::opt<unsigned> MemorySSAPartialStoreLimit(
133 "dse-memoryssa-partial-store-limit", cl::init(Val: 5), cl::Hidden,
134 cl::desc("The maximum number candidates that only partially overwrite the "
135 "killing MemoryDef to consider"
136 " (default = 5)"));
137
138static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
139 "dse-memoryssa-defs-per-block-limit", cl::init(Val: 5000), cl::Hidden,
140 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
141 "other stores per basic block (default = 5000)"));
142
143static cl::opt<unsigned> MemorySSASameBBStepCost(
144 "dse-memoryssa-samebb-cost", cl::init(Val: 1), cl::Hidden,
145 cl::desc(
146 "The cost of a step in the same basic block as the killing MemoryDef"
147 "(default = 1)"));
148
149static cl::opt<unsigned>
150 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: 5),
151 cl::Hidden,
152 cl::desc("The cost of a step in a different basic "
153 "block than the killing MemoryDef"
154 "(default = 5)"));
155
156static cl::opt<unsigned> MemorySSAPathCheckLimit(
157 "dse-memoryssa-path-check-limit", cl::init(Val: 50), cl::Hidden,
158 cl::desc("The maximum number of blocks to check when trying to prove that "
159 "all paths to an exit go through a killing block (default = 50)"));
160
161// This flags allows or disallows DSE to optimize MemorySSA during its
162// traversal. Note that DSE optimizing MemorySSA may impact other passes
163// downstream of the DSE invocation and can lead to issues not being
164// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
165// those cases, the flag can be used to check if DSE's MemorySSA optimizations
166// impact follow-up passes.
167static cl::opt<bool>
168 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
169 cl::desc("Allow DSE to optimize memory accesses."));
170
171// TODO: remove this flag.
172static cl::opt<bool> EnableInitializesImprovement(
173 "enable-dse-initializes-attr-improvement", cl::init(Val: true), cl::Hidden,
174 cl::desc("Enable the initializes attr improvement in DSE"));
175
176//===----------------------------------------------------------------------===//
177// Helper functions
178//===----------------------------------------------------------------------===//
179using OverlapIntervalsTy = std::map<int64_t, int64_t>;
180using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
181
182/// Returns true if the end of this instruction can be safely shortened in
183/// length.
184static bool isShortenableAtTheEnd(Instruction *I) {
185 // Don't shorten stores for now
186 if (isa<StoreInst>(Val: I))
187 return false;
188
189 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
190 switch (II->getIntrinsicID()) {
191 default: return false;
192 case Intrinsic::memset:
193 case Intrinsic::memcpy:
194 case Intrinsic::memcpy_element_unordered_atomic:
195 case Intrinsic::memset_element_unordered_atomic:
196 // Do shorten memory intrinsics.
197 // FIXME: Add memmove if it's also safe to transform.
198 return true;
199 }
200 }
201
202 // Don't shorten libcalls calls for now.
203
204 return false;
205}
206
207/// Returns true if the beginning of this instruction can be safely shortened
208/// in length.
209static bool isShortenableAtTheBeginning(Instruction *I) {
210 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
211 // easily done by offsetting the source address.
212 return isa<AnyMemSetInst>(Val: I);
213}
214
215static std::optional<TypeSize> getPointerSize(const Value *V,
216 const DataLayout &DL,
217 const TargetLibraryInfo &TLI,
218 const Function *F) {
219 uint64_t Size;
220 ObjectSizeOpts Opts;
221 Opts.NullIsUnknownSize = NullPointerIsDefined(F);
222
223 if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
224 return TypeSize::getFixed(ExactSize: Size);
225 return std::nullopt;
226}
227
228namespace {
229
230enum OverwriteResult {
231 OW_Begin,
232 OW_Complete,
233 OW_End,
234 OW_PartialEarlierWithFullLater,
235 OW_MaybePartial,
236 OW_None,
237 OW_Unknown
238};
239
240} // end anonymous namespace
241
242/// Check if two instruction are masked stores that completely
243/// overwrite one another. More specifically, \p KillingI has to
244/// overwrite \p DeadI.
245static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
246 const Instruction *DeadI,
247 BatchAAResults &AA) {
248 const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
249 const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
250 if (KillingII == nullptr || DeadII == nullptr)
251 return OW_Unknown;
252 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
253 return OW_Unknown;
254
255 switch (KillingII->getIntrinsicID()) {
256 case Intrinsic::masked_store:
257 case Intrinsic::vp_store: {
258 const DataLayout &DL = KillingII->getDataLayout();
259 auto *KillingTy = KillingII->getArgOperand(i: 0)->getType();
260 auto *DeadTy = DeadII->getArgOperand(i: 0)->getType();
261 if (DL.getTypeSizeInBits(Ty: KillingTy) != DL.getTypeSizeInBits(Ty: DeadTy))
262 return OW_Unknown;
263 // Element count.
264 if (cast<VectorType>(Val: KillingTy)->getElementCount() !=
265 cast<VectorType>(Val: DeadTy)->getElementCount())
266 return OW_Unknown;
267 // Pointers.
268 Value *KillingPtr = KillingII->getArgOperand(i: 1);
269 Value *DeadPtr = DeadII->getArgOperand(i: 1);
270 if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
271 return OW_Unknown;
272 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
273 // Masks.
274 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
275 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
276 return OW_Unknown;
277 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
278 // Masks.
279 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
280 if (KillingII->getArgOperand(i: 2) != DeadII->getArgOperand(i: 2))
281 return OW_Unknown;
282 // Lengths.
283 if (KillingII->getArgOperand(i: 3) != DeadII->getArgOperand(i: 3))
284 return OW_Unknown;
285 }
286 return OW_Complete;
287 }
288 default:
289 return OW_Unknown;
290 }
291}
292
293/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
294/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
295/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
296/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
297/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
298/// overwritten by a killing (smaller) store which doesn't write outside the big
299/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
300/// NOTE: This function must only be called if both \p KillingLoc and \p
301/// DeadLoc belong to the same underlying object with valid \p KillingOff and
302/// \p DeadOff.
303static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
304 const MemoryLocation &DeadLoc,
305 int64_t KillingOff, int64_t DeadOff,
306 Instruction *DeadI,
307 InstOverlapIntervalsTy &IOL) {
308 const uint64_t KillingSize = KillingLoc.Size.getValue();
309 const uint64_t DeadSize = DeadLoc.Size.getValue();
310 // We may now overlap, although the overlap is not complete. There might also
311 // be other incomplete overlaps, and together, they might cover the complete
312 // dead store.
313 // Note: The correctness of this logic depends on the fact that this function
314 // is not even called providing DepWrite when there are any intervening reads.
315 if (EnablePartialOverwriteTracking &&
316 KillingOff < int64_t(DeadOff + DeadSize) &&
317 int64_t(KillingOff + KillingSize) >= DeadOff) {
318
319 // Insert our part of the overlap into the map.
320 auto &IM = IOL[DeadI];
321 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
322 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
323 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
324 << ")\n");
325
326 // Make sure that we only insert non-overlapping intervals and combine
327 // adjacent intervals. The intervals are stored in the map with the ending
328 // offset as the key (in the half-open sense) and the starting offset as
329 // the value.
330 int64_t KillingIntStart = KillingOff;
331 int64_t KillingIntEnd = KillingOff + KillingSize;
332
333 // Find any intervals ending at, or after, KillingIntStart which start
334 // before KillingIntEnd.
335 auto ILI = IM.lower_bound(x: KillingIntStart);
336 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
337 // This existing interval is overlapped with the current store somewhere
338 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
339 // intervals and adjusting our start and end.
340 KillingIntStart = std::min(a: KillingIntStart, b: ILI->second);
341 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
342 ILI = IM.erase(position: ILI);
343
344 // Continue erasing and adjusting our end in case other previous
345 // intervals are also overlapped with the current store.
346 //
347 // |--- dead 1 ---| |--- dead 2 ---|
348 // |------- killing---------|
349 //
350 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
351 assert(ILI->second > KillingIntStart && "Unexpected interval");
352 KillingIntEnd = std::max(a: KillingIntEnd, b: ILI->first);
353 ILI = IM.erase(position: ILI);
354 }
355 }
356
357 IM[KillingIntEnd] = KillingIntStart;
358
359 ILI = IM.begin();
360 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
361 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
362 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
363 << ") Composite KillingLoc [" << ILI->second << ", "
364 << ILI->first << ")\n");
365 ++NumCompletePartials;
366 return OW_Complete;
367 }
368 }
369
370 // Check for a dead store which writes to all the memory locations that
371 // the killing store writes to.
372 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
373 int64_t(DeadOff + DeadSize) > KillingOff &&
374 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
375 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
376 << ", " << int64_t(DeadOff + DeadSize)
377 << ") by a killing store [" << KillingOff << ", "
378 << int64_t(KillingOff + KillingSize) << ")\n");
379 // TODO: Maybe come up with a better name?
380 return OW_PartialEarlierWithFullLater;
381 }
382
383 // Another interesting case is if the killing store overwrites the end of the
384 // dead store.
385 //
386 // |--dead--|
387 // |-- killing --|
388 //
389 // In this case we may want to trim the size of dead store to avoid
390 // generating stores to addresses which will definitely be overwritten killing
391 // store.
392 if (!EnablePartialOverwriteTracking &&
393 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
394 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
395 return OW_End;
396
397 // Finally, we also need to check if the killing store overwrites the
398 // beginning of the dead store.
399 //
400 // |--dead--|
401 // |-- killing --|
402 //
403 // In this case we may want to move the destination address and trim the size
404 // of dead store to avoid generating stores to addresses which will definitely
405 // be overwritten killing store.
406 if (!EnablePartialOverwriteTracking &&
407 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
408 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
409 "Expect to be handled as OW_Complete");
410 return OW_Begin;
411 }
412 // Otherwise, they don't completely overlap.
413 return OW_Unknown;
414}
415
416/// Returns true if the memory which is accessed by the second instruction is not
417/// modified between the first and the second instruction.
418/// Precondition: Second instruction must be dominated by the first
419/// instruction.
420static bool
421memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
422 BatchAAResults &AA, const DataLayout &DL,
423 DominatorTree *DT) {
424 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
425 // instructions which can modify the memory location accessed by SecondI.
426 //
427 // While doing the walk keep track of the address to check. It might be
428 // different in different basic blocks due to PHI translation.
429 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
430 SmallVector<BlockAddressPair, 16> WorkList;
431 // Keep track of the address we visited each block with. Bail out if we
432 // visit a block with different addresses.
433 DenseMap<BasicBlock *, Value *> Visited;
434
435 BasicBlock::iterator FirstBBI(FirstI);
436 ++FirstBBI;
437 BasicBlock::iterator SecondBBI(SecondI);
438 BasicBlock *FirstBB = FirstI->getParent();
439 BasicBlock *SecondBB = SecondI->getParent();
440 MemoryLocation MemLoc;
441 if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
442 MemLoc = MemoryLocation::getForDest(MI: MemSet);
443 else
444 MemLoc = MemoryLocation::get(Inst: SecondI);
445
446 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
447
448 // Start checking the SecondBB.
449 WorkList.push_back(
450 Elt: std::make_pair(x&: SecondBB, y: PHITransAddr(MemLocPtr, DL, nullptr)));
451 bool isFirstBlock = true;
452
453 // Check all blocks going backward until we reach the FirstBB.
454 while (!WorkList.empty()) {
455 BlockAddressPair Current = WorkList.pop_back_val();
456 BasicBlock *B = Current.first;
457 PHITransAddr &Addr = Current.second;
458 Value *Ptr = Addr.getAddr();
459
460 // Ignore instructions before FirstI if this is the FirstBB.
461 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
462
463 BasicBlock::iterator EI;
464 if (isFirstBlock) {
465 // Ignore instructions after SecondI if this is the first visit of SecondBB.
466 assert(B == SecondBB && "first block is not the store block");
467 EI = SecondBBI;
468 isFirstBlock = false;
469 } else {
470 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
471 // In this case we also have to look at instructions after SecondI.
472 EI = B->end();
473 }
474 for (; BI != EI; ++BI) {
475 Instruction *I = &*BI;
476 if (I->mayWriteToMemory() && I != SecondI)
477 if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
478 return false;
479 }
480 if (B != FirstBB) {
481 assert(B != &FirstBB->getParent()->getEntryBlock() &&
482 "Should not hit the entry block because SI must be dominated by LI");
483 for (BasicBlock *Pred : predecessors(BB: B)) {
484 PHITransAddr PredAddr = Addr;
485 if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
486 if (!PredAddr.isPotentiallyPHITranslatable())
487 return false;
488 if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
489 return false;
490 }
491 Value *TranslatedPtr = PredAddr.getAddr();
492 auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
493 if (!Inserted.second) {
494 // We already visited this block before. If it was with a different
495 // address - bail out!
496 if (TranslatedPtr != Inserted.first->second)
497 return false;
498 // ... otherwise just skip it.
499 continue;
500 }
501 WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
502 }
503 }
504 }
505 return true;
506}
507
508static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
509 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
510 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
511 const DataLayout &DL = Inst->getDataLayout();
512 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
513 uint64_t DeadSliceOffsetInBits =
514 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
515 auto SetDeadFragExpr = [](auto *Assign,
516 DIExpression::FragmentInfo DeadFragment) {
517 // createFragmentExpression expects an offset relative to the existing
518 // fragment offset if there is one.
519 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
520 Assign->getExpression()
521 ->getFragmentInfo()
522 .value_or(DIExpression::FragmentInfo(0, 0))
523 .OffsetInBits;
524 if (auto NewExpr = DIExpression::createFragmentExpression(
525 Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
526 Assign->setExpression(*NewExpr);
527 return;
528 }
529 // Failed to create a fragment expression for this so discard the value,
530 // making this a kill location.
531 auto *Expr = *DIExpression::createFragmentExpression(
532 Expr: DIExpression::get(Context&: Assign->getContext(), Elements: {}), OffsetInBits: DeadFragment.OffsetInBits,
533 SizeInBits: DeadFragment.SizeInBits);
534 Assign->setExpression(Expr);
535 Assign->setKillLocation();
536 };
537
538 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
539 // link to any instructions. Created in the loop below (once).
540 DIAssignID *LinkToNothing = nullptr;
541 LLVMContext &Ctx = Inst->getContext();
542 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
543 if (!LinkToNothing)
544 LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
545 return LinkToNothing;
546 };
547
548 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
549 // overlapping dbg.assign intrinsic.
550 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
551 std::optional<DIExpression::FragmentInfo> NewFragment;
552 if (!at::calculateFragmentIntersect(DL, Dest: OriginalDest, SliceOffsetInBits: DeadSliceOffsetInBits,
553 SliceSizeInBits: DeadSliceSizeInBits, DVRAssign: Assign,
554 Result&: NewFragment) ||
555 !NewFragment) {
556 // We couldn't calculate the intersecting fragment for some reason. Be
557 // cautious and unlink the whole assignment from the store.
558 Assign->setKillAddress();
559 Assign->setAssignId(GetDeadLink());
560 continue;
561 }
562 // No intersect.
563 if (NewFragment->SizeInBits == 0)
564 continue;
565
566 // Fragments overlap: insert a new dbg.assign for this dead part.
567 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
568 NewAssign->insertAfter(InsertAfter: Assign->getIterator());
569 NewAssign->setAssignId(GetDeadLink());
570 if (NewFragment)
571 SetDeadFragExpr(NewAssign, *NewFragment);
572 NewAssign->setKillAddress();
573 }
574}
575
576/// Update the attributes given that a memory access is updated (the
577/// dereferenced pointer could be moved forward when shortening a
578/// mem intrinsic).
579static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
580 uint64_t PtrOffset) {
581 // Remember old attributes.
582 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
583
584 // Find attributes that should be kept, and remove the rest.
585 AttributeMask AttrsToRemove;
586 for (auto &Attr : OldAttrs) {
587 if (Attr.hasKindAsEnum()) {
588 switch (Attr.getKindAsEnum()) {
589 default:
590 break;
591 case Attribute::Alignment:
592 // Only keep alignment if PtrOffset satisfy the alignment.
593 if (isAligned(Lhs: Attr.getAlignment().valueOrOne(), SizeInBytes: PtrOffset))
594 continue;
595 break;
596 case Attribute::Dereferenceable:
597 case Attribute::DereferenceableOrNull:
598 // We could reduce the size of these attributes according to
599 // PtrOffset. But we simply drop these for now.
600 break;
601 case Attribute::NonNull:
602 case Attribute::NoUndef:
603 continue;
604 }
605 }
606 AttrsToRemove.addAttribute(A: Attr);
607 }
608
609 // Remove the attributes that should be dropped.
610 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
611}
612
613static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
614 uint64_t &DeadSize, int64_t KillingStart,
615 uint64_t KillingSize, bool IsOverwriteEnd) {
616 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
617 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
618
619 // We assume that memet/memcpy operates in chunks of the "largest" native
620 // type size and aligned on the same value. That means optimal start and size
621 // of memset/memcpy should be modulo of preferred alignment of that type. That
622 // is it there is no any sense in trying to reduce store size any further
623 // since any "extra" stores comes for free anyway.
624 // On the other hand, maximum alignment we can achieve is limited by alignment
625 // of initial store.
626
627 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
628 // "largest" native type.
629 // Note: What is the proper way to get that value?
630 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
631 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
632
633 int64_t ToRemoveStart = 0;
634 uint64_t ToRemoveSize = 0;
635 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
636 // maintained on the remaining store.
637 if (IsOverwriteEnd) {
638 // Calculate required adjustment for 'KillingStart' in order to keep
639 // remaining store size aligned on 'PerfAlign'.
640 uint64_t Off =
641 offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
642 ToRemoveStart = KillingStart + Off;
643 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
644 return false;
645 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
646 } else {
647 ToRemoveStart = DeadStart;
648 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
649 "Not overlapping accesses?");
650 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
651 // Calculate required adjustment for 'ToRemoveSize'in order to keep
652 // start of the remaining store aligned on 'PerfAlign'.
653 uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
654 if (Off != 0) {
655 if (ToRemoveSize <= (PrefAlign.value() - Off))
656 return false;
657 ToRemoveSize -= PrefAlign.value() - Off;
658 }
659 assert(isAligned(PrefAlign, ToRemoveSize) &&
660 "Should preserve selected alignment");
661 }
662
663 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
664 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
665
666 uint64_t NewSize = DeadSize - ToRemoveSize;
667 if (DeadIntrinsic->isAtomic()) {
668 // When shortening an atomic memory intrinsic, the newly shortened
669 // length must remain an integer multiple of the element size.
670 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
671 if (0 != NewSize % ElementSize)
672 return false;
673 }
674
675 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
676 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
677 << "\n KILLER [" << ToRemoveStart << ", "
678 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
679
680 DeadIntrinsic->setLength(NewSize);
681 DeadIntrinsic->setDestAlignment(PrefAlign);
682
683 Value *OrigDest = DeadIntrinsic->getRawDest();
684 if (!IsOverwriteEnd) {
685 Value *Indices[1] = {
686 ConstantInt::get(Ty: DeadIntrinsic->getLength()->getType(), V: ToRemoveSize)};
687 Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
688 PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "",
689 InsertBefore: DeadI->getIterator());
690 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
691 DeadIntrinsic->setDest(NewDestGEP);
692 adjustArgAttributes(Intrinsic: DeadIntrinsic, ArgNo: 0, PtrOffset: ToRemoveSize);
693 }
694
695 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
696 shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * 8, OldSizeInBits: DeadSize * 8, NewSizeInBits: NewSize * 8,
697 IsOverwriteEnd);
698
699 // Finally update start and size of dead access.
700 if (!IsOverwriteEnd)
701 DeadStart += ToRemoveSize;
702 DeadSize = NewSize;
703
704 return true;
705}
706
707static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
708 int64_t &DeadStart, uint64_t &DeadSize) {
709 if (IntervalMap.empty() || !isShortenableAtTheEnd(I: DeadI))
710 return false;
711
712 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
713 int64_t KillingStart = OII->second;
714 uint64_t KillingSize = OII->first - KillingStart;
715
716 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
717
718 if (KillingStart > DeadStart &&
719 // Note: "KillingStart - KillingStart" is known to be positive due to
720 // preceding check.
721 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
722 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
723 // be non negative due to preceding checks.
724 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
725 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
726 IsOverwriteEnd: true)) {
727 IntervalMap.erase(position: OII);
728 return true;
729 }
730 }
731 return false;
732}
733
734static bool tryToShortenBegin(Instruction *DeadI,
735 OverlapIntervalsTy &IntervalMap,
736 int64_t &DeadStart, uint64_t &DeadSize) {
737 if (IntervalMap.empty() || !isShortenableAtTheBeginning(I: DeadI))
738 return false;
739
740 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
741 int64_t KillingStart = OII->second;
742 uint64_t KillingSize = OII->first - KillingStart;
743
744 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
745
746 if (KillingStart <= DeadStart &&
747 // Note: "DeadStart - KillingStart" is known to be non negative due to
748 // preceding check.
749 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
750 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
751 // be positive due to preceding checks.
752 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
753 "Should have been handled as OW_Complete");
754 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
755 IsOverwriteEnd: false)) {
756 IntervalMap.erase(position: OII);
757 return true;
758 }
759 }
760 return false;
761}
762
763static Constant *
764tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
765 int64_t KillingOffset, int64_t DeadOffset,
766 const DataLayout &DL, BatchAAResults &AA,
767 DominatorTree *DT) {
768
769 if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
770 DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
771 KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
772 DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
773 memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
774 // If the store we find is:
775 // a) partially overwritten by the store to 'Loc'
776 // b) the killing store is fully contained in the dead one and
777 // c) they both have a constant value
778 // d) none of the two stores need padding
779 // Merge the two stores, replacing the dead store's value with a
780 // merge of both values.
781 // TODO: Deal with other constant types (vectors, etc), and probably
782 // some mem intrinsics (if needed)
783
784 APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
785 APInt KillingValue =
786 cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
787 unsigned KillingBits = KillingValue.getBitWidth();
788 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
789 KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
790
791 // Offset of the smaller store inside the larger store
792 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
793 unsigned LShiftAmount =
794 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
795 : BitOffsetDiff;
796 APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
797 hiBit: LShiftAmount + KillingBits);
798 // Clear the bits we'll be replacing, then OR with the smaller
799 // store, shifted appropriately.
800 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
801 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
802 << "\n Killing: " << *KillingI
803 << "\n Merged Value: " << Merged << '\n');
804 return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
805 }
806 return nullptr;
807}
808
809// Returns true if \p I is an intrinsic that does not read or write memory.
810static bool isNoopIntrinsic(Instruction *I) {
811 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
812 switch (II->getIntrinsicID()) {
813 case Intrinsic::lifetime_start:
814 case Intrinsic::lifetime_end:
815 case Intrinsic::invariant_end:
816 case Intrinsic::launder_invariant_group:
817 case Intrinsic::assume:
818 return true;
819 case Intrinsic::dbg_declare:
820 case Intrinsic::dbg_label:
821 case Intrinsic::dbg_value:
822 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
823 default:
824 return false;
825 }
826 }
827 return false;
828}
829
830// Check if we can ignore \p D for DSE.
831static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
832 Instruction *DI = D->getMemoryInst();
833 // Calls that only access inaccessible memory cannot read or write any memory
834 // locations we consider for elimination.
835 if (auto *CB = dyn_cast<CallBase>(Val: DI))
836 if (CB->onlyAccessesInaccessibleMemory())
837 return true;
838
839 // We can eliminate stores to locations not visible to the caller across
840 // throwing instructions.
841 if (DI->mayThrow() && !DefVisibleToCaller)
842 return true;
843
844 // We can remove the dead stores, irrespective of the fence and its ordering
845 // (release/acquire/seq_cst). Fences only constraints the ordering of
846 // already visible stores, it does not make a store visible to other
847 // threads. So, skipping over a fence does not change a store from being
848 // dead.
849 if (isa<FenceInst>(Val: DI))
850 return true;
851
852 // Skip intrinsics that do not really read or modify memory.
853 if (isNoopIntrinsic(I: DI))
854 return true;
855
856 return false;
857}
858
859namespace {
860
861// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
862// defined by `MemDef`.
863struct MemoryLocationWrapper {
864 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
865 bool DefByInitializesAttr)
866 : MemLoc(MemLoc), MemDef(MemDef),
867 DefByInitializesAttr(DefByInitializesAttr) {
868 assert(MemLoc.Ptr && "MemLoc should be not null");
869 UnderlyingObject = getUnderlyingObject(V: MemLoc.Ptr);
870 DefInst = MemDef->getMemoryInst();
871 }
872
873 MemoryLocation MemLoc;
874 const Value *UnderlyingObject;
875 MemoryDef *MemDef;
876 Instruction *DefInst;
877 bool DefByInitializesAttr = false;
878};
879
880// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
881// defined by this MemoryDef.
882struct MemoryDefWrapper {
883 MemoryDefWrapper(MemoryDef *MemDef,
884 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
885 DefInst = MemDef->getMemoryInst();
886 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
887 DefinedLocations.push_back(
888 Elt: MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
889 }
890 Instruction *DefInst;
891 SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
892};
893
894struct ArgumentInitInfo {
895 unsigned Idx;
896 bool IsDeadOrInvisibleOnUnwind;
897 ConstantRangeList Inits;
898};
899} // namespace
900
901static bool hasInitializesAttr(Instruction *I) {
902 CallBase *CB = dyn_cast<CallBase>(Val: I);
903 return CB && CB->getArgOperandWithAttribute(Kind: Attribute::Initializes);
904}
905
906// Return the intersected range list of the initializes attributes of "Args".
907// "Args" are call arguments that alias to each other.
908// If any argument in "Args" doesn't have dead_on_unwind attr and
909// "CallHasNoUnwindAttr" is false, return empty.
910static ConstantRangeList
911getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
912 bool CallHasNoUnwindAttr) {
913 if (Args.empty())
914 return {};
915
916 // To address unwind, the function should have nounwind attribute or the
917 // arguments have dead or invisible on unwind. Otherwise, return empty.
918 for (const auto &Arg : Args) {
919 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
920 return {};
921 if (Arg.Inits.empty())
922 return {};
923 }
924
925 ConstantRangeList IntersectedIntervals = Args.front().Inits;
926 for (auto &Arg : Args.drop_front())
927 IntersectedIntervals = IntersectedIntervals.intersectWith(CRL: Arg.Inits);
928
929 return IntersectedIntervals;
930}
931
932namespace {
933
934struct DSEState {
935 Function &F;
936 AliasAnalysis &AA;
937 EarliestEscapeAnalysis EA;
938
939 /// The single BatchAA instance that is used to cache AA queries. It will
940 /// not be invalidated over the whole run. This is safe, because:
941 /// 1. Only memory writes are removed, so the alias cache for memory
942 /// locations remains valid.
943 /// 2. No new instructions are added (only instructions removed), so cached
944 /// information for a deleted value cannot be accessed by a re-used new
945 /// value pointer.
946 BatchAAResults BatchAA;
947
948 MemorySSA &MSSA;
949 DominatorTree &DT;
950 PostDominatorTree &PDT;
951 const TargetLibraryInfo &TLI;
952 const DataLayout &DL;
953 const LoopInfo &LI;
954
955 // Whether the function contains any irreducible control flow, useful for
956 // being accurately able to detect loops.
957 bool ContainsIrreducibleLoops;
958
959 // All MemoryDefs that potentially could kill other MemDefs.
960 SmallVector<MemoryDef *, 64> MemDefs;
961 // Any that should be skipped as they are already deleted
962 SmallPtrSet<MemoryAccess *, 4> SkipStores;
963 // Keep track whether a given object is captured before return or not.
964 DenseMap<const Value *, bool> CapturedBeforeReturn;
965 // Keep track of all of the objects that are invisible to the caller after
966 // the function returns.
967 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
968 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
969 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
970 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
971 // Post-order numbers for each basic block. Used to figure out if memory
972 // accesses are executed before another access.
973 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
974
975 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
976 /// basic block.
977 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
978 // Check if there are root nodes that are terminated by UnreachableInst.
979 // Those roots pessimize post-dominance queries. If there are such roots,
980 // fall back to CFG scan starting from all non-unreachable roots.
981 bool AnyUnreachableExit;
982
983 // Whether or not we should iterate on removing dead stores at the end of the
984 // function due to removing a store causing a previously captured pointer to
985 // no longer be captured.
986 bool ShouldIterateEndOfFunctionDSE;
987
988 /// Dead instructions to be removed at the end of DSE.
989 SmallVector<Instruction *> ToRemove;
990
991 // Class contains self-reference, make sure it's not copied/moved.
992 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
993 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
994 const LoopInfo &LI);
995 DSEState(const DSEState &) = delete;
996 DSEState &operator=(const DSEState &) = delete;
997
998 LocationSize strengthenLocationSize(const Instruction *I,
999 LocationSize Size) const;
1000
1001 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1002 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1003 /// location (by \p DeadI instruction).
1004 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1005 /// \p DeadI, but they both write to the same underlying object. In that
1006 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1007 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1008 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1009 OverwriteResult isOverwrite(const Instruction *KillingI,
1010 const Instruction *DeadI,
1011 const MemoryLocation &KillingLoc,
1012 const MemoryLocation &DeadLoc,
1013 int64_t &KillingOff, int64_t &DeadOff);
1014
1015 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1016 const LocationSize StoreSize);
1017
1018 bool isInvisibleToCallerOnUnwind(const Value *V);
1019
1020 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1021
1022 // Returns a list of <MemoryLocation, bool> pairs written by I.
1023 // The bool means whether the write is from Initializes attr.
1024 SmallVector<std::pair<MemoryLocation, bool>, 1>
1025 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1026
1027 /// Assuming this instruction has a dead analyzable write, can we delete
1028 /// this instruction?
1029 bool isRemovable(Instruction *I);
1030
1031 /// Returns true if \p UseInst completely overwrites \p DefLoc
1032 /// (stored by \p DefInst).
1033 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1034 Instruction *UseInst);
1035
1036 /// Returns true if \p Def is not read before returning from the function.
1037 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1038
1039 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1040 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1041 /// indicating whether \p I is a free-like call.
1042 std::optional<std::pair<MemoryLocation, bool>>
1043 getLocForTerminator(Instruction *I) const;
1044
1045 /// Returns true if \p I is a memory terminator instruction like
1046 /// llvm.lifetime.end or free.
1047 bool isMemTerminatorInst(Instruction *I) const;
1048
1049 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1050 /// instruction \p AccessI.
1051 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1052 Instruction *MaybeTerm);
1053
1054 // Returns true if \p Use may read from \p DefLoc.
1055 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1056
1057 /// Returns true if a dependency between \p Current and \p KillingDef is
1058 /// guaranteed to be loop invariant for the loops that they are in. Either
1059 /// because they are known to be in the same block, in the same loop level or
1060 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1061 /// during execution of the containing function.
1062 bool isGuaranteedLoopIndependent(const Instruction *Current,
1063 const Instruction *KillingDef,
1064 const MemoryLocation &CurrentLoc);
1065
1066 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1067 /// loop. In particular, this guarantees that it only references a single
1068 /// MemoryLocation during execution of the containing function.
1069 bool isGuaranteedLoopInvariant(const Value *Ptr);
1070
1071 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1072 // with no read access between them or on any other path to a function exit
1073 // block if \p KillingLoc is not accessible after the function returns. If
1074 // there is no such MemoryDef, return std::nullopt. The returned value may not
1075 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1076 // encounter an aliasing MemoryUse (read).
1077 std::optional<MemoryAccess *>
1078 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1079 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1080 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1081 bool IsMemTerm, unsigned &PartialLimit,
1082 bool IsInitializesAttrMemLoc);
1083
1084 /// Delete dead memory defs and recursively add their operands to ToRemove if
1085 /// they became dead.
1086 void
1087 deleteDeadInstruction(Instruction *SI,
1088 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1089
1090 // Check for any extra throws between \p KillingI and \p DeadI that block
1091 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1092 // MemoryDef that may throw are handled during the walk from one def to the
1093 // next.
1094 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1095 const Value *KillingUndObj);
1096
1097 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1098 // instructions act as barriers:
1099 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1100 // object.
1101 // * Atomic stores stronger that monotonic.
1102 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1103
1104 /// Eliminate writes to objects that are not visible in the caller and are not
1105 /// accessed before returning from the function.
1106 bool eliminateDeadWritesAtEndOfFunction();
1107
1108 /// If we have a zero initializing memset following a call to malloc,
1109 /// try folding it into a call to calloc.
1110 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1111
1112 // Check if there is a dominating condition, that implies that the value
1113 // being stored in a ptr is already present in the ptr.
1114 bool dominatingConditionImpliesValue(MemoryDef *Def);
1115
1116 /// \returns true if \p Def is a no-op store, either because it
1117 /// directly stores back a loaded value or stores zero to a calloced object.
1118 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1119
1120 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1121
1122 /// Eliminates writes to locations where the value that is being written
1123 /// is already stored at the same location.
1124 bool eliminateRedundantStoresOfExistingValues();
1125
1126 // Return the locations written by the initializes attribute.
1127 // Note that this function considers:
1128 // 1. Unwind edge: use "initializes" attribute only if the callee has
1129 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1130 // or the argument is invisible to caller on unwind. That is, we don't
1131 // perform incorrect DSE on unwind edges in the current function.
1132 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1133 // the intersected range list of their "initializes" attributes.
1134 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1135
1136 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1137 // killed by `KillingLocWrapper.MemDef`. Return whether
1138 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1139 std::pair<bool, bool>
1140 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1141
1142 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1143 // change state: whether make any change.
1144 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1145};
1146
1147} // end anonymous namespace
1148
1149static void pushMemUses(MemoryAccess *Acc,
1150 SmallVectorImpl<MemoryAccess *> &WorkList,
1151 SmallPtrSetImpl<MemoryAccess *> &Visited) {
1152 for (Use &U : Acc->uses()) {
1153 auto *MA = cast<MemoryAccess>(Val: U.getUser());
1154 if (Visited.insert(Ptr: MA).second)
1155 WorkList.push_back(Elt: MA);
1156 }
1157}
1158
1159// Return true if "Arg" is function local and isn't captured before "CB".
1160static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1161 EarliestEscapeAnalysis &EA) {
1162 const Value *UnderlyingObj = getUnderlyingObject(V: Arg);
1163 return isIdentifiedFunctionLocal(V: UnderlyingObj) &&
1164 capturesNothing(
1165 CC: EA.getCapturesBefore(Object: UnderlyingObj, I: CB, /*OrAt*/ true));
1166}
1167
1168DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1169 DominatorTree &DT, PostDominatorTree &PDT,
1170 const TargetLibraryInfo &TLI, const LoopInfo &LI)
1171 : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT), PDT(PDT),
1172 TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1173 // Collect blocks with throwing instructions not modeled in MemorySSA and
1174 // alloc-like objects.
1175 unsigned PO = 0;
1176 for (BasicBlock *BB : post_order(G: &F)) {
1177 PostOrderNumbers[BB] = PO++;
1178 for (Instruction &I : *BB) {
1179 MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
1180 if (I.mayThrow() && !MA)
1181 ThrowingBlocks.insert(Ptr: I.getParent());
1182
1183 auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
1184 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1185 (getLocForWrite(I: &I) || isMemTerminatorInst(I: &I) ||
1186 (EnableInitializesImprovement && hasInitializesAttr(I: &I))))
1187 MemDefs.push_back(Elt: MD);
1188 }
1189 }
1190
1191 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1192 // stores to them are dead at the end of the function.
1193 for (Argument &AI : F.args()) {
1194 if (AI.hasPassPointeeByValueCopyAttr()) {
1195 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1196 continue;
1197 }
1198
1199 if (!AI.getType()->isPointerTy())
1200 continue;
1201
1202 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1203 if (Info.coversAllReachableMemory())
1204 InvisibleToCallerAfterRet.insert(KV: {&AI, true});
1205 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1206 InvisibleToCallerAfterRetBounded.insert(KV: {&AI, DeadBytes});
1207 }
1208
1209 // Collect whether there is any irreducible control flow in the function.
1210 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
1211
1212 AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
1213 return isa<UnreachableInst>(Val: E->getTerminator());
1214 });
1215}
1216
1217LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1218 LocationSize Size) const {
1219 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1220 LibFunc F;
1221 if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
1222 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1223 // Use the precise location size specified by the 3rd argument
1224 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1225 // instruction. memset_chk will write either the amount specified as 3rd
1226 // argument or the function will immediately abort and exit the program.
1227 // NOTE: AA may determine NoAlias if it can prove that the access size
1228 // is larger than the allocation size due to that being UB. To avoid
1229 // returning potentially invalid NoAlias results by AA, limit the use of
1230 // the precise location size to isOverwrite.
1231 if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: 2)))
1232 return LocationSize::precise(Value: Len->getZExtValue());
1233 }
1234 }
1235 return Size;
1236}
1237
1238OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1239 const Instruction *DeadI,
1240 const MemoryLocation &KillingLoc,
1241 const MemoryLocation &DeadLoc,
1242 int64_t &KillingOff, int64_t &DeadOff) {
1243 // AliasAnalysis does not always account for loops. Limit overwrite checks
1244 // to dependencies for which we can guarantee they are independent of any
1245 // loops they are in.
1246 if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
1247 return OW_Unknown;
1248
1249 LocationSize KillingLocSize =
1250 strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
1251 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1252 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1253 const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
1254 const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
1255
1256 // Check whether the killing store overwrites the whole object, in which
1257 // case the size/offset of the dead store does not matter.
1258 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1259 isIdentifiedObject(V: KillingUndObj)) {
1260 std::optional<TypeSize> KillingUndObjSize =
1261 getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
1262 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1263 return OW_Complete;
1264 }
1265
1266 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1267 // get imprecise values here, though (except for unknown sizes).
1268 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1269 // In case no constant size is known, try to an IR values for the number
1270 // of bytes written and check if they match.
1271 const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
1272 const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
1273 if (KillingMemI && DeadMemI) {
1274 const Value *KillingV = KillingMemI->getLength();
1275 const Value *DeadV = DeadMemI->getLength();
1276 if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
1277 return OW_Complete;
1278 }
1279
1280 // Masked stores have imprecise locations, but we can reason about them
1281 // to some extent.
1282 return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
1283 }
1284
1285 const TypeSize KillingSize = KillingLocSize.getValue();
1286 const TypeSize DeadSize = DeadLoc.Size.getValue();
1287 // Bail on doing Size comparison which depends on AA for now
1288 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1289 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1290
1291 if (AnyScalable)
1292 return OW_Unknown;
1293 // Query the alias information
1294 AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
1295
1296 // If the start pointers are the same, we just have to compare sizes to see if
1297 // the killing store was larger than the dead store.
1298 if (AAR == AliasResult::MustAlias) {
1299 // Make sure that the KillingSize size is >= the DeadSize size.
1300 if (KillingSize >= DeadSize)
1301 return OW_Complete;
1302 }
1303
1304 // If we hit a partial alias we may have a full overwrite
1305 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1306 int32_t Off = AAR.getOffset();
1307 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1308 return OW_Complete;
1309 }
1310
1311 // If we can't resolve the same pointers to the same object, then we can't
1312 // analyze them at all.
1313 if (DeadUndObj != KillingUndObj) {
1314 // Non aliasing stores to different objects don't overlap. Note that
1315 // if the killing store is known to overwrite whole object (out of
1316 // bounds access overwrites whole object as well) then it is assumed to
1317 // completely overwrite any store to the same object even if they don't
1318 // actually alias (see next check).
1319 if (AAR == AliasResult::NoAlias)
1320 return OW_None;
1321 return OW_Unknown;
1322 }
1323
1324 // Okay, we have stores to two completely different pointers. Try to
1325 // decompose the pointer into a "base + constant_offset" form. If the base
1326 // pointers are equal, then we can reason about the two stores.
1327 DeadOff = 0;
1328 KillingOff = 0;
1329 const Value *DeadBasePtr =
1330 GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1331 const Value *KillingBasePtr =
1332 GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1333
1334 // If the base pointers still differ, we have two completely different
1335 // stores.
1336 if (DeadBasePtr != KillingBasePtr)
1337 return OW_Unknown;
1338
1339 // The killing access completely overlaps the dead store if and only if
1340 // both start and end of the dead one is "inside" the killing one:
1341 // |<->|--dead--|<->|
1342 // |-----killing------|
1343 // Accesses may overlap if and only if start of one of them is "inside"
1344 // another one:
1345 // |<->|--dead--|<-------->|
1346 // |-------killing--------|
1347 // OR
1348 // |-------dead-------|
1349 // |<->|---killing---|<----->|
1350 //
1351 // We have to be careful here as *Off is signed while *.Size is unsigned.
1352
1353 // Check if the dead access starts "not before" the killing one.
1354 if (DeadOff >= KillingOff) {
1355 // If the dead access ends "not after" the killing access then the
1356 // dead one is completely overwritten by the killing one.
1357 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1358 return OW_Complete;
1359 // If start of the dead access is "before" end of the killing access
1360 // then accesses overlap.
1361 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1362 return OW_MaybePartial;
1363 }
1364 // If start of the killing access is "before" end of the dead access then
1365 // accesses overlap.
1366 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1367 return OW_MaybePartial;
1368 }
1369
1370 // Can reach here only if accesses are known not to overlap.
1371 return OW_None;
1372}
1373
1374bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1375 const LocationSize StoreSize) {
1376 if (isa<AllocaInst>(Val: V))
1377 return true;
1378
1379 auto IBounded = InvisibleToCallerAfterRetBounded.find(Val: V);
1380 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1381 int64_t ValueOffset;
1382 [[maybe_unused]] const Value *BaseValue =
1383 GetPointerBaseWithConstantOffset(Ptr, Offset&: ValueOffset, DL);
1384 // If we are not able to find a constant offset from the UO, we have to
1385 // pessimistically assume that the store writes to memory out of the
1386 // dead_on_return bounds.
1387 if (BaseValue != V)
1388 return false;
1389 // This store is only invisible after return if we are in bounds of the
1390 // range marked dead.
1391 if (StoreSize.hasValue() &&
1392 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1393 ValueOffset >= 0)
1394 return true;
1395 }
1396 auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1397 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1398 I.first->second = capturesNothing(CC: PointerMayBeCaptured(
1399 V, /*ReturnCaptures=*/true, Mask: CaptureComponents::Provenance));
1400 return I.first->second;
1401}
1402
1403bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1404 bool RequiresNoCaptureBeforeUnwind;
1405 if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1406 return false;
1407 if (!RequiresNoCaptureBeforeUnwind)
1408 return true;
1409
1410 auto I = CapturedBeforeReturn.insert(KV: {V, true});
1411 if (I.second)
1412 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1413 // with the killing MemoryDef. But we refrain from doing so for now to
1414 // limit compile-time and this does not cause any changes to the number
1415 // of stores removed on a large test set in practice.
1416 I.first->second = capturesAnything(CC: PointerMayBeCaptured(
1417 V, /*ReturnCaptures=*/false, Mask: CaptureComponents::Provenance));
1418 return !I.first->second;
1419}
1420
1421std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1422 if (!I->mayWriteToMemory())
1423 return std::nullopt;
1424
1425 if (auto *CB = dyn_cast<CallBase>(Val: I))
1426 return MemoryLocation::getForDest(CI: CB, TLI);
1427
1428 return MemoryLocation::getOrNone(Inst: I);
1429}
1430
1431SmallVector<std::pair<MemoryLocation, bool>, 1>
1432DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1433 SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
1434 if (isMemTerminatorInst(I)) {
1435 if (auto Loc = getLocForTerminator(I))
1436 Locations.push_back(Elt: std::make_pair(x&: Loc->first, y: false));
1437 return Locations;
1438 }
1439
1440 if (auto Loc = getLocForWrite(I))
1441 Locations.push_back(Elt: std::make_pair(x&: *Loc, y: false));
1442
1443 if (ConsiderInitializesAttr) {
1444 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1445 Locations.push_back(Elt: std::make_pair(x&: MemLoc, y: true));
1446 }
1447 }
1448 return Locations;
1449}
1450
1451bool DSEState::isRemovable(Instruction *I) {
1452 assert(getLocForWrite(I) && "Must have analyzable write");
1453
1454 // Don't remove volatile/atomic stores.
1455 if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1456 return SI->isUnordered();
1457
1458 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1459 // Don't remove volatile memory intrinsics.
1460 if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1461 return !MI->isVolatile();
1462
1463 // Never remove dead lifetime intrinsics, e.g. because they are followed
1464 // by a free.
1465 if (CB->isLifetimeStartOrEnd())
1466 return false;
1467
1468 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1469 !CB->isTerminator();
1470 }
1471
1472 return false;
1473}
1474
1475bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1476 Instruction *DefInst, Instruction *UseInst) {
1477 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1478 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1479 // MemoryDef.
1480 if (!UseInst->mayWriteToMemory())
1481 return false;
1482
1483 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1484 if (CB->onlyAccessesInaccessibleMemory())
1485 return false;
1486
1487 int64_t InstWriteOffset, DepWriteOffset;
1488 if (auto CC = getLocForWrite(I: UseInst))
1489 return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1490 DeadOff&: DepWriteOffset) == OW_Complete;
1491 return false;
1492}
1493
1494bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1495 const MemoryLocation &DefLoc) {
1496 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1497 << *Def->getMemoryInst()
1498 << ") is at the end the function \n");
1499 SmallVector<MemoryAccess *, 4> WorkList;
1500 SmallPtrSet<MemoryAccess *, 8> Visited;
1501
1502 pushMemUses(Acc: Def, WorkList, Visited);
1503 for (unsigned I = 0; I < WorkList.size(); I++) {
1504 if (WorkList.size() >= MemorySSAScanLimit) {
1505 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1506 return false;
1507 }
1508
1509 MemoryAccess *UseAccess = WorkList[I];
1510 if (isa<MemoryPhi>(Val: UseAccess)) {
1511 // AliasAnalysis does not account for loops. Limit elimination to
1512 // candidates for which we can guarantee they always store to the same
1513 // memory location.
1514 if (!isGuaranteedLoopInvariant(Ptr: DefLoc.Ptr))
1515 return false;
1516
1517 pushMemUses(Acc: cast<MemoryPhi>(Val: UseAccess), WorkList, Visited);
1518 continue;
1519 }
1520 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1521 // of times this is called and/or caching it.
1522 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1523 if (isReadClobber(DefLoc, UseInst)) {
1524 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1525 return false;
1526 }
1527
1528 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1529 pushMemUses(Acc: UseDef, WorkList, Visited);
1530 }
1531 return true;
1532}
1533
1534std::optional<std::pair<MemoryLocation, bool>>
1535DSEState::getLocForTerminator(Instruction *I) const {
1536 if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1537 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1538 return {
1539 std::make_pair(x: MemoryLocation::getForArgument(Call: CB, ArgIdx: 0, TLI: &TLI), y: false)};
1540 if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1541 return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1542 }
1543
1544 return std::nullopt;
1545}
1546
1547bool DSEState::isMemTerminatorInst(Instruction *I) const {
1548 auto *CB = dyn_cast<CallBase>(Val: I);
1549 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1550 getFreedOperand(CB, TLI: &TLI) != nullptr);
1551}
1552
1553bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1554 Instruction *MaybeTerm) {
1555 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1556 getLocForTerminator(I: MaybeTerm);
1557
1558 if (!MaybeTermLoc)
1559 return false;
1560
1561 // If the terminator is a free-like call, all accesses to the underlying
1562 // object can be considered terminated.
1563 if (getUnderlyingObject(V: Loc.Ptr) !=
1564 getUnderlyingObject(V: MaybeTermLoc->first.Ptr))
1565 return false;
1566
1567 auto TermLoc = MaybeTermLoc->first;
1568 if (MaybeTermLoc->second) {
1569 const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1570 return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1571 }
1572 int64_t InstWriteOffset = 0;
1573 int64_t DepWriteOffset = 0;
1574 return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1575 DeadOff&: DepWriteOffset) == OW_Complete;
1576}
1577
1578bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1579 Instruction *UseInst) {
1580 if (isNoopIntrinsic(I: UseInst))
1581 return false;
1582
1583 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1584 // treated as read clobber.
1585 if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1586 return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1587
1588 if (!UseInst->mayReadFromMemory())
1589 return false;
1590
1591 if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1592 if (CB->onlyAccessesInaccessibleMemory())
1593 return false;
1594
1595 return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1596}
1597
1598bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1599 const Instruction *KillingDef,
1600 const MemoryLocation &CurrentLoc) {
1601 // If the dependency is within the same block or loop level (being careful
1602 // of irreducible loops), we know that AA will return a valid result for the
1603 // memory dependency. (Both at the function level, outside of any loop,
1604 // would also be valid but we currently disable that to limit compile time).
1605 if (Current->getParent() == KillingDef->getParent())
1606 return true;
1607 const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1608 if (!ContainsIrreducibleLoops && CurrentLI &&
1609 CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1610 return true;
1611 // Otherwise check the memory location is invariant to any loops.
1612 return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1613}
1614
1615bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1616 Ptr = Ptr->stripPointerCasts();
1617 if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1618 if (GEP->hasAllConstantIndices())
1619 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1620
1621 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1622 return I->getParent()->isEntryBlock() ||
1623 (!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1624 }
1625 return true;
1626}
1627
1628std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1629 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1630 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1631 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1632 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1633 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1634 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1635 return std::nullopt;
1636 }
1637
1638 MemoryAccess *Current = StartAccess;
1639 Instruction *KillingI = KillingDef->getMemoryInst();
1640 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1641
1642 // Only optimize defining access of KillingDef when directly starting at its
1643 // defining access. The defining access also must only access KillingLoc. At
1644 // the moment we only support instructions with a single write location, so
1645 // it should be sufficient to disable optimizations for instructions that
1646 // also read from memory.
1647 bool CanOptimize = OptimizeMemorySSA &&
1648 KillingDef->getDefiningAccess() == StartAccess &&
1649 !KillingI->mayReadFromMemory();
1650
1651 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1652 std::optional<MemoryLocation> CurrentLoc;
1653 for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1654 LLVM_DEBUG({
1655 dbgs() << " visiting " << *Current;
1656 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1657 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1658 << ")";
1659 dbgs() << "\n";
1660 });
1661
1662 // Reached TOP.
1663 if (MSSA.isLiveOnEntryDef(MA: Current)) {
1664 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1665 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1666 // The first clobbering def is... none.
1667 KillingDef->setOptimized(Current);
1668 return std::nullopt;
1669 }
1670
1671 // Cost of a step. Accesses in the same block are more likely to be valid
1672 // candidates for elimination, hence consider them cheaper.
1673 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1674 ? MemorySSASameBBStepCost
1675 : MemorySSAOtherBBStepCost;
1676 if (WalkerStepLimit <= StepCost) {
1677 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1678 return std::nullopt;
1679 }
1680 WalkerStepLimit -= StepCost;
1681
1682 // Return for MemoryPhis. They cannot be eliminated directly and the
1683 // caller is responsible for traversing them.
1684 if (isa<MemoryPhi>(Val: Current)) {
1685 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1686 return Current;
1687 }
1688
1689 // Below, check if CurrentDef is a valid candidate to be eliminated by
1690 // KillingDef. If it is not, check the next candidate.
1691 MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1692 Instruction *CurrentI = CurrentDef->getMemoryInst();
1693
1694 if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1695 CanOptimize = false;
1696 continue;
1697 }
1698
1699 // Before we try to remove anything, check for any extra throwing
1700 // instructions that block us from DSEing
1701 if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1702 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1703 return std::nullopt;
1704 }
1705
1706 // Check for anything that looks like it will be a barrier to further
1707 // removal
1708 if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1709 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1710 return std::nullopt;
1711 }
1712
1713 // If Current is known to be on path that reads DefLoc or is a read
1714 // clobber, bail out, as the path is not profitable. We skip this check
1715 // for intrinsic calls, because the code knows how to handle memcpy
1716 // intrinsics.
1717 if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1718 return std::nullopt;
1719
1720 // Quick check if there are direct uses that are read-clobbers.
1721 if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1722 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1723 return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1724 isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1725 return false;
1726 })) {
1727 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1728 return std::nullopt;
1729 }
1730
1731 // If Current does not have an analyzable write location or is not
1732 // removable, skip it.
1733 CurrentLoc = getLocForWrite(I: CurrentI);
1734 if (!CurrentLoc || !isRemovable(I: CurrentI)) {
1735 CanOptimize = false;
1736 continue;
1737 }
1738
1739 // AliasAnalysis does not account for loops. Limit elimination to
1740 // candidates for which we can guarantee they always store to the same
1741 // memory location and not located in different loops.
1742 if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1743 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1744 CanOptimize = false;
1745 continue;
1746 }
1747
1748 if (IsMemTerm) {
1749 // If the killing def is a memory terminator (e.g. lifetime.end), check
1750 // the next candidate if the current Current does not write the same
1751 // underlying object as the terminator.
1752 if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1753 CanOptimize = false;
1754 continue;
1755 }
1756 } else {
1757 int64_t KillingOffset = 0;
1758 int64_t DeadOffset = 0;
1759 auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1760 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1761 if (CanOptimize) {
1762 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1763 // optimized access. Do not optimize if CurrentDef is already the
1764 // defining access of KillingDef.
1765 if (CurrentDef != KillingDef->getDefiningAccess() &&
1766 (OR == OW_Complete || OR == OW_MaybePartial))
1767 KillingDef->setOptimized(CurrentDef);
1768
1769 // Once a may-aliasing def is encountered do not set an optimized
1770 // access.
1771 if (OR != OW_None)
1772 CanOptimize = false;
1773 }
1774
1775 // If Current does not write to the same object as KillingDef, check
1776 // the next candidate.
1777 if (OR == OW_Unknown || OR == OW_None)
1778 continue;
1779 else if (OR == OW_MaybePartial) {
1780 // If KillingDef only partially overwrites Current, check the next
1781 // candidate if the partial step limit is exceeded. This aggressively
1782 // limits the number of candidates for partial store elimination,
1783 // which are less likely to be removable in the end.
1784 if (PartialLimit <= 1) {
1785 WalkerStepLimit -= 1;
1786 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1787 "next access\n");
1788 continue;
1789 }
1790 PartialLimit -= 1;
1791 }
1792 }
1793 break;
1794 };
1795
1796 // Accesses to objects accessible after the function returns can only be
1797 // eliminated if the access is dead along all paths to the exit. Collect
1798 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1799 // they cover all paths from MaybeDeadAccess to any function exit.
1800 SmallPtrSet<Instruction *, 16> KillingDefs;
1801 KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1802 MemoryAccess *MaybeDeadAccess = Current;
1803 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1804 Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1805 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1806 << *MaybeDeadI << ")\n");
1807
1808 SmallVector<MemoryAccess *, 32> WorkList;
1809 SmallPtrSet<MemoryAccess *, 32> Visited;
1810 pushMemUses(Acc: MaybeDeadAccess, WorkList, Visited);
1811
1812 // Check if DeadDef may be read.
1813 for (unsigned I = 0; I < WorkList.size(); I++) {
1814 MemoryAccess *UseAccess = WorkList[I];
1815
1816 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1817 // Bail out if the number of accesses to check exceeds the scan limit.
1818 if (ScanLimit < (WorkList.size() - I)) {
1819 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1820 return std::nullopt;
1821 }
1822 --ScanLimit;
1823 NumDomMemDefChecks++;
1824
1825 if (isa<MemoryPhi>(Val: UseAccess)) {
1826 if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1827 return DT.properlyDominates(A: KI->getParent(), B: UseAccess->getBlock());
1828 })) {
1829 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1830 continue;
1831 }
1832 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1833 pushMemUses(Acc: UseAccess, WorkList, Visited);
1834 continue;
1835 }
1836
1837 Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1838 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1839
1840 if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1841 return DT.dominates(Def: KI, User: UseInst);
1842 })) {
1843 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1844 continue;
1845 }
1846
1847 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1848 // MemoryAccesses. We do not have to check it's users.
1849 if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1850 LLVM_DEBUG(
1851 dbgs()
1852 << " ... skipping, memterminator invalidates following accesses\n");
1853 continue;
1854 }
1855
1856 if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1857 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1858 pushMemUses(Acc: UseAccess, WorkList, Visited);
1859 continue;
1860 }
1861
1862 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1863 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1864 return std::nullopt;
1865 }
1866
1867 // Uses which may read the original MemoryDef mean we cannot eliminate the
1868 // original MD. Stop walk.
1869 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1870 // the callee would be dominated by initializations, so it should be safe.
1871 bool IsKillingDefFromInitAttr = false;
1872 if (IsInitializesAttrMemLoc) {
1873 if (KillingI == UseInst &&
1874 KillingUndObj == getUnderlyingObject(V: MaybeDeadLoc.Ptr))
1875 IsKillingDefFromInitAttr = true;
1876 }
1877
1878 if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1879 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1880 return std::nullopt;
1881 }
1882
1883 // If this worklist walks back to the original memory access (and the
1884 // pointer is not guarenteed loop invariant) then we cannot assume that a
1885 // store kills itself.
1886 if (MaybeDeadAccess == UseAccess &&
1887 !isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1888 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1889 return std::nullopt;
1890 }
1891 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1892 // if it reads the memory location.
1893 // TODO: It would probably be better to check for self-reads before
1894 // calling the function.
1895 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1896 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1897 continue;
1898 }
1899
1900 // Check all uses for MemoryDefs, except for defs completely overwriting
1901 // the original location. Otherwise we have to check uses of *all*
1902 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1903 // miss cases like the following
1904 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1905 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1906 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1907 // (The Use points to the *first* Def it may alias)
1908 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1909 // stores [0,1]
1910 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1911 if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1912 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1913 if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1914 PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1915 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1916 StoreSize: KillingLoc.Size)) {
1917 LLVM_DEBUG(dbgs()
1918 << " ... found killing def " << *UseInst << "\n");
1919 KillingDefs.insert(Ptr: UseInst);
1920 }
1921 } else {
1922 LLVM_DEBUG(dbgs()
1923 << " ... found preceeding def " << *UseInst << "\n");
1924 return std::nullopt;
1925 }
1926 } else
1927 pushMemUses(Acc: UseDef, WorkList, Visited);
1928 }
1929 }
1930
1931 // For accesses to locations visible after the function returns, make sure
1932 // that the location is dead (=overwritten) along all paths from
1933 // MaybeDeadAccess to the exit.
1934 if (!isInvisibleToCallerAfterRet(V: KillingUndObj, Ptr: KillingLoc.Ptr,
1935 StoreSize: KillingLoc.Size)) {
1936 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1937 for (Instruction *KD : KillingDefs)
1938 KillingBlocks.insert(Ptr: KD->getParent());
1939 assert(!KillingBlocks.empty() &&
1940 "Expected at least a single killing block");
1941
1942 // Find the common post-dominator of all killing blocks.
1943 BasicBlock *CommonPred = *KillingBlocks.begin();
1944 for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1945 if (!CommonPred)
1946 break;
1947 CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1948 }
1949
1950 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1951 // there is a path from MaybeDeadAccess to an exit not going through a
1952 // killing block.
1953 if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1954 if (!AnyUnreachableExit)
1955 return std::nullopt;
1956
1957 // Fall back to CFG scan starting at all non-unreachable roots if not
1958 // all paths to the exit go through CommonPred.
1959 CommonPred = nullptr;
1960 }
1961
1962 // If CommonPred itself is in the set of killing blocks, we're done.
1963 if (KillingBlocks.count(Ptr: CommonPred))
1964 return {MaybeDeadAccess};
1965
1966 SetVector<BasicBlock *> WorkList;
1967 // If CommonPred is null, there are multiple exits from the function.
1968 // They all have to be added to the worklist.
1969 if (CommonPred)
1970 WorkList.insert(X: CommonPred);
1971 else
1972 for (BasicBlock *R : PDT.roots()) {
1973 if (!isa<UnreachableInst>(Val: R->getTerminator()))
1974 WorkList.insert(X: R);
1975 }
1976
1977 NumCFGTries++;
1978 // Check if all paths starting from an exit node go through one of the
1979 // killing blocks before reaching MaybeDeadAccess.
1980 for (unsigned I = 0; I < WorkList.size(); I++) {
1981 NumCFGChecks++;
1982 BasicBlock *Current = WorkList[I];
1983 if (KillingBlocks.count(Ptr: Current))
1984 continue;
1985 if (Current == MaybeDeadAccess->getBlock())
1986 return std::nullopt;
1987
1988 // MaybeDeadAccess is reachable from the entry, so we don't have to
1989 // explore unreachable blocks further.
1990 if (!DT.isReachableFromEntry(A: Current))
1991 continue;
1992
1993 WorkList.insert_range(R: predecessors(BB: Current));
1994
1995 if (WorkList.size() >= MemorySSAPathCheckLimit)
1996 return std::nullopt;
1997 }
1998 NumCFGSuccess++;
1999 }
2000
2001 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2002 // potentially dead.
2003 return {MaybeDeadAccess};
2004}
2005
2006void DSEState::deleteDeadInstruction(Instruction *SI,
2007 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2008 MemorySSAUpdater Updater(&MSSA);
2009 SmallVector<Instruction *, 32> NowDeadInsts;
2010 NowDeadInsts.push_back(Elt: SI);
2011 --NumFastOther;
2012
2013 while (!NowDeadInsts.empty()) {
2014 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2015 ++NumFastOther;
2016
2017 // Try to preserve debug information attached to the dead instruction.
2018 salvageDebugInfo(I&: *DeadInst);
2019 salvageKnowledge(I: DeadInst);
2020
2021 // Remove the Instruction from MSSA.
2022 MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst);
2023 bool IsMemDef = MA && isa<MemoryDef>(Val: MA);
2024 if (MA) {
2025 if (IsMemDef) {
2026 auto *MD = cast<MemoryDef>(Val: MA);
2027 SkipStores.insert(Ptr: MD);
2028 if (Deleted)
2029 Deleted->insert(Ptr: MD);
2030 if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
2031 if (SI->getValueOperand()->getType()->isPointerTy()) {
2032 const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
2033 if (CapturedBeforeReturn.erase(Val: UO))
2034 ShouldIterateEndOfFunctionDSE = true;
2035 InvisibleToCallerAfterRet.erase(Val: UO);
2036 InvisibleToCallerAfterRetBounded.erase(Val: UO);
2037 }
2038 }
2039 }
2040
2041 Updater.removeMemoryAccess(MA);
2042 }
2043
2044 auto I = IOLs.find(Key: DeadInst->getParent());
2045 if (I != IOLs.end())
2046 I->second.erase(Key: DeadInst);
2047 // Remove its operands
2048 for (Use &O : DeadInst->operands())
2049 if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
2050 O.set(PoisonValue::get(T: O->getType()));
2051 if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
2052 NowDeadInsts.push_back(Elt: OpI);
2053 }
2054
2055 EA.removeInstruction(I: DeadInst);
2056 // Remove memory defs directly if they don't produce results, but only
2057 // queue other dead instructions for later removal. They may have been
2058 // used as memory locations that have been cached by BatchAA. Removing
2059 // them here may lead to newly created instructions to be allocated at the
2060 // same address, yielding stale cache entries.
2061 if (IsMemDef && DeadInst->getType()->isVoidTy())
2062 DeadInst->eraseFromParent();
2063 else
2064 ToRemove.push_back(Elt: DeadInst);
2065 }
2066}
2067
2068bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2069 const Value *KillingUndObj) {
2070 // First see if we can ignore it by using the fact that KillingI is an
2071 // alloca/alloca like object that is not visible to the caller during
2072 // execution of the function.
2073 if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
2074 return false;
2075
2076 if (KillingI->getParent() == DeadI->getParent())
2077 return ThrowingBlocks.count(Ptr: KillingI->getParent());
2078 return !ThrowingBlocks.empty();
2079}
2080
2081bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2082 // If DeadI may throw it acts as a barrier, unless we are to an
2083 // alloca/alloca like object that does not escape.
2084 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
2085 return true;
2086
2087 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2088 // eliminate/reorder it.
2089 if (DeadI->isAtomic()) {
2090 if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
2091 return isStrongerThanMonotonic(AO: LI->getOrdering());
2092 if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
2093 return isStrongerThanMonotonic(AO: SI->getOrdering());
2094 if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
2095 return isStrongerThanMonotonic(AO: ARMW->getOrdering());
2096 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
2097 return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) ||
2098 isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
2099 llvm_unreachable("other instructions should be skipped in MemorySSA");
2100 }
2101 return false;
2102}
2103
2104bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2105 bool MadeChange = false;
2106 LLVM_DEBUG(
2107 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2108 do {
2109 ShouldIterateEndOfFunctionDSE = false;
2110 for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
2111 if (SkipStores.contains(Ptr: Def))
2112 continue;
2113
2114 Instruction *DefI = Def->getMemoryInst();
2115 auto DefLoc = getLocForWrite(I: DefI);
2116 if (!DefLoc || !isRemovable(I: DefI)) {
2117 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2118 "instruction not removable.\n");
2119 continue;
2120 }
2121
2122 // NOTE: Currently eliminating writes at the end of a function is
2123 // limited to MemoryDefs with a single underlying object, to save
2124 // compile-time. In practice it appears the case with multiple
2125 // underlying objects is very uncommon. If it turns out to be important,
2126 // we can use getUnderlyingObjects here instead.
2127 const Value *UO = getUnderlyingObject(V: DefLoc->Ptr);
2128 if (!isInvisibleToCallerAfterRet(V: UO, Ptr: DefLoc->Ptr, StoreSize: DefLoc->Size))
2129 continue;
2130
2131 if (isWriteAtEndOfFunction(Def, DefLoc: *DefLoc)) {
2132 // See through pointer-to-pointer bitcasts
2133 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2134 "of the function\n");
2135 deleteDeadInstruction(SI: DefI);
2136 ++NumFastStores;
2137 MadeChange = true;
2138 }
2139 }
2140 } while (ShouldIterateEndOfFunctionDSE);
2141 return MadeChange;
2142}
2143
2144bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2145 Instruction *DefI = Def->getMemoryInst();
2146 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2147 if (!MemSet)
2148 // TODO: Could handle zero store to small allocation as well.
2149 return false;
2150 Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2151 if (!StoredConstant || !StoredConstant->isNullValue())
2152 return false;
2153
2154 if (!isRemovable(I: DefI))
2155 // The memset might be volatile..
2156 return false;
2157
2158 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
2159 F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
2160 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2161 return false;
2162 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(Val: DefUO));
2163 if (!Malloc)
2164 return false;
2165 auto *InnerCallee = Malloc->getCalledFunction();
2166 if (!InnerCallee)
2167 return false;
2168 LibFunc Func = NotLibFunc;
2169 StringRef ZeroedVariantName;
2170 if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) || !TLI.has(F: Func) ||
2171 Func != LibFunc_malloc) {
2172 Attribute Attr = Malloc->getFnAttr(Kind: "alloc-variant-zeroed");
2173 if (!Attr.isValid())
2174 return false;
2175 ZeroedVariantName = Attr.getValueAsString();
2176 if (ZeroedVariantName.empty())
2177 return false;
2178 }
2179
2180 // Gracefully handle malloc with unexpected memory attributes.
2181 auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
2182 if (!MallocDef)
2183 return false;
2184
2185 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2186 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2187 // of malloc block
2188 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2189 if (MallocBB == MemsetBB)
2190 return true;
2191 auto *Ptr = Memset->getArgOperand(i: 0);
2192 auto *TI = MallocBB->getTerminator();
2193 BasicBlock *TrueBB, *FalseBB;
2194 if (!match(V: TI, P: m_Br(C: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: Ptr),
2195 R: m_Zero()),
2196 T&: TrueBB, F&: FalseBB)))
2197 return false;
2198 if (MemsetBB != FalseBB)
2199 return false;
2200 return true;
2201 };
2202
2203 if (Malloc->getOperand(i_nocapture: 0) != MemSet->getLength())
2204 return false;
2205 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Def: Malloc, User: MemSet) ||
2206 !memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
2207 return false;
2208 IRBuilder<> IRB(Malloc);
2209 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2210 Value *Calloc = nullptr;
2211 if (!ZeroedVariantName.empty()) {
2212 LLVMContext &Ctx = Malloc->getContext();
2213 AttributeList Attrs = InnerCallee->getAttributes();
2214 AllocFnKind AllocKind =
2215 Attrs.getFnAttr(Kind: Attribute::AllocKind).getAllocKind() |
2216 AllocFnKind::Zeroed;
2217 AllocKind &= ~AllocFnKind::Uninitialized;
2218 Attrs =
2219 Attrs.addFnAttribute(C&: Ctx, Attr: Attribute::getWithAllocKind(Context&: Ctx, Kind: AllocKind))
2220 .removeFnAttribute(C&: Ctx, Kind: "alloc-variant-zeroed");
2221 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2222 Name: ZeroedVariantName, T: InnerCallee->getFunctionType(), AttributeList: Attrs);
2223 cast<Function>(Val: ZeroedVariant.getCallee())
2224 ->setCallingConv(Malloc->getCallingConv());
2225 SmallVector<Value *, 3> Args;
2226 Args.append(in_start: Malloc->arg_begin(), in_end: Malloc->arg_end());
2227 CallInst *CI = IRB.CreateCall(Callee: ZeroedVariant, Args, Name: ZeroedVariantName);
2228 CI->setCallingConv(Malloc->getCallingConv());
2229 Calloc = CI;
2230 } else {
2231 Type *SizeTTy = Malloc->getArgOperand(i: 0)->getType();
2232 Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: 1), Size: Malloc->getArgOperand(i: 0),
2233 B&: IRB, TLI, AddrSpace: Malloc->getType()->getPointerAddressSpace());
2234 }
2235 if (!Calloc)
2236 return false;
2237
2238 MemorySSAUpdater Updater(&MSSA);
2239 auto *NewAccess = Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc),
2240 Definition: nullptr, InsertPt: MallocDef);
2241 auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
2242 Updater.insertDef(Def: NewAccessMD, /*RenameUses=*/true);
2243 Malloc->replaceAllUsesWith(V: Calloc);
2244 deleteDeadInstruction(SI: Malloc);
2245 return true;
2246}
2247
2248bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2249 auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
2250 BasicBlock *StoreBB = StoreI->getParent();
2251 Value *StorePtr = StoreI->getPointerOperand();
2252 Value *StoreVal = StoreI->getValueOperand();
2253
2254 DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
2255 if (!IDom)
2256 return false;
2257
2258 auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
2259 if (!BI || !BI->isConditional())
2260 return false;
2261
2262 // In case both blocks are the same, it is not possible to determine
2263 // if optimization is possible. (We would not want to optimize a store
2264 // in the FalseBB if condition is true and vice versa.)
2265 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
2266 return false;
2267
2268 Instruction *ICmpL;
2269 CmpPredicate Pred;
2270 if (!match(V: BI->getCondition(),
2271 P: m_c_ICmp(Pred,
2272 L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
2273 R: m_Instruction(I&: ICmpL)),
2274 R: m_Specific(V: StoreVal))) ||
2275 !ICmpInst::isEquality(P: Pred))
2276 return false;
2277
2278 // In case the else blocks also branches to the if block or the other way
2279 // around it is not possible to determine if the optimization is possible.
2280 if (Pred == ICmpInst::ICMP_EQ &&
2281 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 0)),
2282 BB: StoreBB))
2283 return false;
2284
2285 if (Pred == ICmpInst::ICMP_NE &&
2286 !DT.dominates(BBE: BasicBlockEdge(BI->getParent(), BI->getSuccessor(i: 1)),
2287 BB: StoreBB))
2288 return false;
2289
2290 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
2291 MemoryAccess *ClobAcc =
2292 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2293
2294 return MSSA.dominates(A: ClobAcc, B: LoadAcc);
2295}
2296
2297bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2298 Instruction *DefI = Def->getMemoryInst();
2299 StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
2300 MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
2301 Constant *StoredConstant = nullptr;
2302 if (Store)
2303 StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: 0));
2304 else if (MemSet)
2305 StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
2306 else
2307 return false;
2308
2309 if (!isRemovable(I: DefI))
2310 return false;
2311
2312 if (StoredConstant) {
2313 Constant *InitC =
2314 getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
2315 // If the clobbering access is LiveOnEntry, no instructions between them
2316 // can modify the memory location.
2317 if (InitC && InitC == StoredConstant)
2318 return MSSA.isLiveOnEntryDef(
2319 MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
2320 }
2321
2322 if (!Store)
2323 return false;
2324
2325 if (dominatingConditionImpliesValue(Def))
2326 return true;
2327
2328 if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: 0))) {
2329 if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: 1)) {
2330 // Get the defining access for the load.
2331 auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
2332 // Fast path: the defining accesses are the same.
2333 if (LoadAccess == Def->getDefiningAccess())
2334 return true;
2335
2336 // Look through phi accesses. Recursively scan all phi accesses by
2337 // adding them to a worklist. Bail when we run into a memory def that
2338 // does not match LoadAccess.
2339 SetVector<MemoryAccess *> ToCheck;
2340 MemoryAccess *Current =
2341 MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2342 // We don't want to bail when we run into the store memory def. But,
2343 // the phi access may point to it. So, pretend like we've already
2344 // checked it.
2345 ToCheck.insert(X: Def);
2346 ToCheck.insert(X: Current);
2347 // Start at current (1) to simulate already having checked Def.
2348 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2349 Current = ToCheck[I];
2350 if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2351 // Check all the operands.
2352 for (auto &Use : PhiAccess->incoming_values())
2353 ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2354 continue;
2355 }
2356
2357 // If we found a memory def, bail. This happens when we have an
2358 // unrelated write in between an otherwise noop store.
2359 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2360 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2361 // We are searching for the definition of the store's destination.
2362 // So, if that is the same definition as the load, then this is a
2363 // noop. Otherwise, fail.
2364 if (LoadAccess != Current)
2365 return false;
2366 }
2367 return true;
2368 }
2369 }
2370
2371 return false;
2372}
2373
2374bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2375 bool Changed = false;
2376 for (auto OI : IOL) {
2377 Instruction *DeadI = OI.first;
2378 MemoryLocation Loc = *getLocForWrite(I: DeadI);
2379 assert(isRemovable(DeadI) && "Expect only removable instruction");
2380
2381 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2382 int64_t DeadStart = 0;
2383 uint64_t DeadSize = Loc.Size.getValue();
2384 GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2385 OverlapIntervalsTy &IntervalMap = OI.second;
2386 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2387 if (IntervalMap.empty())
2388 continue;
2389 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2390 }
2391 return Changed;
2392}
2393
2394bool DSEState::eliminateRedundantStoresOfExistingValues() {
2395 bool MadeChange = false;
2396 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2397 "already existing value\n");
2398 for (auto *Def : MemDefs) {
2399 if (SkipStores.contains(Ptr: Def) || MSSA.isLiveOnEntryDef(MA: Def))
2400 continue;
2401
2402 Instruction *DefInst = Def->getMemoryInst();
2403 auto MaybeDefLoc = getLocForWrite(I: DefInst);
2404 if (!MaybeDefLoc || !isRemovable(I: DefInst))
2405 continue;
2406
2407 MemoryDef *UpperDef;
2408 // To conserve compile-time, we avoid walking to the next clobbering def.
2409 // Instead, we just try to get the optimized access, if it exists. DSE
2410 // will try to optimize defs during the earlier traversal.
2411 if (Def->isOptimized())
2412 UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2413 else
2414 UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2415 if (!UpperDef || MSSA.isLiveOnEntryDef(MA: UpperDef))
2416 continue;
2417
2418 Instruction *UpperInst = UpperDef->getMemoryInst();
2419 auto IsRedundantStore = [&]() {
2420 // We don't care about differences in call attributes here.
2421 if (DefInst->isIdenticalToWhenDefined(I: UpperInst,
2422 /*IntersectAttrs=*/true))
2423 return true;
2424 if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2425 if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2426 // MemSetInst must have a write location.
2427 auto UpperLoc = getLocForWrite(I: UpperInst);
2428 if (!UpperLoc)
2429 return false;
2430 int64_t InstWriteOffset = 0;
2431 int64_t DepWriteOffset = 0;
2432 auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: *UpperLoc, DeadLoc: *MaybeDefLoc,
2433 KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2434 Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2435 return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: 1) &&
2436 OR == OW_Complete;
2437 }
2438 }
2439 return false;
2440 };
2441
2442 if (!IsRedundantStore() || isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2443 continue;
2444 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2445 << '\n');
2446 deleteDeadInstruction(SI: DefInst);
2447 NumRedundantStores++;
2448 MadeChange = true;
2449 }
2450 return MadeChange;
2451}
2452
2453SmallVector<MemoryLocation, 1>
2454DSEState::getInitializesArgMemLoc(const Instruction *I) {
2455 const CallBase *CB = dyn_cast<CallBase>(Val: I);
2456 if (!CB)
2457 return {};
2458
2459 // Collect aliasing arguments and their initializes ranges.
2460 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2461 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2462 Value *CurArg = CB->getArgOperand(i: Idx);
2463 if (!CurArg->getType()->isPointerTy())
2464 continue;
2465
2466 ConstantRangeList Inits;
2467 Attribute InitializesAttr = CB->getParamAttr(ArgNo: Idx, Kind: Attribute::Initializes);
2468 // initializes on byval arguments refers to the callee copy, not the
2469 // original memory the caller passed in.
2470 if (InitializesAttr.isValid() && !CB->isByValArgument(ArgNo: Idx))
2471 Inits = InitializesAttr.getValueAsConstantRangeList();
2472
2473 // Check whether "CurArg" could alias with global variables. We require
2474 // either it's function local and isn't captured before or the "CB" only
2475 // accesses arg or inaccessible mem.
2476 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2477 !isFuncLocalAndNotCaptured(Arg: CurArg, CB, EA))
2478 Inits = ConstantRangeList();
2479
2480 // We don't perform incorrect DSE on unwind edges in the current function,
2481 // and use the "initializes" attribute to kill dead stores if:
2482 // - The call does not throw exceptions, "CB->doesNotThrow()".
2483 // - Or the callee parameter has "dead_on_unwind" attribute.
2484 // - Or the argument is invisible to caller on unwind, and there are no
2485 // unwind edges from this call in the current function (e.g. `CallInst`).
2486 bool IsDeadOrInvisibleOnUnwind =
2487 CB->paramHasAttr(ArgNo: Idx, Kind: Attribute::DeadOnUnwind) ||
2488 (isa<CallInst>(Val: CB) && isInvisibleToCallerOnUnwind(V: CurArg));
2489 ArgumentInitInfo InitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind, .Inits: Inits};
2490 bool FoundAliasing = false;
2491 for (auto &[Arg, AliasList] : Arguments) {
2492 auto AAR = BatchAA.alias(LocA: MemoryLocation::getBeforeOrAfter(Ptr: Arg),
2493 LocB: MemoryLocation::getBeforeOrAfter(Ptr: CurArg));
2494 if (AAR == AliasResult::NoAlias) {
2495 continue;
2496 } else if (AAR == AliasResult::MustAlias) {
2497 FoundAliasing = true;
2498 AliasList.push_back(Elt: InitInfo);
2499 } else {
2500 // For PartialAlias and MayAlias, there is an offset or may be an
2501 // unknown offset between the arguments and we insert an empty init
2502 // range to discard the entire initializes info while intersecting.
2503 FoundAliasing = true;
2504 AliasList.push_back(Elt: ArgumentInitInfo{.Idx: Idx, .IsDeadOrInvisibleOnUnwind: IsDeadOrInvisibleOnUnwind,
2505 .Inits: ConstantRangeList()});
2506 }
2507 }
2508 if (!FoundAliasing)
2509 Arguments[CurArg] = {InitInfo};
2510 }
2511
2512 SmallVector<MemoryLocation, 1> Locations;
2513 for (const auto &[_, Args] : Arguments) {
2514 auto IntersectedRanges =
2515 getIntersectedInitRangeList(Args, CallHasNoUnwindAttr: CB->doesNotThrow());
2516 if (IntersectedRanges.empty())
2517 continue;
2518
2519 for (const auto &Arg : Args) {
2520 for (const auto &Range : IntersectedRanges) {
2521 int64_t Start = Range.getLower().getSExtValue();
2522 int64_t End = Range.getUpper().getSExtValue();
2523 // For now, we only handle locations starting at offset 0.
2524 if (Start == 0)
2525 Locations.push_back(Elt: MemoryLocation(CB->getArgOperand(i: Arg.Idx),
2526 LocationSize::precise(Value: End - Start),
2527 CB->getAAMetadata()));
2528 }
2529 }
2530 }
2531 return Locations;
2532}
2533
2534std::pair<bool, bool>
2535DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2536 bool Changed = false;
2537 bool DeletedKillingLoc = false;
2538 unsigned ScanLimit = MemorySSAScanLimit;
2539 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2540 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2541 // Worklist of MemoryAccesses that may be killed by
2542 // "KillingLocWrapper.MemDef".
2543 SmallSetVector<MemoryAccess *, 8> ToCheck;
2544 // Track MemoryAccesses that have been deleted in the loop below, so we can
2545 // skip them. Don't use SkipStores for this, which may contain reused
2546 // MemoryAccess addresses.
2547 SmallPtrSet<MemoryAccess *, 8> Deleted;
2548 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2549 ToCheck.insert(X: KillingLocWrapper.MemDef->getDefiningAccess());
2550
2551 // Check if MemoryAccesses in the worklist are killed by
2552 // "KillingLocWrapper.MemDef".
2553 for (unsigned I = 0; I < ToCheck.size(); I++) {
2554 MemoryAccess *Current = ToCheck[I];
2555 if (Deleted.contains(Ptr: Current))
2556 continue;
2557 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2558 KillingDef: KillingLocWrapper.MemDef, StartAccess: Current, KillingLoc: KillingLocWrapper.MemLoc,
2559 KillingUndObj: KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2560 IsMemTerm: isMemTerminatorInst(I: KillingLocWrapper.DefInst), PartialLimit,
2561 IsInitializesAttrMemLoc: KillingLocWrapper.DefByInitializesAttr);
2562
2563 if (!MaybeDeadAccess) {
2564 LLVM_DEBUG(dbgs() << " finished walk\n");
2565 continue;
2566 }
2567 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2568 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2569 if (isa<MemoryPhi>(Val: DeadAccess)) {
2570 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2571 for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2572 MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2573 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2574 BasicBlock *PhiBlock = DeadAccess->getBlock();
2575
2576 // We only consider incoming MemoryAccesses that come before the
2577 // MemoryPhi. Otherwise we could discover candidates that do not
2578 // strictly dominate our starting def.
2579 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2580 ToCheck.insert(X: IncomingAccess);
2581 }
2582 continue;
2583 }
2584 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2585 // It would incorrectly consider a call instruction as redundant store
2586 // and remove this call instruction.
2587 // TODO: this conflates the existence of a MemoryLocation with being able
2588 // to delete the instruction. Fix isRemovable() to consider calls with
2589 // side effects that cannot be removed, e.g. calls with the initializes
2590 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2591 MemoryDefWrapper DeadDefWrapper(
2592 cast<MemoryDef>(Val: DeadAccess),
2593 getLocForInst(I: cast<MemoryDef>(Val: DeadAccess)->getMemoryInst(),
2594 /*ConsiderInitializesAttr=*/false));
2595 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2596 MemoryLocationWrapper &DeadLocWrapper =
2597 DeadDefWrapper.DefinedLocations.front();
2598 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2599 ToCheck.insert(X: DeadLocWrapper.MemDef->getDefiningAccess());
2600 NumGetDomMemoryDefPassed++;
2601
2602 if (!DebugCounter::shouldExecute(Counter&: MemorySSACounter))
2603 continue;
2604 if (isMemTerminatorInst(I: KillingLocWrapper.DefInst)) {
2605 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2606 continue;
2607 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2608 << *DeadLocWrapper.DefInst << "\n KILLER: "
2609 << *KillingLocWrapper.DefInst << '\n');
2610 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2611 ++NumFastStores;
2612 Changed = true;
2613 } else {
2614 // Check if DeadI overwrites KillingI.
2615 int64_t KillingOffset = 0;
2616 int64_t DeadOffset = 0;
2617 OverwriteResult OR =
2618 isOverwrite(KillingI: KillingLocWrapper.DefInst, DeadI: DeadLocWrapper.DefInst,
2619 KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2620 KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2621 if (OR == OW_MaybePartial) {
2622 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2623 OR = isPartialOverwrite(KillingLoc: KillingLocWrapper.MemLoc, DeadLoc: DeadLocWrapper.MemLoc,
2624 KillingOff: KillingOffset, DeadOff: DeadOffset,
2625 DeadI: DeadLocWrapper.DefInst, IOL);
2626 }
2627 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2628 auto *DeadSI = dyn_cast<StoreInst>(Val: DeadLocWrapper.DefInst);
2629 auto *KillingSI = dyn_cast<StoreInst>(Val: KillingLocWrapper.DefInst);
2630 // We are re-using tryToMergePartialOverlappingStores, which requires
2631 // DeadSI to dominate KillingSI.
2632 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2633 if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2634 if (Constant *Merged = tryToMergePartialOverlappingStores(
2635 KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL, AA&: BatchAA,
2636 DT: &DT)) {
2637
2638 // Update stored value of earlier store to merged constant.
2639 DeadSI->setOperand(i_nocapture: 0, Val_nocapture: Merged);
2640 ++NumModifiedStores;
2641 Changed = true;
2642 DeletedKillingLoc = true;
2643
2644 // Remove killing store and remove any outstanding overlap
2645 // intervals for the updated store.
2646 deleteDeadInstruction(SI: KillingSI, Deleted: &Deleted);
2647 auto I = IOLs.find(Key: DeadSI->getParent());
2648 if (I != IOLs.end())
2649 I->second.erase(Key: DeadSI);
2650 break;
2651 }
2652 }
2653 }
2654 if (OR == OW_Complete) {
2655 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2656 << *DeadLocWrapper.DefInst << "\n KILLER: "
2657 << *KillingLocWrapper.DefInst << '\n');
2658 deleteDeadInstruction(SI: DeadLocWrapper.DefInst, Deleted: &Deleted);
2659 ++NumFastStores;
2660 Changed = true;
2661 }
2662 }
2663 }
2664
2665 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2666 "SkipStores and Deleted out of sync?");
2667
2668 return {Changed, DeletedKillingLoc};
2669}
2670
2671bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2672 if (KillingDefWrapper.DefinedLocations.empty()) {
2673 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2674 << *KillingDefWrapper.DefInst << "\n");
2675 return false;
2676 }
2677
2678 bool MadeChange = false;
2679 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2680 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2681 << *KillingLocWrapper.MemDef << " ("
2682 << *KillingLocWrapper.DefInst << ")\n");
2683 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2684 MadeChange |= Changed;
2685
2686 // Check if the store is a no-op.
2687 if (!DeletedKillingLoc && storeIsNoop(Def: KillingLocWrapper.MemDef,
2688 DefUO: KillingLocWrapper.UnderlyingObject)) {
2689 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2690 << *KillingLocWrapper.DefInst << '\n');
2691 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2692 NumRedundantStores++;
2693 MadeChange = true;
2694 continue;
2695 }
2696 // Can we form a calloc from a memset/malloc pair?
2697 if (!DeletedKillingLoc &&
2698 tryFoldIntoCalloc(Def: KillingLocWrapper.MemDef,
2699 DefUO: KillingLocWrapper.UnderlyingObject)) {
2700 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2701 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2702 deleteDeadInstruction(SI: KillingLocWrapper.DefInst);
2703 MadeChange = true;
2704 continue;
2705 }
2706 }
2707 return MadeChange;
2708}
2709
2710static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2711 DominatorTree &DT, PostDominatorTree &PDT,
2712 const TargetLibraryInfo &TLI,
2713 const LoopInfo &LI) {
2714 bool MadeChange = false;
2715 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2716 // For each store:
2717 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2718 MemoryDef *KillingDef = State.MemDefs[I];
2719 if (State.SkipStores.count(Ptr: KillingDef))
2720 continue;
2721
2722 MemoryDefWrapper KillingDefWrapper(
2723 KillingDef, State.getLocForInst(I: KillingDef->getMemoryInst(),
2724 ConsiderInitializesAttr: EnableInitializesImprovement));
2725 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2726 }
2727
2728 if (EnablePartialOverwriteTracking)
2729 for (auto &KV : State.IOLs)
2730 MadeChange |= State.removePartiallyOverlappedStores(IOL&: KV.second);
2731
2732 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2733 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2734
2735 while (!State.ToRemove.empty()) {
2736 Instruction *DeadInst = State.ToRemove.pop_back_val();
2737 DeadInst->eraseFromParent();
2738 }
2739
2740 return MadeChange;
2741}
2742
2743//===----------------------------------------------------------------------===//
2744// DSE Pass
2745//===----------------------------------------------------------------------===//
2746PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2747 AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2748 const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2749 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2750 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2751 PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2752 LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2753
2754 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2755
2756#ifdef LLVM_ENABLE_STATS
2757 if (AreStatisticsEnabled())
2758 for (auto &I : instructions(F))
2759 NumRemainingStores += isa<StoreInst>(Val: &I);
2760#endif
2761
2762 if (!Changed)
2763 return PreservedAnalyses::all();
2764
2765 PreservedAnalyses PA;
2766 PA.preserveSet<CFGAnalyses>();
2767 PA.preserve<MemorySSAAnalysis>();
2768 PA.preserve<LoopAnalysis>();
2769 return PA;
2770}
2771
2772namespace {
2773
2774/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2775class DSELegacyPass : public FunctionPass {
2776public:
2777 static char ID; // Pass identification, replacement for typeid
2778
2779 DSELegacyPass() : FunctionPass(ID) {
2780 initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
2781 }
2782
2783 bool runOnFunction(Function &F) override {
2784 if (skipFunction(F))
2785 return false;
2786
2787 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2788 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2789 const TargetLibraryInfo &TLI =
2790 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2791 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2792 PostDominatorTree &PDT =
2793 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2794 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2795
2796 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2797
2798#ifdef LLVM_ENABLE_STATS
2799 if (AreStatisticsEnabled())
2800 for (auto &I : instructions(F))
2801 NumRemainingStores += isa<StoreInst>(Val: &I);
2802#endif
2803
2804 return Changed;
2805 }
2806
2807 void getAnalysisUsage(AnalysisUsage &AU) const override {
2808 AU.setPreservesCFG();
2809 AU.addRequired<AAResultsWrapperPass>();
2810 AU.addRequired<TargetLibraryInfoWrapperPass>();
2811 AU.addPreserved<GlobalsAAWrapperPass>();
2812 AU.addRequired<DominatorTreeWrapperPass>();
2813 AU.addPreserved<DominatorTreeWrapperPass>();
2814 AU.addRequired<PostDominatorTreeWrapperPass>();
2815 AU.addRequired<MemorySSAWrapperPass>();
2816 AU.addPreserved<PostDominatorTreeWrapperPass>();
2817 AU.addPreserved<MemorySSAWrapperPass>();
2818 AU.addRequired<LoopInfoWrapperPass>();
2819 AU.addPreserved<LoopInfoWrapperPass>();
2820 AU.addRequired<AssumptionCacheTracker>();
2821 }
2822};
2823
2824} // end anonymous namespace
2825
2826char DSELegacyPass::ID = 0;
2827
2828INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2829 false)
2830INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2831INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
2832INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2833INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2834INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
2835INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2836INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2837INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2838INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2839INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2840 false)
2841
2842LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
2843 return new DSELegacyPass();
2844}
2845