1//===- Loads.cpp - Local load analysis ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines simple local analyses for load instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/Loads.h"
14#include "llvm/Analysis/AliasAnalysis.h"
15#include "llvm/Analysis/AssumeBundleQueries.h"
16#include "llvm/Analysis/LoopAccessAnalysis.h"
17#include "llvm/Analysis/LoopInfo.h"
18#include "llvm/Analysis/MemoryBuiltins.h"
19#include "llvm/Analysis/MemoryLocation.h"
20#include "llvm/Analysis/ScalarEvolution.h"
21#include "llvm/Analysis/ScalarEvolutionExpressions.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/GetElementPtrTypeIterator.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/Operator.h"
27
28using namespace llvm;
29
30static bool isAligned(const Value *Base, Align Alignment,
31 const DataLayout &DL) {
32 return Base->getPointerAlignment(DL) >= Alignment;
33}
34
35static bool isDereferenceableAndAlignedPointerViaAssumption(
36 const Value *Ptr, Align Alignment, const SimplifyQuery &SQ, bool IgnoreFree,
37 function_ref<bool(const RetainedKnowledge &RK)> CheckSize) {
38 if (!SQ.CxtI)
39 return false;
40 // Look through assumes to see if both dereferenceability and alignment can
41 // be proven by an assume if needed.
42 bool PtrCanBeFreed = Ptr->canBeFreed() && !IgnoreFree;
43 bool IsAligned = Ptr->getPointerAlignment(DL: SQ.DL) >= Alignment;
44 bool IsDerefable = false;
45 return getKnowledgeForValue(
46 V: Ptr, AttrKinds: {Attribute::Dereferenceable, Attribute::Alignment}, AC&: *SQ.AC,
47 Filter: [&](RetainedKnowledge RK, Instruction *Assume, auto) {
48 if (!isValidAssumeForContext(I: Assume, CxtI: SQ.CxtI, DT: SQ.DT))
49 return false;
50 if (RK.AttrKind == Attribute::Alignment) {
51 IsAligned |= RK.ArgValue >= Alignment.value();
52 } else {
53 assert(RK.AttrKind == Attribute::Dereferenceable);
54 // Dereferenceable information from assumptions is only valid if the
55 // value cannot be freed between the assumption and use.
56 if (!IsDerefable &&
57 (!PtrCanBeFreed || willNotFreeBetween(Assume, CtxI: SQ.CxtI)) &&
58 CheckSize(RK))
59 IsDerefable = true;
60 }
61 // Stop looking if we have proven both necessary facts.
62 return IsAligned && IsDerefable;
63 });
64}
65
66/// Test if V is always a pointer to allocated and suitably aligned memory for
67/// a simple load or store.
68static bool isDereferenceableAndAlignedPointer(
69 const Value *V, Align Alignment, const APInt &Size, const SimplifyQuery &SQ,
70 bool IgnoreFree, SmallPtrSetImpl<const Value *> &Visited,
71 unsigned MaxDepth) {
72 assert(V->getType()->isPointerTy() && "Base must be pointer");
73
74 // Recursion limit.
75 if (MaxDepth-- == 0)
76 return false;
77
78 // Already visited? Bail out, we've likely hit unreachable code.
79 if (!Visited.insert(Ptr: V).second)
80 return false;
81
82 // Note that it is not safe to speculate into a malloc'd region because
83 // malloc may return null.
84
85 // For GEPs, determine if the indexing lands within the allocated object.
86 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(Val: V)) {
87 const Value *Base = GEP->getPointerOperand();
88
89 APInt Offset(SQ.DL.getIndexTypeSizeInBits(Ty: GEP->getType()), 0);
90 if (!GEP->accumulateConstantOffset(DL: SQ.DL, Offset) || Offset.isNegative() ||
91 !Offset.urem(RHS: APInt(Offset.getBitWidth(), Alignment.value()))
92 .isMinValue())
93 return false;
94
95 // If the base pointer is dereferenceable for Offset+Size bytes, then the
96 // GEP (== Base + Offset) is dereferenceable for Size bytes. If the base
97 // pointer is aligned to Align bytes, and the Offset is divisible by Align
98 // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
99 // aligned to Align bytes.
100
101 // Offset and Size may have different bit widths if we have visited an
102 // addrspacecast, so we can't do arithmetic directly on the APInt values.
103 return isDereferenceableAndAlignedPointer(
104 V: Base, Alignment, Size: Offset + Size.sextOrTrunc(width: Offset.getBitWidth()), SQ,
105 IgnoreFree, Visited, MaxDepth);
106 }
107
108 // bitcast instructions are no-ops as far as dereferenceability is concerned.
109 if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(Val: V)) {
110 if (BC->getSrcTy()->isPointerTy())
111 return isDereferenceableAndAlignedPointer(V: BC->getOperand(i_nocapture: 0), Alignment,
112 Size, SQ, IgnoreFree, Visited,
113 MaxDepth);
114 }
115
116 // Recurse into both hands of select.
117 if (const SelectInst *Sel = dyn_cast<SelectInst>(Val: V)) {
118 return isDereferenceableAndAlignedPointer(V: Sel->getTrueValue(), Alignment,
119 Size, SQ, IgnoreFree, Visited,
120 MaxDepth) &&
121 isDereferenceableAndAlignedPointer(V: Sel->getFalseValue(), Alignment,
122 Size, SQ, IgnoreFree, Visited,
123 MaxDepth);
124 }
125
126 auto IsKnownDeref = [&]() {
127 bool CheckForNonNull, CheckForFreed;
128 if (!Size.ule(RHS: V->getPointerDereferenceableBytes(DL: SQ.DL, CanBeNull&: CheckForNonNull,
129 CanBeFreed: &CheckForFreed)))
130 return false;
131 if (CheckForNonNull && !isKnownNonZero(V, Q: SQ))
132 return false;
133
134 auto *I = dyn_cast<Instruction>(Val: V);
135 if (CheckForFreed && !IgnoreFree) {
136 const Instruction *DefI;
137 if (I) {
138 // We don't want to consider frees by the instruction producing the
139 // pointer, so skip it if we can.
140 if (auto *II = dyn_cast<InvokeInst>(Val: V)) {
141 DefI = &II->getNormalDest()->front();
142 } else if (!I->isTerminator()) {
143 DefI = I->getNextNode();
144 } else {
145 DefI = I;
146 }
147 } else {
148 // For arguments, check frees from the start of the entry block.
149 DefI = &cast<Argument>(Val: V)->getParent()->getEntryBlock().front();
150 }
151
152 if (!SQ.CxtI || !willNotFreeBetween(Assume: DefI, CtxI: SQ.CxtI))
153 return false;
154 }
155
156 // When using something like !dereferenceable on a load, the
157 // dereferenceability may only be valid on a specific control-flow path.
158 // If the instruction doesn't dominate the context instruction, we're
159 // asking about dereferenceability under the assumption that the
160 // instruction has been speculated to the point of the context instruction,
161 // in which case we don't know if the dereferenceability info still holds.
162 // We don't bother handling allocas here, as they aren't speculatable
163 // anyway.
164 if (I && !isa<AllocaInst>(Val: I))
165 return SQ.CxtI && isValidAssumeForContext(I, CxtI: SQ.CxtI, DT: SQ.DT);
166 return true;
167 };
168 if (IsKnownDeref()) {
169 // As we recursed through GEPs to get here, we've incrementally checked
170 // that each step advanced by a multiple of the alignment. If our base is
171 // properly aligned, then the original offset accessed must also be.
172 return isAligned(Base: V, Alignment, DL: SQ.DL);
173 }
174
175 /// TODO refactor this function to be able to search independently for
176 /// Dereferencability and Alignment requirements.
177
178
179 if (const auto *Call = dyn_cast<CallBase>(Val: V)) {
180 if (auto *RP = getArgumentAliasingToReturnedPointer(
181 Call, /*MustPreserveOffset=*/true))
182 return isDereferenceableAndAlignedPointer(V: RP, Alignment, Size, SQ,
183 IgnoreFree, Visited, MaxDepth);
184
185 // If we have a call we can't recurse through, check to see if this is an
186 // allocation function for which we can establish an minimum object size.
187 // Such a minimum object size is analogous to a deref_or_null attribute in
188 // that we still need to prove the result non-null at point of use.
189 // NOTE: We can only use the object size as a base fact as we a) need to
190 // prove alignment too, and b) don't want the compile time impact of a
191 // separate recursive walk.
192 ObjectSizeOpts Opts;
193 // TODO: It may be okay to round to align, but that would imply that
194 // accessing slightly out of bounds was legal, and we're currently
195 // inconsistent about that. For the moment, be conservative.
196 Opts.RoundToAlign = false;
197 Opts.NullIsUnknownSize = true;
198 uint64_t ObjSize;
199 if (getObjectSize(Ptr: V, Size&: ObjSize, DL: SQ.DL, TLI: SQ.TLI, Opts)) {
200 APInt KnownDerefBytes(Size.getBitWidth(), ObjSize);
201 if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(RHS: Size) &&
202 isKnownNonZero(V, Q: SQ) && !V->canBeFreed()) {
203 // As we recursed through GEPs to get here, we've incrementally
204 // checked that each step advanced by a multiple of the alignment. If
205 // our base is properly aligned, then the original offset accessed
206 // must also be.
207 return isAligned(Base: V, Alignment, DL: SQ.DL);
208 }
209 }
210 }
211
212 // For gc.relocate, look through relocations
213 if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(Val: V))
214 return isDereferenceableAndAlignedPointer(V: RelocateInst->getDerivedPtr(),
215 Alignment, Size, SQ, IgnoreFree,
216 Visited, MaxDepth);
217
218 if (const AddrSpaceCastOperator *ASC = dyn_cast<AddrSpaceCastOperator>(Val: V))
219 return isDereferenceableAndAlignedPointer(
220 V: ASC->getOperand(i_nocapture: 0), Alignment, Size, SQ, IgnoreFree, Visited, MaxDepth);
221
222 return SQ.AC &&
223 isDereferenceableAndAlignedPointerViaAssumption(
224 Ptr: V, Alignment, SQ, IgnoreFree, CheckSize: [Size](const RetainedKnowledge &RK) {
225 return RK.ArgValue >= Size.getZExtValue();
226 });
227}
228
229bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
230 const APInt &Size,
231 const SimplifyQuery &SQ,
232 bool IgnoreFree) {
233 // Note: At the moment, Size can be zero. This ends up being interpreted as
234 // a query of whether [Base, V] is dereferenceable and V is aligned (since
235 // that's what the implementation happened to do). It's unclear if this is
236 // the desired semantic, but at least SelectionDAG does exercise this case.
237
238 SmallPtrSet<const Value *, 32> Visited;
239 return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, SQ,
240 IgnoreFree, Visited,
241 /*MaxDepth=*/16);
242}
243
244bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
245 Align Alignment,
246 const SimplifyQuery &SQ,
247 bool IgnoreFree) {
248 // For unsized types or scalable vectors we don't know exactly how many bytes
249 // are dereferenced, so bail out.
250 if (!Ty->isSized() || Ty->isScalableTy())
251 return false;
252
253 // When dereferenceability information is provided by a dereferenceable
254 // attribute, we know exactly how many bytes are dereferenceable. If we can
255 // determine the exact offset to the attributed variable, we can use that
256 // information here.
257
258 APInt AccessSize(SQ.DL.getPointerTypeSizeInBits(V->getType()),
259 SQ.DL.getTypeStoreSize(Ty));
260 return isDereferenceableAndAlignedPointer(V, Alignment, Size: AccessSize, SQ,
261 IgnoreFree);
262}
263
264bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
265 const SimplifyQuery &SQ, bool IgnoreFree) {
266 return isDereferenceableAndAlignedPointer(V, Ty, Alignment: Align(1), SQ, IgnoreFree);
267}
268
269bool llvm::isDereferenceablePointer(const Value *V, const APInt &Size,
270 const SimplifyQuery &Q, bool IgnoreFree) {
271 return isDereferenceableAndAlignedPointer(V, Alignment: Align(1), Size, SQ: Q, IgnoreFree);
272}
273
274/// Test if A and B will obviously have the same value.
275///
276/// This includes recognizing that %t0 and %t1 will have the same
277/// value in code like this:
278/// \code
279/// %t0 = getelementptr \@a, 0, 3
280/// store i32 0, i32* %t0
281/// %t1 = getelementptr \@a, 0, 3
282/// %t2 = load i32* %t1
283/// \endcode
284///
285static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
286 // Test if the values are trivially equivalent.
287 if (A == B)
288 return true;
289
290 // Test if the values come from identical arithmetic instructions.
291 // Use isIdenticalToWhenDefined instead of isIdenticalTo because
292 // this function is only used when one address use dominates the
293 // other, which means that they'll always either have the same
294 // value or one of them will have an undefined value.
295 if (isa<CastInst>(Val: A) || isa<PHINode>(Val: A) || isa<GetElementPtrInst>(Val: A))
296 if (const Instruction *BI = dyn_cast<Instruction>(Val: B))
297 if (cast<Instruction>(Val: A)->isIdenticalToWhenDefined(I: BI))
298 return true;
299
300 // Otherwise they may not be equivalent.
301 return false;
302}
303
304bool llvm::isDereferenceableAndAlignedInLoop(
305 LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
306 AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
307 auto &DL = LI->getDataLayout();
308 Value *Ptr = LI->getPointerOperand();
309 const SCEV *PtrSCEV = SE.getSCEV(V: Ptr);
310 APInt EltSize(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()),
311 DL.getTypeStoreSize(Ty: LI->getType()).getFixedValue());
312
313 // If given a uniform (i.e. non-varying) address, see if we can prove the
314 // access is safe within the loop w/o needing predication.
315 if (L->isLoopInvariant(V: Ptr))
316 return isDereferenceableAndAlignedPointer(
317 V: Ptr, Alignment: LI->getAlign(), Size: EltSize,
318 SQ: SimplifyQuery(DL, &DT, AC, &*L->getHeader()->getFirstNonPHIIt()));
319
320 const SCEV *EltSizeSCEV = SE.getConstant(Val: EltSize);
321 return isDereferenceableAndAlignedInLoop(PtrSCEV, Alignment: LI->getAlign(), EltSizeSCEV,
322 L, SE, DT, AC, Predicates);
323}
324
325bool llvm::isDereferenceableAndAlignedInLoop(
326 const SCEV *PtrSCEV, Align Alignment, const SCEV *EltSizeSCEV, Loop *L,
327 ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC,
328 SmallVectorImpl<const SCEVPredicate *> *Predicates) {
329 auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: PtrSCEV);
330
331 // Check to see if we have a repeating access pattern and it's possible
332 // to prove all accesses are well aligned.
333 if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
334 return false;
335
336 auto *Step = dyn_cast<SCEVConstant>(Val: AddRec->getStepRecurrence(SE));
337 if (!Step)
338 return false;
339
340 const APInt &EltSize = cast<SCEVConstant>(Val: EltSizeSCEV)->getAPInt();
341 // For the moment, restrict ourselves to the case where the access size is a
342 // multiple of the requested alignment and the base is aligned.
343 // TODO: generalize if a case found which warrants
344 if (EltSize.urem(RHS: Alignment.value()) != 0)
345 return false;
346
347 // TODO: Handle overlapping accesses.
348 if (EltSize.ugt(RHS: Step->getAPInt().abs()))
349 return false;
350
351 const SCEV *MaxBECount =
352 Predicates ? SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, Predicates&: *Predicates)
353 : SE.getSymbolicMaxBackedgeTakenCount(L);
354 const SCEV *BECount = Predicates
355 ? SE.getPredicatedBackedgeTakenCount(L, Predicates&: *Predicates)
356 : SE.getBackedgeTakenCount(L);
357 if (isa<SCEVCouldNotCompute>(Val: MaxBECount))
358 return false;
359 std::optional<ScalarEvolution::LoopGuards> LoopGuards;
360
361 auto &DL = L->getHeader()->getDataLayout();
362 const auto &[AccessStart, AccessEnd] =
363 getStartAndEndForAccess(Lp: L, PtrExpr: PtrSCEV, EltSizeSCEV, BTC: BECount, MaxBTC: MaxBECount, SE: &SE,
364 PointerBounds: nullptr, DT: &DT, AC, LoopGuards);
365 if (isa<SCEVCouldNotCompute>(Val: AccessStart) ||
366 isa<SCEVCouldNotCompute>(Val: AccessEnd))
367 return false;
368
369 // Try to get the access size.
370 const SCEV *PtrDiff = SE.getMinusSCEV(LHS: AccessEnd, RHS: AccessStart);
371 if (isa<SCEVCouldNotCompute>(Val: PtrDiff))
372 return false;
373
374 if (!LoopGuards)
375 LoopGuards.emplace(
376 args: ScalarEvolution::LoopGuards::collect(L: AddRec->getLoop(), SE));
377
378 APInt MaxPtrDiff =
379 SE.getUnsignedRangeMax(S: SE.applyLoopGuards(Expr: PtrDiff, Guards: *LoopGuards));
380
381 Value *Base = nullptr;
382 APInt AccessSize;
383 const SCEV *AccessSizeSCEV = nullptr;
384 if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(Val: AccessStart)) {
385 Base = NewBase->getValue();
386 AccessSize = std::move(MaxPtrDiff);
387 AccessSizeSCEV = PtrDiff;
388 } else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(Val: AccessStart)) {
389 if (MinAdd->getNumOperands() != 2)
390 return false;
391
392 const auto *Offset = dyn_cast<SCEVConstant>(Val: MinAdd->getOperand(i: 0));
393 const auto *NewBase = dyn_cast<SCEVUnknown>(Val: MinAdd->getOperand(i: 1));
394 if (!Offset || !NewBase)
395 return false;
396
397 // The following code below assumes the offset is unsigned, but GEP
398 // offsets are treated as signed so we can end up with a signed value
399 // here too. For example, suppose the initial PHI value is (i8 255),
400 // the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
401 if (Offset->getAPInt().isNegative())
402 return false;
403
404 // For the moment, restrict ourselves to the case where the offset is a
405 // multiple of the requested alignment and the base is aligned.
406 // TODO: generalize if a case found which warrants
407 if (Offset->getAPInt().urem(RHS: Alignment.value()) != 0)
408 return false;
409
410 bool Overflow = false;
411 AccessSize = MaxPtrDiff.uadd_ov(RHS: Offset->getAPInt(), Overflow);
412 if (Overflow)
413 return false;
414 AccessSizeSCEV = SE.getAddExpr(LHS: PtrDiff, RHS: Offset);
415 Base = NewBase->getValue();
416 } else
417 return false;
418
419 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
420 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
421 if (isa<UncondBrInst, CondBrInst>(Val: LoopPred->getTerminator()))
422 CtxI = LoopPred->getTerminator();
423 }
424 SimplifyQuery SQ(DL, &DT, AC, CtxI);
425 return isDereferenceableAndAlignedPointerViaAssumption(
426 Ptr: Base, Alignment, SQ, /*IgnoreFree=*/false,
427 CheckSize: [&SE, AccessSizeSCEV, &LoopGuards](const RetainedKnowledge &RK) {
428 return SE.isKnownPredicate(
429 Pred: CmpInst::ICMP_ULE,
430 LHS: SE.applyLoopGuards(Expr: AccessSizeSCEV, Guards: *LoopGuards),
431 RHS: SE.applyLoopGuards(Expr: SE.getSCEV(V: RK.IRArgValue), Guards: *LoopGuards));
432 }) ||
433 isDereferenceableAndAlignedPointer(V: Base, Alignment, Size: AccessSize, SQ);
434}
435
436static bool suppressSpeculativeLoadForSanitizers(const Instruction &CtxI) {
437 const Function &F = *CtxI.getFunction();
438 // Speculative load may create a race that did not exist in the source.
439 return F.hasFnAttribute(Kind: Attribute::SanitizeThread) ||
440 // Speculative load may load data from dirty regions.
441 F.hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
442 F.hasFnAttribute(Kind: Attribute::SanitizeHWAddress);
443}
444
445bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
446 return !LI.isUnordered() || suppressSpeculativeLoadForSanitizers(CtxI: LI);
447}
448
449bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size,
450 const DataLayout &DL,
451 Instruction *ScanFrom,
452 AssumptionCache *AC,
453 const DominatorTree *DT,
454 const TargetLibraryInfo *TLI) {
455 if (isDereferenceableAndAlignedPointer(
456 V, Alignment, Size, SQ: SimplifyQuery(DL, TLI, DT, AC, ScanFrom))) {
457 // With sanitizers `Dereferenceable` is not always enough for unconditional
458 // load.
459 if (!ScanFrom || !suppressSpeculativeLoadForSanitizers(CtxI: *ScanFrom))
460 return true;
461 }
462
463 if (!ScanFrom)
464 return false;
465
466 if (Size.getBitWidth() > 64)
467 return false;
468 const TypeSize LoadSize = TypeSize::getFixed(ExactSize: Size.getZExtValue());
469
470 // Otherwise, be a little bit aggressive by scanning the local block where we
471 // want to check to see if the pointer is already being loaded or stored
472 // from/to. If so, the previous load or store would have already trapped,
473 // so there is no harm doing an extra load (also, CSE will later eliminate
474 // the load entirely).
475 BasicBlock::iterator BBI = ScanFrom->getIterator(),
476 E = ScanFrom->getParent()->begin();
477
478 // We can at least always strip pointer casts even though we can't use the
479 // base here.
480 V = V->stripPointerCasts();
481
482 while (BBI != E) {
483 --BBI;
484
485 // If we see a free or a call which may write to memory (i.e. which might do
486 // a free) the pointer could be marked invalid.
487 if (isa<CallInst>(Val: BBI) && BBI->mayWriteToMemory() &&
488 !isa<LifetimeIntrinsic>(Val: BBI))
489 return false;
490
491 Value *AccessedPtr;
492 Type *AccessedTy;
493 Align AccessedAlign;
494 if (LoadInst *LI = dyn_cast<LoadInst>(Val&: BBI)) {
495 // Ignore volatile loads. The execution of a volatile load cannot
496 // be used to prove an address is backed by regular memory; it can,
497 // for example, point to an MMIO register.
498 if (LI->isVolatile())
499 continue;
500 AccessedPtr = LI->getPointerOperand();
501 AccessedTy = LI->getType();
502 AccessedAlign = LI->getAlign();
503 } else if (StoreInst *SI = dyn_cast<StoreInst>(Val&: BBI)) {
504 // Ignore volatile stores (see comment for loads).
505 if (SI->isVolatile())
506 continue;
507 AccessedPtr = SI->getPointerOperand();
508 AccessedTy = SI->getValueOperand()->getType();
509 AccessedAlign = SI->getAlign();
510 } else
511 continue;
512
513 if (AccessedAlign < Alignment)
514 continue;
515
516 // Handle trivial cases.
517 if (AccessedPtr == V &&
518 TypeSize::isKnownLE(LHS: LoadSize, RHS: DL.getTypeStoreSize(Ty: AccessedTy)))
519 return true;
520
521 if (AreEquivalentAddressValues(A: AccessedPtr->stripPointerCasts(), B: V) &&
522 TypeSize::isKnownLE(LHS: LoadSize, RHS: DL.getTypeStoreSize(Ty: AccessedTy)))
523 return true;
524 }
525 return false;
526}
527
528bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
529 const DataLayout &DL,
530 Instruction *ScanFrom,
531 AssumptionCache *AC,
532 const DominatorTree *DT,
533 const TargetLibraryInfo *TLI) {
534 TypeSize TySize = DL.getTypeStoreSize(Ty);
535 if (TySize.isScalable())
536 return false;
537 APInt Size(DL.getIndexTypeSizeInBits(Ty: V->getType()), TySize.getFixedValue());
538 return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, AC, DT,
539 TLI);
540}
541
542/// DefMaxInstsToScan - the default number of maximum instructions
543/// to scan in the block, used by FindAvailableLoadedValue().
544/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
545/// threading in part by eliminating partially redundant loads.
546/// At that point, the value of MaxInstsToScan was already set to '6'
547/// without documented explanation.
548cl::opt<unsigned>
549llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(Val: 6), cl::Hidden,
550 cl::desc("Use this to specify the default maximum number of instructions "
551 "to scan backward from a given instruction, when searching for "
552 "available loaded value"));
553
554Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
555 BasicBlock::iterator &ScanFrom,
556 unsigned MaxInstsToScan,
557 BatchAAResults *AA, bool *IsLoad,
558 unsigned *NumScanedInst) {
559 // Don't CSE load that is volatile or anything stronger than unordered.
560 if (!Load->isUnordered())
561 return nullptr;
562
563 MemoryLocation Loc = MemoryLocation::get(LI: Load);
564 return findAvailablePtrLoadStore(Loc, AccessTy: Load->getType(), AtLeastAtomic: Load->isAtomic(),
565 ScanBB, ScanFrom, MaxInstsToScan, AA, IsLoadCSE: IsLoad,
566 NumScanedInst);
567}
568
569// Check if the load and the store have the same base, constant offsets and
570// non-overlapping access ranges.
571static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr,
572 Type *LoadTy,
573 const Value *StorePtr,
574 Type *StoreTy,
575 const DataLayout &DL) {
576 APInt LoadOffset(DL.getIndexTypeSizeInBits(Ty: LoadPtr->getType()), 0);
577 APInt StoreOffset(DL.getIndexTypeSizeInBits(Ty: StorePtr->getType()), 0);
578 if (LoadOffset.getBitWidth() != StoreOffset.getBitWidth())
579 return false;
580 const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets(
581 DL, Offset&: LoadOffset, /* AllowNonInbounds */ false);
582 const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets(
583 DL, Offset&: StoreOffset, /* AllowNonInbounds */ false);
584 if (LoadBase != StoreBase)
585 return false;
586 auto LoadAccessSize = LocationSize::precise(Value: DL.getTypeStoreSize(Ty: LoadTy));
587 auto StoreAccessSize = LocationSize::precise(Value: DL.getTypeStoreSize(Ty: StoreTy));
588 ConstantRange LoadRange(LoadOffset,
589 LoadOffset + LoadAccessSize.toRaw());
590 ConstantRange StoreRange(StoreOffset,
591 StoreOffset + StoreAccessSize.toRaw());
592 return LoadRange.intersectWith(CR: StoreRange).isEmptySet();
593}
594
595static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
596 Type *AccessTy, bool AtLeastAtomic,
597 const DataLayout &DL, bool *IsLoadCSE) {
598 // If this is a load of Ptr, the loaded value is available.
599 // (This is true even if the load is volatile or atomic, although
600 // those cases are unlikely.)
601 if (LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
602 // We can value forward from an atomic to a non-atomic, but not the
603 // other way around.
604 if (LI->isAtomic() < AtLeastAtomic)
605 return nullptr;
606
607 Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts();
608 if (!AreEquivalentAddressValues(A: LoadPtr, B: Ptr))
609 return nullptr;
610
611 if (CastInst::isBitOrNoopPointerCastable(SrcTy: LI->getType(), DestTy: AccessTy, DL)) {
612 if (IsLoadCSE)
613 *IsLoadCSE = true;
614 return LI;
615 }
616 }
617
618 // If this is a store through Ptr, the value is available!
619 // (This is true even if the store is volatile or atomic, although
620 // those cases are unlikely.)
621 if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
622 // We can value forward from an atomic to a non-atomic, but not the
623 // other way around.
624 if (SI->isAtomic() < AtLeastAtomic)
625 return nullptr;
626
627 Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
628 if (!AreEquivalentAddressValues(A: StorePtr, B: Ptr))
629 return nullptr;
630
631 if (IsLoadCSE)
632 *IsLoadCSE = false;
633
634 Value *Val = SI->getValueOperand();
635 if (CastInst::isBitOrNoopPointerCastable(SrcTy: Val->getType(), DestTy: AccessTy, DL))
636 return Val;
637
638 TypeSize StoreSize = DL.getTypeSizeInBits(Ty: Val->getType());
639 TypeSize LoadSize = DL.getTypeSizeInBits(Ty: AccessTy);
640 if (TypeSize::isKnownLE(LHS: LoadSize, RHS: StoreSize))
641 if (auto *C = dyn_cast<Constant>(Val))
642 return ConstantFoldLoadFromConst(C, Ty: AccessTy, DL);
643 }
644
645 if (auto *MSI = dyn_cast<MemSetInst>(Val: Inst)) {
646 // Don't forward from (non-atomic) memset to atomic load.
647 if (AtLeastAtomic)
648 return nullptr;
649
650 // Only handle constant memsets.
651 auto *Val = dyn_cast<ConstantInt>(Val: MSI->getValue());
652 auto *Len = dyn_cast<ConstantInt>(Val: MSI->getLength());
653 if (!Val || !Len)
654 return nullptr;
655
656 // Handle offsets.
657 int64_t StoreOffset = 0, LoadOffset = 0;
658 const Value *StoreBase =
659 GetPointerBaseWithConstantOffset(Ptr: MSI->getDest(), Offset&: StoreOffset, DL);
660 const Value *LoadBase =
661 GetPointerBaseWithConstantOffset(Ptr, Offset&: LoadOffset, DL);
662 if (StoreBase != LoadBase || LoadOffset < StoreOffset)
663 return nullptr;
664
665 if (IsLoadCSE)
666 *IsLoadCSE = false;
667
668 TypeSize LoadTypeSize = DL.getTypeSizeInBits(Ty: AccessTy);
669 if (LoadTypeSize.isScalable())
670 return nullptr;
671
672 // Make sure the read bytes are contained in the memset.
673 uint64_t LoadSize = LoadTypeSize.getFixedValue();
674 if ((Len->getValue() * 8).ult(RHS: LoadSize + (LoadOffset - StoreOffset) * 8))
675 return nullptr;
676
677 APInt Splat = LoadSize >= 8 ? APInt::getSplat(NewLen: LoadSize, V: Val->getValue())
678 : Val->getValue().trunc(width: LoadSize);
679 ConstantInt *SplatC = ConstantInt::get(Context&: MSI->getContext(), V: Splat);
680 if (CastInst::isBitOrNoopPointerCastable(SrcTy: SplatC->getType(), DestTy: AccessTy, DL))
681 return SplatC;
682
683 return nullptr;
684 }
685
686 return nullptr;
687}
688
689Value *llvm::findAvailablePtrLoadStore(
690 const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
691 BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
692 BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
693 if (MaxInstsToScan == 0)
694 MaxInstsToScan = ~0U;
695
696 const DataLayout &DL = ScanBB->getDataLayout();
697 const Value *StrippedPtr = Loc.Ptr->stripPointerCasts();
698
699 while (ScanFrom != ScanBB->begin()) {
700 // We must ignore debug info directives when counting (otherwise they
701 // would affect codegen).
702 Instruction *Inst = &*--ScanFrom;
703 if (Inst->isDebugOrPseudoInst())
704 continue;
705
706 // Restore ScanFrom to expected value in case next test succeeds
707 ScanFrom++;
708
709 if (NumScanedInst)
710 ++(*NumScanedInst);
711
712 // Don't scan huge blocks.
713 if (MaxInstsToScan-- == 0)
714 return nullptr;
715
716 --ScanFrom;
717
718 if (Value *Available = getAvailableLoadStore(Inst, Ptr: StrippedPtr, AccessTy,
719 AtLeastAtomic, DL, IsLoadCSE))
720 return Available;
721
722 // Try to get the store size for the type.
723 if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
724 Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
725
726 // If both StrippedPtr and StorePtr reach all the way to an alloca or
727 // global and they are different, ignore the store. This is a trivial form
728 // of alias analysis that is important for reg2mem'd code.
729 if ((isa<AllocaInst>(Val: StrippedPtr) || isa<GlobalVariable>(Val: StrippedPtr)) &&
730 (isa<AllocaInst>(Val: StorePtr) || isa<GlobalVariable>(Val: StorePtr)) &&
731 StrippedPtr != StorePtr)
732 continue;
733
734 if (!AA) {
735 // When AA isn't available, but if the load and the store have the same
736 // base, constant offsets and non-overlapping access ranges, ignore the
737 // store. This is a simple form of alias analysis that is used by the
738 // inliner. FIXME: use BasicAA if possible.
739 if (areNonOverlapSameBaseLoadAndStore(
740 LoadPtr: Loc.Ptr, LoadTy: AccessTy, StorePtr: SI->getPointerOperand(),
741 StoreTy: SI->getValueOperand()->getType(), DL))
742 continue;
743 } else {
744 // If we have alias analysis and it says the store won't modify the
745 // loaded value, ignore the store.
746 if (!isModSet(MRI: AA->getModRefInfo(I: SI, OptLoc: Loc)))
747 continue;
748 }
749
750 // Otherwise the store that may or may not alias the pointer, bail out.
751 ++ScanFrom;
752 return nullptr;
753 }
754
755 // If this is some other instruction that may clobber Ptr, bail out.
756 if (Inst->mayWriteToMemory()) {
757 // If alias analysis claims that it really won't modify the load,
758 // ignore it.
759 if (AA && !isModSet(MRI: AA->getModRefInfo(I: Inst, OptLoc: Loc)))
760 continue;
761
762 // May modify the pointer, bail out.
763 ++ScanFrom;
764 return nullptr;
765 }
766 }
767
768 // Got to the start of the block, we didn't find it, but are done for this
769 // block.
770 return nullptr;
771}
772
773Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
774 bool *IsLoadCSE,
775 unsigned MaxInstsToScan) {
776 const DataLayout &DL = Load->getDataLayout();
777 Value *StrippedPtr = Load->getPointerOperand()->stripPointerCasts();
778 BasicBlock *ScanBB = Load->getParent();
779 Type *AccessTy = Load->getType();
780 bool AtLeastAtomic = Load->isAtomic();
781
782 if (!Load->isUnordered())
783 return nullptr;
784
785 // Try to find an available value first, and delay expensive alias analysis
786 // queries until later.
787 Value *Available = nullptr;
788 SmallVector<Instruction *> MustNotAliasInsts;
789 for (Instruction &Inst : make_range(x: ++Load->getReverseIterator(),
790 y: ScanBB->rend())) {
791 if (Inst.isDebugOrPseudoInst())
792 continue;
793
794 if (MaxInstsToScan-- == 0)
795 return nullptr;
796
797 Available = getAvailableLoadStore(Inst: &Inst, Ptr: StrippedPtr, AccessTy,
798 AtLeastAtomic, DL, IsLoadCSE);
799 if (Available)
800 break;
801
802 if (Inst.mayWriteToMemory())
803 MustNotAliasInsts.push_back(Elt: &Inst);
804 }
805
806 // If we found an available value, ensure that the instructions in between
807 // did not modify the memory location.
808 if (Available) {
809 MemoryLocation Loc = MemoryLocation::get(LI: Load);
810 for (Instruction *Inst : MustNotAliasInsts)
811 if (isModSet(MRI: AA.getModRefInfo(I: Inst, OptLoc: Loc)))
812 return nullptr;
813 }
814
815 return Available;
816}
817
818// Returns true if a use is either in an ICmp/PtrToInt or a Phi/Select that only
819// feeds into them.
820static bool isPointerUseReplacable(const Use &U, bool HasNonAddressBits) {
821 unsigned Limit = 40;
822 SmallVector<const User *> Worklist({U.getUser()});
823 SmallPtrSet<const User *, 8> Visited;
824
825 while (!Worklist.empty() && --Limit) {
826 auto *User = Worklist.pop_back_val();
827 if (!Visited.insert(Ptr: User).second)
828 continue;
829 if (isa<ICmpInst, PtrToAddrInst>(Val: User))
830 continue;
831 // FIXME: The PtrToIntInst case here is not strictly correct, as it
832 // changes which provenance is exposed.
833 if (!HasNonAddressBits && isa<PtrToIntInst>(Val: User))
834 continue;
835 if (isa<PHINode, SelectInst>(Val: User))
836 Worklist.append(in_start: User->user_begin(), in_end: User->user_end());
837 else
838 return false;
839 }
840
841 return Limit != 0;
842}
843
844static bool isPointerAlwaysReplaceable(const Value *From, const Value *To,
845 const DataLayout &DL) {
846 // This is not strictly correct, but we do it for now to retain important
847 // optimizations.
848 if (isa<ConstantPointerNull>(Val: To))
849 return true;
850 // Conversely, replacing null in the default address space with destination
851 // pointer is always valid.
852 if (isa<ConstantPointerNull>(Val: From) &&
853 From->getType()->getPointerAddressSpace() == 0)
854 return true;
855 if (isa<Constant>(Val: To) && To->getType()->isPointerTy() &&
856 isDereferenceablePointer(V: To, Ty: Type::getInt8Ty(C&: To->getContext()), SQ: DL))
857 return true;
858 return getUnderlyingObjectAggressive(V: From) ==
859 getUnderlyingObjectAggressive(V: To);
860}
861
862bool llvm::canReplacePointersInUseIfEqual(const Use &U, const Value *To,
863 const DataLayout &DL) {
864 Type *Ty = To->getType();
865 assert(U->getType() == Ty && "values must have matching types");
866 // Not a pointer, just return true.
867 if (!Ty->isPtrOrPtrVectorTy())
868 return true;
869
870 // Do not perform replacements in lifetime intrinsic arguments.
871 if (isa<LifetimeIntrinsic>(Val: U.getUser()))
872 return false;
873
874 if (isPointerAlwaysReplaceable(From: &*U, To, DL))
875 return true;
876
877 bool HasNonAddressBits =
878 DL.getAddressSizeInBits(Ty) != DL.getPointerTypeSizeInBits(Ty);
879 return isPointerUseReplacable(U, HasNonAddressBits);
880}
881
882bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
883 const DataLayout &DL) {
884 assert(From->getType() == To->getType() && "values must have matching types");
885 // Not a pointer, just return true.
886 if (!From->getType()->isPtrOrPtrVectorTy())
887 return true;
888
889 return isPointerAlwaysReplaceable(From, To, DL);
890}
891
892bool llvm::isReadOnlyLoop(
893 Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
894 SmallVectorImpl<LoadInst *> &NonDereferenceableAndAlignedLoads,
895 SmallVectorImpl<const SCEVPredicate *> *Predicates) {
896 for (BasicBlock *BB : L->blocks()) {
897 for (Instruction &I : *BB) {
898 if (auto *LI = dyn_cast<LoadInst>(Val: &I)) {
899 if (!isDereferenceableAndAlignedInLoop(LI, L, SE&: *SE, DT&: *DT, AC, Predicates))
900 NonDereferenceableAndAlignedLoads.push_back(Elt: LI);
901 } else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
902 I.mayThrow()) {
903 return false;
904 }
905 }
906 }
907 return true;
908}
909
910LinearExpression llvm::decomposeLinearExpression(const DataLayout &DL,
911 Value *Ptr) {
912 assert(Ptr->getType()->isPointerTy() && "Must be called with pointer arg");
913
914 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: Ptr->getType());
915 LinearExpression Expr(Ptr, BitWidth);
916
917 while (true) {
918 auto *GEP = dyn_cast<GEPOperator>(Val: Expr.BasePtr);
919 if (!GEP || GEP->getSourceElementType()->isScalableTy())
920 return Expr;
921
922 Value *VarIndex = nullptr;
923 for (Value *Index : GEP->indices()) {
924 if (isa<ConstantInt>(Val: Index))
925 continue;
926 // Only allow a single variable index. We do not bother to handle the
927 // case of the same variable index appearing multiple times.
928 if (Expr.Index || VarIndex)
929 return Expr;
930 VarIndex = Index;
931 }
932
933 // Don't return non-canonical indexes.
934 if (VarIndex && !VarIndex->getType()->isIntegerTy(BitWidth))
935 return Expr;
936
937 // We have verified that we can fully handle this GEP, so we can update Expr
938 // members past this point.
939 Expr.BasePtr = GEP->getPointerOperand();
940 Expr.Flags = Expr.Flags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
941 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
942 GTI != GTE; ++GTI) {
943 Value *Index = GTI.getOperand();
944 if (auto *ConstOffset = dyn_cast<ConstantInt>(Val: Index)) {
945 if (ConstOffset->isZero())
946 continue;
947 if (StructType *STy = GTI.getStructTypeOrNull()) {
948 unsigned ElementIdx = ConstOffset->getZExtValue();
949 const StructLayout *SL = DL.getStructLayout(Ty: STy);
950 Expr.Offset += SL->getElementOffset(Idx: ElementIdx);
951 continue;
952 }
953 // Truncate if type size exceeds index space.
954 APInt IndexedSize(BitWidth, GTI.getSequentialElementStride(DL),
955 /*isSigned=*/false,
956 /*implcitTrunc=*/true);
957 Expr.Offset += ConstOffset->getValue() * IndexedSize;
958 continue;
959 }
960
961 // FIXME: Also look through a mul/shl in the index.
962 assert(Expr.Index == nullptr && "Shouldn't have index yet");
963 Expr.Index = Index;
964 // Truncate if type size exceeds index space.
965 Expr.Scale = APInt(BitWidth, GTI.getSequentialElementStride(DL),
966 /*isSigned=*/false, /*implicitTrunc=*/true);
967 }
968 }
969
970 return Expr;
971}
972