1//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the TypeBasedAliasAnalysis pass, which implements
10// metadata-based TBAA.
11//
12// In LLVM IR, memory does not have types, so LLVM's own type system is not
13// suitable for doing TBAA. Instead, metadata is added to the IR to describe
14// a type system of a higher level language. This can be used to implement
15// typical C/C++ TBAA, but it can also be used to implement custom alias
16// analysis behavior for other languages.
17//
18// We now support two types of metadata format: scalar TBAA and struct-path
19// aware TBAA. After all testing cases are upgraded to use struct-path aware
20// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
21// can be dropped.
22//
23// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
24// three fields, e.g.:
25// !0 = !{ !"an example type tree" }
26// !1 = !{ !"int", !0 }
27// !2 = !{ !"float", !0 }
28// !3 = !{ !"const float", !2, i64 1 }
29//
30// The first field is an identity field. It can be any value, usually
31// an MDString, which uniquely identifies the type. The most important
32// name in the tree is the name of the root node. Two trees with
33// different root node names are entirely disjoint, even if they
34// have leaves with common names.
35//
36// The second field identifies the type's parent node in the tree, or
37// is null or omitted for a root node. A type is considered to alias
38// all of its descendants and all of its ancestors in the tree. Also,
39// a type is considered to alias all types in other trees, so that
40// bitcode produced from multiple front-ends is handled conservatively.
41//
42// If the third field is present, it's an integer which if equal to 1
43// indicates that the type is "constant" (meaning pointsToConstantMemory
44// should return true; see
45// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
46//
47// With struct-path aware TBAA, the MDNodes attached to an instruction using
48// "!tbaa" are called path tag nodes.
49//
50// The path tag node has 4 fields with the last field being optional.
51//
52// The first field is the base type node, it can be a struct type node
53// or a scalar type node. The second field is the access type node, it
54// must be a scalar type node. The third field is the offset into the base type.
55// The last field has the same meaning as the last field of our scalar TBAA:
56// it's an integer which if equal to 1 indicates that the access is "constant".
57//
58// The struct type node has a name and a list of pairs, one pair for each member
59// of the struct. The first element of each pair is a type node (a struct type
60// node or a scalar type node), specifying the type of the member, the second
61// element of each pair is the offset of the member.
62//
63// Given an example
64// typedef struct {
65// short s;
66// } A;
67// typedef struct {
68// uint16_t s;
69// A a;
70// } B;
71//
72// For an access to B.a.s, we attach !5 (a path tag node) to the load/store
73// instruction. The base type is !4 (struct B), the access type is !2 (scalar
74// type short) and the offset is 4.
75//
76// !0 = !{!"Simple C/C++ TBAA"}
77// !1 = !{!"omnipotent char", !0} // Scalar type node
78// !2 = !{!"short", !1} // Scalar type node
79// !3 = !{!"A", !2, i64 0} // Struct type node
80// !4 = !{!"B", !2, i64 0, !3, i64 4}
81// // Struct type node
82// !5 = !{!4, !2, i64 4} // Path tag node
83//
84// The struct type nodes and the scalar type nodes form a type DAG.
85// Root (!0)
86// char (!1) -- edge to Root
87// short (!2) -- edge to char
88// A (!3) -- edge with offset 0 to short
89// B (!4) -- edge with offset 0 to short and edge with offset 4 to A
90//
91// To check if two tags (tagX and tagY) can alias, we start from the base type
92// of tagX, follow the edge with the correct offset in the type DAG and adjust
93// the offset until we reach the base type of tagY or until we reach the Root
94// node.
95// If we reach the base type of tagY, compare the adjusted offset with
96// offset of tagY, return Alias if the offsets are the same, return NoAlias
97// otherwise.
98// If we reach the Root node, perform the above starting from base type of tagY
99// to see if we reach base type of tagX.
100//
101// If they have different roots, they're part of different potentially
102// unrelated type systems, so we return Alias to be conservative.
103// If neither node is an ancestor of the other and they have the same root,
104// then we say NoAlias.
105//
106//===----------------------------------------------------------------------===//
107
108#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
109#include "llvm/ADT/SetVector.h"
110#include "llvm/Analysis/AliasAnalysis.h"
111#include "llvm/Analysis/MemoryLocation.h"
112#include "llvm/IR/Constants.h"
113#include "llvm/IR/DataLayout.h"
114#include "llvm/IR/DerivedTypes.h"
115#include "llvm/IR/InstrTypes.h"
116#include "llvm/IR/LLVMContext.h"
117#include "llvm/IR/Metadata.h"
118#include "llvm/IR/Module.h"
119#include "llvm/InitializePasses.h"
120#include "llvm/Pass.h"
121#include "llvm/Support/Casting.h"
122#include "llvm/Support/CommandLine.h"
123#include "llvm/Support/ErrorHandling.h"
124#include <cassert>
125#include <cstdint>
126
127using namespace llvm;
128
129// A handy option for disabling TBAA functionality. The same effect can also be
130// achieved by stripping the !tbaa tags from IR, but this option is sometimes
131// more convenient.
132static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(Val: true), cl::Hidden);
133
134namespace {
135
136/// isNewFormatTypeNode - Return true iff the given type node is in the new
137/// size-aware format.
138static bool isNewFormatTypeNode(const MDNode *N) {
139 if (N->getNumOperands() < 3)
140 return false;
141 // In the old format the first operand is a string.
142 if (!isa<MDNode>(Val: N->getOperand(I: 0)))
143 return false;
144 return true;
145}
146
147/// This is a simple wrapper around an MDNode which provides a higher-level
148/// interface by hiding the details of how alias analysis information is encoded
149/// in its operands.
150template<typename MDNodeTy>
151class TBAANodeImpl {
152 MDNodeTy *Node = nullptr;
153
154public:
155 TBAANodeImpl() = default;
156 explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
157
158 /// getNode - Get the MDNode for this TBAANode.
159 MDNodeTy *getNode() const { return Node; }
160
161 /// isNewFormat - Return true iff the wrapped type node is in the new
162 /// size-aware format.
163 bool isNewFormat() const { return isNewFormatTypeNode(Node); }
164
165 /// getParent - Get this TBAANode's Alias tree parent.
166 TBAANodeImpl<MDNodeTy> getParent() const {
167 if (isNewFormat())
168 return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0)));
169
170 if (Node->getNumOperands() < 2)
171 return TBAANodeImpl<MDNodeTy>();
172 MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
173 if (!P)
174 return TBAANodeImpl<MDNodeTy>();
175 // Ok, this node has a valid parent. Return it.
176 return TBAANodeImpl<MDNodeTy>(P);
177 }
178
179 /// Test if this TBAANode represents a type for objects which are
180 /// not modified (by any means) in the context where this
181 /// AliasAnalysis is relevant.
182 bool isTypeImmutable() const {
183 if (Node->getNumOperands() < 3)
184 return false;
185 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
186 if (!CI)
187 return false;
188 return CI->getValue()[0];
189 }
190};
191
192/// \name Specializations of \c TBAANodeImpl for const and non const qualified
193/// \c MDNode.
194/// @{
195using TBAANode = TBAANodeImpl<const MDNode>;
196using MutableTBAANode = TBAANodeImpl<MDNode>;
197/// @}
198
199/// This is a simple wrapper around an MDNode which provides a
200/// higher-level interface by hiding the details of how alias analysis
201/// information is encoded in its operands.
202template<typename MDNodeTy>
203class TBAAStructTagNodeImpl {
204 /// This node should be created with createTBAAAccessTag().
205 MDNodeTy *Node;
206
207public:
208 explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
209
210 /// Get the MDNode for this TBAAStructTagNode.
211 MDNodeTy *getNode() const { return Node; }
212
213 /// isNewFormat - Return true iff the wrapped access tag is in the new
214 /// size-aware format.
215 bool isNewFormat() const {
216 if (Node->getNumOperands() < 4)
217 return false;
218 if (MDNodeTy *AccessType = getAccessType())
219 if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat())
220 return false;
221 return true;
222 }
223
224 MDNodeTy *getBaseType() const {
225 return dyn_cast_or_null<MDNode>(Node->getOperand(0));
226 }
227
228 MDNodeTy *getAccessType() const {
229 return dyn_cast_or_null<MDNode>(Node->getOperand(1));
230 }
231
232 uint64_t getOffset() const {
233 return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
234 }
235
236 uint64_t getSize() const {
237 if (!isNewFormat())
238 return UINT64_MAX;
239 return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue();
240 }
241
242 /// Test if this TBAAStructTagNode represents a type for objects
243 /// which are not modified (by any means) in the context where this
244 /// AliasAnalysis is relevant.
245 bool isTypeImmutable() const {
246 unsigned OpNo = isNewFormat() ? 4 : 3;
247 if (Node->getNumOperands() < OpNo + 1)
248 return false;
249 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo));
250 if (!CI)
251 return false;
252 return CI->getValue()[0];
253 }
254};
255
256/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
257/// qualified \c MDNods.
258/// @{
259using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>;
260using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
261/// @}
262
263/// This is a simple wrapper around an MDNode which provides a
264/// higher-level interface by hiding the details of how alias analysis
265/// information is encoded in its operands.
266class TBAAStructTypeNode {
267 /// This node should be created with createTBAATypeNode().
268 const MDNode *Node = nullptr;
269
270public:
271 TBAAStructTypeNode() = default;
272 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
273
274 /// Get the MDNode for this TBAAStructTypeNode.
275 const MDNode *getNode() const { return Node; }
276
277 /// isNewFormat - Return true iff the wrapped type node is in the new
278 /// size-aware format.
279 bool isNewFormat() const { return isNewFormatTypeNode(N: Node); }
280
281 bool operator==(const TBAAStructTypeNode &Other) const {
282 return getNode() == Other.getNode();
283 }
284
285 /// getId - Return type identifier.
286 Metadata *getId() const {
287 return Node->getOperand(I: isNewFormat() ? 2 : 0);
288 }
289
290 unsigned getNumFields() const {
291 unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
292 unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
293 return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField;
294 }
295
296 TBAAStructTypeNode getFieldType(unsigned FieldIndex) const {
297 unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
298 unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
299 unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField;
300 auto *TypeNode = cast<MDNode>(Val: getNode()->getOperand(I: OpIndex));
301 return TBAAStructTypeNode(TypeNode);
302 }
303
304 /// Get this TBAAStructTypeNode's field in the type DAG with
305 /// given offset. Update the offset to be relative to the field type.
306 TBAAStructTypeNode getField(uint64_t &Offset) const {
307 bool NewFormat = isNewFormat();
308 const ArrayRef<MDOperand> Operands = Node->operands();
309 const unsigned NumOperands = Operands.size();
310
311 if (NewFormat) {
312 // New-format root and scalar type nodes have no fields.
313 if (NumOperands < 6)
314 return TBAAStructTypeNode();
315 } else {
316 // Parent can be omitted for the root node.
317 if (NumOperands < 2)
318 return TBAAStructTypeNode();
319
320 // Fast path for a scalar type node and a struct type node with a single
321 // field.
322 if (NumOperands <= 3) {
323 uint64_t Cur =
324 NumOperands == 2
325 ? 0
326 : mdconst::extract<ConstantInt>(MD: Operands[2])->getZExtValue();
327 Offset -= Cur;
328 MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[1]);
329 if (!P)
330 return TBAAStructTypeNode();
331 return TBAAStructTypeNode(P);
332 }
333 }
334
335 // Assume the offsets are in order. We return the previous field if
336 // the current offset is bigger than the given offset.
337 unsigned FirstFieldOpNo = NewFormat ? 3 : 1;
338 unsigned NumOpsPerField = NewFormat ? 3 : 2;
339 unsigned TheIdx = 0;
340
341 for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
342 Idx += NumOpsPerField) {
343 uint64_t Cur =
344 mdconst::extract<ConstantInt>(MD: Operands[Idx + 1])->getZExtValue();
345 if (Cur > Offset) {
346 assert(Idx >= FirstFieldOpNo + NumOpsPerField &&
347 "TBAAStructTypeNode::getField should have an offset match!");
348 TheIdx = Idx - NumOpsPerField;
349 break;
350 }
351 }
352 // Move along the last field.
353 if (TheIdx == 0)
354 TheIdx = NumOperands - NumOpsPerField;
355 uint64_t Cur =
356 mdconst::extract<ConstantInt>(MD: Operands[TheIdx + 1])->getZExtValue();
357 Offset -= Cur;
358 MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[TheIdx]);
359 if (!P)
360 return TBAAStructTypeNode();
361 return TBAAStructTypeNode(P);
362 }
363};
364
365} // end anonymous namespace
366
367/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
368/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
369/// format.
370static bool isStructPathTBAA(const MDNode *MD) {
371 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
372 // a TBAA tag.
373 return isa<MDNode>(Val: MD->getOperand(I: 0)) && MD->getNumOperands() >= 3;
374}
375
376AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
377 const MemoryLocation &LocB,
378 AAQueryInfo &AAQI, const Instruction *) {
379 if (!shouldUseTBAA())
380 return AliasResult::MayAlias;
381
382 if (Aliases(A: LocA.AATags.TBAA, B: LocB.AATags.TBAA))
383 return AliasResult::MayAlias;
384
385 // Otherwise return a definitive result.
386 return AliasResult::NoAlias;
387}
388
389AliasResult TypeBasedAAResult::aliasErrno(const MemoryLocation &Loc,
390 const Module *M) {
391 if (!shouldUseTBAA())
392 return AliasResult::MayAlias;
393
394 const auto *N = Loc.AATags.TBAA;
395 if (!N)
396 return AliasResult::MayAlias;
397
398 // There cannot be any alias with errno if TBAA proves the given memory
399 // location does not alias errno.
400 const auto *ErrnoTBAAMD = M->getNamedMetadata(Name: "llvm.errno.tbaa");
401 if (!ErrnoTBAAMD || any_of(Range: ErrnoTBAAMD->operands(), P: [&](const auto *Node) {
402 return Aliases(A: N, B: Node);
403 }))
404 return AliasResult::MayAlias;
405 return AliasResult::NoAlias;
406}
407
408ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc,
409 AAQueryInfo &AAQI,
410 bool IgnoreLocals) {
411 if (!shouldUseTBAA())
412 return ModRefInfo::ModRef;
413
414 const MDNode *M = Loc.AATags.TBAA;
415 if (!M)
416 return ModRefInfo::ModRef;
417
418 // If this is an "immutable" type, we can assume the pointer is pointing
419 // to constant memory.
420 if ((!isStructPathTBAA(MD: M) && TBAANode(M).isTypeImmutable()) ||
421 (isStructPathTBAA(MD: M) && TBAAStructTagNode(M).isTypeImmutable()))
422 return ModRefInfo::NoModRef;
423
424 return ModRefInfo::ModRef;
425}
426
427MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call,
428 AAQueryInfo &AAQI) {
429 if (!shouldUseTBAA())
430 return MemoryEffects::unknown();
431
432 // If this is an "immutable" type, the access is not observable.
433 if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa))
434 if ((!isStructPathTBAA(MD: M) && TBAANode(M).isTypeImmutable()) ||
435 (isStructPathTBAA(MD: M) && TBAAStructTagNode(M).isTypeImmutable()))
436 return MemoryEffects::none();
437
438 return MemoryEffects::unknown();
439}
440
441MemoryEffects TypeBasedAAResult::getMemoryEffects(const Function *F) {
442 // Functions don't have metadata.
443 return MemoryEffects::unknown();
444}
445
446ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
447 const MemoryLocation &Loc,
448 AAQueryInfo &AAQI) {
449 if (!shouldUseTBAA())
450 return ModRefInfo::ModRef;
451
452 if (const MDNode *L = Loc.AATags.TBAA)
453 if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa))
454 if (!Aliases(A: L, B: M))
455 return ModRefInfo::NoModRef;
456
457 return ModRefInfo::ModRef;
458}
459
460ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
461 const CallBase *Call2,
462 AAQueryInfo &AAQI) {
463 if (!shouldUseTBAA())
464 return ModRefInfo::ModRef;
465
466 if (const MDNode *M1 = Call1->getMetadata(KindID: LLVMContext::MD_tbaa))
467 if (const MDNode *M2 = Call2->getMetadata(KindID: LLVMContext::MD_tbaa))
468 if (!Aliases(A: M1, B: M2))
469 return ModRefInfo::NoModRef;
470
471 return ModRefInfo::ModRef;
472}
473
474bool MDNode::isTBAAVtableAccess() const {
475 if (!isStructPathTBAA(MD: this)) {
476 if (getNumOperands() < 1)
477 return false;
478 if (MDString *Tag1 = dyn_cast<MDString>(Val: getOperand(I: 0))) {
479 if (Tag1->getString() == "vtable pointer")
480 return true;
481 }
482 return false;
483 }
484
485 // For struct-path aware TBAA, we use the access type of the tag.
486 TBAAStructTagNode Tag(this);
487 TBAAStructTypeNode AccessType(Tag.getAccessType());
488 if(auto *Id = dyn_cast<MDString>(Val: AccessType.getId()))
489 if (Id->getString() == "vtable pointer")
490 return true;
491 return false;
492}
493
494static bool matchAccessTags(const MDNode *A, const MDNode *B,
495 const MDNode **GenericTag = nullptr);
496
497MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
498 const MDNode *GenericTag;
499 matchAccessTags(A, B, GenericTag: &GenericTag);
500 return const_cast<MDNode*>(GenericTag);
501}
502
503static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
504 if (!A || !B)
505 return nullptr;
506
507 if (A == B)
508 return A;
509
510 SmallSetVector<const MDNode *, 4> PathA;
511 TBAANode TA(A);
512 while (TA.getNode()) {
513 if (!PathA.insert(X: TA.getNode()))
514 report_fatal_error(reason: "Cycle found in TBAA metadata.");
515 TA = TA.getParent();
516 }
517
518 SmallSetVector<const MDNode *, 4> PathB;
519 TBAANode TB(B);
520 while (TB.getNode()) {
521 if (!PathB.insert(X: TB.getNode()))
522 report_fatal_error(reason: "Cycle found in TBAA metadata.");
523 TB = TB.getParent();
524 }
525
526 int IA = PathA.size() - 1;
527 int IB = PathB.size() - 1;
528
529 const MDNode *Ret = nullptr;
530 while (IA >= 0 && IB >= 0) {
531 if (PathA[IA] == PathB[IB])
532 Ret = PathA[IA];
533 else
534 break;
535 --IA;
536 --IB;
537 }
538
539 return Ret;
540}
541
542AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
543 AAMDNodes Result;
544 Result.TBAA = MDNode::getMostGenericTBAA(A: TBAA, B: Other.TBAA);
545 Result.TBAAStruct = nullptr;
546 Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope);
547 Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias);
548 Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
549 A: NoAliasAddrSpace, B: Other.NoAliasAddrSpace);
550 return Result;
551}
552
553AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
554 AAMDNodes Result;
555 Result.TBAA = Result.TBAAStruct = nullptr;
556 Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope);
557 Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias);
558 Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
559 A: NoAliasAddrSpace, B: Other.NoAliasAddrSpace);
560 return Result;
561}
562
563static const MDNode *createAccessTag(const MDNode *AccessType) {
564 // If there is no access type or the access type is the root node, then
565 // we don't have any useful access tag to return.
566 if (!AccessType || AccessType->getNumOperands() < 2)
567 return nullptr;
568
569 Type *Int64 = IntegerType::get(C&: AccessType->getContext(), NumBits: 64);
570 auto *OffsetNode = ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: 0));
571
572 if (TBAAStructTypeNode(AccessType).isNewFormat()) {
573 // TODO: Take access ranges into account when matching access tags and
574 // fix this code to generate actual access sizes for generic tags.
575 uint64_t AccessSize = UINT64_MAX;
576 auto *SizeNode =
577 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: AccessSize));
578 Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
579 const_cast<MDNode*>(AccessType),
580 OffsetNode, SizeNode};
581 return MDNode::get(Context&: AccessType->getContext(), MDs: Ops);
582 }
583
584 Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
585 const_cast<MDNode*>(AccessType),
586 OffsetNode};
587 return MDNode::get(Context&: AccessType->getContext(), MDs: Ops);
588}
589
590static bool hasField(TBAAStructTypeNode BaseType,
591 TBAAStructTypeNode FieldType) {
592 for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) {
593 TBAAStructTypeNode T = BaseType.getFieldType(FieldIndex: I);
594 if (T == FieldType || hasField(BaseType: T, FieldType))
595 return true;
596 }
597 return false;
598}
599
600/// Return true if for two given accesses, one of the accessed objects may be a
601/// subobject of the other. The \p BaseTag and \p SubobjectTag parameters
602/// describe the accesses to the base object and the subobject respectively.
603/// \p CommonType must be the metadata node describing the common type of the
604/// accessed objects. On return, \p MayAlias is set to true iff these accesses
605/// may alias and \p Generic, if not null, points to the most generic access
606/// tag for the given two.
607static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
608 TBAAStructTagNode SubobjectTag,
609 const MDNode *CommonType,
610 const MDNode **GenericTag,
611 bool &MayAlias) {
612 // If the base object is of the least common type, then this may be an access
613 // to its subobject.
614 if (BaseTag.getAccessType() == BaseTag.getBaseType() &&
615 BaseTag.getAccessType() == CommonType) {
616 if (GenericTag)
617 *GenericTag = createAccessTag(AccessType: CommonType);
618 MayAlias = true;
619 return true;
620 }
621
622 // If the access to the base object is through a field of the subobject's
623 // type, then this may be an access to that field. To check for that we start
624 // from the base type, follow the edge with the correct offset in the type DAG
625 // and adjust the offset until we reach the field type or until we reach the
626 // access type.
627 bool NewFormat = BaseTag.isNewFormat();
628 TBAAStructTypeNode BaseType(BaseTag.getBaseType());
629 uint64_t OffsetInBase = BaseTag.getOffset();
630
631 for (;;) {
632 // In the old format there is no distinction between fields and parent
633 // types, so in this case we consider all nodes up to the root.
634 if (!BaseType.getNode()) {
635 assert(!NewFormat && "Did not see access type in access path!");
636 break;
637 }
638
639 if (BaseType.getNode() == SubobjectTag.getBaseType()) {
640 MayAlias = OffsetInBase == SubobjectTag.getOffset() ||
641 BaseType.getNode() == BaseTag.getAccessType() ||
642 SubobjectTag.getBaseType() == SubobjectTag.getAccessType();
643 if (GenericTag) {
644 *GenericTag =
645 MayAlias ? SubobjectTag.getNode() : createAccessTag(AccessType: CommonType);
646 }
647 return true;
648 }
649
650 // With new-format nodes we stop at the access type.
651 if (NewFormat && BaseType.getNode() == BaseTag.getAccessType())
652 break;
653
654 // Follow the edge with the correct offset. Offset will be adjusted to
655 // be relative to the field type.
656 BaseType = BaseType.getField(Offset&: OffsetInBase);
657 }
658
659 // If the base object has a direct or indirect field of the subobject's type,
660 // then this may be an access to that field. We need this to check now that
661 // we support aggregates as access types.
662 if (NewFormat) {
663 // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
664 TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
665 if (hasField(BaseType, FieldType)) {
666 if (GenericTag)
667 *GenericTag = createAccessTag(AccessType: CommonType);
668 MayAlias = true;
669 return true;
670 }
671 }
672
673 return false;
674}
675
676/// matchTags - Return true if the given couple of accesses are allowed to
677/// overlap. If \arg GenericTag is not null, then on return it points to the
678/// most generic access descriptor for the given two.
679static bool matchAccessTags(const MDNode *A, const MDNode *B,
680 const MDNode **GenericTag) {
681 if (A == B) {
682 if (GenericTag)
683 *GenericTag = A;
684 return true;
685 }
686
687 // Accesses with no TBAA information may alias with any other accesses.
688 if (!A || !B) {
689 if (GenericTag)
690 *GenericTag = nullptr;
691 return true;
692 }
693
694 // Verify that both input nodes are struct-path aware. Auto-upgrade should
695 // have taken care of this.
696 assert(isStructPathTBAA(A) && "Access A is not struct-path aware!");
697 assert(isStructPathTBAA(B) && "Access B is not struct-path aware!");
698
699 TBAAStructTagNode TagA(A), TagB(B);
700 const MDNode *CommonType = getLeastCommonType(A: TagA.getAccessType(),
701 B: TagB.getAccessType());
702
703 // If the final access types have different roots, they're part of different
704 // potentially unrelated type systems, so we must be conservative.
705 if (!CommonType) {
706 if (GenericTag)
707 *GenericTag = nullptr;
708 return true;
709 }
710
711 // If one of the accessed objects may be a subobject of the other, then such
712 // accesses may alias.
713 bool MayAlias;
714 if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB,
715 CommonType, GenericTag, MayAlias) ||
716 mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA,
717 CommonType, GenericTag, MayAlias))
718 return MayAlias;
719
720 // Otherwise, we've proved there's no alias.
721 if (GenericTag)
722 *GenericTag = createAccessTag(AccessType: CommonType);
723 return false;
724}
725
726/// Aliases - Test whether the access represented by tag A may alias the
727/// access represented by tag B.
728bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
729 return matchAccessTags(A, B);
730}
731
732bool TypeBasedAAResult::shouldUseTBAA() const {
733 return EnableTBAA && !UsingTypeSanitizer;
734}
735
736AnalysisKey TypeBasedAA::Key;
737
738TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
739 return TypeBasedAAResult(F.hasFnAttribute(Kind: Attribute::SanitizeType));
740}
741
742char TypeBasedAAWrapperPass::ID = 0;
743INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
744 false, true)
745
746ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
747 return new TypeBasedAAWrapperPass();
748}
749
750TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {}
751
752bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
753 Result.reset(p: new TypeBasedAAResult(/*UsingTypeSanitizer=*/false));
754 return false;
755}
756
757bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
758 Result.reset();
759 return false;
760}
761
762void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
763 AU.setPreservesAll();
764}
765
766MDNode *AAMDNodes::shiftTBAA(MDNode *MD, size_t Offset) {
767 // Fast path if there's no offset
768 if (Offset == 0)
769 return MD;
770 // Fast path if there's no path tbaa node (and thus scalar)
771 if (!isStructPathTBAA(MD))
772 return MD;
773
774 // The correct behavior here is to add the offset into the TBAA
775 // struct node offset. The base type, however may not have defined
776 // a type at this additional offset, resulting in errors. Since
777 // this method is only used within a given load/store access
778 // the offset provided is only used to subdivide the previous load
779 // maintaining the validity of the previous TBAA.
780 //
781 // This, however, should be revisited in the future.
782 return MD;
783}
784
785MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) {
786 // Fast path if there's no offset
787 if (Offset == 0)
788 return MD;
789 SmallVector<Metadata *, 3> Sub;
790 for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
791 ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i));
792 ConstantInt *InnerSize =
793 mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i + 1));
794 // Don't include any triples that aren't in bounds
795 if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
796 continue;
797
798 uint64_t NewSize = InnerSize->getZExtValue();
799 uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
800 if (InnerOffset->getZExtValue() < Offset) {
801 NewOffset = 0;
802 NewSize -= Offset - InnerOffset->getZExtValue();
803 }
804
805 // Shift the offset of the triple
806 Sub.push_back(Elt: ConstantAsMetadata::get(
807 C: ConstantInt::get(Ty: InnerOffset->getType(), V: NewOffset)));
808 Sub.push_back(Elt: ConstantAsMetadata::get(
809 C: ConstantInt::get(Ty: InnerSize->getType(), V: NewSize)));
810 Sub.push_back(Elt: MD->getOperand(I: i + 2));
811 }
812 return MDNode::get(Context&: MD->getContext(), MDs: Sub);
813}
814
815MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
816 // Fast path if 0-length
817 if (Len == 0)
818 return nullptr;
819
820 // Regular TBAA is invariant of length, so we only need to consider
821 // struct-path TBAA.
822 if (!isStructPathTBAA(MD))
823 return MD;
824
825 TBAAStructTagNode Tag(MD);
826
827 // Only new format TBAA has a size
828 if (!Tag.isNewFormat())
829 return MD;
830
831 // If unknown size, drop the TBAA.
832 if (Len == -1)
833 return nullptr;
834
835 // Otherwise, create TBAA with the new Len
836 ArrayRef<MDOperand> MDOperands = MD->operands();
837 SmallVector<Metadata *, 4> NextNodes(MDOperands);
838 ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(MD&: NextNodes[3]);
839
840 // Don't create a new MDNode if it is the same length.
841 if (PreviousSize->equalsInt(V: Len))
842 return MD;
843
844 NextNodes[3] =
845 ConstantAsMetadata::get(C: ConstantInt::get(Ty: PreviousSize->getType(), V: Len));
846 return MDNode::get(Context&: MD->getContext(), MDs: NextNodes);
847}
848
849AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
850 AAMDNodes New = *this;
851 MDNode *M = New.TBAAStruct;
852 if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(I: 0) &&
853 mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 0)) &&
854 mdconst::extract<ConstantInt>(MD: M->getOperand(I: 0))->isZero() &&
855 M->getOperand(I: 1) && mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 1)) &&
856 mdconst::extract<ConstantInt>(MD: M->getOperand(I: 1))->getValue() ==
857 AccessSize &&
858 M->getOperand(I: 2) && isa<MDNode>(Val: M->getOperand(I: 2)))
859 New.TBAA = cast<MDNode>(Val: M->getOperand(I: 2));
860
861 New.TBAAStruct = nullptr;
862 return New;
863}
864
865AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
866 const DataLayout &DL) {
867 AAMDNodes New = shift(Offset);
868 if (!DL.typeSizeEqualsStoreSize(Ty: AccessTy))
869 return New;
870 TypeSize Size = DL.getTypeStoreSize(Ty: AccessTy);
871 if (Size.isScalable())
872 return New;
873
874 return New.adjustForAccess(AccessSize: Size.getKnownMinValue());
875}
876
877AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) {
878 AAMDNodes New = shift(Offset);
879 return New.adjustForAccess(AccessSize);
880}
881