| 1 | //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the TypeBasedAliasAnalysis pass, which implements |
| 10 | // metadata-based TBAA. |
| 11 | // |
| 12 | // In LLVM IR, memory does not have types, so LLVM's own type system is not |
| 13 | // suitable for doing TBAA. Instead, metadata is added to the IR to describe |
| 14 | // a type system of a higher level language. This can be used to implement |
| 15 | // typical C/C++ TBAA, but it can also be used to implement custom alias |
| 16 | // analysis behavior for other languages. |
| 17 | // |
| 18 | // Scalar type nodes have up to three fields, e.g.: |
| 19 | // !0 = !{ !"an example type tree" } |
| 20 | // !1 = !{ !"int", !0 } |
| 21 | // !2 = !{ !"float", !0 } |
| 22 | // !3 = !{ !"const float", !2, i64 1 } |
| 23 | // |
| 24 | // The first field is an identity field. It can be any value, usually |
| 25 | // an MDString, which uniquely identifies the type. The most important |
| 26 | // name in the tree is the name of the root node. Two trees with |
| 27 | // different root node names are entirely disjoint, even if they |
| 28 | // have leaves with common names. |
| 29 | // |
| 30 | // The second field identifies the type's parent node in the tree, or |
| 31 | // is null or omitted for a root node. A type is considered to alias |
| 32 | // all of its descendants and all of its ancestors in the tree. Also, |
| 33 | // a type is considered to alias all types in other trees, so that |
| 34 | // bitcode produced from multiple front-ends is handled conservatively. |
| 35 | // |
| 36 | // If the third field is present, it's an integer which if equal to 1 |
| 37 | // indicates that the type is "constant" (meaning pointsToConstantMemory |
| 38 | // should return true; see |
| 39 | // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). |
| 40 | // |
| 41 | // The MDNodes attached to an instruction using "!tbaa" are not plain type |
| 42 | // nodes, but path tag nodes. |
| 43 | // |
| 44 | // The path tag node has 4 fields with the last field being optional. |
| 45 | // |
| 46 | // The first field is the base type node, it can be a struct type node |
| 47 | // or a scalar type node. The second field is the access type node, it |
| 48 | // must be a scalar type node. The third field is the offset into the base type. |
| 49 | // The last field has the same meaning as the last field of our scalar TBAA: |
| 50 | // it's an integer which if equal to 1 indicates that the access is "constant". |
| 51 | // |
| 52 | // The struct type node has a name and a list of pairs, one pair for each member |
| 53 | // of the struct. The first element of each pair is a type node (a struct type |
| 54 | // node or a scalar type node), specifying the type of the member, the second |
| 55 | // element of each pair is the offset of the member. |
| 56 | // |
| 57 | // Given an example |
| 58 | // typedef struct { |
| 59 | // short s; |
| 60 | // } A; |
| 61 | // typedef struct { |
| 62 | // uint16_t s; |
| 63 | // A a; |
| 64 | // } B; |
| 65 | // |
| 66 | // For an access to B.a.s, we attach !5 (a path tag node) to the load/store |
| 67 | // instruction. The base type is !4 (struct B), the access type is !2 (scalar |
| 68 | // type short) and the offset is 4. |
| 69 | // |
| 70 | // !0 = !{!"Simple C/C++ TBAA"} |
| 71 | // !1 = !{!"omnipotent char", !0} // Scalar type node |
| 72 | // !2 = !{!"short", !1} // Scalar type node |
| 73 | // !3 = !{!"A", !2, i64 0} // Struct type node |
| 74 | // !4 = !{!"B", !2, i64 0, !3, i64 4} |
| 75 | // // Struct type node |
| 76 | // !5 = !{!4, !2, i64 4} // Path tag node |
| 77 | // |
| 78 | // The struct type nodes and the scalar type nodes form a type DAG. |
| 79 | // Root (!0) |
| 80 | // char (!1) -- edge to Root |
| 81 | // short (!2) -- edge to char |
| 82 | // A (!3) -- edge with offset 0 to short |
| 83 | // B (!4) -- edge with offset 0 to short and edge with offset 4 to A |
| 84 | // |
| 85 | // To check if two tags (tagX and tagY) can alias, we start from the base type |
| 86 | // of tagX, follow the edge with the correct offset in the type DAG and adjust |
| 87 | // the offset until we reach the base type of tagY or until we reach the Root |
| 88 | // node. |
| 89 | // If we reach the base type of tagY, compare the adjusted offset with |
| 90 | // offset of tagY, return Alias if the offsets are the same, return NoAlias |
| 91 | // otherwise. |
| 92 | // If we reach the Root node, perform the above starting from base type of tagY |
| 93 | // to see if we reach base type of tagX. |
| 94 | // |
| 95 | // If they have different roots, they're part of different potentially |
| 96 | // unrelated type systems, so we return Alias to be conservative. |
| 97 | // If neither node is an ancestor of the other and they have the same root, |
| 98 | // then we say NoAlias. |
| 99 | // |
| 100 | //===----------------------------------------------------------------------===// |
| 101 | |
| 102 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
| 103 | #include "llvm/ADT/SetVector.h" |
| 104 | #include "llvm/Analysis/AliasAnalysis.h" |
| 105 | #include "llvm/Analysis/MemoryLocation.h" |
| 106 | #include "llvm/IR/Constants.h" |
| 107 | #include "llvm/IR/DataLayout.h" |
| 108 | #include "llvm/IR/DerivedTypes.h" |
| 109 | #include "llvm/IR/InstrTypes.h" |
| 110 | #include "llvm/IR/LLVMContext.h" |
| 111 | #include "llvm/IR/Metadata.h" |
| 112 | #include "llvm/IR/Module.h" |
| 113 | #include "llvm/InitializePasses.h" |
| 114 | #include "llvm/Pass.h" |
| 115 | #include "llvm/Support/Casting.h" |
| 116 | #include "llvm/Support/CommandLine.h" |
| 117 | #include "llvm/Support/ErrorHandling.h" |
| 118 | #include <cassert> |
| 119 | #include <cstdint> |
| 120 | |
| 121 | using namespace llvm; |
| 122 | |
| 123 | // A handy option for disabling TBAA functionality. The same effect can also be |
| 124 | // achieved by stripping the !tbaa tags from IR, but this option is sometimes |
| 125 | // more convenient. |
| 126 | static cl::opt<bool> EnableTBAA("enable-tbaa" , cl::init(Val: true), cl::Hidden); |
| 127 | |
| 128 | namespace { |
| 129 | |
| 130 | /// isNewFormatTypeNode - Return true iff the given type node is in the new |
| 131 | /// size-aware format. |
| 132 | static bool isNewFormatTypeNode(const MDNode *N) { |
| 133 | if (N->getNumOperands() < 3) |
| 134 | return false; |
| 135 | // In the old format the first operand is a string. |
| 136 | if (!isa<MDNode>(Val: N->getOperand(I: 0))) |
| 137 | return false; |
| 138 | return true; |
| 139 | } |
| 140 | |
| 141 | /// This is a simple wrapper around an MDNode which provides a higher-level |
| 142 | /// interface by hiding the details of how alias analysis information is encoded |
| 143 | /// in its operands. |
| 144 | template<typename MDNodeTy> |
| 145 | class TBAANodeImpl { |
| 146 | MDNodeTy *Node = nullptr; |
| 147 | |
| 148 | public: |
| 149 | TBAANodeImpl() = default; |
| 150 | explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {} |
| 151 | |
| 152 | /// getNode - Get the MDNode for this TBAANode. |
| 153 | MDNodeTy *getNode() const { return Node; } |
| 154 | |
| 155 | /// isNewFormat - Return true iff the wrapped type node is in the new |
| 156 | /// size-aware format. |
| 157 | bool isNewFormat() const { return isNewFormatTypeNode(Node); } |
| 158 | |
| 159 | /// getParent - Get this TBAANode's Alias tree parent. |
| 160 | TBAANodeImpl<MDNodeTy> getParent() const { |
| 161 | if (isNewFormat()) |
| 162 | return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0))); |
| 163 | |
| 164 | if (Node->getNumOperands() < 2) |
| 165 | return TBAANodeImpl<MDNodeTy>(); |
| 166 | MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1)); |
| 167 | if (!P) |
| 168 | return TBAANodeImpl<MDNodeTy>(); |
| 169 | // Ok, this node has a valid parent. Return it. |
| 170 | return TBAANodeImpl<MDNodeTy>(P); |
| 171 | } |
| 172 | |
| 173 | /// Test if this TBAANode represents a type for objects which are |
| 174 | /// not modified (by any means) in the context where this |
| 175 | /// AliasAnalysis is relevant. |
| 176 | bool isTypeImmutable() const { |
| 177 | if (Node->getNumOperands() < 3) |
| 178 | return false; |
| 179 | ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); |
| 180 | if (!CI) |
| 181 | return false; |
| 182 | return CI->getValue()[0]; |
| 183 | } |
| 184 | }; |
| 185 | |
| 186 | /// \name Specializations of \c TBAANodeImpl for const and non const qualified |
| 187 | /// \c MDNode. |
| 188 | /// @{ |
| 189 | using TBAANode = TBAANodeImpl<const MDNode>; |
| 190 | using MutableTBAANode = TBAANodeImpl<MDNode>; |
| 191 | /// @} |
| 192 | |
| 193 | /// This is a simple wrapper around an MDNode which provides a |
| 194 | /// higher-level interface by hiding the details of how alias analysis |
| 195 | /// information is encoded in its operands. |
| 196 | template<typename MDNodeTy> |
| 197 | class TBAAStructTagNodeImpl { |
| 198 | /// This node should be created with createTBAAAccessTag(). |
| 199 | MDNodeTy *Node; |
| 200 | |
| 201 | public: |
| 202 | explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {} |
| 203 | |
| 204 | /// Get the MDNode for this TBAAStructTagNode. |
| 205 | MDNodeTy *getNode() const { return Node; } |
| 206 | |
| 207 | /// isNewFormat - Return true iff the wrapped access tag is in the new |
| 208 | /// size-aware format. |
| 209 | bool isNewFormat() const { |
| 210 | if (Node->getNumOperands() < 4) |
| 211 | return false; |
| 212 | if (MDNodeTy *AccessType = getAccessType()) |
| 213 | if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat()) |
| 214 | return false; |
| 215 | return true; |
| 216 | } |
| 217 | |
| 218 | MDNodeTy *getBaseType() const { |
| 219 | return dyn_cast_or_null<MDNode>(Node->getOperand(0)); |
| 220 | } |
| 221 | |
| 222 | MDNodeTy *getAccessType() const { |
| 223 | return dyn_cast_or_null<MDNode>(Node->getOperand(1)); |
| 224 | } |
| 225 | |
| 226 | uint64_t getOffset() const { |
| 227 | return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); |
| 228 | } |
| 229 | |
| 230 | uint64_t getSize() const { |
| 231 | if (!isNewFormat()) |
| 232 | return UINT64_MAX; |
| 233 | return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue(); |
| 234 | } |
| 235 | |
| 236 | /// Test if this TBAAStructTagNode represents a type for objects |
| 237 | /// which are not modified (by any means) in the context where this |
| 238 | /// AliasAnalysis is relevant. |
| 239 | bool isTypeImmutable() const { |
| 240 | unsigned OpNo = isNewFormat() ? 4 : 3; |
| 241 | if (Node->getNumOperands() < OpNo + 1) |
| 242 | return false; |
| 243 | ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo)); |
| 244 | if (!CI) |
| 245 | return false; |
| 246 | return CI->getValue()[0]; |
| 247 | } |
| 248 | }; |
| 249 | |
| 250 | /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const |
| 251 | /// qualified \c MDNods. |
| 252 | /// @{ |
| 253 | using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>; |
| 254 | using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>; |
| 255 | /// @} |
| 256 | |
| 257 | /// This is a simple wrapper around an MDNode which provides a |
| 258 | /// higher-level interface by hiding the details of how alias analysis |
| 259 | /// information is encoded in its operands. |
| 260 | class TBAAStructTypeNode { |
| 261 | /// This node should be created with createTBAATypeNode(). |
| 262 | const MDNode *Node = nullptr; |
| 263 | |
| 264 | public: |
| 265 | TBAAStructTypeNode() = default; |
| 266 | explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} |
| 267 | |
| 268 | /// Get the MDNode for this TBAAStructTypeNode. |
| 269 | const MDNode *getNode() const { return Node; } |
| 270 | |
| 271 | /// isNewFormat - Return true iff the wrapped type node is in the new |
| 272 | /// size-aware format. |
| 273 | bool isNewFormat() const { return isNewFormatTypeNode(N: Node); } |
| 274 | |
| 275 | bool operator==(const TBAAStructTypeNode &Other) const { |
| 276 | return getNode() == Other.getNode(); |
| 277 | } |
| 278 | |
| 279 | /// getId - Return type identifier. |
| 280 | Metadata *getId() const { |
| 281 | return Node->getOperand(I: isNewFormat() ? 2 : 0); |
| 282 | } |
| 283 | |
| 284 | unsigned getNumFields() const { |
| 285 | unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; |
| 286 | unsigned NumOpsPerField = isNewFormat() ? 3 : 2; |
| 287 | return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField; |
| 288 | } |
| 289 | |
| 290 | TBAAStructTypeNode getFieldType(unsigned FieldIndex) const { |
| 291 | unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; |
| 292 | unsigned NumOpsPerField = isNewFormat() ? 3 : 2; |
| 293 | unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField; |
| 294 | auto *TypeNode = cast<MDNode>(Val: getNode()->getOperand(I: OpIndex)); |
| 295 | return TBAAStructTypeNode(TypeNode); |
| 296 | } |
| 297 | |
| 298 | /// Get this TBAAStructTypeNode's field in the type DAG with |
| 299 | /// given offset. Update the offset to be relative to the field type. |
| 300 | TBAAStructTypeNode getField(uint64_t &Offset) const { |
| 301 | bool NewFormat = isNewFormat(); |
| 302 | const ArrayRef<MDOperand> Operands = Node->operands(); |
| 303 | const unsigned NumOperands = Operands.size(); |
| 304 | |
| 305 | if (NewFormat) { |
| 306 | // New-format root and scalar type nodes have no fields. |
| 307 | if (NumOperands < 6) |
| 308 | return TBAAStructTypeNode(); |
| 309 | } else { |
| 310 | // Parent can be omitted for the root node. |
| 311 | if (NumOperands < 2) |
| 312 | return TBAAStructTypeNode(); |
| 313 | |
| 314 | // Fast path for a scalar type node and a struct type node with a single |
| 315 | // field. |
| 316 | if (NumOperands <= 3) { |
| 317 | uint64_t Cur = |
| 318 | NumOperands == 2 |
| 319 | ? 0 |
| 320 | : mdconst::extract<ConstantInt>(MD: Operands[2])->getZExtValue(); |
| 321 | Offset -= Cur; |
| 322 | MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[1]); |
| 323 | if (!P) |
| 324 | return TBAAStructTypeNode(); |
| 325 | return TBAAStructTypeNode(P); |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | // Assume the offsets are in order. We return the previous field if |
| 330 | // the current offset is bigger than the given offset. |
| 331 | unsigned FirstFieldOpNo = NewFormat ? 3 : 1; |
| 332 | unsigned NumOpsPerField = NewFormat ? 3 : 2; |
| 333 | unsigned TheIdx = 0; |
| 334 | |
| 335 | for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands; |
| 336 | Idx += NumOpsPerField) { |
| 337 | uint64_t Cur = |
| 338 | mdconst::extract<ConstantInt>(MD: Operands[Idx + 1])->getZExtValue(); |
| 339 | if (Cur > Offset) { |
| 340 | assert(Idx >= FirstFieldOpNo + NumOpsPerField && |
| 341 | "TBAAStructTypeNode::getField should have an offset match!" ); |
| 342 | TheIdx = Idx - NumOpsPerField; |
| 343 | break; |
| 344 | } |
| 345 | } |
| 346 | // Move along the last field. |
| 347 | if (TheIdx == 0) |
| 348 | TheIdx = NumOperands - NumOpsPerField; |
| 349 | uint64_t Cur = |
| 350 | mdconst::extract<ConstantInt>(MD: Operands[TheIdx + 1])->getZExtValue(); |
| 351 | Offset -= Cur; |
| 352 | MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[TheIdx]); |
| 353 | if (!P) |
| 354 | return TBAAStructTypeNode(); |
| 355 | return TBAAStructTypeNode(P); |
| 356 | } |
| 357 | }; |
| 358 | |
| 359 | } // end anonymous namespace |
| 360 | |
| 361 | AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, |
| 362 | const MemoryLocation &LocB, |
| 363 | AAQueryInfo &AAQI, const Instruction *) { |
| 364 | if (!shouldUseTBAA()) |
| 365 | return AliasResult::MayAlias; |
| 366 | |
| 367 | if (Aliases(A: LocA.AATags.TBAA, B: LocB.AATags.TBAA)) |
| 368 | return AliasResult::MayAlias; |
| 369 | |
| 370 | // Otherwise return a definitive result. |
| 371 | return AliasResult::NoAlias; |
| 372 | } |
| 373 | |
| 374 | AliasResult TypeBasedAAResult::aliasErrno(const MemoryLocation &Loc, |
| 375 | const Module *M) { |
| 376 | if (!shouldUseTBAA()) |
| 377 | return AliasResult::MayAlias; |
| 378 | |
| 379 | const auto *N = Loc.AATags.TBAA; |
| 380 | if (!N) |
| 381 | return AliasResult::MayAlias; |
| 382 | |
| 383 | // There cannot be any alias with errno if TBAA proves the given memory |
| 384 | // location does not alias errno. |
| 385 | const auto *ErrnoTBAAMD = M->getNamedMetadata(Name: "llvm.errno.tbaa" ); |
| 386 | if (!ErrnoTBAAMD || any_of(Range: ErrnoTBAAMD->operands(), P: [&](const auto *Node) { |
| 387 | return Aliases(A: N, B: Node); |
| 388 | })) |
| 389 | return AliasResult::MayAlias; |
| 390 | return AliasResult::NoAlias; |
| 391 | } |
| 392 | |
| 393 | ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc, |
| 394 | AAQueryInfo &AAQI, |
| 395 | bool IgnoreLocals) { |
| 396 | if (!shouldUseTBAA()) |
| 397 | return ModRefInfo::ModRef; |
| 398 | |
| 399 | const MDNode *M = Loc.AATags.TBAA; |
| 400 | if (!M) |
| 401 | return ModRefInfo::ModRef; |
| 402 | |
| 403 | // If this is an "immutable" type, we can assume the pointer is pointing |
| 404 | // to constant memory. |
| 405 | if (TBAAStructTagNode(M).isTypeImmutable()) |
| 406 | return ModRefInfo::NoModRef; |
| 407 | |
| 408 | return ModRefInfo::ModRef; |
| 409 | } |
| 410 | |
| 411 | MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call, |
| 412 | AAQueryInfo &AAQI) { |
| 413 | if (!shouldUseTBAA()) |
| 414 | return MemoryEffects::unknown(); |
| 415 | |
| 416 | // If this is an "immutable" type, the access is not observable. |
| 417 | if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 418 | if (TBAAStructTagNode(M).isTypeImmutable()) |
| 419 | return MemoryEffects::none(); |
| 420 | |
| 421 | return MemoryEffects::unknown(); |
| 422 | } |
| 423 | |
| 424 | MemoryEffects TypeBasedAAResult::getMemoryEffects(const Function *F) { |
| 425 | // Functions don't have metadata. |
| 426 | return MemoryEffects::unknown(); |
| 427 | } |
| 428 | |
| 429 | ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call, |
| 430 | const MemoryLocation &Loc, |
| 431 | AAQueryInfo &AAQI) { |
| 432 | if (!shouldUseTBAA()) |
| 433 | return ModRefInfo::ModRef; |
| 434 | |
| 435 | if (const MDNode *L = Loc.AATags.TBAA) |
| 436 | if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 437 | if (!Aliases(A: L, B: M)) |
| 438 | return ModRefInfo::NoModRef; |
| 439 | |
| 440 | return ModRefInfo::ModRef; |
| 441 | } |
| 442 | |
| 443 | ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1, |
| 444 | const CallBase *Call2, |
| 445 | AAQueryInfo &AAQI) { |
| 446 | if (!shouldUseTBAA()) |
| 447 | return ModRefInfo::ModRef; |
| 448 | |
| 449 | if (const MDNode *M1 = Call1->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 450 | if (const MDNode *M2 = Call2->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 451 | if (!Aliases(A: M1, B: M2)) |
| 452 | return ModRefInfo::NoModRef; |
| 453 | |
| 454 | return ModRefInfo::ModRef; |
| 455 | } |
| 456 | |
| 457 | bool MDNode::isTBAAVtableAccess() const { |
| 458 | // For struct-path aware TBAA, we use the access type of the tag. |
| 459 | TBAAStructTagNode Tag(this); |
| 460 | TBAAStructTypeNode AccessType(Tag.getAccessType()); |
| 461 | if(auto *Id = dyn_cast<MDString>(Val: AccessType.getId())) |
| 462 | if (Id->getString() == "vtable pointer" ) |
| 463 | return true; |
| 464 | return false; |
| 465 | } |
| 466 | |
| 467 | static bool matchAccessTags(const MDNode *A, const MDNode *B, |
| 468 | const MDNode **GenericTag = nullptr); |
| 469 | |
| 470 | MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { |
| 471 | const MDNode *GenericTag; |
| 472 | matchAccessTags(A, B, GenericTag: &GenericTag); |
| 473 | return const_cast<MDNode*>(GenericTag); |
| 474 | } |
| 475 | |
| 476 | static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { |
| 477 | if (!A || !B) |
| 478 | return nullptr; |
| 479 | |
| 480 | if (A == B) |
| 481 | return A; |
| 482 | |
| 483 | SmallSetVector<const MDNode *, 4> PathA; |
| 484 | TBAANode TA(A); |
| 485 | while (TA.getNode()) { |
| 486 | if (!PathA.insert(X: TA.getNode())) |
| 487 | report_fatal_error(reason: "Cycle found in TBAA metadata." ); |
| 488 | TA = TA.getParent(); |
| 489 | } |
| 490 | |
| 491 | SmallSetVector<const MDNode *, 4> PathB; |
| 492 | TBAANode TB(B); |
| 493 | while (TB.getNode()) { |
| 494 | if (!PathB.insert(X: TB.getNode())) |
| 495 | report_fatal_error(reason: "Cycle found in TBAA metadata." ); |
| 496 | TB = TB.getParent(); |
| 497 | } |
| 498 | |
| 499 | int IA = PathA.size() - 1; |
| 500 | int IB = PathB.size() - 1; |
| 501 | |
| 502 | const MDNode *Ret = nullptr; |
| 503 | while (IA >= 0 && IB >= 0) { |
| 504 | if (PathA[IA] == PathB[IB]) |
| 505 | Ret = PathA[IA]; |
| 506 | else |
| 507 | break; |
| 508 | --IA; |
| 509 | --IB; |
| 510 | } |
| 511 | |
| 512 | return Ret; |
| 513 | } |
| 514 | |
| 515 | AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const { |
| 516 | AAMDNodes Result; |
| 517 | Result.TBAA = MDNode::getMostGenericTBAA(A: TBAA, B: Other.TBAA); |
| 518 | Result.TBAAStruct = nullptr; |
| 519 | Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope); |
| 520 | Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias); |
| 521 | Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace( |
| 522 | A: NoAliasAddrSpace, B: Other.NoAliasAddrSpace); |
| 523 | return Result; |
| 524 | } |
| 525 | |
| 526 | AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const { |
| 527 | AAMDNodes Result; |
| 528 | Result.TBAA = Result.TBAAStruct = nullptr; |
| 529 | Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope); |
| 530 | Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias); |
| 531 | Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace( |
| 532 | A: NoAliasAddrSpace, B: Other.NoAliasAddrSpace); |
| 533 | return Result; |
| 534 | } |
| 535 | |
| 536 | static const MDNode *createAccessTag(const MDNode *AccessType) { |
| 537 | // If there is no access type or the access type is the root node, then |
| 538 | // we don't have any useful access tag to return. |
| 539 | if (!AccessType || AccessType->getNumOperands() < 2) |
| 540 | return nullptr; |
| 541 | |
| 542 | Type *Int64 = IntegerType::get(C&: AccessType->getContext(), NumBits: 64); |
| 543 | auto *OffsetNode = ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: 0)); |
| 544 | |
| 545 | if (TBAAStructTypeNode(AccessType).isNewFormat()) { |
| 546 | // TODO: Take access ranges into account when matching access tags and |
| 547 | // fix this code to generate actual access sizes for generic tags. |
| 548 | uint64_t AccessSize = UINT64_MAX; |
| 549 | auto *SizeNode = |
| 550 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: AccessSize)); |
| 551 | Metadata *Ops[] = {const_cast<MDNode*>(AccessType), |
| 552 | const_cast<MDNode*>(AccessType), |
| 553 | OffsetNode, SizeNode}; |
| 554 | return MDNode::get(Context&: AccessType->getContext(), MDs: Ops); |
| 555 | } |
| 556 | |
| 557 | Metadata *Ops[] = {const_cast<MDNode*>(AccessType), |
| 558 | const_cast<MDNode*>(AccessType), |
| 559 | OffsetNode}; |
| 560 | return MDNode::get(Context&: AccessType->getContext(), MDs: Ops); |
| 561 | } |
| 562 | |
| 563 | static bool hasField(TBAAStructTypeNode BaseType, |
| 564 | TBAAStructTypeNode FieldType) { |
| 565 | for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) { |
| 566 | TBAAStructTypeNode T = BaseType.getFieldType(FieldIndex: I); |
| 567 | if (T == FieldType || hasField(BaseType: T, FieldType)) |
| 568 | return true; |
| 569 | } |
| 570 | return false; |
| 571 | } |
| 572 | |
| 573 | /// Return true if for two given accesses, one of the accessed objects may be a |
| 574 | /// subobject of the other. The \p BaseTag and \p SubobjectTag parameters |
| 575 | /// describe the accesses to the base object and the subobject respectively. |
| 576 | /// \p CommonType must be the metadata node describing the common type of the |
| 577 | /// accessed objects. On return, \p MayAlias is set to true iff these accesses |
| 578 | /// may alias and \p Generic, if not null, points to the most generic access |
| 579 | /// tag for the given two. |
| 580 | static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag, |
| 581 | TBAAStructTagNode SubobjectTag, |
| 582 | const MDNode *CommonType, |
| 583 | const MDNode **GenericTag, |
| 584 | bool &MayAlias) { |
| 585 | // If the base object is of the least common type, then this may be an access |
| 586 | // to its subobject. |
| 587 | if (BaseTag.getAccessType() == BaseTag.getBaseType() && |
| 588 | BaseTag.getAccessType() == CommonType) { |
| 589 | if (GenericTag) |
| 590 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 591 | MayAlias = true; |
| 592 | return true; |
| 593 | } |
| 594 | |
| 595 | // If the access to the base object is through a field of the subobject's |
| 596 | // type, then this may be an access to that field. To check for that we start |
| 597 | // from the base type, follow the edge with the correct offset in the type DAG |
| 598 | // and adjust the offset until we reach the field type or until we reach the |
| 599 | // access type. |
| 600 | bool NewFormat = BaseTag.isNewFormat(); |
| 601 | TBAAStructTypeNode BaseType(BaseTag.getBaseType()); |
| 602 | uint64_t OffsetInBase = BaseTag.getOffset(); |
| 603 | |
| 604 | for (;;) { |
| 605 | // In the old format there is no distinction between fields and parent |
| 606 | // types, so in this case we consider all nodes up to the root. |
| 607 | if (!BaseType.getNode()) { |
| 608 | assert(!NewFormat && "Did not see access type in access path!" ); |
| 609 | break; |
| 610 | } |
| 611 | |
| 612 | if (BaseType.getNode() == SubobjectTag.getBaseType()) { |
| 613 | MayAlias = OffsetInBase == SubobjectTag.getOffset() || |
| 614 | BaseType.getNode() == BaseTag.getAccessType() || |
| 615 | SubobjectTag.getBaseType() == SubobjectTag.getAccessType(); |
| 616 | if (GenericTag) { |
| 617 | *GenericTag = |
| 618 | MayAlias ? SubobjectTag.getNode() : createAccessTag(AccessType: CommonType); |
| 619 | } |
| 620 | return true; |
| 621 | } |
| 622 | |
| 623 | // With new-format nodes we stop at the access type. |
| 624 | if (NewFormat && BaseType.getNode() == BaseTag.getAccessType()) |
| 625 | break; |
| 626 | |
| 627 | // Follow the edge with the correct offset. Offset will be adjusted to |
| 628 | // be relative to the field type. |
| 629 | BaseType = BaseType.getField(Offset&: OffsetInBase); |
| 630 | } |
| 631 | |
| 632 | // If the base object has a direct or indirect field of the subobject's type, |
| 633 | // then this may be an access to that field. We need this to check now that |
| 634 | // we support aggregates as access types. |
| 635 | if (NewFormat) { |
| 636 | // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType()); |
| 637 | TBAAStructTypeNode FieldType(SubobjectTag.getBaseType()); |
| 638 | if (hasField(BaseType, FieldType)) { |
| 639 | if (GenericTag) |
| 640 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 641 | MayAlias = true; |
| 642 | return true; |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | return false; |
| 647 | } |
| 648 | |
| 649 | /// matchTags - Return true if the given couple of accesses are allowed to |
| 650 | /// overlap. If \arg GenericTag is not null, then on return it points to the |
| 651 | /// most generic access descriptor for the given two. |
| 652 | static bool matchAccessTags(const MDNode *A, const MDNode *B, |
| 653 | const MDNode **GenericTag) { |
| 654 | if (A == B) { |
| 655 | if (GenericTag) |
| 656 | *GenericTag = A; |
| 657 | return true; |
| 658 | } |
| 659 | |
| 660 | // Accesses with no TBAA information may alias with any other accesses. |
| 661 | if (!A || !B) { |
| 662 | if (GenericTag) |
| 663 | *GenericTag = nullptr; |
| 664 | return true; |
| 665 | } |
| 666 | |
| 667 | TBAAStructTagNode TagA(A), TagB(B); |
| 668 | const MDNode *CommonType = getLeastCommonType(A: TagA.getAccessType(), |
| 669 | B: TagB.getAccessType()); |
| 670 | |
| 671 | // If the final access types have different roots, they're part of different |
| 672 | // potentially unrelated type systems, so we must be conservative. |
| 673 | if (!CommonType) { |
| 674 | if (GenericTag) |
| 675 | *GenericTag = nullptr; |
| 676 | return true; |
| 677 | } |
| 678 | |
| 679 | // If one of the accessed objects may be a subobject of the other, then such |
| 680 | // accesses may alias. |
| 681 | bool MayAlias; |
| 682 | if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB, |
| 683 | CommonType, GenericTag, MayAlias) || |
| 684 | mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA, |
| 685 | CommonType, GenericTag, MayAlias)) |
| 686 | return MayAlias; |
| 687 | |
| 688 | // Otherwise, we've proved there's no alias. |
| 689 | if (GenericTag) |
| 690 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 691 | return false; |
| 692 | } |
| 693 | |
| 694 | /// Aliases - Test whether the access represented by tag A may alias the |
| 695 | /// access represented by tag B. |
| 696 | bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { |
| 697 | return matchAccessTags(A, B); |
| 698 | } |
| 699 | |
| 700 | bool TypeBasedAAResult::shouldUseTBAA() const { |
| 701 | return EnableTBAA && !UsingTypeSanitizer; |
| 702 | } |
| 703 | |
| 704 | AnalysisKey TypeBasedAA::Key; |
| 705 | |
| 706 | TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { |
| 707 | return TypeBasedAAResult(F.hasFnAttribute(Kind: Attribute::SanitizeType)); |
| 708 | } |
| 709 | |
| 710 | char TypeBasedAAWrapperPass::ID = 0; |
| 711 | INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa" , "Type-Based Alias Analysis" , |
| 712 | false, true) |
| 713 | |
| 714 | ImmutablePass *llvm::createTypeBasedAAWrapperPass() { |
| 715 | return new TypeBasedAAWrapperPass(); |
| 716 | } |
| 717 | |
| 718 | TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {} |
| 719 | |
| 720 | bool TypeBasedAAWrapperPass::doInitialization(Module &M) { |
| 721 | Result.reset(p: new TypeBasedAAResult(/*UsingTypeSanitizer=*/false)); |
| 722 | return false; |
| 723 | } |
| 724 | |
| 725 | bool TypeBasedAAWrapperPass::doFinalization(Module &M) { |
| 726 | Result.reset(); |
| 727 | return false; |
| 728 | } |
| 729 | |
| 730 | void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| 731 | AU.setPreservesAll(); |
| 732 | } |
| 733 | |
| 734 | MDNode *AAMDNodes::shiftTBAA(MDNode *MD, size_t Offset) { |
| 735 | // Fast path if there's no offset |
| 736 | if (Offset == 0) |
| 737 | return MD; |
| 738 | |
| 739 | // The correct behavior here is to add the offset into the TBAA |
| 740 | // struct node offset. The base type, however may not have defined |
| 741 | // a type at this additional offset, resulting in errors. Since |
| 742 | // this method is only used within a given load/store access |
| 743 | // the offset provided is only used to subdivide the previous load |
| 744 | // maintaining the validity of the previous TBAA. |
| 745 | // |
| 746 | // This, however, should be revisited in the future. |
| 747 | return MD; |
| 748 | } |
| 749 | |
| 750 | MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) { |
| 751 | // Fast path if there's no offset |
| 752 | if (Offset == 0) |
| 753 | return MD; |
| 754 | SmallVector<Metadata *, 3> Sub; |
| 755 | for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { |
| 756 | ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i)); |
| 757 | ConstantInt *InnerSize = |
| 758 | mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i + 1)); |
| 759 | // Don't include any triples that aren't in bounds |
| 760 | if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) |
| 761 | continue; |
| 762 | |
| 763 | uint64_t NewSize = InnerSize->getZExtValue(); |
| 764 | uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; |
| 765 | if (InnerOffset->getZExtValue() < Offset) { |
| 766 | NewOffset = 0; |
| 767 | NewSize -= Offset - InnerOffset->getZExtValue(); |
| 768 | } |
| 769 | |
| 770 | // Shift the offset of the triple |
| 771 | Sub.push_back(Elt: ConstantAsMetadata::get( |
| 772 | C: ConstantInt::get(Ty: InnerOffset->getType(), V: NewOffset))); |
| 773 | Sub.push_back(Elt: ConstantAsMetadata::get( |
| 774 | C: ConstantInt::get(Ty: InnerSize->getType(), V: NewSize))); |
| 775 | Sub.push_back(Elt: MD->getOperand(I: i + 2)); |
| 776 | } |
| 777 | return MDNode::get(Context&: MD->getContext(), MDs: Sub); |
| 778 | } |
| 779 | |
| 780 | MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { |
| 781 | // Fast path if 0-length |
| 782 | if (Len == 0) |
| 783 | return nullptr; |
| 784 | |
| 785 | TBAAStructTagNode Tag(MD); |
| 786 | |
| 787 | // Only new format TBAA has a size |
| 788 | if (!Tag.isNewFormat()) |
| 789 | return MD; |
| 790 | |
| 791 | // If unknown size, drop the TBAA. |
| 792 | if (Len == -1) |
| 793 | return nullptr; |
| 794 | |
| 795 | // Otherwise, create TBAA with the new Len |
| 796 | ArrayRef<MDOperand> MDOperands = MD->operands(); |
| 797 | SmallVector<Metadata *, 4> NextNodes(MDOperands); |
| 798 | ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(MD&: NextNodes[3]); |
| 799 | |
| 800 | // Don't create a new MDNode if it is the same length. |
| 801 | if (PreviousSize->equalsInt(V: Len)) |
| 802 | return MD; |
| 803 | |
| 804 | NextNodes[3] = |
| 805 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: PreviousSize->getType(), V: Len)); |
| 806 | return MDNode::get(Context&: MD->getContext(), MDs: NextNodes); |
| 807 | } |
| 808 | |
| 809 | AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { |
| 810 | AAMDNodes New = *this; |
| 811 | MDNode *M = New.TBAAStruct; |
| 812 | if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(I: 0) && |
| 813 | mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 0)) && |
| 814 | mdconst::extract<ConstantInt>(MD: M->getOperand(I: 0))->isZero() && |
| 815 | M->getOperand(I: 1) && mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 1)) && |
| 816 | mdconst::extract<ConstantInt>(MD: M->getOperand(I: 1))->getValue() == |
| 817 | AccessSize && |
| 818 | M->getOperand(I: 2) && isa<MDNode>(Val: M->getOperand(I: 2))) |
| 819 | New.TBAA = cast<MDNode>(Val: M->getOperand(I: 2)); |
| 820 | |
| 821 | New.TBAAStruct = nullptr; |
| 822 | return New; |
| 823 | } |
| 824 | |
| 825 | AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, |
| 826 | const DataLayout &DL) { |
| 827 | AAMDNodes New = shift(Offset); |
| 828 | if (!DL.typeSizeEqualsStoreSize(Ty: AccessTy)) |
| 829 | return New; |
| 830 | TypeSize Size = DL.getTypeStoreSize(Ty: AccessTy); |
| 831 | if (Size.isScalable()) |
| 832 | return New; |
| 833 | |
| 834 | return New.adjustForAccess(AccessSize: Size.getKnownMinValue()); |
| 835 | } |
| 836 | |
| 837 | AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) { |
| 838 | AAMDNodes New = shift(Offset); |
| 839 | return New.adjustForAccess(AccessSize); |
| 840 | } |
| 841 | |