| 1 | //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the TypeBasedAliasAnalysis pass, which implements |
| 10 | // metadata-based TBAA. |
| 11 | // |
| 12 | // In LLVM IR, memory does not have types, so LLVM's own type system is not |
| 13 | // suitable for doing TBAA. Instead, metadata is added to the IR to describe |
| 14 | // a type system of a higher level language. This can be used to implement |
| 15 | // typical C/C++ TBAA, but it can also be used to implement custom alias |
| 16 | // analysis behavior for other languages. |
| 17 | // |
| 18 | // We now support two types of metadata format: scalar TBAA and struct-path |
| 19 | // aware TBAA. After all testing cases are upgraded to use struct-path aware |
| 20 | // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA |
| 21 | // can be dropped. |
| 22 | // |
| 23 | // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to |
| 24 | // three fields, e.g.: |
| 25 | // !0 = !{ !"an example type tree" } |
| 26 | // !1 = !{ !"int", !0 } |
| 27 | // !2 = !{ !"float", !0 } |
| 28 | // !3 = !{ !"const float", !2, i64 1 } |
| 29 | // |
| 30 | // The first field is an identity field. It can be any value, usually |
| 31 | // an MDString, which uniquely identifies the type. The most important |
| 32 | // name in the tree is the name of the root node. Two trees with |
| 33 | // different root node names are entirely disjoint, even if they |
| 34 | // have leaves with common names. |
| 35 | // |
| 36 | // The second field identifies the type's parent node in the tree, or |
| 37 | // is null or omitted for a root node. A type is considered to alias |
| 38 | // all of its descendants and all of its ancestors in the tree. Also, |
| 39 | // a type is considered to alias all types in other trees, so that |
| 40 | // bitcode produced from multiple front-ends is handled conservatively. |
| 41 | // |
| 42 | // If the third field is present, it's an integer which if equal to 1 |
| 43 | // indicates that the type is "constant" (meaning pointsToConstantMemory |
| 44 | // should return true; see |
| 45 | // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). |
| 46 | // |
| 47 | // With struct-path aware TBAA, the MDNodes attached to an instruction using |
| 48 | // "!tbaa" are called path tag nodes. |
| 49 | // |
| 50 | // The path tag node has 4 fields with the last field being optional. |
| 51 | // |
| 52 | // The first field is the base type node, it can be a struct type node |
| 53 | // or a scalar type node. The second field is the access type node, it |
| 54 | // must be a scalar type node. The third field is the offset into the base type. |
| 55 | // The last field has the same meaning as the last field of our scalar TBAA: |
| 56 | // it's an integer which if equal to 1 indicates that the access is "constant". |
| 57 | // |
| 58 | // The struct type node has a name and a list of pairs, one pair for each member |
| 59 | // of the struct. The first element of each pair is a type node (a struct type |
| 60 | // node or a scalar type node), specifying the type of the member, the second |
| 61 | // element of each pair is the offset of the member. |
| 62 | // |
| 63 | // Given an example |
| 64 | // typedef struct { |
| 65 | // short s; |
| 66 | // } A; |
| 67 | // typedef struct { |
| 68 | // uint16_t s; |
| 69 | // A a; |
| 70 | // } B; |
| 71 | // |
| 72 | // For an access to B.a.s, we attach !5 (a path tag node) to the load/store |
| 73 | // instruction. The base type is !4 (struct B), the access type is !2 (scalar |
| 74 | // type short) and the offset is 4. |
| 75 | // |
| 76 | // !0 = !{!"Simple C/C++ TBAA"} |
| 77 | // !1 = !{!"omnipotent char", !0} // Scalar type node |
| 78 | // !2 = !{!"short", !1} // Scalar type node |
| 79 | // !3 = !{!"A", !2, i64 0} // Struct type node |
| 80 | // !4 = !{!"B", !2, i64 0, !3, i64 4} |
| 81 | // // Struct type node |
| 82 | // !5 = !{!4, !2, i64 4} // Path tag node |
| 83 | // |
| 84 | // The struct type nodes and the scalar type nodes form a type DAG. |
| 85 | // Root (!0) |
| 86 | // char (!1) -- edge to Root |
| 87 | // short (!2) -- edge to char |
| 88 | // A (!3) -- edge with offset 0 to short |
| 89 | // B (!4) -- edge with offset 0 to short and edge with offset 4 to A |
| 90 | // |
| 91 | // To check if two tags (tagX and tagY) can alias, we start from the base type |
| 92 | // of tagX, follow the edge with the correct offset in the type DAG and adjust |
| 93 | // the offset until we reach the base type of tagY or until we reach the Root |
| 94 | // node. |
| 95 | // If we reach the base type of tagY, compare the adjusted offset with |
| 96 | // offset of tagY, return Alias if the offsets are the same, return NoAlias |
| 97 | // otherwise. |
| 98 | // If we reach the Root node, perform the above starting from base type of tagY |
| 99 | // to see if we reach base type of tagX. |
| 100 | // |
| 101 | // If they have different roots, they're part of different potentially |
| 102 | // unrelated type systems, so we return Alias to be conservative. |
| 103 | // If neither node is an ancestor of the other and they have the same root, |
| 104 | // then we say NoAlias. |
| 105 | // |
| 106 | //===----------------------------------------------------------------------===// |
| 107 | |
| 108 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
| 109 | #include "llvm/ADT/SetVector.h" |
| 110 | #include "llvm/Analysis/AliasAnalysis.h" |
| 111 | #include "llvm/Analysis/MemoryLocation.h" |
| 112 | #include "llvm/IR/Constants.h" |
| 113 | #include "llvm/IR/DataLayout.h" |
| 114 | #include "llvm/IR/DerivedTypes.h" |
| 115 | #include "llvm/IR/InstrTypes.h" |
| 116 | #include "llvm/IR/LLVMContext.h" |
| 117 | #include "llvm/IR/Metadata.h" |
| 118 | #include "llvm/InitializePasses.h" |
| 119 | #include "llvm/Pass.h" |
| 120 | #include "llvm/Support/Casting.h" |
| 121 | #include "llvm/Support/CommandLine.h" |
| 122 | #include "llvm/Support/ErrorHandling.h" |
| 123 | #include <cassert> |
| 124 | #include <cstdint> |
| 125 | |
| 126 | using namespace llvm; |
| 127 | |
| 128 | // A handy option for disabling TBAA functionality. The same effect can also be |
| 129 | // achieved by stripping the !tbaa tags from IR, but this option is sometimes |
| 130 | // more convenient. |
| 131 | static cl::opt<bool> EnableTBAA("enable-tbaa" , cl::init(Val: true), cl::Hidden); |
| 132 | |
| 133 | namespace { |
| 134 | |
| 135 | /// isNewFormatTypeNode - Return true iff the given type node is in the new |
| 136 | /// size-aware format. |
| 137 | static bool isNewFormatTypeNode(const MDNode *N) { |
| 138 | if (N->getNumOperands() < 3) |
| 139 | return false; |
| 140 | // In the old format the first operand is a string. |
| 141 | if (!isa<MDNode>(Val: N->getOperand(I: 0))) |
| 142 | return false; |
| 143 | return true; |
| 144 | } |
| 145 | |
| 146 | /// This is a simple wrapper around an MDNode which provides a higher-level |
| 147 | /// interface by hiding the details of how alias analysis information is encoded |
| 148 | /// in its operands. |
| 149 | template<typename MDNodeTy> |
| 150 | class TBAANodeImpl { |
| 151 | MDNodeTy *Node = nullptr; |
| 152 | |
| 153 | public: |
| 154 | TBAANodeImpl() = default; |
| 155 | explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {} |
| 156 | |
| 157 | /// getNode - Get the MDNode for this TBAANode. |
| 158 | MDNodeTy *getNode() const { return Node; } |
| 159 | |
| 160 | /// isNewFormat - Return true iff the wrapped type node is in the new |
| 161 | /// size-aware format. |
| 162 | bool isNewFormat() const { return isNewFormatTypeNode(Node); } |
| 163 | |
| 164 | /// getParent - Get this TBAANode's Alias tree parent. |
| 165 | TBAANodeImpl<MDNodeTy> getParent() const { |
| 166 | if (isNewFormat()) |
| 167 | return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0))); |
| 168 | |
| 169 | if (Node->getNumOperands() < 2) |
| 170 | return TBAANodeImpl<MDNodeTy>(); |
| 171 | MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1)); |
| 172 | if (!P) |
| 173 | return TBAANodeImpl<MDNodeTy>(); |
| 174 | // Ok, this node has a valid parent. Return it. |
| 175 | return TBAANodeImpl<MDNodeTy>(P); |
| 176 | } |
| 177 | |
| 178 | /// Test if this TBAANode represents a type for objects which are |
| 179 | /// not modified (by any means) in the context where this |
| 180 | /// AliasAnalysis is relevant. |
| 181 | bool isTypeImmutable() const { |
| 182 | if (Node->getNumOperands() < 3) |
| 183 | return false; |
| 184 | ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); |
| 185 | if (!CI) |
| 186 | return false; |
| 187 | return CI->getValue()[0]; |
| 188 | } |
| 189 | }; |
| 190 | |
| 191 | /// \name Specializations of \c TBAANodeImpl for const and non const qualified |
| 192 | /// \c MDNode. |
| 193 | /// @{ |
| 194 | using TBAANode = TBAANodeImpl<const MDNode>; |
| 195 | using MutableTBAANode = TBAANodeImpl<MDNode>; |
| 196 | /// @} |
| 197 | |
| 198 | /// This is a simple wrapper around an MDNode which provides a |
| 199 | /// higher-level interface by hiding the details of how alias analysis |
| 200 | /// information is encoded in its operands. |
| 201 | template<typename MDNodeTy> |
| 202 | class TBAAStructTagNodeImpl { |
| 203 | /// This node should be created with createTBAAAccessTag(). |
| 204 | MDNodeTy *Node; |
| 205 | |
| 206 | public: |
| 207 | explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {} |
| 208 | |
| 209 | /// Get the MDNode for this TBAAStructTagNode. |
| 210 | MDNodeTy *getNode() const { return Node; } |
| 211 | |
| 212 | /// isNewFormat - Return true iff the wrapped access tag is in the new |
| 213 | /// size-aware format. |
| 214 | bool isNewFormat() const { |
| 215 | if (Node->getNumOperands() < 4) |
| 216 | return false; |
| 217 | if (MDNodeTy *AccessType = getAccessType()) |
| 218 | if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat()) |
| 219 | return false; |
| 220 | return true; |
| 221 | } |
| 222 | |
| 223 | MDNodeTy *getBaseType() const { |
| 224 | return dyn_cast_or_null<MDNode>(Node->getOperand(0)); |
| 225 | } |
| 226 | |
| 227 | MDNodeTy *getAccessType() const { |
| 228 | return dyn_cast_or_null<MDNode>(Node->getOperand(1)); |
| 229 | } |
| 230 | |
| 231 | uint64_t getOffset() const { |
| 232 | return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); |
| 233 | } |
| 234 | |
| 235 | uint64_t getSize() const { |
| 236 | if (!isNewFormat()) |
| 237 | return UINT64_MAX; |
| 238 | return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue(); |
| 239 | } |
| 240 | |
| 241 | /// Test if this TBAAStructTagNode represents a type for objects |
| 242 | /// which are not modified (by any means) in the context where this |
| 243 | /// AliasAnalysis is relevant. |
| 244 | bool isTypeImmutable() const { |
| 245 | unsigned OpNo = isNewFormat() ? 4 : 3; |
| 246 | if (Node->getNumOperands() < OpNo + 1) |
| 247 | return false; |
| 248 | ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo)); |
| 249 | if (!CI) |
| 250 | return false; |
| 251 | return CI->getValue()[0]; |
| 252 | } |
| 253 | }; |
| 254 | |
| 255 | /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const |
| 256 | /// qualified \c MDNods. |
| 257 | /// @{ |
| 258 | using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>; |
| 259 | using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>; |
| 260 | /// @} |
| 261 | |
| 262 | /// This is a simple wrapper around an MDNode which provides a |
| 263 | /// higher-level interface by hiding the details of how alias analysis |
| 264 | /// information is encoded in its operands. |
| 265 | class TBAAStructTypeNode { |
| 266 | /// This node should be created with createTBAATypeNode(). |
| 267 | const MDNode *Node = nullptr; |
| 268 | |
| 269 | public: |
| 270 | TBAAStructTypeNode() = default; |
| 271 | explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} |
| 272 | |
| 273 | /// Get the MDNode for this TBAAStructTypeNode. |
| 274 | const MDNode *getNode() const { return Node; } |
| 275 | |
| 276 | /// isNewFormat - Return true iff the wrapped type node is in the new |
| 277 | /// size-aware format. |
| 278 | bool isNewFormat() const { return isNewFormatTypeNode(N: Node); } |
| 279 | |
| 280 | bool operator==(const TBAAStructTypeNode &Other) const { |
| 281 | return getNode() == Other.getNode(); |
| 282 | } |
| 283 | |
| 284 | /// getId - Return type identifier. |
| 285 | Metadata *getId() const { |
| 286 | return Node->getOperand(I: isNewFormat() ? 2 : 0); |
| 287 | } |
| 288 | |
| 289 | unsigned getNumFields() const { |
| 290 | unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; |
| 291 | unsigned NumOpsPerField = isNewFormat() ? 3 : 2; |
| 292 | return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField; |
| 293 | } |
| 294 | |
| 295 | TBAAStructTypeNode getFieldType(unsigned FieldIndex) const { |
| 296 | unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1; |
| 297 | unsigned NumOpsPerField = isNewFormat() ? 3 : 2; |
| 298 | unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField; |
| 299 | auto *TypeNode = cast<MDNode>(Val: getNode()->getOperand(I: OpIndex)); |
| 300 | return TBAAStructTypeNode(TypeNode); |
| 301 | } |
| 302 | |
| 303 | /// Get this TBAAStructTypeNode's field in the type DAG with |
| 304 | /// given offset. Update the offset to be relative to the field type. |
| 305 | TBAAStructTypeNode getField(uint64_t &Offset) const { |
| 306 | bool NewFormat = isNewFormat(); |
| 307 | const ArrayRef<MDOperand> Operands = Node->operands(); |
| 308 | const unsigned NumOperands = Operands.size(); |
| 309 | |
| 310 | if (NewFormat) { |
| 311 | // New-format root and scalar type nodes have no fields. |
| 312 | if (NumOperands < 6) |
| 313 | return TBAAStructTypeNode(); |
| 314 | } else { |
| 315 | // Parent can be omitted for the root node. |
| 316 | if (NumOperands < 2) |
| 317 | return TBAAStructTypeNode(); |
| 318 | |
| 319 | // Fast path for a scalar type node and a struct type node with a single |
| 320 | // field. |
| 321 | if (NumOperands <= 3) { |
| 322 | uint64_t Cur = |
| 323 | NumOperands == 2 |
| 324 | ? 0 |
| 325 | : mdconst::extract<ConstantInt>(MD: Operands[2])->getZExtValue(); |
| 326 | Offset -= Cur; |
| 327 | MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[1]); |
| 328 | if (!P) |
| 329 | return TBAAStructTypeNode(); |
| 330 | return TBAAStructTypeNode(P); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | // Assume the offsets are in order. We return the previous field if |
| 335 | // the current offset is bigger than the given offset. |
| 336 | unsigned FirstFieldOpNo = NewFormat ? 3 : 1; |
| 337 | unsigned NumOpsPerField = NewFormat ? 3 : 2; |
| 338 | unsigned TheIdx = 0; |
| 339 | |
| 340 | for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands; |
| 341 | Idx += NumOpsPerField) { |
| 342 | uint64_t Cur = |
| 343 | mdconst::extract<ConstantInt>(MD: Operands[Idx + 1])->getZExtValue(); |
| 344 | if (Cur > Offset) { |
| 345 | assert(Idx >= FirstFieldOpNo + NumOpsPerField && |
| 346 | "TBAAStructTypeNode::getField should have an offset match!" ); |
| 347 | TheIdx = Idx - NumOpsPerField; |
| 348 | break; |
| 349 | } |
| 350 | } |
| 351 | // Move along the last field. |
| 352 | if (TheIdx == 0) |
| 353 | TheIdx = NumOperands - NumOpsPerField; |
| 354 | uint64_t Cur = |
| 355 | mdconst::extract<ConstantInt>(MD: Operands[TheIdx + 1])->getZExtValue(); |
| 356 | Offset -= Cur; |
| 357 | MDNode *P = dyn_cast_or_null<MDNode>(Val: Operands[TheIdx]); |
| 358 | if (!P) |
| 359 | return TBAAStructTypeNode(); |
| 360 | return TBAAStructTypeNode(P); |
| 361 | } |
| 362 | }; |
| 363 | |
| 364 | } // end anonymous namespace |
| 365 | |
| 366 | /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat |
| 367 | /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA |
| 368 | /// format. |
| 369 | static bool isStructPathTBAA(const MDNode *MD) { |
| 370 | // Anonymous TBAA root starts with a MDNode and dragonegg uses it as |
| 371 | // a TBAA tag. |
| 372 | return isa<MDNode>(Val: MD->getOperand(I: 0)) && MD->getNumOperands() >= 3; |
| 373 | } |
| 374 | |
| 375 | AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, |
| 376 | const MemoryLocation &LocB, |
| 377 | AAQueryInfo &AAQI, const Instruction *) { |
| 378 | if (!shouldUseTBAA()) |
| 379 | return AliasResult::MayAlias; |
| 380 | |
| 381 | if (Aliases(A: LocA.AATags.TBAA, B: LocB.AATags.TBAA)) |
| 382 | return AliasResult::MayAlias; |
| 383 | |
| 384 | // Otherwise return a definitive result. |
| 385 | return AliasResult::NoAlias; |
| 386 | } |
| 387 | |
| 388 | ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc, |
| 389 | AAQueryInfo &AAQI, |
| 390 | bool IgnoreLocals) { |
| 391 | if (!shouldUseTBAA()) |
| 392 | return ModRefInfo::ModRef; |
| 393 | |
| 394 | const MDNode *M = Loc.AATags.TBAA; |
| 395 | if (!M) |
| 396 | return ModRefInfo::ModRef; |
| 397 | |
| 398 | // If this is an "immutable" type, we can assume the pointer is pointing |
| 399 | // to constant memory. |
| 400 | if ((!isStructPathTBAA(MD: M) && TBAANode(M).isTypeImmutable()) || |
| 401 | (isStructPathTBAA(MD: M) && TBAAStructTagNode(M).isTypeImmutable())) |
| 402 | return ModRefInfo::NoModRef; |
| 403 | |
| 404 | return ModRefInfo::ModRef; |
| 405 | } |
| 406 | |
| 407 | MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call, |
| 408 | AAQueryInfo &AAQI) { |
| 409 | if (!shouldUseTBAA()) |
| 410 | return MemoryEffects::unknown(); |
| 411 | |
| 412 | // If this is an "immutable" type, the access is not observable. |
| 413 | if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 414 | if ((!isStructPathTBAA(MD: M) && TBAANode(M).isTypeImmutable()) || |
| 415 | (isStructPathTBAA(MD: M) && TBAAStructTagNode(M).isTypeImmutable())) |
| 416 | return MemoryEffects::none(); |
| 417 | |
| 418 | return MemoryEffects::unknown(); |
| 419 | } |
| 420 | |
| 421 | MemoryEffects TypeBasedAAResult::getMemoryEffects(const Function *F) { |
| 422 | // Functions don't have metadata. |
| 423 | return MemoryEffects::unknown(); |
| 424 | } |
| 425 | |
| 426 | ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call, |
| 427 | const MemoryLocation &Loc, |
| 428 | AAQueryInfo &AAQI) { |
| 429 | if (!shouldUseTBAA()) |
| 430 | return ModRefInfo::ModRef; |
| 431 | |
| 432 | if (const MDNode *L = Loc.AATags.TBAA) |
| 433 | if (const MDNode *M = Call->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 434 | if (!Aliases(A: L, B: M)) |
| 435 | return ModRefInfo::NoModRef; |
| 436 | |
| 437 | return ModRefInfo::ModRef; |
| 438 | } |
| 439 | |
| 440 | ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1, |
| 441 | const CallBase *Call2, |
| 442 | AAQueryInfo &AAQI) { |
| 443 | if (!shouldUseTBAA()) |
| 444 | return ModRefInfo::ModRef; |
| 445 | |
| 446 | if (const MDNode *M1 = Call1->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 447 | if (const MDNode *M2 = Call2->getMetadata(KindID: LLVMContext::MD_tbaa)) |
| 448 | if (!Aliases(A: M1, B: M2)) |
| 449 | return ModRefInfo::NoModRef; |
| 450 | |
| 451 | return ModRefInfo::ModRef; |
| 452 | } |
| 453 | |
| 454 | bool MDNode::isTBAAVtableAccess() const { |
| 455 | if (!isStructPathTBAA(MD: this)) { |
| 456 | if (getNumOperands() < 1) |
| 457 | return false; |
| 458 | if (MDString *Tag1 = dyn_cast<MDString>(Val: getOperand(I: 0))) { |
| 459 | if (Tag1->getString() == "vtable pointer" ) |
| 460 | return true; |
| 461 | } |
| 462 | return false; |
| 463 | } |
| 464 | |
| 465 | // For struct-path aware TBAA, we use the access type of the tag. |
| 466 | TBAAStructTagNode Tag(this); |
| 467 | TBAAStructTypeNode AccessType(Tag.getAccessType()); |
| 468 | if(auto *Id = dyn_cast<MDString>(Val: AccessType.getId())) |
| 469 | if (Id->getString() == "vtable pointer" ) |
| 470 | return true; |
| 471 | return false; |
| 472 | } |
| 473 | |
| 474 | static bool matchAccessTags(const MDNode *A, const MDNode *B, |
| 475 | const MDNode **GenericTag = nullptr); |
| 476 | |
| 477 | MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { |
| 478 | const MDNode *GenericTag; |
| 479 | matchAccessTags(A, B, GenericTag: &GenericTag); |
| 480 | return const_cast<MDNode*>(GenericTag); |
| 481 | } |
| 482 | |
| 483 | static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { |
| 484 | if (!A || !B) |
| 485 | return nullptr; |
| 486 | |
| 487 | if (A == B) |
| 488 | return A; |
| 489 | |
| 490 | SmallSetVector<const MDNode *, 4> PathA; |
| 491 | TBAANode TA(A); |
| 492 | while (TA.getNode()) { |
| 493 | if (!PathA.insert(X: TA.getNode())) |
| 494 | report_fatal_error(reason: "Cycle found in TBAA metadata." ); |
| 495 | TA = TA.getParent(); |
| 496 | } |
| 497 | |
| 498 | SmallSetVector<const MDNode *, 4> PathB; |
| 499 | TBAANode TB(B); |
| 500 | while (TB.getNode()) { |
| 501 | if (!PathB.insert(X: TB.getNode())) |
| 502 | report_fatal_error(reason: "Cycle found in TBAA metadata." ); |
| 503 | TB = TB.getParent(); |
| 504 | } |
| 505 | |
| 506 | int IA = PathA.size() - 1; |
| 507 | int IB = PathB.size() - 1; |
| 508 | |
| 509 | const MDNode *Ret = nullptr; |
| 510 | while (IA >= 0 && IB >= 0) { |
| 511 | if (PathA[IA] == PathB[IB]) |
| 512 | Ret = PathA[IA]; |
| 513 | else |
| 514 | break; |
| 515 | --IA; |
| 516 | --IB; |
| 517 | } |
| 518 | |
| 519 | return Ret; |
| 520 | } |
| 521 | |
| 522 | AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const { |
| 523 | AAMDNodes Result; |
| 524 | Result.TBAA = MDNode::getMostGenericTBAA(A: TBAA, B: Other.TBAA); |
| 525 | Result.TBAAStruct = nullptr; |
| 526 | Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope); |
| 527 | Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias); |
| 528 | return Result; |
| 529 | } |
| 530 | |
| 531 | AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const { |
| 532 | AAMDNodes Result; |
| 533 | Result.TBAA = Result.TBAAStruct = nullptr; |
| 534 | Result.Scope = MDNode::getMostGenericAliasScope(A: Scope, B: Other.Scope); |
| 535 | Result.NoAlias = MDNode::intersect(A: NoAlias, B: Other.NoAlias); |
| 536 | return Result; |
| 537 | } |
| 538 | |
| 539 | static const MDNode *createAccessTag(const MDNode *AccessType) { |
| 540 | // If there is no access type or the access type is the root node, then |
| 541 | // we don't have any useful access tag to return. |
| 542 | if (!AccessType || AccessType->getNumOperands() < 2) |
| 543 | return nullptr; |
| 544 | |
| 545 | Type *Int64 = IntegerType::get(C&: AccessType->getContext(), NumBits: 64); |
| 546 | auto *OffsetNode = ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: 0)); |
| 547 | |
| 548 | if (TBAAStructTypeNode(AccessType).isNewFormat()) { |
| 549 | // TODO: Take access ranges into account when matching access tags and |
| 550 | // fix this code to generate actual access sizes for generic tags. |
| 551 | uint64_t AccessSize = UINT64_MAX; |
| 552 | auto *SizeNode = |
| 553 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int64, V: AccessSize)); |
| 554 | Metadata *Ops[] = {const_cast<MDNode*>(AccessType), |
| 555 | const_cast<MDNode*>(AccessType), |
| 556 | OffsetNode, SizeNode}; |
| 557 | return MDNode::get(Context&: AccessType->getContext(), MDs: Ops); |
| 558 | } |
| 559 | |
| 560 | Metadata *Ops[] = {const_cast<MDNode*>(AccessType), |
| 561 | const_cast<MDNode*>(AccessType), |
| 562 | OffsetNode}; |
| 563 | return MDNode::get(Context&: AccessType->getContext(), MDs: Ops); |
| 564 | } |
| 565 | |
| 566 | static bool hasField(TBAAStructTypeNode BaseType, |
| 567 | TBAAStructTypeNode FieldType) { |
| 568 | for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) { |
| 569 | TBAAStructTypeNode T = BaseType.getFieldType(FieldIndex: I); |
| 570 | if (T == FieldType || hasField(BaseType: T, FieldType)) |
| 571 | return true; |
| 572 | } |
| 573 | return false; |
| 574 | } |
| 575 | |
| 576 | /// Return true if for two given accesses, one of the accessed objects may be a |
| 577 | /// subobject of the other. The \p BaseTag and \p SubobjectTag parameters |
| 578 | /// describe the accesses to the base object and the subobject respectively. |
| 579 | /// \p CommonType must be the metadata node describing the common type of the |
| 580 | /// accessed objects. On return, \p MayAlias is set to true iff these accesses |
| 581 | /// may alias and \p Generic, if not null, points to the most generic access |
| 582 | /// tag for the given two. |
| 583 | static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag, |
| 584 | TBAAStructTagNode SubobjectTag, |
| 585 | const MDNode *CommonType, |
| 586 | const MDNode **GenericTag, |
| 587 | bool &MayAlias) { |
| 588 | // If the base object is of the least common type, then this may be an access |
| 589 | // to its subobject. |
| 590 | if (BaseTag.getAccessType() == BaseTag.getBaseType() && |
| 591 | BaseTag.getAccessType() == CommonType) { |
| 592 | if (GenericTag) |
| 593 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 594 | MayAlias = true; |
| 595 | return true; |
| 596 | } |
| 597 | |
| 598 | // If the access to the base object is through a field of the subobject's |
| 599 | // type, then this may be an access to that field. To check for that we start |
| 600 | // from the base type, follow the edge with the correct offset in the type DAG |
| 601 | // and adjust the offset until we reach the field type or until we reach the |
| 602 | // access type. |
| 603 | bool NewFormat = BaseTag.isNewFormat(); |
| 604 | TBAAStructTypeNode BaseType(BaseTag.getBaseType()); |
| 605 | uint64_t OffsetInBase = BaseTag.getOffset(); |
| 606 | |
| 607 | for (;;) { |
| 608 | // In the old format there is no distinction between fields and parent |
| 609 | // types, so in this case we consider all nodes up to the root. |
| 610 | if (!BaseType.getNode()) { |
| 611 | assert(!NewFormat && "Did not see access type in access path!" ); |
| 612 | break; |
| 613 | } |
| 614 | |
| 615 | if (BaseType.getNode() == SubobjectTag.getBaseType()) { |
| 616 | MayAlias = OffsetInBase == SubobjectTag.getOffset() || |
| 617 | BaseType.getNode() == BaseTag.getAccessType() || |
| 618 | SubobjectTag.getBaseType() == SubobjectTag.getAccessType(); |
| 619 | if (GenericTag) { |
| 620 | *GenericTag = |
| 621 | MayAlias ? SubobjectTag.getNode() : createAccessTag(AccessType: CommonType); |
| 622 | } |
| 623 | return true; |
| 624 | } |
| 625 | |
| 626 | // With new-format nodes we stop at the access type. |
| 627 | if (NewFormat && BaseType.getNode() == BaseTag.getAccessType()) |
| 628 | break; |
| 629 | |
| 630 | // Follow the edge with the correct offset. Offset will be adjusted to |
| 631 | // be relative to the field type. |
| 632 | BaseType = BaseType.getField(Offset&: OffsetInBase); |
| 633 | } |
| 634 | |
| 635 | // If the base object has a direct or indirect field of the subobject's type, |
| 636 | // then this may be an access to that field. We need this to check now that |
| 637 | // we support aggregates as access types. |
| 638 | if (NewFormat) { |
| 639 | // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType()); |
| 640 | TBAAStructTypeNode FieldType(SubobjectTag.getBaseType()); |
| 641 | if (hasField(BaseType, FieldType)) { |
| 642 | if (GenericTag) |
| 643 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 644 | MayAlias = true; |
| 645 | return true; |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | return false; |
| 650 | } |
| 651 | |
| 652 | /// matchTags - Return true if the given couple of accesses are allowed to |
| 653 | /// overlap. If \arg GenericTag is not null, then on return it points to the |
| 654 | /// most generic access descriptor for the given two. |
| 655 | static bool matchAccessTags(const MDNode *A, const MDNode *B, |
| 656 | const MDNode **GenericTag) { |
| 657 | if (A == B) { |
| 658 | if (GenericTag) |
| 659 | *GenericTag = A; |
| 660 | return true; |
| 661 | } |
| 662 | |
| 663 | // Accesses with no TBAA information may alias with any other accesses. |
| 664 | if (!A || !B) { |
| 665 | if (GenericTag) |
| 666 | *GenericTag = nullptr; |
| 667 | return true; |
| 668 | } |
| 669 | |
| 670 | // Verify that both input nodes are struct-path aware. Auto-upgrade should |
| 671 | // have taken care of this. |
| 672 | assert(isStructPathTBAA(A) && "Access A is not struct-path aware!" ); |
| 673 | assert(isStructPathTBAA(B) && "Access B is not struct-path aware!" ); |
| 674 | |
| 675 | TBAAStructTagNode TagA(A), TagB(B); |
| 676 | const MDNode *CommonType = getLeastCommonType(A: TagA.getAccessType(), |
| 677 | B: TagB.getAccessType()); |
| 678 | |
| 679 | // If the final access types have different roots, they're part of different |
| 680 | // potentially unrelated type systems, so we must be conservative. |
| 681 | if (!CommonType) { |
| 682 | if (GenericTag) |
| 683 | *GenericTag = nullptr; |
| 684 | return true; |
| 685 | } |
| 686 | |
| 687 | // If one of the accessed objects may be a subobject of the other, then such |
| 688 | // accesses may alias. |
| 689 | bool MayAlias; |
| 690 | if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB, |
| 691 | CommonType, GenericTag, MayAlias) || |
| 692 | mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA, |
| 693 | CommonType, GenericTag, MayAlias)) |
| 694 | return MayAlias; |
| 695 | |
| 696 | // Otherwise, we've proved there's no alias. |
| 697 | if (GenericTag) |
| 698 | *GenericTag = createAccessTag(AccessType: CommonType); |
| 699 | return false; |
| 700 | } |
| 701 | |
| 702 | /// Aliases - Test whether the access represented by tag A may alias the |
| 703 | /// access represented by tag B. |
| 704 | bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { |
| 705 | return matchAccessTags(A, B); |
| 706 | } |
| 707 | |
| 708 | bool TypeBasedAAResult::shouldUseTBAA() const { |
| 709 | return EnableTBAA && !UsingTypeSanitizer; |
| 710 | } |
| 711 | |
| 712 | AnalysisKey TypeBasedAA::Key; |
| 713 | |
| 714 | TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { |
| 715 | return TypeBasedAAResult(F.hasFnAttribute(Kind: Attribute::SanitizeType)); |
| 716 | } |
| 717 | |
| 718 | char TypeBasedAAWrapperPass::ID = 0; |
| 719 | INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa" , "Type-Based Alias Analysis" , |
| 720 | false, true) |
| 721 | |
| 722 | ImmutablePass *llvm::createTypeBasedAAWrapperPass() { |
| 723 | return new TypeBasedAAWrapperPass(); |
| 724 | } |
| 725 | |
| 726 | TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {} |
| 727 | |
| 728 | bool TypeBasedAAWrapperPass::doInitialization(Module &M) { |
| 729 | Result.reset(p: new TypeBasedAAResult(/*UsingTypeSanitizer=*/false)); |
| 730 | return false; |
| 731 | } |
| 732 | |
| 733 | bool TypeBasedAAWrapperPass::doFinalization(Module &M) { |
| 734 | Result.reset(); |
| 735 | return false; |
| 736 | } |
| 737 | |
| 738 | void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| 739 | AU.setPreservesAll(); |
| 740 | } |
| 741 | |
| 742 | MDNode *AAMDNodes::shiftTBAA(MDNode *MD, size_t Offset) { |
| 743 | // Fast path if there's no offset |
| 744 | if (Offset == 0) |
| 745 | return MD; |
| 746 | // Fast path if there's no path tbaa node (and thus scalar) |
| 747 | if (!isStructPathTBAA(MD)) |
| 748 | return MD; |
| 749 | |
| 750 | // The correct behavior here is to add the offset into the TBAA |
| 751 | // struct node offset. The base type, however may not have defined |
| 752 | // a type at this additional offset, resulting in errors. Since |
| 753 | // this method is only used within a given load/store access |
| 754 | // the offset provided is only used to subdivide the previous load |
| 755 | // maintaining the validity of the previous TBAA. |
| 756 | // |
| 757 | // This, however, should be revisited in the future. |
| 758 | return MD; |
| 759 | } |
| 760 | |
| 761 | MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) { |
| 762 | // Fast path if there's no offset |
| 763 | if (Offset == 0) |
| 764 | return MD; |
| 765 | SmallVector<Metadata *, 3> Sub; |
| 766 | for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { |
| 767 | ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i)); |
| 768 | ConstantInt *InnerSize = |
| 769 | mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i + 1)); |
| 770 | // Don't include any triples that aren't in bounds |
| 771 | if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) |
| 772 | continue; |
| 773 | |
| 774 | uint64_t NewSize = InnerSize->getZExtValue(); |
| 775 | uint64_t NewOffset = InnerOffset->getZExtValue() - Offset; |
| 776 | if (InnerOffset->getZExtValue() < Offset) { |
| 777 | NewOffset = 0; |
| 778 | NewSize -= Offset - InnerOffset->getZExtValue(); |
| 779 | } |
| 780 | |
| 781 | // Shift the offset of the triple |
| 782 | Sub.push_back(Elt: ConstantAsMetadata::get( |
| 783 | C: ConstantInt::get(Ty: InnerOffset->getType(), V: NewOffset))); |
| 784 | Sub.push_back(Elt: ConstantAsMetadata::get( |
| 785 | C: ConstantInt::get(Ty: InnerSize->getType(), V: NewSize))); |
| 786 | Sub.push_back(Elt: MD->getOperand(I: i + 2)); |
| 787 | } |
| 788 | return MDNode::get(Context&: MD->getContext(), MDs: Sub); |
| 789 | } |
| 790 | |
| 791 | MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { |
| 792 | // Fast path if 0-length |
| 793 | if (Len == 0) |
| 794 | return nullptr; |
| 795 | |
| 796 | // Regular TBAA is invariant of length, so we only need to consider |
| 797 | // struct-path TBAA. |
| 798 | if (!isStructPathTBAA(MD)) |
| 799 | return MD; |
| 800 | |
| 801 | TBAAStructTagNode Tag(MD); |
| 802 | |
| 803 | // Only new format TBAA has a size |
| 804 | if (!Tag.isNewFormat()) |
| 805 | return MD; |
| 806 | |
| 807 | // If unknown size, drop the TBAA. |
| 808 | if (Len == -1) |
| 809 | return nullptr; |
| 810 | |
| 811 | // Otherwise, create TBAA with the new Len |
| 812 | ArrayRef<MDOperand> MDOperands = MD->operands(); |
| 813 | SmallVector<Metadata *, 4> NextNodes(MDOperands); |
| 814 | ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(MD&: NextNodes[3]); |
| 815 | |
| 816 | // Don't create a new MDNode if it is the same length. |
| 817 | if (PreviousSize->equalsInt(V: Len)) |
| 818 | return MD; |
| 819 | |
| 820 | NextNodes[3] = |
| 821 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: PreviousSize->getType(), V: Len)); |
| 822 | return MDNode::get(Context&: MD->getContext(), MDs: NextNodes); |
| 823 | } |
| 824 | |
| 825 | AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { |
| 826 | AAMDNodes New = *this; |
| 827 | MDNode *M = New.TBAAStruct; |
| 828 | if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(I: 0) && |
| 829 | mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 0)) && |
| 830 | mdconst::extract<ConstantInt>(MD: M->getOperand(I: 0))->isZero() && |
| 831 | M->getOperand(I: 1) && mdconst::hasa<ConstantInt>(MD: M->getOperand(I: 1)) && |
| 832 | mdconst::extract<ConstantInt>(MD: M->getOperand(I: 1))->getValue() == |
| 833 | AccessSize && |
| 834 | M->getOperand(I: 2) && isa<MDNode>(Val: M->getOperand(I: 2))) |
| 835 | New.TBAA = cast<MDNode>(Val: M->getOperand(I: 2)); |
| 836 | |
| 837 | New.TBAAStruct = nullptr; |
| 838 | return New; |
| 839 | } |
| 840 | |
| 841 | AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, |
| 842 | const DataLayout &DL) { |
| 843 | AAMDNodes New = shift(Offset); |
| 844 | if (!DL.typeSizeEqualsStoreSize(Ty: AccessTy)) |
| 845 | return New; |
| 846 | TypeSize Size = DL.getTypeStoreSize(Ty: AccessTy); |
| 847 | if (Size.isScalable()) |
| 848 | return New; |
| 849 | |
| 850 | return New.adjustForAccess(AccessSize: Size.getKnownMinValue()); |
| 851 | } |
| 852 | |
| 853 | AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) { |
| 854 | AAMDNodes New = shift(Offset); |
| 855 | return New.adjustForAccess(AccessSize); |
| 856 | } |
| 857 | |