| 1 | //===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This is the code that manages TBAA information and defines the TBAA policy |
| 10 | // for the optimizer to use. Relevant standards text includes: |
| 11 | // |
| 12 | // C99 6.5p7 |
| 13 | // C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "CodeGenTBAA.h" |
| 18 | #include "ABIInfoImpl.h" |
| 19 | #include "CGCXXABI.h" |
| 20 | #include "CGRecordLayout.h" |
| 21 | #include "CodeGenTypes.h" |
| 22 | #include "clang/AST/ASTContext.h" |
| 23 | #include "clang/AST/Attr.h" |
| 24 | #include "clang/AST/Mangle.h" |
| 25 | #include "clang/AST/RecordLayout.h" |
| 26 | #include "clang/Basic/CodeGenOptions.h" |
| 27 | #include "clang/Basic/TargetInfo.h" |
| 28 | #include "llvm/IR/LLVMContext.h" |
| 29 | #include "llvm/IR/Metadata.h" |
| 30 | #include "llvm/IR/Module.h" |
| 31 | #include "llvm/IR/Type.h" |
| 32 | #include "llvm/Support/Debug.h" |
| 33 | using namespace clang; |
| 34 | using namespace CodeGen; |
| 35 | |
| 36 | CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, |
| 37 | llvm::Module &M, const CodeGenOptions &CGO, |
| 38 | const LangOptions &Features) |
| 39 | : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO), |
| 40 | Features(Features), |
| 41 | MangleCtx(ItaniumMangleContext::create(Context&: Ctx, Diags&: Ctx.getDiagnostics())), |
| 42 | MDHelper(M.getContext()), Root(nullptr), Char(nullptr) {} |
| 43 | |
| 44 | CodeGenTBAA::~CodeGenTBAA() { |
| 45 | } |
| 46 | |
| 47 | llvm::MDNode *CodeGenTBAA::getRoot() { |
| 48 | // Define the root of the tree. This identifies the tree, so that |
| 49 | // if our LLVM IR is linked with LLVM IR from a different front-end |
| 50 | // (or a different version of this front-end), their TBAA trees will |
| 51 | // remain distinct, and the optimizer will treat them conservatively. |
| 52 | if (!Root) { |
| 53 | if (Features.CPlusPlus) |
| 54 | Root = MDHelper.createTBAARoot(Name: "Simple C++ TBAA" ); |
| 55 | else |
| 56 | Root = MDHelper.createTBAARoot(Name: "Simple C/C++ TBAA" ); |
| 57 | } |
| 58 | |
| 59 | return Root; |
| 60 | } |
| 61 | |
| 62 | llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, |
| 63 | llvm::MDNode *Parent, |
| 64 | uint64_t Size) { |
| 65 | if (CodeGenOpts.NewStructPathTBAA) { |
| 66 | llvm::Metadata *Id = MDHelper.createString(Str: Name); |
| 67 | return MDHelper.createTBAATypeNode(Parent, Size, Id); |
| 68 | } |
| 69 | return MDHelper.createTBAAScalarTypeNode(Name, Parent); |
| 70 | } |
| 71 | |
| 72 | llvm::MDNode *CodeGenTBAA::getChar() { |
| 73 | // Define the root of the tree for user-accessible memory. C and C++ |
| 74 | // give special powers to char and certain similar types. However, |
| 75 | // these special powers only cover user-accessible memory, and doesn't |
| 76 | // include things like vtables. |
| 77 | if (!Char) |
| 78 | Char = createScalarTypeNode(Name: "omnipotent char" , Parent: getRoot(), /* Size= */ 1); |
| 79 | |
| 80 | return Char; |
| 81 | } |
| 82 | |
| 83 | llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { |
| 84 | assert(PtrDepth >= 1 && "Pointer must have some depth" ); |
| 85 | |
| 86 | // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes |
| 87 | // for "any" pointers of increasing pointer depth, and are organized in the |
| 88 | // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... |
| 89 | // |
| 90 | // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), |
| 91 | // since there is no node for pointer depth 0. |
| 92 | // |
| 93 | // These "any" pointer type nodes are used in pointer TBAA. The type node of |
| 94 | // a concrete pointer type has the "any" pointer type node of appropriate |
| 95 | // pointer depth as its parent. The "any" pointer type nodes are also used |
| 96 | // directly for accesses to void pointers, or to specific pointers that we |
| 97 | // conservatively do not distinguish in pointer TBAA (e.g. pointers to |
| 98 | // members). Essentially, this establishes that e.g. void** can alias with |
| 99 | // any type that can unify with T**, ignoring things like qualifiers. Here, T |
| 100 | // is a variable that represents an arbitrary type, including pointer types. |
| 101 | // As such, each depth is naturally a subtype of the previous depth, and thus |
| 102 | // transitively of all previous depths. |
| 103 | if (AnyPtrs.size() < PtrDepth) { |
| 104 | AnyPtrs.reserve(N: PtrDepth); |
| 105 | auto Size = Module.getDataLayout().getPointerSize(); |
| 106 | // Populate first element. |
| 107 | if (AnyPtrs.empty()) |
| 108 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name: "any pointer" , Parent: getChar(), Size)); |
| 109 | // Populate further elements. |
| 110 | for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { |
| 111 | auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer" ).str(); |
| 112 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name, Parent: AnyPtrs[Idx - 1], Size)); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | return AnyPtrs[PtrDepth - 1]; |
| 117 | } |
| 118 | |
| 119 | static bool TypeHasMayAlias(QualType QTy) { |
| 120 | // Tagged types have declarations, and therefore may have attributes. |
| 121 | if (auto *TD = QTy->getAsTagDecl()) |
| 122 | if (TD->hasAttr<MayAliasAttr>()) |
| 123 | return true; |
| 124 | |
| 125 | // Also look for may_alias as a declaration attribute on a typedef. |
| 126 | // FIXME: We should follow GCC and model may_alias as a type attribute |
| 127 | // rather than as a declaration attribute. |
| 128 | while (auto *TT = QTy->getAs<TypedefType>()) { |
| 129 | if (TT->getDecl()->hasAttr<MayAliasAttr>()) |
| 130 | return true; |
| 131 | QTy = TT->desugar(); |
| 132 | } |
| 133 | |
| 134 | // Also consider an array type as may_alias when its element type (at |
| 135 | // any level) is marked as such. |
| 136 | if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe()) |
| 137 | if (TypeHasMayAlias(QTy: ArrayTy->getElementType())) |
| 138 | return true; |
| 139 | |
| 140 | return false; |
| 141 | } |
| 142 | |
| 143 | /// Check if the given type is a valid base type to be used in access tags. |
| 144 | static bool isValidBaseType(QualType QTy) { |
| 145 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { |
| 146 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
| 147 | // Incomplete types are not valid base access types. |
| 148 | if (!RD) |
| 149 | return false; |
| 150 | if (RD->hasFlexibleArrayMember()) |
| 151 | return false; |
| 152 | // RD can be struct, union, class, interface or enum. |
| 153 | // For now, we only handle struct and class. |
| 154 | if (RD->isStruct() || RD->isClass()) |
| 155 | return true; |
| 156 | } |
| 157 | return false; |
| 158 | } |
| 159 | |
| 160 | llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { |
| 161 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); |
| 162 | |
| 163 | // Handle builtin types. |
| 164 | if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Val: Ty)) { |
| 165 | switch (BTy->getKind()) { |
| 166 | // Character types are special and can alias anything. |
| 167 | // In C++, this technically only includes "char" and "unsigned char", |
| 168 | // and not "signed char". In C, it includes all three. For now, |
| 169 | // the risk of exploiting this detail in C++ seems likely to outweigh |
| 170 | // the benefit. |
| 171 | case BuiltinType::Char_U: |
| 172 | case BuiltinType::Char_S: |
| 173 | case BuiltinType::UChar: |
| 174 | case BuiltinType::SChar: |
| 175 | return getChar(); |
| 176 | |
| 177 | // Unsigned types can alias their corresponding signed types. |
| 178 | case BuiltinType::UShort: |
| 179 | return getTypeInfo(QTy: Context.ShortTy); |
| 180 | case BuiltinType::UInt: |
| 181 | return getTypeInfo(QTy: Context.IntTy); |
| 182 | case BuiltinType::ULong: |
| 183 | return getTypeInfo(QTy: Context.LongTy); |
| 184 | case BuiltinType::ULongLong: |
| 185 | return getTypeInfo(QTy: Context.LongLongTy); |
| 186 | case BuiltinType::UInt128: |
| 187 | return getTypeInfo(QTy: Context.Int128Ty); |
| 188 | |
| 189 | case BuiltinType::UShortFract: |
| 190 | return getTypeInfo(QTy: Context.ShortFractTy); |
| 191 | case BuiltinType::UFract: |
| 192 | return getTypeInfo(QTy: Context.FractTy); |
| 193 | case BuiltinType::ULongFract: |
| 194 | return getTypeInfo(QTy: Context.LongFractTy); |
| 195 | |
| 196 | case BuiltinType::SatUShortFract: |
| 197 | return getTypeInfo(QTy: Context.SatShortFractTy); |
| 198 | case BuiltinType::SatUFract: |
| 199 | return getTypeInfo(QTy: Context.SatFractTy); |
| 200 | case BuiltinType::SatULongFract: |
| 201 | return getTypeInfo(QTy: Context.SatLongFractTy); |
| 202 | |
| 203 | case BuiltinType::UShortAccum: |
| 204 | return getTypeInfo(QTy: Context.ShortAccumTy); |
| 205 | case BuiltinType::UAccum: |
| 206 | return getTypeInfo(QTy: Context.AccumTy); |
| 207 | case BuiltinType::ULongAccum: |
| 208 | return getTypeInfo(QTy: Context.LongAccumTy); |
| 209 | |
| 210 | case BuiltinType::SatUShortAccum: |
| 211 | return getTypeInfo(QTy: Context.SatShortAccumTy); |
| 212 | case BuiltinType::SatUAccum: |
| 213 | return getTypeInfo(QTy: Context.SatAccumTy); |
| 214 | case BuiltinType::SatULongAccum: |
| 215 | return getTypeInfo(QTy: Context.SatLongAccumTy); |
| 216 | |
| 217 | // Treat all other builtin types as distinct types. This includes |
| 218 | // treating wchar_t, char16_t, and char32_t as distinct from their |
| 219 | // "underlying types". |
| 220 | default: |
| 221 | return createScalarTypeNode(Name: BTy->getName(Policy: Features), Parent: getChar(), Size); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | // C++1z [basic.lval]p10: "If a program attempts to access the stored value of |
| 226 | // an object through a glvalue of other than one of the following types the |
| 227 | // behavior is undefined: [...] a char, unsigned char, or std::byte type." |
| 228 | if (Ty->isStdByteType()) |
| 229 | return getChar(); |
| 230 | |
| 231 | // Handle pointers and references. |
| 232 | // |
| 233 | // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2: |
| 234 | // For two pointer types to be compatible, both shall be identically |
| 235 | // qualified and both shall be pointers to compatible types. |
| 236 | // |
| 237 | // This rule is impractically strict; we want to at least ignore CVR |
| 238 | // qualifiers. Distinguishing by CVR qualifiers would make it UB to |
| 239 | // e.g. cast a `char **` to `const char * const *` and dereference it, |
| 240 | // which is too common and useful to invalidate. C++'s similar types |
| 241 | // rule permits qualifier differences in these nested positions; in fact, |
| 242 | // C++ even allows that cast as an implicit conversion. |
| 243 | // |
| 244 | // Other qualifiers could theoretically be distinguished, especially if |
| 245 | // they involve a significant representation difference. We don't |
| 246 | // currently do so, however. |
| 247 | if (Ty->isPointerType() || Ty->isReferenceType()) { |
| 248 | if (!CodeGenOpts.PointerTBAA) |
| 249 | return getAnyPtr(); |
| 250 | // C++ [basic.lval]p11 permits objects to accessed through an l-value of |
| 251 | // similar type. Two types are similar under C++ [conv.qual]p2 if the |
| 252 | // decomposition of the types into pointers, member pointers, and arrays has |
| 253 | // the same structure when ignoring cv-qualifiers at each level of the |
| 254 | // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which |
| 255 | // would really complicate any attempt to distinguish pointers to arrays by |
| 256 | // their bounds. It's simpler, and much easier to explain to users, to |
| 257 | // simply treat all pointers to arrays as pointers to their element type for |
| 258 | // aliasing purposes. So when creating a TBAA tag for a pointer type, we |
| 259 | // recursively ignore both qualifiers and array types when decomposing the |
| 260 | // pointee type. The only meaningful remaining structure is the number of |
| 261 | // pointer types we encountered along the way, so we just produce the tag |
| 262 | // "p<depth> <base type tag>". If we do find a member pointer type, for now |
| 263 | // we just conservatively bail out with AnyPtr (below) rather than trying to |
| 264 | // create a tag that honors the similar-type rules while still |
| 265 | // distinguishing different kinds of member pointer. |
| 266 | unsigned PtrDepth = 0; |
| 267 | do { |
| 268 | PtrDepth++; |
| 269 | Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe(); |
| 270 | } while (Ty->isPointerType()); |
| 271 | |
| 272 | // While there are no special rules in the standards regarding void pointers |
| 273 | // and strict aliasing, emitting distinct tags for void pointers break some |
| 274 | // common idioms and there is no good alternative to re-write the code |
| 275 | // without strict-aliasing violations. |
| 276 | if (Ty->isVoidType()) |
| 277 | return getAnyPtr(PtrDepth); |
| 278 | |
| 279 | assert(!isa<VariableArrayType>(Ty)); |
| 280 | // When the underlying type is a builtin type, we compute the pointee type |
| 281 | // string recursively, which is implicitly more forgiving than the standards |
| 282 | // require. Effectively, we are turning the question "are these types |
| 283 | // compatible/similar" into "are accesses to these types allowed to alias". |
| 284 | // In both C and C++, the latter question has special carve-outs for |
| 285 | // signedness mismatches that only apply at the top level. As a result, we |
| 286 | // are allowing e.g. `int *` l-values to access `unsigned *` objects. |
| 287 | SmallString<256> TyName; |
| 288 | if (isa<BuiltinType>(Val: Ty)) { |
| 289 | llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty); |
| 290 | StringRef Name = |
| 291 | cast<llvm::MDString>( |
| 292 | Val: ScalarMD->getOperand(I: CodeGenOpts.NewStructPathTBAA ? 2 : 0)) |
| 293 | ->getString(); |
| 294 | TyName = Name; |
| 295 | } else { |
| 296 | // Be conservative if the type isn't a RecordType. We are specifically |
| 297 | // required to do this for member pointers until we implement the |
| 298 | // similar-types rule. |
| 299 | const auto *RT = Ty->getAs<RecordType>(); |
| 300 | if (!RT) |
| 301 | return getAnyPtr(PtrDepth); |
| 302 | |
| 303 | // For unnamed structs or unions C's compatible types rule applies. Two |
| 304 | // compatible types in different compilation units can have different |
| 305 | // mangled names, meaning the metadata emitted below would incorrectly |
| 306 | // mark them as no-alias. Use AnyPtr for such types in both C and C++, as |
| 307 | // C and C++ types may be visible when doing LTO. |
| 308 | // |
| 309 | // Note that using AnyPtr is overly conservative. We could summarize the |
| 310 | // members of the type, as per the C compatibility rule in the future. |
| 311 | // This also covers anonymous structs and unions, which have a different |
| 312 | // compatibility rule, but it doesn't matter because you can never have a |
| 313 | // pointer to an anonymous struct or union. |
| 314 | if (!RT->getDecl()->getDeclName()) |
| 315 | return getAnyPtr(PtrDepth); |
| 316 | |
| 317 | // For non-builtin types use the mangled name of the canonical type. |
| 318 | llvm::raw_svector_ostream TyOut(TyName); |
| 319 | MangleCtx->mangleCanonicalTypeName(T: QualType(Ty, 0), TyOut); |
| 320 | } |
| 321 | |
| 322 | SmallString<256> OutName("p" ); |
| 323 | OutName += std::to_string(val: PtrDepth); |
| 324 | OutName += " " ; |
| 325 | OutName += TyName; |
| 326 | return createScalarTypeNode(Name: OutName, Parent: getAnyPtr(PtrDepth), Size); |
| 327 | } |
| 328 | |
| 329 | // Accesses to arrays are accesses to objects of their element types. |
| 330 | if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType()) |
| 331 | return getTypeInfo(QTy: cast<ArrayType>(Val: Ty)->getElementType()); |
| 332 | |
| 333 | // Enum types are distinct types. In C++ they have "underlying types", |
| 334 | // however they aren't related for TBAA. |
| 335 | if (const EnumType *ETy = dyn_cast<EnumType>(Val: Ty)) { |
| 336 | if (!Features.CPlusPlus) |
| 337 | return getTypeInfo(QTy: ETy->getDecl()->getIntegerType()); |
| 338 | |
| 339 | // In C++ mode, types have linkage, so we can rely on the ODR and |
| 340 | // on their mangled names, if they're external. |
| 341 | // TODO: Is there a way to get a program-wide unique name for a |
| 342 | // decl with local linkage or no linkage? |
| 343 | if (!ETy->getDecl()->isExternallyVisible()) |
| 344 | return getChar(); |
| 345 | |
| 346 | SmallString<256> OutName; |
| 347 | llvm::raw_svector_ostream Out(OutName); |
| 348 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( |
| 349 | T: QualType(ETy, 0), Out); |
| 350 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); |
| 351 | } |
| 352 | |
| 353 | if (const auto *EIT = dyn_cast<BitIntType>(Val: Ty)) { |
| 354 | SmallString<256> OutName; |
| 355 | llvm::raw_svector_ostream Out(OutName); |
| 356 | // Don't specify signed/unsigned since integer types can alias despite sign |
| 357 | // differences. |
| 358 | Out << "_BitInt(" << EIT->getNumBits() << ')'; |
| 359 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); |
| 360 | } |
| 361 | |
| 362 | // For now, handle any other kind of type conservatively. |
| 363 | return getChar(); |
| 364 | } |
| 365 | |
| 366 | llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { |
| 367 | // At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless |
| 368 | // we're running TypeSanitizer). |
| 369 | if (!Features.Sanitize.has(K: SanitizerKind::Type) && |
| 370 | (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)) |
| 371 | return nullptr; |
| 372 | |
| 373 | // If the type has the may_alias attribute (even on a typedef), it is |
| 374 | // effectively in the general char alias class. |
| 375 | if (TypeHasMayAlias(QTy)) |
| 376 | return getChar(); |
| 377 | |
| 378 | // We need this function to not fall back to returning the "omnipotent char" |
| 379 | // type node for aggregate and union types. Otherwise, any dereference of an |
| 380 | // aggregate will result into the may-alias access descriptor, meaning all |
| 381 | // subsequent accesses to direct and indirect members of that aggregate will |
| 382 | // be considered may-alias too. |
| 383 | // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single |
| 384 | // function. |
| 385 | if (isValidBaseType(QTy)) |
| 386 | return getValidBaseTypeInfo(QTy); |
| 387 | |
| 388 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
| 389 | if (llvm::MDNode *N = MetadataCache[Ty]) |
| 390 | return N; |
| 391 | |
| 392 | // Note that the following helper call is allowed to add new nodes to the |
| 393 | // cache, which invalidates all its previously obtained iterators. So we |
| 394 | // first generate the node for the type and then add that node to the cache. |
| 395 | llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); |
| 396 | return MetadataCache[Ty] = TypeNode; |
| 397 | } |
| 398 | |
| 399 | TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) { |
| 400 | // Pointee values may have incomplete types, but they shall never be |
| 401 | // dereferenced. |
| 402 | if (AccessType->isIncompleteType()) |
| 403 | return TBAAAccessInfo::getIncompleteInfo(); |
| 404 | |
| 405 | if (TypeHasMayAlias(QTy: AccessType)) |
| 406 | return TBAAAccessInfo::getMayAliasInfo(); |
| 407 | |
| 408 | uint64_t Size = Context.getTypeSizeInChars(T: AccessType).getQuantity(); |
| 409 | return TBAAAccessInfo(getTypeInfo(QTy: AccessType), Size); |
| 410 | } |
| 411 | |
| 412 | TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { |
| 413 | const llvm::DataLayout &DL = Module.getDataLayout(); |
| 414 | unsigned Size = DL.getPointerTypeSize(Ty: VTablePtrType); |
| 415 | return TBAAAccessInfo(createScalarTypeNode(Name: "vtable pointer" , Parent: getRoot(), Size), |
| 416 | Size); |
| 417 | } |
| 418 | |
| 419 | bool |
| 420 | CodeGenTBAA::CollectFields(uint64_t BaseOffset, |
| 421 | QualType QTy, |
| 422 | SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & |
| 423 | Fields, |
| 424 | bool MayAlias) { |
| 425 | /* Things not handled yet include: C++ base classes, bitfields, */ |
| 426 | |
| 427 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { |
| 428 | if (TTy->isUnionType()) { |
| 429 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); |
| 430 | llvm::MDNode *TBAAType = getChar(); |
| 431 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
| 432 | Fields.push_back( |
| 433 | Elt: llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag)); |
| 434 | return true; |
| 435 | } |
| 436 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
| 437 | if (RD->hasFlexibleArrayMember()) |
| 438 | return false; |
| 439 | |
| 440 | // TODO: Handle C++ base classes. |
| 441 | if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(Val: RD)) |
| 442 | if (Decl->bases_begin() != Decl->bases_end()) |
| 443 | return false; |
| 444 | |
| 445 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); |
| 446 | const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD); |
| 447 | |
| 448 | unsigned idx = 0; |
| 449 | for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); |
| 450 | i != e; ++i, ++idx) { |
| 451 | if (isEmptyFieldForLayout(Context, FD: *i)) |
| 452 | continue; |
| 453 | |
| 454 | uint64_t Offset = |
| 455 | BaseOffset + Layout.getFieldOffset(FieldNo: idx) / Context.getCharWidth(); |
| 456 | |
| 457 | // Create a single field for consecutive named bitfields using char as |
| 458 | // base type. |
| 459 | if ((*i)->isBitField()) { |
| 460 | const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(FD: *i); |
| 461 | // For big endian targets the first bitfield in the consecutive run is |
| 462 | // at the most-significant end; see CGRecordLowering::setBitFieldInfo |
| 463 | // for more information. |
| 464 | bool IsBE = Context.getTargetInfo().isBigEndian(); |
| 465 | bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0 |
| 466 | : Info.Offset == 0; |
| 467 | if (!IsFirst) |
| 468 | continue; |
| 469 | unsigned CurrentBitFieldSize = Info.StorageSize; |
| 470 | uint64_t Size = |
| 471 | llvm::divideCeil(Numerator: CurrentBitFieldSize, Denominator: Context.getCharWidth()); |
| 472 | llvm::MDNode *TBAAType = getChar(); |
| 473 | llvm::MDNode *TBAATag = |
| 474 | getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
| 475 | Fields.push_back( |
| 476 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); |
| 477 | continue; |
| 478 | } |
| 479 | |
| 480 | QualType FieldQTy = i->getType(); |
| 481 | if (!CollectFields(BaseOffset: Offset, QTy: FieldQTy, Fields, |
| 482 | MayAlias: MayAlias || TypeHasMayAlias(QTy: FieldQTy))) |
| 483 | return false; |
| 484 | } |
| 485 | return true; |
| 486 | } |
| 487 | |
| 488 | /* Otherwise, treat whatever it is as a field. */ |
| 489 | uint64_t Offset = BaseOffset; |
| 490 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); |
| 491 | llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); |
| 492 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
| 493 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); |
| 494 | return true; |
| 495 | } |
| 496 | |
| 497 | llvm::MDNode * |
| 498 | CodeGenTBAA::getTBAAStructInfo(QualType QTy) { |
| 499 | if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) |
| 500 | return nullptr; |
| 501 | |
| 502 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
| 503 | |
| 504 | if (llvm::MDNode *N = StructMetadataCache[Ty]) |
| 505 | return N; |
| 506 | |
| 507 | SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; |
| 508 | if (CollectFields(BaseOffset: 0, QTy, Fields, MayAlias: TypeHasMayAlias(QTy))) |
| 509 | return MDHelper.createTBAAStructNode(Fields); |
| 510 | |
| 511 | // For now, handle any other kind of type conservatively. |
| 512 | return StructMetadataCache[Ty] = nullptr; |
| 513 | } |
| 514 | |
| 515 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { |
| 516 | if (auto *TTy = dyn_cast<RecordType>(Val: Ty)) { |
| 517 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
| 518 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); |
| 519 | using TBAAStructField = llvm::MDBuilder::TBAAStructField; |
| 520 | SmallVector<TBAAStructField, 4> Fields; |
| 521 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) { |
| 522 | // Handle C++ base classes. Non-virtual bases can treated a kind of |
| 523 | // field. Virtual bases are more complex and omitted, but avoid an |
| 524 | // incomplete view for NewStructPathTBAA. |
| 525 | if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) |
| 526 | return nullptr; |
| 527 | for (const CXXBaseSpecifier &B : CXXRD->bases()) { |
| 528 | if (B.isVirtual()) |
| 529 | continue; |
| 530 | QualType BaseQTy = B.getType(); |
| 531 | const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl(); |
| 532 | if (BaseRD->isEmpty()) |
| 533 | continue; |
| 534 | llvm::MDNode *TypeNode = isValidBaseType(QTy: BaseQTy) |
| 535 | ? getValidBaseTypeInfo(QTy: BaseQTy) |
| 536 | : getTypeInfo(QTy: BaseQTy); |
| 537 | if (!TypeNode) |
| 538 | return nullptr; |
| 539 | uint64_t Offset = Layout.getBaseClassOffset(Base: BaseRD).getQuantity(); |
| 540 | uint64_t Size = |
| 541 | Context.getASTRecordLayout(D: BaseRD).getDataSize().getQuantity(); |
| 542 | Fields.push_back( |
| 543 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode)); |
| 544 | } |
| 545 | // The order in which base class subobjects are allocated is unspecified, |
| 546 | // so may differ from declaration order. In particular, Itanium ABI will |
| 547 | // allocate a primary base first. |
| 548 | // Since we exclude empty subobjects, the objects are not overlapping and |
| 549 | // their offsets are unique. |
| 550 | llvm::sort(C&: Fields, |
| 551 | Comp: [](const TBAAStructField &A, const TBAAStructField &B) { |
| 552 | return A.Offset < B.Offset; |
| 553 | }); |
| 554 | } |
| 555 | for (FieldDecl *Field : RD->fields()) { |
| 556 | if (Field->isZeroSize(Ctx: Context) || Field->isUnnamedBitField()) |
| 557 | continue; |
| 558 | QualType FieldQTy = Field->getType(); |
| 559 | llvm::MDNode *TypeNode = isValidBaseType(QTy: FieldQTy) |
| 560 | ? getValidBaseTypeInfo(QTy: FieldQTy) |
| 561 | : getTypeInfo(QTy: FieldQTy); |
| 562 | if (!TypeNode) |
| 563 | return nullptr; |
| 564 | |
| 565 | uint64_t BitOffset = Layout.getFieldOffset(FieldNo: Field->getFieldIndex()); |
| 566 | uint64_t Offset = Context.toCharUnitsFromBits(BitSize: BitOffset).getQuantity(); |
| 567 | uint64_t Size = Context.getTypeSizeInChars(T: FieldQTy).getQuantity(); |
| 568 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, |
| 569 | TypeNode)); |
| 570 | } |
| 571 | |
| 572 | SmallString<256> OutName; |
| 573 | if (Features.CPlusPlus) { |
| 574 | // Don't use the mangler for C code. |
| 575 | llvm::raw_svector_ostream Out(OutName); |
| 576 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( |
| 577 | T: QualType(Ty, 0), Out); |
| 578 | } else { |
| 579 | OutName = RD->getName(); |
| 580 | } |
| 581 | |
| 582 | if (CodeGenOpts.NewStructPathTBAA) { |
| 583 | llvm::MDNode *Parent = getChar(); |
| 584 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); |
| 585 | llvm::Metadata *Id = MDHelper.createString(Str: OutName); |
| 586 | return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields); |
| 587 | } |
| 588 | |
| 589 | // Create the struct type node with a vector of pairs (offset, type). |
| 590 | SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; |
| 591 | for (const auto &Field : Fields) |
| 592 | OffsetsAndTypes.push_back(Elt: std::make_pair(x: Field.Type, y: Field.Offset)); |
| 593 | return MDHelper.createTBAAStructTypeNode(Name: OutName, Fields: OffsetsAndTypes); |
| 594 | } |
| 595 | |
| 596 | return nullptr; |
| 597 | } |
| 598 | |
| 599 | llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { |
| 600 | assert(isValidBaseType(QTy) && "Must be a valid base type" ); |
| 601 | |
| 602 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
| 603 | |
| 604 | // nullptr is a valid value in the cache, so use find rather than [] |
| 605 | auto I = BaseTypeMetadataCache.find(Val: Ty); |
| 606 | if (I != BaseTypeMetadataCache.end()) |
| 607 | return I->second; |
| 608 | |
| 609 | // First calculate the metadata, before recomputing the insertion point, as |
| 610 | // the helper can recursively call us. |
| 611 | llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); |
| 612 | LLVM_ATTRIBUTE_UNUSED auto inserted = |
| 613 | BaseTypeMetadataCache.insert(KV: {Ty, TypeNode}); |
| 614 | assert(inserted.second && "BaseType metadata was already inserted" ); |
| 615 | |
| 616 | return TypeNode; |
| 617 | } |
| 618 | |
| 619 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { |
| 620 | return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr; |
| 621 | } |
| 622 | |
| 623 | llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { |
| 624 | assert(!Info.isIncomplete() && "Access to an object of an incomplete type!" ); |
| 625 | |
| 626 | if (Info.isMayAlias()) |
| 627 | Info = TBAAAccessInfo(getChar(), Info.Size); |
| 628 | |
| 629 | if (!Info.AccessType) |
| 630 | return nullptr; |
| 631 | |
| 632 | if (!CodeGenOpts.StructPathTBAA) |
| 633 | Info = TBAAAccessInfo(Info.AccessType, Info.Size); |
| 634 | |
| 635 | llvm::MDNode *&N = AccessTagMetadataCache[Info]; |
| 636 | if (N) |
| 637 | return N; |
| 638 | |
| 639 | if (!Info.BaseType) { |
| 640 | Info.BaseType = Info.AccessType; |
| 641 | assert(!Info.Offset && "Nonzero offset for an access with no base type!" ); |
| 642 | } |
| 643 | if (CodeGenOpts.NewStructPathTBAA) { |
| 644 | return N = MDHelper.createTBAAAccessTag(BaseType: Info.BaseType, AccessType: Info.AccessType, |
| 645 | Offset: Info.Offset, Size: Info.Size); |
| 646 | } |
| 647 | return N = MDHelper.createTBAAStructTagNode(BaseType: Info.BaseType, AccessType: Info.AccessType, |
| 648 | Offset: Info.Offset); |
| 649 | } |
| 650 | |
| 651 | TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, |
| 652 | TBAAAccessInfo TargetInfo) { |
| 653 | if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) |
| 654 | return TBAAAccessInfo::getMayAliasInfo(); |
| 655 | return TargetInfo; |
| 656 | } |
| 657 | |
| 658 | TBAAAccessInfo |
| 659 | CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, |
| 660 | TBAAAccessInfo InfoB) { |
| 661 | if (InfoA == InfoB) |
| 662 | return InfoA; |
| 663 | |
| 664 | if (!InfoA || !InfoB) |
| 665 | return TBAAAccessInfo(); |
| 666 | |
| 667 | if (InfoA.isMayAlias() || InfoB.isMayAlias()) |
| 668 | return TBAAAccessInfo::getMayAliasInfo(); |
| 669 | |
| 670 | // TODO: Implement the rest of the logic here. For example, two accesses |
| 671 | // with same final access types result in an access to an object of that final |
| 672 | // access type regardless of their base types. |
| 673 | return TBAAAccessInfo::getMayAliasInfo(); |
| 674 | } |
| 675 | |
| 676 | TBAAAccessInfo |
| 677 | CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, |
| 678 | TBAAAccessInfo SrcInfo) { |
| 679 | if (DestInfo == SrcInfo) |
| 680 | return DestInfo; |
| 681 | |
| 682 | if (!DestInfo || !SrcInfo) |
| 683 | return TBAAAccessInfo(); |
| 684 | |
| 685 | if (DestInfo.isMayAlias() || SrcInfo.isMayAlias()) |
| 686 | return TBAAAccessInfo::getMayAliasInfo(); |
| 687 | |
| 688 | // TODO: Implement the rest of the logic here. For example, two accesses |
| 689 | // with same final access types result in an access to an object of that final |
| 690 | // access type regardless of their base types. |
| 691 | return TBAAAccessInfo::getMayAliasInfo(); |
| 692 | } |
| 693 | |