| 1 | //===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This is the code that manages TBAA information and defines the TBAA policy | 
|---|
| 10 | // for the optimizer to use. Relevant standards text includes: | 
|---|
| 11 | // | 
|---|
| 12 | //   C99 6.5p7 | 
|---|
| 13 | //   C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) | 
|---|
| 14 | // | 
|---|
| 15 | //===----------------------------------------------------------------------===// | 
|---|
| 16 |  | 
|---|
| 17 | #include "CodeGenTBAA.h" | 
|---|
| 18 | #include "ABIInfoImpl.h" | 
|---|
| 19 | #include "CGCXXABI.h" | 
|---|
| 20 | #include "CGRecordLayout.h" | 
|---|
| 21 | #include "CodeGenTypes.h" | 
|---|
| 22 | #include "clang/AST/ASTContext.h" | 
|---|
| 23 | #include "clang/AST/Attr.h" | 
|---|
| 24 | #include "clang/AST/Mangle.h" | 
|---|
| 25 | #include "clang/AST/RecordLayout.h" | 
|---|
| 26 | #include "clang/Basic/CodeGenOptions.h" | 
|---|
| 27 | #include "clang/Basic/TargetInfo.h" | 
|---|
| 28 | #include "llvm/IR/LLVMContext.h" | 
|---|
| 29 | #include "llvm/IR/Metadata.h" | 
|---|
| 30 | #include "llvm/IR/Module.h" | 
|---|
| 31 | #include "llvm/IR/Type.h" | 
|---|
| 32 | #include "llvm/Support/Debug.h" | 
|---|
| 33 | using namespace clang; | 
|---|
| 34 | using namespace CodeGen; | 
|---|
| 35 |  | 
|---|
| 36 | CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, | 
|---|
| 37 | llvm::Module &M, const CodeGenOptions &CGO, | 
|---|
| 38 | const LangOptions &Features) | 
|---|
| 39 | : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO), | 
|---|
| 40 | Features(Features), | 
|---|
| 41 | MangleCtx(ItaniumMangleContext::create(Context&: Ctx, Diags&: Ctx.getDiagnostics())), | 
|---|
| 42 | MDHelper(M.getContext()), Root(nullptr), Char(nullptr) {} | 
|---|
| 43 |  | 
|---|
| 44 | CodeGenTBAA::~CodeGenTBAA() { | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | llvm::MDNode *CodeGenTBAA::getRoot() { | 
|---|
| 48 | // Define the root of the tree. This identifies the tree, so that | 
|---|
| 49 | // if our LLVM IR is linked with LLVM IR from a different front-end | 
|---|
| 50 | // (or a different version of this front-end), their TBAA trees will | 
|---|
| 51 | // remain distinct, and the optimizer will treat them conservatively. | 
|---|
| 52 | if (!Root) { | 
|---|
| 53 | if (Features.CPlusPlus) | 
|---|
| 54 | Root = MDHelper.createTBAARoot(Name: "Simple C++ TBAA"); | 
|---|
| 55 | else | 
|---|
| 56 | Root = MDHelper.createTBAARoot(Name: "Simple C/C++ TBAA"); | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | return Root; | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, | 
|---|
| 63 | llvm::MDNode *Parent, | 
|---|
| 64 | uint64_t Size) { | 
|---|
| 65 | if (CodeGenOpts.NewStructPathTBAA) { | 
|---|
| 66 | llvm::Metadata *Id = MDHelper.createString(Str: Name); | 
|---|
| 67 | return MDHelper.createTBAATypeNode(Parent, Size, Id); | 
|---|
| 68 | } | 
|---|
| 69 | return MDHelper.createTBAAScalarTypeNode(Name, Parent); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | llvm::MDNode *CodeGenTBAA::getChar() { | 
|---|
| 73 | // Define the root of the tree for user-accessible memory. C and C++ | 
|---|
| 74 | // give special powers to char and certain similar types. However, | 
|---|
| 75 | // these special powers only cover user-accessible memory, and doesn't | 
|---|
| 76 | // include things like vtables. | 
|---|
| 77 | if (!Char) | 
|---|
| 78 | Char = createScalarTypeNode(Name: "omnipotent char", Parent: getRoot(), /* Size= */ 1); | 
|---|
| 79 |  | 
|---|
| 80 | return Char; | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { | 
|---|
| 84 | assert(PtrDepth >= 1 && "Pointer must have some depth"); | 
|---|
| 85 |  | 
|---|
| 86 | // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes | 
|---|
| 87 | // for "any" pointers of increasing pointer depth, and are organized in the | 
|---|
| 88 | // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... | 
|---|
| 89 | // | 
|---|
| 90 | // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), | 
|---|
| 91 | // since there is no node for pointer depth 0. | 
|---|
| 92 | // | 
|---|
| 93 | // These "any" pointer type nodes are used in pointer TBAA. The type node of | 
|---|
| 94 | // a concrete pointer type has the "any" pointer type node of appropriate | 
|---|
| 95 | // pointer depth as its parent. The "any" pointer type nodes are also used | 
|---|
| 96 | // directly for accesses to void pointers, or to specific pointers that we | 
|---|
| 97 | // conservatively do not distinguish in pointer TBAA (e.g. pointers to | 
|---|
| 98 | // members). Essentially, this establishes that e.g. void** can alias with | 
|---|
| 99 | // any type that can unify with T**, ignoring things like qualifiers. Here, T | 
|---|
| 100 | // is a variable that represents an arbitrary type, including pointer types. | 
|---|
| 101 | // As such, each depth is naturally a subtype of the previous depth, and thus | 
|---|
| 102 | // transitively of all previous depths. | 
|---|
| 103 | if (AnyPtrs.size() < PtrDepth) { | 
|---|
| 104 | AnyPtrs.reserve(N: PtrDepth); | 
|---|
| 105 | auto Size = Module.getDataLayout().getPointerSize(); | 
|---|
| 106 | // Populate first element. | 
|---|
| 107 | if (AnyPtrs.empty()) | 
|---|
| 108 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name: "any pointer", Parent: getChar(), Size)); | 
|---|
| 109 | // Populate further elements. | 
|---|
| 110 | for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { | 
|---|
| 111 | auto Name = ( "any p"+ llvm::Twine(Idx + 1) + " pointer").str(); | 
|---|
| 112 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name, Parent: AnyPtrs[Idx - 1], Size)); | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | return AnyPtrs[PtrDepth - 1]; | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | static bool TypeHasMayAlias(QualType QTy) { | 
|---|
| 120 | // Tagged types have declarations, and therefore may have attributes. | 
|---|
| 121 | if (auto *TD = QTy->getAsTagDecl()) | 
|---|
| 122 | if (TD->hasAttr<MayAliasAttr>()) | 
|---|
| 123 | return true; | 
|---|
| 124 |  | 
|---|
| 125 | // Also look for may_alias as a declaration attribute on a typedef. | 
|---|
| 126 | // FIXME: We should follow GCC and model may_alias as a type attribute | 
|---|
| 127 | // rather than as a declaration attribute. | 
|---|
| 128 | while (auto *TT = QTy->getAs<TypedefType>()) { | 
|---|
| 129 | if (TT->getDecl()->hasAttr<MayAliasAttr>()) | 
|---|
| 130 | return true; | 
|---|
| 131 | QTy = TT->desugar(); | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | // Also consider an array type as may_alias when its element type (at | 
|---|
| 135 | // any level) is marked as such. | 
|---|
| 136 | if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe()) | 
|---|
| 137 | if (TypeHasMayAlias(QTy: ArrayTy->getElementType())) | 
|---|
| 138 | return true; | 
|---|
| 139 |  | 
|---|
| 140 | return false; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | /// Check if the given type is a valid base type to be used in access tags. | 
|---|
| 144 | static bool isValidBaseType(QualType QTy) { | 
|---|
| 145 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { | 
|---|
| 146 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); | 
|---|
| 147 | // Incomplete types are not valid base access types. | 
|---|
| 148 | if (!RD) | 
|---|
| 149 | return false; | 
|---|
| 150 | if (RD->hasFlexibleArrayMember()) | 
|---|
| 151 | return false; | 
|---|
| 152 | // RD can be struct, union, class, interface or enum. | 
|---|
| 153 | // For now, we only handle struct and class. | 
|---|
| 154 | if (RD->isStruct() || RD->isClass()) | 
|---|
| 155 | return true; | 
|---|
| 156 | } | 
|---|
| 157 | return false; | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { | 
|---|
| 161 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); | 
|---|
| 162 |  | 
|---|
| 163 | // Handle builtin types. | 
|---|
| 164 | if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Val: Ty)) { | 
|---|
| 165 | switch (BTy->getKind()) { | 
|---|
| 166 | // Character types are special and can alias anything. | 
|---|
| 167 | // In C++, this technically only includes "char" and "unsigned char", | 
|---|
| 168 | // and not "signed char". In C, it includes all three. For now, | 
|---|
| 169 | // the risk of exploiting this detail in C++ seems likely to outweigh | 
|---|
| 170 | // the benefit. | 
|---|
| 171 | case BuiltinType::Char_U: | 
|---|
| 172 | case BuiltinType::Char_S: | 
|---|
| 173 | case BuiltinType::UChar: | 
|---|
| 174 | case BuiltinType::SChar: | 
|---|
| 175 | return getChar(); | 
|---|
| 176 |  | 
|---|
| 177 | // Unsigned types can alias their corresponding signed types. | 
|---|
| 178 | case BuiltinType::UShort: | 
|---|
| 179 | return getTypeInfo(QTy: Context.ShortTy); | 
|---|
| 180 | case BuiltinType::UInt: | 
|---|
| 181 | return getTypeInfo(QTy: Context.IntTy); | 
|---|
| 182 | case BuiltinType::ULong: | 
|---|
| 183 | return getTypeInfo(QTy: Context.LongTy); | 
|---|
| 184 | case BuiltinType::ULongLong: | 
|---|
| 185 | return getTypeInfo(QTy: Context.LongLongTy); | 
|---|
| 186 | case BuiltinType::UInt128: | 
|---|
| 187 | return getTypeInfo(QTy: Context.Int128Ty); | 
|---|
| 188 |  | 
|---|
| 189 | case BuiltinType::UShortFract: | 
|---|
| 190 | return getTypeInfo(QTy: Context.ShortFractTy); | 
|---|
| 191 | case BuiltinType::UFract: | 
|---|
| 192 | return getTypeInfo(QTy: Context.FractTy); | 
|---|
| 193 | case BuiltinType::ULongFract: | 
|---|
| 194 | return getTypeInfo(QTy: Context.LongFractTy); | 
|---|
| 195 |  | 
|---|
| 196 | case BuiltinType::SatUShortFract: | 
|---|
| 197 | return getTypeInfo(QTy: Context.SatShortFractTy); | 
|---|
| 198 | case BuiltinType::SatUFract: | 
|---|
| 199 | return getTypeInfo(QTy: Context.SatFractTy); | 
|---|
| 200 | case BuiltinType::SatULongFract: | 
|---|
| 201 | return getTypeInfo(QTy: Context.SatLongFractTy); | 
|---|
| 202 |  | 
|---|
| 203 | case BuiltinType::UShortAccum: | 
|---|
| 204 | return getTypeInfo(QTy: Context.ShortAccumTy); | 
|---|
| 205 | case BuiltinType::UAccum: | 
|---|
| 206 | return getTypeInfo(QTy: Context.AccumTy); | 
|---|
| 207 | case BuiltinType::ULongAccum: | 
|---|
| 208 | return getTypeInfo(QTy: Context.LongAccumTy); | 
|---|
| 209 |  | 
|---|
| 210 | case BuiltinType::SatUShortAccum: | 
|---|
| 211 | return getTypeInfo(QTy: Context.SatShortAccumTy); | 
|---|
| 212 | case BuiltinType::SatUAccum: | 
|---|
| 213 | return getTypeInfo(QTy: Context.SatAccumTy); | 
|---|
| 214 | case BuiltinType::SatULongAccum: | 
|---|
| 215 | return getTypeInfo(QTy: Context.SatLongAccumTy); | 
|---|
| 216 |  | 
|---|
| 217 | // Treat all other builtin types as distinct types. This includes | 
|---|
| 218 | // treating wchar_t, char16_t, and char32_t as distinct from their | 
|---|
| 219 | // "underlying types". | 
|---|
| 220 | default: | 
|---|
| 221 | return createScalarTypeNode(Name: BTy->getName(Policy: Features), Parent: getChar(), Size); | 
|---|
| 222 | } | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | // C++1z [basic.lval]p10: "If a program attempts to access the stored value of | 
|---|
| 226 | // an object through a glvalue of other than one of the following types the | 
|---|
| 227 | // behavior is undefined: [...] a char, unsigned char, or std::byte type." | 
|---|
| 228 | if (Ty->isStdByteType()) | 
|---|
| 229 | return getChar(); | 
|---|
| 230 |  | 
|---|
| 231 | // Handle pointers and references. | 
|---|
| 232 | // | 
|---|
| 233 | // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2: | 
|---|
| 234 | //     For two pointer types to be compatible, both shall be identically | 
|---|
| 235 | //     qualified and both shall be pointers to compatible types. | 
|---|
| 236 | // | 
|---|
| 237 | // This rule is impractically strict; we want to at least ignore CVR | 
|---|
| 238 | // qualifiers. Distinguishing by CVR qualifiers would make it UB to | 
|---|
| 239 | // e.g. cast a `char **` to `const char * const *` and dereference it, | 
|---|
| 240 | // which is too common and useful to invalidate. C++'s similar types | 
|---|
| 241 | // rule permits qualifier differences in these nested positions; in fact, | 
|---|
| 242 | // C++ even allows that cast as an implicit conversion. | 
|---|
| 243 | // | 
|---|
| 244 | // Other qualifiers could theoretically be distinguished, especially if | 
|---|
| 245 | // they involve a significant representation difference.  We don't | 
|---|
| 246 | // currently do so, however. | 
|---|
| 247 | if (Ty->isPointerType() || Ty->isReferenceType()) { | 
|---|
| 248 | if (!CodeGenOpts.PointerTBAA) | 
|---|
| 249 | return getAnyPtr(); | 
|---|
| 250 | // C++ [basic.lval]p11 permits objects to accessed through an l-value of | 
|---|
| 251 | // similar type. Two types are similar under C++ [conv.qual]p2 if the | 
|---|
| 252 | // decomposition of the types into pointers, member pointers, and arrays has | 
|---|
| 253 | // the same structure when ignoring cv-qualifiers at each level of the | 
|---|
| 254 | // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which | 
|---|
| 255 | // would really complicate any attempt to distinguish pointers to arrays by | 
|---|
| 256 | // their bounds. It's simpler, and much easier to explain to users, to | 
|---|
| 257 | // simply treat all pointers to arrays as pointers to their element type for | 
|---|
| 258 | // aliasing purposes. So when creating a TBAA tag for a pointer type, we | 
|---|
| 259 | // recursively ignore both qualifiers and array types when decomposing the | 
|---|
| 260 | // pointee type. The only meaningful remaining structure is the number of | 
|---|
| 261 | // pointer types we encountered along the way, so we just produce the tag | 
|---|
| 262 | // "p<depth> <base type tag>". If we do find a member pointer type, for now | 
|---|
| 263 | // we just conservatively bail out with AnyPtr (below) rather than trying to | 
|---|
| 264 | // create a tag that honors the similar-type rules while still | 
|---|
| 265 | // distinguishing different kinds of member pointer. | 
|---|
| 266 | unsigned PtrDepth = 0; | 
|---|
| 267 | do { | 
|---|
| 268 | PtrDepth++; | 
|---|
| 269 | Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe(); | 
|---|
| 270 | } while (Ty->isPointerType()); | 
|---|
| 271 |  | 
|---|
| 272 | // While there are no special rules in the standards regarding void pointers | 
|---|
| 273 | // and strict aliasing, emitting distinct tags for void pointers break some | 
|---|
| 274 | // common idioms and there is no good alternative to re-write the code | 
|---|
| 275 | // without strict-aliasing violations. | 
|---|
| 276 | if (Ty->isVoidType()) | 
|---|
| 277 | return getAnyPtr(PtrDepth); | 
|---|
| 278 |  | 
|---|
| 279 | assert(!isa<VariableArrayType>(Ty)); | 
|---|
| 280 | // When the underlying type is a builtin type, we compute the pointee type | 
|---|
| 281 | // string recursively, which is implicitly more forgiving than the standards | 
|---|
| 282 | // require.  Effectively, we are turning the question "are these types | 
|---|
| 283 | // compatible/similar" into "are accesses to these types allowed to alias". | 
|---|
| 284 | // In both C and C++, the latter question has special carve-outs for | 
|---|
| 285 | // signedness mismatches that only apply at the top level.  As a result, we | 
|---|
| 286 | // are allowing e.g. `int *` l-values to access `unsigned *` objects. | 
|---|
| 287 | SmallString<256> TyName; | 
|---|
| 288 | if (isa<BuiltinType>(Val: Ty)) { | 
|---|
| 289 | llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty); | 
|---|
| 290 | StringRef Name = | 
|---|
| 291 | cast<llvm::MDString>( | 
|---|
| 292 | Val: ScalarMD->getOperand(I: CodeGenOpts.NewStructPathTBAA ? 2 : 0)) | 
|---|
| 293 | ->getString(); | 
|---|
| 294 | TyName = Name; | 
|---|
| 295 | } else { | 
|---|
| 296 | // Be conservative if the type isn't a RecordType. We are specifically | 
|---|
| 297 | // required to do this for member pointers until we implement the | 
|---|
| 298 | // similar-types rule. | 
|---|
| 299 | const auto *RT = Ty->getAs<RecordType>(); | 
|---|
| 300 | if (!RT) | 
|---|
| 301 | return getAnyPtr(PtrDepth); | 
|---|
| 302 |  | 
|---|
| 303 | // For unnamed structs or unions C's compatible types rule applies. Two | 
|---|
| 304 | // compatible types in different compilation units can have different | 
|---|
| 305 | // mangled names, meaning the metadata emitted below would incorrectly | 
|---|
| 306 | // mark them as no-alias. Use AnyPtr for such types in both C and C++, as | 
|---|
| 307 | // C and C++ types may be visible when doing LTO. | 
|---|
| 308 | // | 
|---|
| 309 | // Note that using AnyPtr is overly conservative. We could summarize the | 
|---|
| 310 | // members of the type, as per the C compatibility rule in the future. | 
|---|
| 311 | // This also covers anonymous structs and unions, which have a different | 
|---|
| 312 | // compatibility rule, but it doesn't matter because you can never have a | 
|---|
| 313 | // pointer to an anonymous struct or union. | 
|---|
| 314 | if (!RT->getDecl()->getDeclName()) | 
|---|
| 315 | return getAnyPtr(PtrDepth); | 
|---|
| 316 |  | 
|---|
| 317 | // For non-builtin types use the mangled name of the canonical type. | 
|---|
| 318 | llvm::raw_svector_ostream TyOut(TyName); | 
|---|
| 319 | MangleCtx->mangleCanonicalTypeName(T: QualType(Ty, 0), TyOut); | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | SmallString<256> OutName( "p"); | 
|---|
| 323 | OutName += std::to_string(val: PtrDepth); | 
|---|
| 324 | OutName += " "; | 
|---|
| 325 | OutName += TyName; | 
|---|
| 326 | return createScalarTypeNode(Name: OutName, Parent: getAnyPtr(PtrDepth), Size); | 
|---|
| 327 | } | 
|---|
| 328 |  | 
|---|
| 329 | // Accesses to arrays are accesses to objects of their element types. | 
|---|
| 330 | if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType()) | 
|---|
| 331 | return getTypeInfo(QTy: cast<ArrayType>(Val: Ty)->getElementType()); | 
|---|
| 332 |  | 
|---|
| 333 | // Enum types are distinct types. In C++ they have "underlying types", | 
|---|
| 334 | // however they aren't related for TBAA. | 
|---|
| 335 | if (const EnumType *ETy = dyn_cast<EnumType>(Val: Ty)) { | 
|---|
| 336 | if (!Features.CPlusPlus) | 
|---|
| 337 | return getTypeInfo(QTy: ETy->getDecl()->getIntegerType()); | 
|---|
| 338 |  | 
|---|
| 339 | // In C++ mode, types have linkage, so we can rely on the ODR and | 
|---|
| 340 | // on their mangled names, if they're external. | 
|---|
| 341 | // TODO: Is there a way to get a program-wide unique name for a | 
|---|
| 342 | // decl with local linkage or no linkage? | 
|---|
| 343 | if (!ETy->getDecl()->isExternallyVisible()) | 
|---|
| 344 | return getChar(); | 
|---|
| 345 |  | 
|---|
| 346 | SmallString<256> OutName; | 
|---|
| 347 | llvm::raw_svector_ostream Out(OutName); | 
|---|
| 348 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( | 
|---|
| 349 | T: QualType(ETy, 0), Out); | 
|---|
| 350 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | if (const auto *EIT = dyn_cast<BitIntType>(Val: Ty)) { | 
|---|
| 354 | SmallString<256> OutName; | 
|---|
| 355 | llvm::raw_svector_ostream Out(OutName); | 
|---|
| 356 | // Don't specify signed/unsigned since integer types can alias despite sign | 
|---|
| 357 | // differences. | 
|---|
| 358 | Out << "_BitInt("<< EIT->getNumBits() << ')'; | 
|---|
| 359 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | // For now, handle any other kind of type conservatively. | 
|---|
| 363 | return getChar(); | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { | 
|---|
| 367 | // At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless | 
|---|
| 368 | // we're running TypeSanitizer). | 
|---|
| 369 | if (!Features.Sanitize.has(K: SanitizerKind::Type) && | 
|---|
| 370 | (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)) | 
|---|
| 371 | return nullptr; | 
|---|
| 372 |  | 
|---|
| 373 | // If the type has the may_alias attribute (even on a typedef), it is | 
|---|
| 374 | // effectively in the general char alias class. | 
|---|
| 375 | if (TypeHasMayAlias(QTy)) | 
|---|
| 376 | return getChar(); | 
|---|
| 377 |  | 
|---|
| 378 | // We need this function to not fall back to returning the "omnipotent char" | 
|---|
| 379 | // type node for aggregate and union types. Otherwise, any dereference of an | 
|---|
| 380 | // aggregate will result into the may-alias access descriptor, meaning all | 
|---|
| 381 | // subsequent accesses to direct and indirect members of that aggregate will | 
|---|
| 382 | // be considered may-alias too. | 
|---|
| 383 | // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single | 
|---|
| 384 | // function. | 
|---|
| 385 | if (isValidBaseType(QTy)) | 
|---|
| 386 | return getValidBaseTypeInfo(QTy); | 
|---|
| 387 |  | 
|---|
| 388 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); | 
|---|
| 389 | if (llvm::MDNode *N = MetadataCache[Ty]) | 
|---|
| 390 | return N; | 
|---|
| 391 |  | 
|---|
| 392 | // Note that the following helper call is allowed to add new nodes to the | 
|---|
| 393 | // cache, which invalidates all its previously obtained iterators. So we | 
|---|
| 394 | // first generate the node for the type and then add that node to the cache. | 
|---|
| 395 | llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); | 
|---|
| 396 | return MetadataCache[Ty] = TypeNode; | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) { | 
|---|
| 400 | // Pointee values may have incomplete types, but they shall never be | 
|---|
| 401 | // dereferenced. | 
|---|
| 402 | if (AccessType->isIncompleteType()) | 
|---|
| 403 | return TBAAAccessInfo::getIncompleteInfo(); | 
|---|
| 404 |  | 
|---|
| 405 | if (TypeHasMayAlias(QTy: AccessType)) | 
|---|
| 406 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 407 |  | 
|---|
| 408 | uint64_t Size = Context.getTypeSizeInChars(T: AccessType).getQuantity(); | 
|---|
| 409 | return TBAAAccessInfo(getTypeInfo(QTy: AccessType), Size); | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|
| 412 | TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { | 
|---|
| 413 | const llvm::DataLayout &DL = Module.getDataLayout(); | 
|---|
| 414 | unsigned Size = DL.getPointerTypeSize(Ty: VTablePtrType); | 
|---|
| 415 | return TBAAAccessInfo(createScalarTypeNode(Name: "vtable pointer", Parent: getRoot(), Size), | 
|---|
| 416 | Size); | 
|---|
| 417 | } | 
|---|
| 418 |  | 
|---|
| 419 | bool | 
|---|
| 420 | CodeGenTBAA::CollectFields(uint64_t BaseOffset, | 
|---|
| 421 | QualType QTy, | 
|---|
| 422 | SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & | 
|---|
| 423 | Fields, | 
|---|
| 424 | bool MayAlias) { | 
|---|
| 425 | /* Things not handled yet include: C++ base classes, bitfields, */ | 
|---|
| 426 |  | 
|---|
| 427 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { | 
|---|
| 428 | if (TTy->isUnionType()) { | 
|---|
| 429 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); | 
|---|
| 430 | llvm::MDNode *TBAAType = getChar(); | 
|---|
| 431 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); | 
|---|
| 432 | Fields.push_back( | 
|---|
| 433 | Elt: llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag)); | 
|---|
| 434 | return true; | 
|---|
| 435 | } | 
|---|
| 436 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); | 
|---|
| 437 | if (RD->hasFlexibleArrayMember()) | 
|---|
| 438 | return false; | 
|---|
| 439 |  | 
|---|
| 440 | // TODO: Handle C++ base classes. | 
|---|
| 441 | if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(Val: RD)) | 
|---|
| 442 | if (Decl->bases_begin() != Decl->bases_end()) | 
|---|
| 443 | return false; | 
|---|
| 444 |  | 
|---|
| 445 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); | 
|---|
| 446 | const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD); | 
|---|
| 447 |  | 
|---|
| 448 | unsigned idx = 0; | 
|---|
| 449 | for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); | 
|---|
| 450 | i != e; ++i, ++idx) { | 
|---|
| 451 | if (isEmptyFieldForLayout(Context, FD: *i)) | 
|---|
| 452 | continue; | 
|---|
| 453 |  | 
|---|
| 454 | uint64_t Offset = | 
|---|
| 455 | BaseOffset + Layout.getFieldOffset(FieldNo: idx) / Context.getCharWidth(); | 
|---|
| 456 |  | 
|---|
| 457 | // Create a single field for consecutive named bitfields using char as | 
|---|
| 458 | // base type. | 
|---|
| 459 | if ((*i)->isBitField()) { | 
|---|
| 460 | const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(FD: *i); | 
|---|
| 461 | // For big endian targets the first bitfield in the consecutive run is | 
|---|
| 462 | // at the most-significant end; see CGRecordLowering::setBitFieldInfo | 
|---|
| 463 | // for more information. | 
|---|
| 464 | bool IsBE = Context.getTargetInfo().isBigEndian(); | 
|---|
| 465 | bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0 | 
|---|
| 466 | : Info.Offset == 0; | 
|---|
| 467 | if (!IsFirst) | 
|---|
| 468 | continue; | 
|---|
| 469 | unsigned CurrentBitFieldSize = Info.StorageSize; | 
|---|
| 470 | uint64_t Size = | 
|---|
| 471 | llvm::divideCeil(Numerator: CurrentBitFieldSize, Denominator: Context.getCharWidth()); | 
|---|
| 472 | llvm::MDNode *TBAAType = getChar(); | 
|---|
| 473 | llvm::MDNode *TBAATag = | 
|---|
| 474 | getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); | 
|---|
| 475 | Fields.push_back( | 
|---|
| 476 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); | 
|---|
| 477 | continue; | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | QualType FieldQTy = i->getType(); | 
|---|
| 481 | if (!CollectFields(BaseOffset: Offset, QTy: FieldQTy, Fields, | 
|---|
| 482 | MayAlias: MayAlias || TypeHasMayAlias(QTy: FieldQTy))) | 
|---|
| 483 | return false; | 
|---|
| 484 | } | 
|---|
| 485 | return true; | 
|---|
| 486 | } | 
|---|
| 487 |  | 
|---|
| 488 | /* Otherwise, treat whatever it is as a field. */ | 
|---|
| 489 | uint64_t Offset = BaseOffset; | 
|---|
| 490 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); | 
|---|
| 491 | llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); | 
|---|
| 492 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); | 
|---|
| 493 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); | 
|---|
| 494 | return true; | 
|---|
| 495 | } | 
|---|
| 496 |  | 
|---|
| 497 | llvm::MDNode * | 
|---|
| 498 | CodeGenTBAA::getTBAAStructInfo(QualType QTy) { | 
|---|
| 499 | if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) | 
|---|
| 500 | return nullptr; | 
|---|
| 501 |  | 
|---|
| 502 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); | 
|---|
| 503 |  | 
|---|
| 504 | if (llvm::MDNode *N = StructMetadataCache[Ty]) | 
|---|
| 505 | return N; | 
|---|
| 506 |  | 
|---|
| 507 | SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; | 
|---|
| 508 | if (CollectFields(BaseOffset: 0, QTy, Fields, MayAlias: TypeHasMayAlias(QTy))) | 
|---|
| 509 | return MDHelper.createTBAAStructNode(Fields); | 
|---|
| 510 |  | 
|---|
| 511 | // For now, handle any other kind of type conservatively. | 
|---|
| 512 | return StructMetadataCache[Ty] = nullptr; | 
|---|
| 513 | } | 
|---|
| 514 |  | 
|---|
| 515 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { | 
|---|
| 516 | if (auto *TTy = dyn_cast<RecordType>(Val: Ty)) { | 
|---|
| 517 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); | 
|---|
| 518 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); | 
|---|
| 519 | using TBAAStructField = llvm::MDBuilder::TBAAStructField; | 
|---|
| 520 | SmallVector<TBAAStructField, 4> Fields; | 
|---|
| 521 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) { | 
|---|
| 522 | // Handle C++ base classes. Non-virtual bases can treated a kind of | 
|---|
| 523 | // field. Virtual bases are more complex and omitted, but avoid an | 
|---|
| 524 | // incomplete view for NewStructPathTBAA. | 
|---|
| 525 | if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) | 
|---|
| 526 | return nullptr; | 
|---|
| 527 | for (const CXXBaseSpecifier &B : CXXRD->bases()) { | 
|---|
| 528 | if (B.isVirtual()) | 
|---|
| 529 | continue; | 
|---|
| 530 | QualType BaseQTy = B.getType(); | 
|---|
| 531 | const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl(); | 
|---|
| 532 | if (BaseRD->isEmpty()) | 
|---|
| 533 | continue; | 
|---|
| 534 | llvm::MDNode *TypeNode = isValidBaseType(QTy: BaseQTy) | 
|---|
| 535 | ? getValidBaseTypeInfo(QTy: BaseQTy) | 
|---|
| 536 | : getTypeInfo(QTy: BaseQTy); | 
|---|
| 537 | if (!TypeNode) | 
|---|
| 538 | return nullptr; | 
|---|
| 539 | uint64_t Offset = Layout.getBaseClassOffset(Base: BaseRD).getQuantity(); | 
|---|
| 540 | uint64_t Size = | 
|---|
| 541 | Context.getASTRecordLayout(D: BaseRD).getDataSize().getQuantity(); | 
|---|
| 542 | Fields.push_back( | 
|---|
| 543 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode)); | 
|---|
| 544 | } | 
|---|
| 545 | // The order in which base class subobjects are allocated is unspecified, | 
|---|
| 546 | // so may differ from declaration order. In particular, Itanium ABI will | 
|---|
| 547 | // allocate a primary base first. | 
|---|
| 548 | // Since we exclude empty subobjects, the objects are not overlapping and | 
|---|
| 549 | // their offsets are unique. | 
|---|
| 550 | llvm::sort(C&: Fields, | 
|---|
| 551 | Comp: [](const TBAAStructField &A, const TBAAStructField &B) { | 
|---|
| 552 | return A.Offset < B.Offset; | 
|---|
| 553 | }); | 
|---|
| 554 | } | 
|---|
| 555 | for (FieldDecl *Field : RD->fields()) { | 
|---|
| 556 | if (Field->isZeroSize(Ctx: Context) || Field->isUnnamedBitField()) | 
|---|
| 557 | continue; | 
|---|
| 558 | QualType FieldQTy = Field->getType(); | 
|---|
| 559 | llvm::MDNode *TypeNode = isValidBaseType(QTy: FieldQTy) | 
|---|
| 560 | ? getValidBaseTypeInfo(QTy: FieldQTy) | 
|---|
| 561 | : getTypeInfo(QTy: FieldQTy); | 
|---|
| 562 | if (!TypeNode) | 
|---|
| 563 | return nullptr; | 
|---|
| 564 |  | 
|---|
| 565 | uint64_t BitOffset = Layout.getFieldOffset(FieldNo: Field->getFieldIndex()); | 
|---|
| 566 | uint64_t Offset = Context.toCharUnitsFromBits(BitSize: BitOffset).getQuantity(); | 
|---|
| 567 | uint64_t Size = Context.getTypeSizeInChars(T: FieldQTy).getQuantity(); | 
|---|
| 568 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, | 
|---|
| 569 | TypeNode)); | 
|---|
| 570 | } | 
|---|
| 571 |  | 
|---|
| 572 | SmallString<256> OutName; | 
|---|
| 573 | if (Features.CPlusPlus) { | 
|---|
| 574 | // Don't use the mangler for C code. | 
|---|
| 575 | llvm::raw_svector_ostream Out(OutName); | 
|---|
| 576 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( | 
|---|
| 577 | T: QualType(Ty, 0), Out); | 
|---|
| 578 | } else { | 
|---|
| 579 | OutName = RD->getName(); | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | if (CodeGenOpts.NewStructPathTBAA) { | 
|---|
| 583 | llvm::MDNode *Parent = getChar(); | 
|---|
| 584 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); | 
|---|
| 585 | llvm::Metadata *Id = MDHelper.createString(Str: OutName); | 
|---|
| 586 | return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields); | 
|---|
| 587 | } | 
|---|
| 588 |  | 
|---|
| 589 | // Create the struct type node with a vector of pairs (offset, type). | 
|---|
| 590 | SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; | 
|---|
| 591 | for (const auto &Field : Fields) | 
|---|
| 592 | OffsetsAndTypes.push_back(Elt: std::make_pair(x: Field.Type, y: Field.Offset)); | 
|---|
| 593 | return MDHelper.createTBAAStructTypeNode(Name: OutName, Fields: OffsetsAndTypes); | 
|---|
| 594 | } | 
|---|
| 595 |  | 
|---|
| 596 | return nullptr; | 
|---|
| 597 | } | 
|---|
| 598 |  | 
|---|
| 599 | llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { | 
|---|
| 600 | assert(isValidBaseType(QTy) && "Must be a valid base type"); | 
|---|
| 601 |  | 
|---|
| 602 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); | 
|---|
| 603 |  | 
|---|
| 604 | // nullptr is a valid value in the cache, so use find rather than [] | 
|---|
| 605 | auto I = BaseTypeMetadataCache.find(Val: Ty); | 
|---|
| 606 | if (I != BaseTypeMetadataCache.end()) | 
|---|
| 607 | return I->second; | 
|---|
| 608 |  | 
|---|
| 609 | // First calculate the metadata, before recomputing the insertion point, as | 
|---|
| 610 | // the helper can recursively call us. | 
|---|
| 611 | llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); | 
|---|
| 612 | LLVM_ATTRIBUTE_UNUSED auto inserted = | 
|---|
| 613 | BaseTypeMetadataCache.insert(KV: {Ty, TypeNode}); | 
|---|
| 614 | assert(inserted.second && "BaseType metadata was already inserted"); | 
|---|
| 615 |  | 
|---|
| 616 | return TypeNode; | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { | 
|---|
| 620 | return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr; | 
|---|
| 621 | } | 
|---|
| 622 |  | 
|---|
| 623 | llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { | 
|---|
| 624 | assert(!Info.isIncomplete() && "Access to an object of an incomplete type!"); | 
|---|
| 625 |  | 
|---|
| 626 | if (Info.isMayAlias()) | 
|---|
| 627 | Info = TBAAAccessInfo(getChar(), Info.Size); | 
|---|
| 628 |  | 
|---|
| 629 | if (!Info.AccessType) | 
|---|
| 630 | return nullptr; | 
|---|
| 631 |  | 
|---|
| 632 | if (!CodeGenOpts.StructPathTBAA) | 
|---|
| 633 | Info = TBAAAccessInfo(Info.AccessType, Info.Size); | 
|---|
| 634 |  | 
|---|
| 635 | llvm::MDNode *&N = AccessTagMetadataCache[Info]; | 
|---|
| 636 | if (N) | 
|---|
| 637 | return N; | 
|---|
| 638 |  | 
|---|
| 639 | if (!Info.BaseType) { | 
|---|
| 640 | Info.BaseType = Info.AccessType; | 
|---|
| 641 | assert(!Info.Offset && "Nonzero offset for an access with no base type!"); | 
|---|
| 642 | } | 
|---|
| 643 | if (CodeGenOpts.NewStructPathTBAA) { | 
|---|
| 644 | return N = MDHelper.createTBAAAccessTag(BaseType: Info.BaseType, AccessType: Info.AccessType, | 
|---|
| 645 | Offset: Info.Offset, Size: Info.Size); | 
|---|
| 646 | } | 
|---|
| 647 | return N = MDHelper.createTBAAStructTagNode(BaseType: Info.BaseType, AccessType: Info.AccessType, | 
|---|
| 648 | Offset: Info.Offset); | 
|---|
| 649 | } | 
|---|
| 650 |  | 
|---|
| 651 | TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, | 
|---|
| 652 | TBAAAccessInfo TargetInfo) { | 
|---|
| 653 | if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) | 
|---|
| 654 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 655 | return TargetInfo; | 
|---|
| 656 | } | 
|---|
| 657 |  | 
|---|
| 658 | TBAAAccessInfo | 
|---|
| 659 | CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, | 
|---|
| 660 | TBAAAccessInfo InfoB) { | 
|---|
| 661 | if (InfoA == InfoB) | 
|---|
| 662 | return InfoA; | 
|---|
| 663 |  | 
|---|
| 664 | if (!InfoA || !InfoB) | 
|---|
| 665 | return TBAAAccessInfo(); | 
|---|
| 666 |  | 
|---|
| 667 | if (InfoA.isMayAlias() || InfoB.isMayAlias()) | 
|---|
| 668 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 669 |  | 
|---|
| 670 | // TODO: Implement the rest of the logic here. For example, two accesses | 
|---|
| 671 | // with same final access types result in an access to an object of that final | 
|---|
| 672 | // access type regardless of their base types. | 
|---|
| 673 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 674 | } | 
|---|
| 675 |  | 
|---|
| 676 | TBAAAccessInfo | 
|---|
| 677 | CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, | 
|---|
| 678 | TBAAAccessInfo SrcInfo) { | 
|---|
| 679 | if (DestInfo == SrcInfo) | 
|---|
| 680 | return DestInfo; | 
|---|
| 681 |  | 
|---|
| 682 | if (!DestInfo || !SrcInfo) | 
|---|
| 683 | return TBAAAccessInfo(); | 
|---|
| 684 |  | 
|---|
| 685 | if (DestInfo.isMayAlias() || SrcInfo.isMayAlias()) | 
|---|
| 686 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 687 |  | 
|---|
| 688 | // TODO: Implement the rest of the logic here. For example, two accesses | 
|---|
| 689 | // with same final access types result in an access to an object of that final | 
|---|
| 690 | // access type regardless of their base types. | 
|---|
| 691 | return TBAAAccessInfo::getMayAliasInfo(); | 
|---|
| 692 | } | 
|---|
| 693 |  | 
|---|