1//===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is the code that manages TBAA information and defines the TBAA policy
10// for the optimizer to use. Relevant standards text includes:
11//
12// C99 6.5p7
13// C++ [basic.lval] (p10 in n3126, p15 in some earlier versions)
14//
15//===----------------------------------------------------------------------===//
16
17#include "CodeGenTBAA.h"
18#include "ABIInfoImpl.h"
19#include "CGCXXABI.h"
20#include "CGRecordLayout.h"
21#include "CodeGenTypes.h"
22#include "clang/AST/ASTContext.h"
23#include "clang/AST/Attr.h"
24#include "clang/AST/Mangle.h"
25#include "clang/AST/RecordLayout.h"
26#include "clang/Basic/CodeGenOptions.h"
27#include "clang/Basic/TargetInfo.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Metadata.h"
30#include "llvm/IR/Module.h"
31#include "llvm/IR/Type.h"
32#include "llvm/Support/Debug.h"
33using namespace clang;
34using namespace CodeGen;
35
36CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes,
37 llvm::Module &M, const CodeGenOptions &CGO,
38 const LangOptions &Features)
39 : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO),
40 Features(Features),
41 MangleCtx(ItaniumMangleContext::create(Context&: Ctx, Diags&: Ctx.getDiagnostics())),
42 MDHelper(M.getContext()), Root(nullptr), Char(nullptr) {}
43
44CodeGenTBAA::~CodeGenTBAA() {
45}
46
47llvm::MDNode *CodeGenTBAA::getRoot() {
48 // Define the root of the tree. This identifies the tree, so that
49 // if our LLVM IR is linked with LLVM IR from a different front-end
50 // (or a different version of this front-end), their TBAA trees will
51 // remain distinct, and the optimizer will treat them conservatively.
52 if (!Root) {
53 if (Features.CPlusPlus)
54 Root = MDHelper.createTBAARoot(Name: "Simple C++ TBAA");
55 else
56 Root = MDHelper.createTBAARoot(Name: "Simple C/C++ TBAA");
57 }
58
59 return Root;
60}
61
62llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name,
63 llvm::MDNode *Parent,
64 uint64_t Size) {
65 if (CodeGenOpts.NewStructPathTBAA) {
66 llvm::Metadata *Id = MDHelper.createString(Str: Name);
67 return MDHelper.createTBAATypeNode(Parent, Size, Id);
68 }
69 return MDHelper.createTBAAScalarTypeNode(Name, Parent);
70}
71
72llvm::MDNode *CodeGenTBAA::getChar() {
73 // Define the root of the tree for user-accessible memory. C and C++
74 // give special powers to char and certain similar types. However,
75 // these special powers only cover user-accessible memory, and doesn't
76 // include things like vtables.
77 if (!Char)
78 Char = createScalarTypeNode(Name: "omnipotent char", Parent: getRoot(), /* Size= */ 1);
79
80 return Char;
81}
82
83llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) {
84 assert(PtrDepth >= 1 && "Pointer must have some depth");
85
86 // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes
87 // for "any" pointers of increasing pointer depth, and are organized in the
88 // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ...
89 //
90 // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1),
91 // since there is no node for pointer depth 0.
92 //
93 // These "any" pointer type nodes are used in pointer TBAA. The type node of
94 // a concrete pointer type has the "any" pointer type node of appropriate
95 // pointer depth as its parent. The "any" pointer type nodes are also used
96 // directly for accesses to void pointers, or to specific pointers that we
97 // conservatively do not distinguish in pointer TBAA (e.g. pointers to
98 // members). Essentially, this establishes that e.g. void** can alias with
99 // any type that can unify with T**, ignoring things like qualifiers. Here, T
100 // is a variable that represents an arbitrary type, including pointer types.
101 // As such, each depth is naturally a subtype of the previous depth, and thus
102 // transitively of all previous depths.
103 if (AnyPtrs.size() < PtrDepth) {
104 AnyPtrs.reserve(N: PtrDepth);
105 auto Size = Module.getDataLayout().getPointerSize();
106 // Populate first element.
107 if (AnyPtrs.empty())
108 AnyPtrs.push_back(Elt: createScalarTypeNode(Name: "any pointer", Parent: getChar(), Size));
109 // Populate further elements.
110 for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) {
111 auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str();
112 AnyPtrs.push_back(Elt: createScalarTypeNode(Name, Parent: AnyPtrs[Idx - 1], Size));
113 }
114 }
115
116 return AnyPtrs[PtrDepth - 1];
117}
118
119static bool TypeHasMayAlias(QualType QTy) {
120 // Tagged types have declarations, and therefore may have attributes.
121 if (auto *TD = QTy->getAsTagDecl())
122 if (TD->hasAttr<MayAliasAttr>())
123 return true;
124
125 // Also look for may_alias as a declaration attribute on a typedef.
126 // FIXME: We should follow GCC and model may_alias as a type attribute
127 // rather than as a declaration attribute.
128 while (auto *TT = QTy->getAs<TypedefType>()) {
129 if (TT->getDecl()->hasAttr<MayAliasAttr>())
130 return true;
131 QTy = TT->desugar();
132 }
133
134 // Also consider an array type as may_alias when its element type (at
135 // any level) is marked as such.
136 if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe())
137 if (TypeHasMayAlias(QTy: ArrayTy->getElementType()))
138 return true;
139
140 return false;
141}
142
143/// Check if the given type is a valid base type to be used in access tags.
144static bool isValidBaseType(QualType QTy) {
145 if (const auto *RD = QTy->getAsRecordDecl()) {
146 // Incomplete types are not valid base access types.
147 if (!RD->isCompleteDefinition())
148 return false;
149 if (RD->hasFlexibleArrayMember())
150 return false;
151 // RD can be struct, union, class, interface or enum.
152 // For now, we only handle struct and class.
153 if (RD->isStruct() || RD->isClass())
154 return true;
155 }
156 return false;
157}
158
159llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
160 uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity();
161
162 // Handle builtin types.
163 if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Val: Ty)) {
164 switch (BTy->getKind()) {
165 // Character types are special and can alias anything.
166 // In C++, this technically only includes "char" and "unsigned char",
167 // and not "signed char". In C, it includes all three. For now,
168 // the risk of exploiting this detail in C++ seems likely to outweigh
169 // the benefit.
170 case BuiltinType::Char_U:
171 case BuiltinType::Char_S:
172 case BuiltinType::UChar:
173 case BuiltinType::SChar:
174 return getChar();
175
176 // Unsigned types can alias their corresponding signed types.
177 case BuiltinType::UShort:
178 return getTypeInfo(QTy: Context.ShortTy);
179 case BuiltinType::UInt:
180 return getTypeInfo(QTy: Context.IntTy);
181 case BuiltinType::ULong:
182 return getTypeInfo(QTy: Context.LongTy);
183 case BuiltinType::ULongLong:
184 return getTypeInfo(QTy: Context.LongLongTy);
185 case BuiltinType::UInt128:
186 return getTypeInfo(QTy: Context.Int128Ty);
187
188 case BuiltinType::UShortFract:
189 return getTypeInfo(QTy: Context.ShortFractTy);
190 case BuiltinType::UFract:
191 return getTypeInfo(QTy: Context.FractTy);
192 case BuiltinType::ULongFract:
193 return getTypeInfo(QTy: Context.LongFractTy);
194
195 case BuiltinType::SatUShortFract:
196 return getTypeInfo(QTy: Context.SatShortFractTy);
197 case BuiltinType::SatUFract:
198 return getTypeInfo(QTy: Context.SatFractTy);
199 case BuiltinType::SatULongFract:
200 return getTypeInfo(QTy: Context.SatLongFractTy);
201
202 case BuiltinType::UShortAccum:
203 return getTypeInfo(QTy: Context.ShortAccumTy);
204 case BuiltinType::UAccum:
205 return getTypeInfo(QTy: Context.AccumTy);
206 case BuiltinType::ULongAccum:
207 return getTypeInfo(QTy: Context.LongAccumTy);
208
209 case BuiltinType::SatUShortAccum:
210 return getTypeInfo(QTy: Context.SatShortAccumTy);
211 case BuiltinType::SatUAccum:
212 return getTypeInfo(QTy: Context.SatAccumTy);
213 case BuiltinType::SatULongAccum:
214 return getTypeInfo(QTy: Context.SatLongAccumTy);
215
216 // Treat all other builtin types as distinct types. This includes
217 // treating wchar_t, char16_t, and char32_t as distinct from their
218 // "underlying types".
219 default:
220 return createScalarTypeNode(Name: BTy->getName(Policy: Features), Parent: getChar(), Size);
221 }
222 }
223
224 // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
225 // an object through a glvalue of other than one of the following types the
226 // behavior is undefined: [...] a char, unsigned char, or std::byte type."
227 if (Ty->isStdByteType())
228 return getChar();
229
230 // Handle pointers and references.
231 //
232 // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2:
233 // For two pointer types to be compatible, both shall be identically
234 // qualified and both shall be pointers to compatible types.
235 //
236 // This rule is impractically strict; we want to at least ignore CVR
237 // qualifiers. Distinguishing by CVR qualifiers would make it UB to
238 // e.g. cast a `char **` to `const char * const *` and dereference it,
239 // which is too common and useful to invalidate. C++'s similar types
240 // rule permits qualifier differences in these nested positions; in fact,
241 // C++ even allows that cast as an implicit conversion.
242 //
243 // Other qualifiers could theoretically be distinguished, especially if
244 // they involve a significant representation difference. We don't
245 // currently do so, however.
246 if (Ty->isPointerType() || Ty->isReferenceType()) {
247 if (!CodeGenOpts.PointerTBAA)
248 return getAnyPtr();
249 // C++ [basic.lval]p11 permits objects to accessed through an l-value of
250 // similar type. Two types are similar under C++ [conv.qual]p2 if the
251 // decomposition of the types into pointers, member pointers, and arrays has
252 // the same structure when ignoring cv-qualifiers at each level of the
253 // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which
254 // would really complicate any attempt to distinguish pointers to arrays by
255 // their bounds. It's simpler, and much easier to explain to users, to
256 // simply treat all pointers to arrays as pointers to their element type for
257 // aliasing purposes. So when creating a TBAA tag for a pointer type, we
258 // recursively ignore both qualifiers and array types when decomposing the
259 // pointee type. The only meaningful remaining structure is the number of
260 // pointer types we encountered along the way, so we just produce the tag
261 // "p<depth> <base type tag>". If we do find a member pointer type, for now
262 // we just conservatively bail out with AnyPtr (below) rather than trying to
263 // create a tag that honors the similar-type rules while still
264 // distinguishing different kinds of member pointer.
265 unsigned PtrDepth = 0;
266 do {
267 PtrDepth++;
268 Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe();
269 } while (Ty->isPointerType());
270
271 // While there are no special rules in the standards regarding void pointers
272 // and strict aliasing, emitting distinct tags for void pointers break some
273 // common idioms and there is no good alternative to re-write the code
274 // without strict-aliasing violations.
275 if (Ty->isVoidType())
276 return getAnyPtr(PtrDepth);
277
278 assert(!isa<VariableArrayType>(Ty));
279 // When the underlying type is a builtin type, we compute the pointee type
280 // string recursively, which is implicitly more forgiving than the standards
281 // require. Effectively, we are turning the question "are these types
282 // compatible/similar" into "are accesses to these types allowed to alias".
283 // In both C and C++, the latter question has special carve-outs for
284 // signedness mismatches that only apply at the top level. As a result, we
285 // are allowing e.g. `int *` l-values to access `unsigned *` objects.
286 SmallString<256> TyName;
287 if (isa<BuiltinType>(Val: Ty)) {
288 llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty);
289 StringRef Name =
290 cast<llvm::MDString>(
291 Val: ScalarMD->getOperand(I: CodeGenOpts.NewStructPathTBAA ? 2 : 0))
292 ->getString();
293 TyName = Name;
294 } else {
295 // Be conservative if the type isn't a RecordType. We are specifically
296 // required to do this for member pointers until we implement the
297 // similar-types rule.
298 const auto *RT = Ty->getAsCanonical<RecordType>();
299 if (!RT)
300 return getAnyPtr(PtrDepth);
301
302 // For unnamed structs or unions C's compatible types rule applies. Two
303 // compatible types in different compilation units can have different
304 // mangled names, meaning the metadata emitted below would incorrectly
305 // mark them as no-alias. Use AnyPtr for such types in both C and C++, as
306 // C and C++ types may be visible when doing LTO.
307 //
308 // Note that using AnyPtr is overly conservative. We could summarize the
309 // members of the type, as per the C compatibility rule in the future.
310 // This also covers anonymous structs and unions, which have a different
311 // compatibility rule, but it doesn't matter because you can never have a
312 // pointer to an anonymous struct or union.
313 if (!RT->getDecl()->getDeclName())
314 return getAnyPtr(PtrDepth);
315
316 // For non-builtin types use the mangled name of the canonical type.
317 llvm::raw_svector_ostream TyOut(TyName);
318 MangleCtx->mangleCanonicalTypeName(T: QualType(Ty, 0), TyOut);
319 }
320
321 SmallString<256> OutName("p");
322 OutName += std::to_string(val: PtrDepth);
323 OutName += " ";
324 OutName += TyName;
325 return createScalarTypeNode(Name: OutName, Parent: getAnyPtr(PtrDepth), Size);
326 }
327
328 // Accesses to arrays are accesses to objects of their element types.
329 if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType())
330 return getTypeInfo(QTy: cast<ArrayType>(Val: Ty)->getElementType());
331
332 // Accesses to matrix types are accesses to objects of their element types.
333 if (const auto *MTy = dyn_cast<MatrixType>(Val: Ty)) {
334 assert(isa<ConstantMatrixType>(Ty) &&
335 "only ConstantMatrixType should reach CodeGen");
336 return getTypeInfo(QTy: MTy->getElementType());
337 }
338
339 // Enum types are distinct types. In C++ they have "underlying types",
340 // however they aren't related for TBAA.
341 if (const EnumType *ETy = dyn_cast<EnumType>(Val: Ty)) {
342 const EnumDecl *ED = ETy->getDecl()->getDefinitionOrSelf();
343 if (!Features.CPlusPlus)
344 return getTypeInfo(QTy: ED->getIntegerType());
345
346 // In C++ mode, types have linkage, so we can rely on the ODR and
347 // on their mangled names, if they're external.
348 // TODO: Is there a way to get a program-wide unique name for a
349 // decl with local linkage or no linkage?
350 if (!ED->isExternallyVisible())
351 return getChar();
352
353 SmallString<256> OutName;
354 llvm::raw_svector_ostream Out(OutName);
355 CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
356 T: QualType(ETy, 0), Out);
357 return createScalarTypeNode(Name: OutName, Parent: getChar(), Size);
358 }
359
360 if (const auto *EIT = dyn_cast<BitIntType>(Val: Ty)) {
361 SmallString<256> OutName;
362 llvm::raw_svector_ostream Out(OutName);
363 // Don't specify signed/unsigned since integer types can alias despite sign
364 // differences.
365 Out << "_BitInt(" << EIT->getNumBits() << ')';
366 return createScalarTypeNode(Name: OutName, Parent: getChar(), Size);
367 }
368
369 // For now, handle any other kind of type conservatively.
370 return getChar();
371}
372
373llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
374 // At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless
375 // we're running TypeSanitizer).
376 if (!Features.Sanitize.has(K: SanitizerKind::Type) &&
377 (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing))
378 return nullptr;
379
380 // If the type has the may_alias attribute (even on a typedef), it is
381 // effectively in the general char alias class.
382 if (TypeHasMayAlias(QTy))
383 return getChar();
384
385 // We need this function to not fall back to returning the "omnipotent char"
386 // type node for aggregate and union types. Otherwise, any dereference of an
387 // aggregate will result into the may-alias access descriptor, meaning all
388 // subsequent accesses to direct and indirect members of that aggregate will
389 // be considered may-alias too.
390 // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single
391 // function.
392 if (isValidBaseType(QTy))
393 return getValidBaseTypeInfo(QTy);
394
395 const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr();
396 if (llvm::MDNode *N = MetadataCache[Ty])
397 return N;
398
399 // Note that the following helper call is allowed to add new nodes to the
400 // cache, which invalidates all its previously obtained iterators. So we
401 // first generate the node for the type and then add that node to the cache.
402 llvm::MDNode *TypeNode = getTypeInfoHelper(Ty);
403 return MetadataCache[Ty] = TypeNode;
404}
405
406TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) {
407 // Pointee values may have incomplete types, but they shall never be
408 // dereferenced.
409 if (AccessType->isIncompleteType())
410 return TBAAAccessInfo::getIncompleteInfo();
411
412 if (TypeHasMayAlias(QTy: AccessType))
413 return TBAAAccessInfo::getMayAliasInfo();
414
415 uint64_t Size = Context.getTypeSizeInChars(T: AccessType).getQuantity();
416 return TBAAAccessInfo(getTypeInfo(QTy: AccessType), Size);
417}
418
419TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
420 const llvm::DataLayout &DL = Module.getDataLayout();
421 unsigned Size = DL.getPointerTypeSize(Ty: VTablePtrType);
422 return TBAAAccessInfo(createScalarTypeNode(Name: "vtable pointer", Parent: getRoot(), Size),
423 Size);
424}
425
426bool
427CodeGenTBAA::CollectFields(uint64_t BaseOffset,
428 QualType QTy,
429 SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &
430 Fields,
431 bool MayAlias) {
432 /* Things not handled yet include: C++ base classes, bitfields, */
433
434 if (const auto *TTy = QTy->getAsCanonical<RecordType>()) {
435 if (TTy->isUnionType()) {
436 uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity();
437 llvm::MDNode *TBAAType = getChar();
438 llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size));
439 Fields.push_back(
440 Elt: llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag));
441 return true;
442 }
443 const RecordDecl *RD = TTy->getDecl()->getDefinition();
444 if (RD->hasFlexibleArrayMember())
445 return false;
446
447 // TODO: Handle C++ base classes.
448 if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(Val: RD))
449 if (!Decl->bases().empty())
450 return false;
451
452 const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD);
453 const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD);
454
455 unsigned idx = 0;
456 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
457 i != e; ++i, ++idx) {
458 if (isEmptyFieldForLayout(Context, FD: *i))
459 continue;
460
461 uint64_t Offset =
462 BaseOffset + Layout.getFieldOffset(FieldNo: idx) / Context.getCharWidth();
463
464 // Create a single field for consecutive named bitfields using char as
465 // base type.
466 if ((*i)->isBitField()) {
467 const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(FD: *i);
468 // For big endian targets the first bitfield in the consecutive run is
469 // at the most-significant end; see CGRecordLowering::setBitFieldInfo
470 // for more information.
471 bool IsBE = Context.getTargetInfo().isBigEndian();
472 bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0
473 : Info.Offset == 0;
474 if (!IsFirst)
475 continue;
476 unsigned CurrentBitFieldSize = Info.StorageSize;
477 uint64_t Size =
478 llvm::divideCeil(Numerator: CurrentBitFieldSize, Denominator: Context.getCharWidth());
479 llvm::MDNode *TBAAType = getChar();
480 llvm::MDNode *TBAATag =
481 getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size));
482 Fields.push_back(
483 Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
484 continue;
485 }
486
487 QualType FieldQTy = i->getType();
488 if (!CollectFields(BaseOffset: Offset, QTy: FieldQTy, Fields,
489 MayAlias: MayAlias || TypeHasMayAlias(QTy: FieldQTy)))
490 return false;
491 }
492 return true;
493 }
494
495 /* Otherwise, treat whatever it is as a field. */
496 uint64_t Offset = BaseOffset;
497 uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity();
498 llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy);
499 llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size));
500 Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
501 return true;
502}
503
504llvm::MDNode *
505CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
506 if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
507 return nullptr;
508
509 const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr();
510
511 if (llvm::MDNode *N = StructMetadataCache[Ty])
512 return N;
513
514 SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
515 if (CollectFields(BaseOffset: 0, QTy, Fields, MayAlias: TypeHasMayAlias(QTy)))
516 return MDHelper.createTBAAStructNode(Fields);
517
518 // For now, handle any other kind of type conservatively.
519 return StructMetadataCache[Ty] = nullptr;
520}
521
522llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
523 if (auto *TTy = dyn_cast<RecordType>(Val: Ty)) {
524 const RecordDecl *RD = TTy->getDecl()->getDefinition();
525 const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD);
526 using TBAAStructField = llvm::MDBuilder::TBAAStructField;
527 SmallVector<TBAAStructField, 4> Fields;
528 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
529 // Handle C++ base classes. Non-virtual bases can treated a kind of
530 // field. Virtual bases are more complex and omitted, but avoid an
531 // incomplete view for NewStructPathTBAA.
532 if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0)
533 return nullptr;
534 for (const CXXBaseSpecifier &B : CXXRD->bases()) {
535 if (B.isVirtual())
536 continue;
537 QualType BaseQTy = B.getType();
538 const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl();
539 if (BaseRD->isEmpty())
540 continue;
541 llvm::MDNode *TypeNode = isValidBaseType(QTy: BaseQTy)
542 ? getValidBaseTypeInfo(QTy: BaseQTy)
543 : getTypeInfo(QTy: BaseQTy);
544 if (!TypeNode)
545 return nullptr;
546 uint64_t Offset = Layout.getBaseClassOffset(Base: BaseRD).getQuantity();
547 uint64_t Size =
548 Context.getASTRecordLayout(D: BaseRD).getDataSize().getQuantity();
549 Fields.push_back(
550 Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode));
551 }
552 // The order in which base class subobjects are allocated is unspecified,
553 // so may differ from declaration order. In particular, Itanium ABI will
554 // allocate a primary base first.
555 // Since we exclude empty subobjects, the objects are not overlapping and
556 // their offsets are unique.
557 llvm::sort(C&: Fields,
558 Comp: [](const TBAAStructField &A, const TBAAStructField &B) {
559 return A.Offset < B.Offset;
560 });
561 }
562 for (FieldDecl *Field : RD->fields()) {
563 if (Field->isZeroSize(Ctx: Context) || Field->isUnnamedBitField())
564 continue;
565 QualType FieldQTy = Field->getType();
566 llvm::MDNode *TypeNode = isValidBaseType(QTy: FieldQTy)
567 ? getValidBaseTypeInfo(QTy: FieldQTy)
568 : getTypeInfo(QTy: FieldQTy);
569 if (!TypeNode)
570 return nullptr;
571
572 uint64_t BitOffset = Layout.getFieldOffset(FieldNo: Field->getFieldIndex());
573 uint64_t Offset = Context.toCharUnitsFromBits(BitSize: BitOffset).getQuantity();
574 uint64_t Size = Context.getTypeSizeInChars(T: FieldQTy).getQuantity();
575 Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size,
576 TypeNode));
577 }
578
579 SmallString<256> OutName;
580 if (Features.CPlusPlus) {
581 // Don't use the mangler for C code.
582 llvm::raw_svector_ostream Out(OutName);
583 CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
584 T: QualType(Ty, 0), Out);
585 } else {
586 OutName = RD->getName();
587 }
588
589 if (CodeGenOpts.NewStructPathTBAA) {
590 llvm::MDNode *Parent = getChar();
591 uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity();
592 llvm::Metadata *Id = MDHelper.createString(Str: OutName);
593 return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields);
594 }
595
596 // Create the struct type node with a vector of pairs (offset, type).
597 SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes;
598 for (const auto &Field : Fields)
599 OffsetsAndTypes.push_back(Elt: std::make_pair(x: Field.Type, y: Field.Offset));
600 return MDHelper.createTBAAStructTypeNode(Name: OutName, Fields: OffsetsAndTypes);
601 }
602
603 return nullptr;
604}
605
606llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) {
607 assert(isValidBaseType(QTy) && "Must be a valid base type");
608
609 const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr();
610
611 // nullptr is a valid value in the cache, so use find rather than []
612 auto I = BaseTypeMetadataCache.find(Val: Ty);
613 if (I != BaseTypeMetadataCache.end())
614 return I->second;
615
616 // First calculate the metadata, before recomputing the insertion point, as
617 // the helper can recursively call us.
618 llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
619 [[maybe_unused]] auto inserted = BaseTypeMetadataCache.insert(KV: {Ty, TypeNode});
620 assert(inserted.second && "BaseType metadata was already inserted");
621
622 return TypeNode;
623}
624
625llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
626 return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr;
627}
628
629llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
630 assert(!Info.isIncomplete() && "Access to an object of an incomplete type!");
631
632 if (Info.isMayAlias())
633 Info = TBAAAccessInfo(getChar(), Info.Size);
634
635 if (!Info.AccessType)
636 return nullptr;
637
638 if (!CodeGenOpts.StructPathTBAA)
639 Info = TBAAAccessInfo(Info.AccessType, Info.Size);
640
641 llvm::MDNode *&N = AccessTagMetadataCache[Info];
642 if (N)
643 return N;
644
645 if (!Info.BaseType) {
646 Info.BaseType = Info.AccessType;
647 assert(!Info.Offset && "Nonzero offset for an access with no base type!");
648 }
649 if (CodeGenOpts.NewStructPathTBAA) {
650 return N = MDHelper.createTBAAAccessTag(BaseType: Info.BaseType, AccessType: Info.AccessType,
651 Offset: Info.Offset, Size: Info.Size);
652 }
653 return N = MDHelper.createTBAAStructTagNode(BaseType: Info.BaseType, AccessType: Info.AccessType,
654 Offset: Info.Offset);
655}
656
657TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
658 TBAAAccessInfo TargetInfo) {
659 if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias())
660 return TBAAAccessInfo::getMayAliasInfo();
661 return TargetInfo;
662}
663
664TBAAAccessInfo
665CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
666 TBAAAccessInfo InfoB) {
667 if (InfoA == InfoB)
668 return InfoA;
669
670 if (!InfoA || !InfoB)
671 return TBAAAccessInfo();
672
673 if (InfoA.isMayAlias() || InfoB.isMayAlias())
674 return TBAAAccessInfo::getMayAliasInfo();
675
676 // TODO: Implement the rest of the logic here. For example, two accesses
677 // with same final access types result in an access to an object of that final
678 // access type regardless of their base types.
679 return TBAAAccessInfo::getMayAliasInfo();
680}
681
682TBAAAccessInfo
683CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
684 TBAAAccessInfo SrcInfo) {
685 if (DestInfo == SrcInfo)
686 return DestInfo;
687
688 if (!DestInfo || !SrcInfo)
689 return TBAAAccessInfo();
690
691 if (DestInfo.isMayAlias() || SrcInfo.isMayAlias())
692 return TBAAAccessInfo::getMayAliasInfo();
693
694 // TODO: Implement the rest of the logic here. For example, two accesses
695 // with same final access types result in an access to an object of that final
696 // access type regardless of their base types.
697 return TBAAAccessInfo::getMayAliasInfo();
698}
699