1 | //===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is the code that manages TBAA information and defines the TBAA policy |
10 | // for the optimizer to use. Relevant standards text includes: |
11 | // |
12 | // C99 6.5p7 |
13 | // C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "CodeGenTBAA.h" |
18 | #include "ABIInfoImpl.h" |
19 | #include "CGCXXABI.h" |
20 | #include "CGRecordLayout.h" |
21 | #include "CodeGenTypes.h" |
22 | #include "clang/AST/ASTContext.h" |
23 | #include "clang/AST/Attr.h" |
24 | #include "clang/AST/Mangle.h" |
25 | #include "clang/AST/RecordLayout.h" |
26 | #include "clang/Basic/CodeGenOptions.h" |
27 | #include "clang/Basic/TargetInfo.h" |
28 | #include "llvm/IR/LLVMContext.h" |
29 | #include "llvm/IR/Metadata.h" |
30 | #include "llvm/IR/Module.h" |
31 | #include "llvm/IR/Type.h" |
32 | #include "llvm/Support/Debug.h" |
33 | using namespace clang; |
34 | using namespace CodeGen; |
35 | |
36 | CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, |
37 | llvm::Module &M, const CodeGenOptions &CGO, |
38 | const LangOptions &Features) |
39 | : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO), |
40 | Features(Features), |
41 | MangleCtx(ItaniumMangleContext::create(Context&: Ctx, Diags&: Ctx.getDiagnostics())), |
42 | MDHelper(M.getContext()), Root(nullptr), Char(nullptr) {} |
43 | |
44 | CodeGenTBAA::~CodeGenTBAA() { |
45 | } |
46 | |
47 | llvm::MDNode *CodeGenTBAA::getRoot() { |
48 | // Define the root of the tree. This identifies the tree, so that |
49 | // if our LLVM IR is linked with LLVM IR from a different front-end |
50 | // (or a different version of this front-end), their TBAA trees will |
51 | // remain distinct, and the optimizer will treat them conservatively. |
52 | if (!Root) { |
53 | if (Features.CPlusPlus) |
54 | Root = MDHelper.createTBAARoot(Name: "Simple C++ TBAA" ); |
55 | else |
56 | Root = MDHelper.createTBAARoot(Name: "Simple C/C++ TBAA" ); |
57 | } |
58 | |
59 | return Root; |
60 | } |
61 | |
62 | llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, |
63 | llvm::MDNode *Parent, |
64 | uint64_t Size) { |
65 | if (CodeGenOpts.NewStructPathTBAA) { |
66 | llvm::Metadata *Id = MDHelper.createString(Str: Name); |
67 | return MDHelper.createTBAATypeNode(Parent, Size, Id); |
68 | } |
69 | return MDHelper.createTBAAScalarTypeNode(Name, Parent); |
70 | } |
71 | |
72 | llvm::MDNode *CodeGenTBAA::getChar() { |
73 | // Define the root of the tree for user-accessible memory. C and C++ |
74 | // give special powers to char and certain similar types. However, |
75 | // these special powers only cover user-accessible memory, and doesn't |
76 | // include things like vtables. |
77 | if (!Char) |
78 | Char = createScalarTypeNode(Name: "omnipotent char" , Parent: getRoot(), /* Size= */ 1); |
79 | |
80 | return Char; |
81 | } |
82 | |
83 | llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { |
84 | assert(PtrDepth >= 1 && "Pointer must have some depth" ); |
85 | |
86 | // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes |
87 | // for "any" pointers of increasing pointer depth, and are organized in the |
88 | // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... |
89 | // |
90 | // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), |
91 | // since there is no node for pointer depth 0. |
92 | // |
93 | // These "any" pointer type nodes are used in pointer TBAA. The type node of |
94 | // a concrete pointer type has the "any" pointer type node of appropriate |
95 | // pointer depth as its parent. The "any" pointer type nodes are also used |
96 | // directly for accesses to void pointers, or to specific pointers that we |
97 | // conservatively do not distinguish in pointer TBAA (e.g. pointers to |
98 | // members). Essentially, this establishes that e.g. void** can alias with |
99 | // any type that can unify with T**, ignoring things like qualifiers. Here, T |
100 | // is a variable that represents an arbitrary type, including pointer types. |
101 | // As such, each depth is naturally a subtype of the previous depth, and thus |
102 | // transitively of all previous depths. |
103 | if (AnyPtrs.size() < PtrDepth) { |
104 | AnyPtrs.reserve(N: PtrDepth); |
105 | auto Size = Module.getDataLayout().getPointerSize(); |
106 | // Populate first element. |
107 | if (AnyPtrs.empty()) |
108 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name: "any pointer" , Parent: getChar(), Size)); |
109 | // Populate further elements. |
110 | for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { |
111 | auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer" ).str(); |
112 | AnyPtrs.push_back(Elt: createScalarTypeNode(Name, Parent: AnyPtrs[Idx - 1], Size)); |
113 | } |
114 | } |
115 | |
116 | return AnyPtrs[PtrDepth - 1]; |
117 | } |
118 | |
119 | static bool TypeHasMayAlias(QualType QTy) { |
120 | // Tagged types have declarations, and therefore may have attributes. |
121 | if (auto *TD = QTy->getAsTagDecl()) |
122 | if (TD->hasAttr<MayAliasAttr>()) |
123 | return true; |
124 | |
125 | // Also look for may_alias as a declaration attribute on a typedef. |
126 | // FIXME: We should follow GCC and model may_alias as a type attribute |
127 | // rather than as a declaration attribute. |
128 | while (auto *TT = QTy->getAs<TypedefType>()) { |
129 | if (TT->getDecl()->hasAttr<MayAliasAttr>()) |
130 | return true; |
131 | QTy = TT->desugar(); |
132 | } |
133 | |
134 | // Also consider an array type as may_alias when its element type (at |
135 | // any level) is marked as such. |
136 | if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe()) |
137 | if (TypeHasMayAlias(QTy: ArrayTy->getElementType())) |
138 | return true; |
139 | |
140 | return false; |
141 | } |
142 | |
143 | /// Check if the given type is a valid base type to be used in access tags. |
144 | static bool isValidBaseType(QualType QTy) { |
145 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { |
146 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
147 | // Incomplete types are not valid base access types. |
148 | if (!RD) |
149 | return false; |
150 | if (RD->hasFlexibleArrayMember()) |
151 | return false; |
152 | // RD can be struct, union, class, interface or enum. |
153 | // For now, we only handle struct and class. |
154 | if (RD->isStruct() || RD->isClass()) |
155 | return true; |
156 | } |
157 | return false; |
158 | } |
159 | |
160 | llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { |
161 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); |
162 | |
163 | // Handle builtin types. |
164 | if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Val: Ty)) { |
165 | switch (BTy->getKind()) { |
166 | // Character types are special and can alias anything. |
167 | // In C++, this technically only includes "char" and "unsigned char", |
168 | // and not "signed char". In C, it includes all three. For now, |
169 | // the risk of exploiting this detail in C++ seems likely to outweigh |
170 | // the benefit. |
171 | case BuiltinType::Char_U: |
172 | case BuiltinType::Char_S: |
173 | case BuiltinType::UChar: |
174 | case BuiltinType::SChar: |
175 | return getChar(); |
176 | |
177 | // Unsigned types can alias their corresponding signed types. |
178 | case BuiltinType::UShort: |
179 | return getTypeInfo(QTy: Context.ShortTy); |
180 | case BuiltinType::UInt: |
181 | return getTypeInfo(QTy: Context.IntTy); |
182 | case BuiltinType::ULong: |
183 | return getTypeInfo(QTy: Context.LongTy); |
184 | case BuiltinType::ULongLong: |
185 | return getTypeInfo(QTy: Context.LongLongTy); |
186 | case BuiltinType::UInt128: |
187 | return getTypeInfo(QTy: Context.Int128Ty); |
188 | |
189 | case BuiltinType::UShortFract: |
190 | return getTypeInfo(QTy: Context.ShortFractTy); |
191 | case BuiltinType::UFract: |
192 | return getTypeInfo(QTy: Context.FractTy); |
193 | case BuiltinType::ULongFract: |
194 | return getTypeInfo(QTy: Context.LongFractTy); |
195 | |
196 | case BuiltinType::SatUShortFract: |
197 | return getTypeInfo(QTy: Context.SatShortFractTy); |
198 | case BuiltinType::SatUFract: |
199 | return getTypeInfo(QTy: Context.SatFractTy); |
200 | case BuiltinType::SatULongFract: |
201 | return getTypeInfo(QTy: Context.SatLongFractTy); |
202 | |
203 | case BuiltinType::UShortAccum: |
204 | return getTypeInfo(QTy: Context.ShortAccumTy); |
205 | case BuiltinType::UAccum: |
206 | return getTypeInfo(QTy: Context.AccumTy); |
207 | case BuiltinType::ULongAccum: |
208 | return getTypeInfo(QTy: Context.LongAccumTy); |
209 | |
210 | case BuiltinType::SatUShortAccum: |
211 | return getTypeInfo(QTy: Context.SatShortAccumTy); |
212 | case BuiltinType::SatUAccum: |
213 | return getTypeInfo(QTy: Context.SatAccumTy); |
214 | case BuiltinType::SatULongAccum: |
215 | return getTypeInfo(QTy: Context.SatLongAccumTy); |
216 | |
217 | // Treat all other builtin types as distinct types. This includes |
218 | // treating wchar_t, char16_t, and char32_t as distinct from their |
219 | // "underlying types". |
220 | default: |
221 | return createScalarTypeNode(Name: BTy->getName(Policy: Features), Parent: getChar(), Size); |
222 | } |
223 | } |
224 | |
225 | // C++1z [basic.lval]p10: "If a program attempts to access the stored value of |
226 | // an object through a glvalue of other than one of the following types the |
227 | // behavior is undefined: [...] a char, unsigned char, or std::byte type." |
228 | if (Ty->isStdByteType()) |
229 | return getChar(); |
230 | |
231 | // Handle pointers and references. |
232 | // |
233 | // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2: |
234 | // For two pointer types to be compatible, both shall be identically |
235 | // qualified and both shall be pointers to compatible types. |
236 | // |
237 | // This rule is impractically strict; we want to at least ignore CVR |
238 | // qualifiers. Distinguishing by CVR qualifiers would make it UB to |
239 | // e.g. cast a `char **` to `const char * const *` and dereference it, |
240 | // which is too common and useful to invalidate. C++'s similar types |
241 | // rule permits qualifier differences in these nested positions; in fact, |
242 | // C++ even allows that cast as an implicit conversion. |
243 | // |
244 | // Other qualifiers could theoretically be distinguished, especially if |
245 | // they involve a significant representation difference. We don't |
246 | // currently do so, however. |
247 | if (Ty->isPointerType() || Ty->isReferenceType()) { |
248 | if (!CodeGenOpts.PointerTBAA) |
249 | return getAnyPtr(); |
250 | // C++ [basic.lval]p11 permits objects to accessed through an l-value of |
251 | // similar type. Two types are similar under C++ [conv.qual]p2 if the |
252 | // decomposition of the types into pointers, member pointers, and arrays has |
253 | // the same structure when ignoring cv-qualifiers at each level of the |
254 | // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which |
255 | // would really complicate any attempt to distinguish pointers to arrays by |
256 | // their bounds. It's simpler, and much easier to explain to users, to |
257 | // simply treat all pointers to arrays as pointers to their element type for |
258 | // aliasing purposes. So when creating a TBAA tag for a pointer type, we |
259 | // recursively ignore both qualifiers and array types when decomposing the |
260 | // pointee type. The only meaningful remaining structure is the number of |
261 | // pointer types we encountered along the way, so we just produce the tag |
262 | // "p<depth> <base type tag>". If we do find a member pointer type, for now |
263 | // we just conservatively bail out with AnyPtr (below) rather than trying to |
264 | // create a tag that honors the similar-type rules while still |
265 | // distinguishing different kinds of member pointer. |
266 | unsigned PtrDepth = 0; |
267 | do { |
268 | PtrDepth++; |
269 | Ty = Ty->getPointeeType()->getBaseElementTypeUnsafe(); |
270 | } while (Ty->isPointerType()); |
271 | |
272 | // While there are no special rules in the standards regarding void pointers |
273 | // and strict aliasing, emitting distinct tags for void pointers break some |
274 | // common idioms and there is no good alternative to re-write the code |
275 | // without strict-aliasing violations. |
276 | if (Ty->isVoidType()) |
277 | return getAnyPtr(PtrDepth); |
278 | |
279 | assert(!isa<VariableArrayType>(Ty)); |
280 | // When the underlying type is a builtin type, we compute the pointee type |
281 | // string recursively, which is implicitly more forgiving than the standards |
282 | // require. Effectively, we are turning the question "are these types |
283 | // compatible/similar" into "are accesses to these types allowed to alias". |
284 | // In both C and C++, the latter question has special carve-outs for |
285 | // signedness mismatches that only apply at the top level. As a result, we |
286 | // are allowing e.g. `int *` l-values to access `unsigned *` objects. |
287 | SmallString<256> TyName; |
288 | if (isa<BuiltinType>(Val: Ty)) { |
289 | llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty); |
290 | StringRef Name = |
291 | cast<llvm::MDString>( |
292 | Val: ScalarMD->getOperand(I: CodeGenOpts.NewStructPathTBAA ? 2 : 0)) |
293 | ->getString(); |
294 | TyName = Name; |
295 | } else { |
296 | // Be conservative if the type isn't a RecordType. We are specifically |
297 | // required to do this for member pointers until we implement the |
298 | // similar-types rule. |
299 | const auto *RT = Ty->getAs<RecordType>(); |
300 | if (!RT) |
301 | return getAnyPtr(PtrDepth); |
302 | |
303 | // For unnamed structs or unions C's compatible types rule applies. Two |
304 | // compatible types in different compilation units can have different |
305 | // mangled names, meaning the metadata emitted below would incorrectly |
306 | // mark them as no-alias. Use AnyPtr for such types in both C and C++, as |
307 | // C and C++ types may be visible when doing LTO. |
308 | // |
309 | // Note that using AnyPtr is overly conservative. We could summarize the |
310 | // members of the type, as per the C compatibility rule in the future. |
311 | // This also covers anonymous structs and unions, which have a different |
312 | // compatibility rule, but it doesn't matter because you can never have a |
313 | // pointer to an anonymous struct or union. |
314 | if (!RT->getDecl()->getDeclName()) |
315 | return getAnyPtr(PtrDepth); |
316 | |
317 | // For non-builtin types use the mangled name of the canonical type. |
318 | llvm::raw_svector_ostream TyOut(TyName); |
319 | MangleCtx->mangleCanonicalTypeName(T: QualType(Ty, 0), TyOut); |
320 | } |
321 | |
322 | SmallString<256> OutName("p" ); |
323 | OutName += std::to_string(val: PtrDepth); |
324 | OutName += " " ; |
325 | OutName += TyName; |
326 | return createScalarTypeNode(Name: OutName, Parent: getAnyPtr(PtrDepth), Size); |
327 | } |
328 | |
329 | // Accesses to arrays are accesses to objects of their element types. |
330 | if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType()) |
331 | return getTypeInfo(QTy: cast<ArrayType>(Val: Ty)->getElementType()); |
332 | |
333 | // Enum types are distinct types. In C++ they have "underlying types", |
334 | // however they aren't related for TBAA. |
335 | if (const EnumType *ETy = dyn_cast<EnumType>(Val: Ty)) { |
336 | if (!Features.CPlusPlus) |
337 | return getTypeInfo(QTy: ETy->getDecl()->getIntegerType()); |
338 | |
339 | // In C++ mode, types have linkage, so we can rely on the ODR and |
340 | // on their mangled names, if they're external. |
341 | // TODO: Is there a way to get a program-wide unique name for a |
342 | // decl with local linkage or no linkage? |
343 | if (!ETy->getDecl()->isExternallyVisible()) |
344 | return getChar(); |
345 | |
346 | SmallString<256> OutName; |
347 | llvm::raw_svector_ostream Out(OutName); |
348 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( |
349 | T: QualType(ETy, 0), Out); |
350 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); |
351 | } |
352 | |
353 | if (const auto *EIT = dyn_cast<BitIntType>(Val: Ty)) { |
354 | SmallString<256> OutName; |
355 | llvm::raw_svector_ostream Out(OutName); |
356 | // Don't specify signed/unsigned since integer types can alias despite sign |
357 | // differences. |
358 | Out << "_BitInt(" << EIT->getNumBits() << ')'; |
359 | return createScalarTypeNode(Name: OutName, Parent: getChar(), Size); |
360 | } |
361 | |
362 | // For now, handle any other kind of type conservatively. |
363 | return getChar(); |
364 | } |
365 | |
366 | llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { |
367 | // At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless |
368 | // we're running TypeSanitizer). |
369 | if (!Features.Sanitize.has(K: SanitizerKind::Type) && |
370 | (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)) |
371 | return nullptr; |
372 | |
373 | // If the type has the may_alias attribute (even on a typedef), it is |
374 | // effectively in the general char alias class. |
375 | if (TypeHasMayAlias(QTy)) |
376 | return getChar(); |
377 | |
378 | // We need this function to not fall back to returning the "omnipotent char" |
379 | // type node for aggregate and union types. Otherwise, any dereference of an |
380 | // aggregate will result into the may-alias access descriptor, meaning all |
381 | // subsequent accesses to direct and indirect members of that aggregate will |
382 | // be considered may-alias too. |
383 | // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single |
384 | // function. |
385 | if (isValidBaseType(QTy)) |
386 | return getValidBaseTypeInfo(QTy); |
387 | |
388 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
389 | if (llvm::MDNode *N = MetadataCache[Ty]) |
390 | return N; |
391 | |
392 | // Note that the following helper call is allowed to add new nodes to the |
393 | // cache, which invalidates all its previously obtained iterators. So we |
394 | // first generate the node for the type and then add that node to the cache. |
395 | llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); |
396 | return MetadataCache[Ty] = TypeNode; |
397 | } |
398 | |
399 | TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) { |
400 | // Pointee values may have incomplete types, but they shall never be |
401 | // dereferenced. |
402 | if (AccessType->isIncompleteType()) |
403 | return TBAAAccessInfo::getIncompleteInfo(); |
404 | |
405 | if (TypeHasMayAlias(QTy: AccessType)) |
406 | return TBAAAccessInfo::getMayAliasInfo(); |
407 | |
408 | uint64_t Size = Context.getTypeSizeInChars(T: AccessType).getQuantity(); |
409 | return TBAAAccessInfo(getTypeInfo(QTy: AccessType), Size); |
410 | } |
411 | |
412 | TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { |
413 | const llvm::DataLayout &DL = Module.getDataLayout(); |
414 | unsigned Size = DL.getPointerTypeSize(Ty: VTablePtrType); |
415 | return TBAAAccessInfo(createScalarTypeNode(Name: "vtable pointer" , Parent: getRoot(), Size), |
416 | Size); |
417 | } |
418 | |
419 | bool |
420 | CodeGenTBAA::CollectFields(uint64_t BaseOffset, |
421 | QualType QTy, |
422 | SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & |
423 | Fields, |
424 | bool MayAlias) { |
425 | /* Things not handled yet include: C++ base classes, bitfields, */ |
426 | |
427 | if (const RecordType *TTy = QTy->getAs<RecordType>()) { |
428 | if (TTy->isUnionType()) { |
429 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); |
430 | llvm::MDNode *TBAAType = getChar(); |
431 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
432 | Fields.push_back( |
433 | Elt: llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag)); |
434 | return true; |
435 | } |
436 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
437 | if (RD->hasFlexibleArrayMember()) |
438 | return false; |
439 | |
440 | // TODO: Handle C++ base classes. |
441 | if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(Val: RD)) |
442 | if (Decl->bases_begin() != Decl->bases_end()) |
443 | return false; |
444 | |
445 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); |
446 | const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD); |
447 | |
448 | unsigned idx = 0; |
449 | for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); |
450 | i != e; ++i, ++idx) { |
451 | if (isEmptyFieldForLayout(Context, FD: *i)) |
452 | continue; |
453 | |
454 | uint64_t Offset = |
455 | BaseOffset + Layout.getFieldOffset(FieldNo: idx) / Context.getCharWidth(); |
456 | |
457 | // Create a single field for consecutive named bitfields using char as |
458 | // base type. |
459 | if ((*i)->isBitField()) { |
460 | const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(FD: *i); |
461 | // For big endian targets the first bitfield in the consecutive run is |
462 | // at the most-significant end; see CGRecordLowering::setBitFieldInfo |
463 | // for more information. |
464 | bool IsBE = Context.getTargetInfo().isBigEndian(); |
465 | bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0 |
466 | : Info.Offset == 0; |
467 | if (!IsFirst) |
468 | continue; |
469 | unsigned CurrentBitFieldSize = Info.StorageSize; |
470 | uint64_t Size = |
471 | llvm::divideCeil(Numerator: CurrentBitFieldSize, Denominator: Context.getCharWidth()); |
472 | llvm::MDNode *TBAAType = getChar(); |
473 | llvm::MDNode *TBAATag = |
474 | getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
475 | Fields.push_back( |
476 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); |
477 | continue; |
478 | } |
479 | |
480 | QualType FieldQTy = i->getType(); |
481 | if (!CollectFields(BaseOffset: Offset, QTy: FieldQTy, Fields, |
482 | MayAlias: MayAlias || TypeHasMayAlias(QTy: FieldQTy))) |
483 | return false; |
484 | } |
485 | return true; |
486 | } |
487 | |
488 | /* Otherwise, treat whatever it is as a field. */ |
489 | uint64_t Offset = BaseOffset; |
490 | uint64_t Size = Context.getTypeSizeInChars(T: QTy).getQuantity(); |
491 | llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); |
492 | llvm::MDNode *TBAATag = getAccessTagInfo(Info: TBAAAccessInfo(TBAAType, Size)); |
493 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); |
494 | return true; |
495 | } |
496 | |
497 | llvm::MDNode * |
498 | CodeGenTBAA::getTBAAStructInfo(QualType QTy) { |
499 | if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) |
500 | return nullptr; |
501 | |
502 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
503 | |
504 | if (llvm::MDNode *N = StructMetadataCache[Ty]) |
505 | return N; |
506 | |
507 | SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; |
508 | if (CollectFields(BaseOffset: 0, QTy, Fields, MayAlias: TypeHasMayAlias(QTy))) |
509 | return MDHelper.createTBAAStructNode(Fields); |
510 | |
511 | // For now, handle any other kind of type conservatively. |
512 | return StructMetadataCache[Ty] = nullptr; |
513 | } |
514 | |
515 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { |
516 | if (auto *TTy = dyn_cast<RecordType>(Val: Ty)) { |
517 | const RecordDecl *RD = TTy->getDecl()->getDefinition(); |
518 | const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD); |
519 | using TBAAStructField = llvm::MDBuilder::TBAAStructField; |
520 | SmallVector<TBAAStructField, 4> Fields; |
521 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) { |
522 | // Handle C++ base classes. Non-virtual bases can treated a kind of |
523 | // field. Virtual bases are more complex and omitted, but avoid an |
524 | // incomplete view for NewStructPathTBAA. |
525 | if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) |
526 | return nullptr; |
527 | for (const CXXBaseSpecifier &B : CXXRD->bases()) { |
528 | if (B.isVirtual()) |
529 | continue; |
530 | QualType BaseQTy = B.getType(); |
531 | const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl(); |
532 | if (BaseRD->isEmpty()) |
533 | continue; |
534 | llvm::MDNode *TypeNode = isValidBaseType(QTy: BaseQTy) |
535 | ? getValidBaseTypeInfo(QTy: BaseQTy) |
536 | : getTypeInfo(QTy: BaseQTy); |
537 | if (!TypeNode) |
538 | return nullptr; |
539 | uint64_t Offset = Layout.getBaseClassOffset(Base: BaseRD).getQuantity(); |
540 | uint64_t Size = |
541 | Context.getASTRecordLayout(D: BaseRD).getDataSize().getQuantity(); |
542 | Fields.push_back( |
543 | Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode)); |
544 | } |
545 | // The order in which base class subobjects are allocated is unspecified, |
546 | // so may differ from declaration order. In particular, Itanium ABI will |
547 | // allocate a primary base first. |
548 | // Since we exclude empty subobjects, the objects are not overlapping and |
549 | // their offsets are unique. |
550 | llvm::sort(C&: Fields, |
551 | Comp: [](const TBAAStructField &A, const TBAAStructField &B) { |
552 | return A.Offset < B.Offset; |
553 | }); |
554 | } |
555 | for (FieldDecl *Field : RD->fields()) { |
556 | if (Field->isZeroSize(Ctx: Context) || Field->isUnnamedBitField()) |
557 | continue; |
558 | QualType FieldQTy = Field->getType(); |
559 | llvm::MDNode *TypeNode = isValidBaseType(QTy: FieldQTy) |
560 | ? getValidBaseTypeInfo(QTy: FieldQTy) |
561 | : getTypeInfo(QTy: FieldQTy); |
562 | if (!TypeNode) |
563 | return nullptr; |
564 | |
565 | uint64_t BitOffset = Layout.getFieldOffset(FieldNo: Field->getFieldIndex()); |
566 | uint64_t Offset = Context.toCharUnitsFromBits(BitSize: BitOffset).getQuantity(); |
567 | uint64_t Size = Context.getTypeSizeInChars(T: FieldQTy).getQuantity(); |
568 | Fields.push_back(Elt: llvm::MDBuilder::TBAAStructField(Offset, Size, |
569 | TypeNode)); |
570 | } |
571 | |
572 | SmallString<256> OutName; |
573 | if (Features.CPlusPlus) { |
574 | // Don't use the mangler for C code. |
575 | llvm::raw_svector_ostream Out(OutName); |
576 | CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( |
577 | T: QualType(Ty, 0), Out); |
578 | } else { |
579 | OutName = RD->getName(); |
580 | } |
581 | |
582 | if (CodeGenOpts.NewStructPathTBAA) { |
583 | llvm::MDNode *Parent = getChar(); |
584 | uint64_t Size = Context.getTypeSizeInChars(T: Ty).getQuantity(); |
585 | llvm::Metadata *Id = MDHelper.createString(Str: OutName); |
586 | return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields); |
587 | } |
588 | |
589 | // Create the struct type node with a vector of pairs (offset, type). |
590 | SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; |
591 | for (const auto &Field : Fields) |
592 | OffsetsAndTypes.push_back(Elt: std::make_pair(x: Field.Type, y: Field.Offset)); |
593 | return MDHelper.createTBAAStructTypeNode(Name: OutName, Fields: OffsetsAndTypes); |
594 | } |
595 | |
596 | return nullptr; |
597 | } |
598 | |
599 | llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { |
600 | assert(isValidBaseType(QTy) && "Must be a valid base type" ); |
601 | |
602 | const Type *Ty = Context.getCanonicalType(T: QTy).getTypePtr(); |
603 | |
604 | // nullptr is a valid value in the cache, so use find rather than [] |
605 | auto I = BaseTypeMetadataCache.find(Val: Ty); |
606 | if (I != BaseTypeMetadataCache.end()) |
607 | return I->second; |
608 | |
609 | // First calculate the metadata, before recomputing the insertion point, as |
610 | // the helper can recursively call us. |
611 | llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); |
612 | LLVM_ATTRIBUTE_UNUSED auto inserted = |
613 | BaseTypeMetadataCache.insert(KV: {Ty, TypeNode}); |
614 | assert(inserted.second && "BaseType metadata was already inserted" ); |
615 | |
616 | return TypeNode; |
617 | } |
618 | |
619 | llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { |
620 | return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr; |
621 | } |
622 | |
623 | llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { |
624 | assert(!Info.isIncomplete() && "Access to an object of an incomplete type!" ); |
625 | |
626 | if (Info.isMayAlias()) |
627 | Info = TBAAAccessInfo(getChar(), Info.Size); |
628 | |
629 | if (!Info.AccessType) |
630 | return nullptr; |
631 | |
632 | if (!CodeGenOpts.StructPathTBAA) |
633 | Info = TBAAAccessInfo(Info.AccessType, Info.Size); |
634 | |
635 | llvm::MDNode *&N = AccessTagMetadataCache[Info]; |
636 | if (N) |
637 | return N; |
638 | |
639 | if (!Info.BaseType) { |
640 | Info.BaseType = Info.AccessType; |
641 | assert(!Info.Offset && "Nonzero offset for an access with no base type!" ); |
642 | } |
643 | if (CodeGenOpts.NewStructPathTBAA) { |
644 | return N = MDHelper.createTBAAAccessTag(BaseType: Info.BaseType, AccessType: Info.AccessType, |
645 | Offset: Info.Offset, Size: Info.Size); |
646 | } |
647 | return N = MDHelper.createTBAAStructTagNode(BaseType: Info.BaseType, AccessType: Info.AccessType, |
648 | Offset: Info.Offset); |
649 | } |
650 | |
651 | TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, |
652 | TBAAAccessInfo TargetInfo) { |
653 | if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) |
654 | return TBAAAccessInfo::getMayAliasInfo(); |
655 | return TargetInfo; |
656 | } |
657 | |
658 | TBAAAccessInfo |
659 | CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, |
660 | TBAAAccessInfo InfoB) { |
661 | if (InfoA == InfoB) |
662 | return InfoA; |
663 | |
664 | if (!InfoA || !InfoB) |
665 | return TBAAAccessInfo(); |
666 | |
667 | if (InfoA.isMayAlias() || InfoB.isMayAlias()) |
668 | return TBAAAccessInfo::getMayAliasInfo(); |
669 | |
670 | // TODO: Implement the rest of the logic here. For example, two accesses |
671 | // with same final access types result in an access to an object of that final |
672 | // access type regardless of their base types. |
673 | return TBAAAccessInfo::getMayAliasInfo(); |
674 | } |
675 | |
676 | TBAAAccessInfo |
677 | CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, |
678 | TBAAAccessInfo SrcInfo) { |
679 | if (DestInfo == SrcInfo) |
680 | return DestInfo; |
681 | |
682 | if (!DestInfo || !SrcInfo) |
683 | return TBAAAccessInfo(); |
684 | |
685 | if (DestInfo.isMayAlias() || SrcInfo.isMayAlias()) |
686 | return TBAAAccessInfo::getMayAliasInfo(); |
687 | |
688 | // TODO: Implement the rest of the logic here. For example, two accesses |
689 | // with same final access types result in an access to an object of that final |
690 | // access type regardless of their base types. |
691 | return TBAAAccessInfo::getMayAliasInfo(); |
692 | } |
693 | |