| 1 | //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass lowers type metadata and calls to the llvm.type.test intrinsic. |
| 10 | // It also ensures that globals are properly laid out for the |
| 11 | // llvm.icall.branch.funnel intrinsic. |
| 12 | // See http://llvm.org/docs/TypeMetadata.html for more information. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/Transforms/IPO/LowerTypeTests.h" |
| 17 | #include "llvm/ADT/APInt.h" |
| 18 | #include "llvm/ADT/ArrayRef.h" |
| 19 | #include "llvm/ADT/DenseMap.h" |
| 20 | #include "llvm/ADT/EquivalenceClasses.h" |
| 21 | #include "llvm/ADT/PointerUnion.h" |
| 22 | #include "llvm/ADT/STLExtras.h" |
| 23 | #include "llvm/ADT/SetVector.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/ADT/Statistic.h" |
| 26 | #include "llvm/ADT/StringRef.h" |
| 27 | #include "llvm/ADT/TinyPtrVector.h" |
| 28 | #include "llvm/Analysis/LoopInfo.h" |
| 29 | #include "llvm/Analysis/PostDominators.h" |
| 30 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 31 | #include "llvm/Analysis/TypeMetadataUtils.h" |
| 32 | #include "llvm/Analysis/ValueTracking.h" |
| 33 | #include "llvm/IR/Attributes.h" |
| 34 | #include "llvm/IR/BasicBlock.h" |
| 35 | #include "llvm/IR/Constant.h" |
| 36 | #include "llvm/IR/Constants.h" |
| 37 | #include "llvm/IR/DataLayout.h" |
| 38 | #include "llvm/IR/DerivedTypes.h" |
| 39 | #include "llvm/IR/Function.h" |
| 40 | #include "llvm/IR/GlobalAlias.h" |
| 41 | #include "llvm/IR/GlobalObject.h" |
| 42 | #include "llvm/IR/GlobalValue.h" |
| 43 | #include "llvm/IR/GlobalVariable.h" |
| 44 | #include "llvm/IR/IRBuilder.h" |
| 45 | #include "llvm/IR/InlineAsm.h" |
| 46 | #include "llvm/IR/Instruction.h" |
| 47 | #include "llvm/IR/Instructions.h" |
| 48 | #include "llvm/IR/IntrinsicInst.h" |
| 49 | #include "llvm/IR/Intrinsics.h" |
| 50 | #include "llvm/IR/LLVMContext.h" |
| 51 | #include "llvm/IR/MDBuilder.h" |
| 52 | #include "llvm/IR/Metadata.h" |
| 53 | #include "llvm/IR/Module.h" |
| 54 | #include "llvm/IR/ModuleSummaryIndex.h" |
| 55 | #include "llvm/IR/ModuleSummaryIndexYAML.h" |
| 56 | #include "llvm/IR/Operator.h" |
| 57 | #include "llvm/IR/PassManager.h" |
| 58 | #include "llvm/IR/ProfDataUtils.h" |
| 59 | #include "llvm/IR/ReplaceConstant.h" |
| 60 | #include "llvm/IR/Type.h" |
| 61 | #include "llvm/IR/Use.h" |
| 62 | #include "llvm/IR/User.h" |
| 63 | #include "llvm/IR/Value.h" |
| 64 | #include "llvm/Support/Allocator.h" |
| 65 | #include "llvm/Support/Casting.h" |
| 66 | #include "llvm/Support/CommandLine.h" |
| 67 | #include "llvm/Support/Debug.h" |
| 68 | #include "llvm/Support/Error.h" |
| 69 | #include "llvm/Support/ErrorHandling.h" |
| 70 | #include "llvm/Support/FileSystem.h" |
| 71 | #include "llvm/Support/MathExtras.h" |
| 72 | #include "llvm/Support/MemoryBuffer.h" |
| 73 | #include "llvm/Support/TrailingObjects.h" |
| 74 | #include "llvm/Support/YAMLTraits.h" |
| 75 | #include "llvm/Support/raw_ostream.h" |
| 76 | #include "llvm/TargetParser/Triple.h" |
| 77 | #include "llvm/Transforms/IPO.h" |
| 78 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 79 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
| 80 | #include <algorithm> |
| 81 | #include <cassert> |
| 82 | #include <cstdint> |
| 83 | #include <set> |
| 84 | #include <string> |
| 85 | #include <system_error> |
| 86 | #include <utility> |
| 87 | #include <vector> |
| 88 | |
| 89 | using namespace llvm; |
| 90 | using namespace lowertypetests; |
| 91 | |
| 92 | #define DEBUG_TYPE "lowertypetests" |
| 93 | |
| 94 | STATISTIC(ByteArraySizeBits, "Byte array size in bits" ); |
| 95 | STATISTIC(ByteArraySizeBytes, "Byte array size in bytes" ); |
| 96 | STATISTIC(NumByteArraysCreated, "Number of byte arrays created" ); |
| 97 | STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered" ); |
| 98 | STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers" ); |
| 99 | |
| 100 | static cl::opt<bool> AvoidReuse( |
| 101 | "lowertypetests-avoid-reuse" , |
| 102 | cl::desc("Try to avoid reuse of byte array addresses using aliases" ), |
| 103 | cl::Hidden, cl::init(Val: true)); |
| 104 | |
| 105 | static cl::opt<PassSummaryAction> ClSummaryAction( |
| 106 | "lowertypetests-summary-action" , |
| 107 | cl::desc("What to do with the summary when running this pass" ), |
| 108 | cl::values(clEnumValN(PassSummaryAction::None, "none" , "Do nothing" ), |
| 109 | clEnumValN(PassSummaryAction::Import, "import" , |
| 110 | "Import typeid resolutions from summary and globals" ), |
| 111 | clEnumValN(PassSummaryAction::Export, "export" , |
| 112 | "Export typeid resolutions to summary and globals" )), |
| 113 | cl::Hidden); |
| 114 | |
| 115 | static cl::opt<std::string> ClReadSummary( |
| 116 | "lowertypetests-read-summary" , |
| 117 | cl::desc("Read summary from given YAML file before running pass" ), |
| 118 | cl::Hidden); |
| 119 | |
| 120 | static cl::opt<std::string> ClWriteSummary( |
| 121 | "lowertypetests-write-summary" , |
| 122 | cl::desc("Write summary to given YAML file after running pass" ), |
| 123 | cl::Hidden); |
| 124 | |
| 125 | static cl::opt<DropTestKind> |
| 126 | ClDropTypeTests("lowertypetests-drop-type-tests" , |
| 127 | cl::desc("Simply drop type test sequences" ), |
| 128 | cl::values(clEnumValN(DropTestKind::None, "none" , |
| 129 | "Do not drop any type tests" ), |
| 130 | clEnumValN(DropTestKind::Assume, "assume" , |
| 131 | "Drop type test assume sequences" ), |
| 132 | clEnumValN(DropTestKind::All, "all" , |
| 133 | "Drop all type test sequences" )), |
| 134 | cl::Hidden, cl::init(Val: DropTestKind::None)); |
| 135 | |
| 136 | bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { |
| 137 | if (Offset < ByteOffset) |
| 138 | return false; |
| 139 | |
| 140 | if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) |
| 141 | return false; |
| 142 | |
| 143 | uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; |
| 144 | if (BitOffset >= BitSize) |
| 145 | return false; |
| 146 | |
| 147 | return Bits.count(x: BitSize - 1 - BitOffset); |
| 148 | } |
| 149 | |
| 150 | void BitSetInfo::print(raw_ostream &OS) const { |
| 151 | OS << "offset " << ByteOffset << " size " << BitSize << " align " |
| 152 | << (1 << AlignLog2); |
| 153 | |
| 154 | if (isAllOnes()) { |
| 155 | OS << " all-ones\n" ; |
| 156 | return; |
| 157 | } |
| 158 | |
| 159 | OS << " { " ; |
| 160 | for (uint64_t B : Bits) |
| 161 | OS << B << ' '; |
| 162 | OS << "}\n" ; |
| 163 | } |
| 164 | |
| 165 | BitSetInfo BitSetBuilder::build() { |
| 166 | if (Min > Max) |
| 167 | Min = 0; |
| 168 | |
| 169 | // Normalize each offset against the minimum observed offset, and compute |
| 170 | // the bitwise OR of each of the offsets. The number of trailing zeros |
| 171 | // in the mask gives us the log2 of the alignment of all offsets, which |
| 172 | // allows us to compress the bitset by only storing one bit per aligned |
| 173 | // address. |
| 174 | uint64_t Mask = 0; |
| 175 | for (uint64_t &Offset : Offsets) { |
| 176 | Offset -= Min; |
| 177 | Mask |= Offset; |
| 178 | } |
| 179 | |
| 180 | BitSetInfo BSI; |
| 181 | BSI.ByteOffset = Min; |
| 182 | |
| 183 | BSI.AlignLog2 = 0; |
| 184 | if (Mask != 0) |
| 185 | BSI.AlignLog2 = llvm::countr_zero(Val: Mask); |
| 186 | |
| 187 | // Build the compressed bitset while normalizing the offsets against the |
| 188 | // computed alignment. |
| 189 | BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; |
| 190 | for (uint64_t Offset : Offsets) { |
| 191 | Offset >>= BSI.AlignLog2; |
| 192 | // We invert the order of bits when adding them to the bitset. This is |
| 193 | // because the offset that we test against is computed by subtracting the |
| 194 | // address that we are testing from the global's address, which means that |
| 195 | // the offset increases as the tested address decreases. |
| 196 | BSI.Bits.insert(x: BSI.BitSize - 1 - Offset); |
| 197 | } |
| 198 | |
| 199 | return BSI; |
| 200 | } |
| 201 | |
| 202 | void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { |
| 203 | // Create a new fragment to hold the layout for F. |
| 204 | Fragments.emplace_back(); |
| 205 | std::vector<uint64_t> &Fragment = Fragments.back(); |
| 206 | uint64_t FragmentIndex = Fragments.size() - 1; |
| 207 | |
| 208 | for (auto ObjIndex : F) { |
| 209 | uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; |
| 210 | if (OldFragmentIndex == 0) { |
| 211 | // We haven't seen this object index before, so just add it to the current |
| 212 | // fragment. |
| 213 | Fragment.push_back(x: ObjIndex); |
| 214 | } else { |
| 215 | // This index belongs to an existing fragment. Copy the elements of the |
| 216 | // old fragment into this one and clear the old fragment. We don't update |
| 217 | // the fragment map just yet, this ensures that any further references to |
| 218 | // indices from the old fragment in this fragment do not insert any more |
| 219 | // indices. |
| 220 | std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; |
| 221 | llvm::append_range(C&: Fragment, R&: OldFragment); |
| 222 | OldFragment.clear(); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | // Update the fragment map to point our object indices to this fragment. |
| 227 | for (uint64_t ObjIndex : Fragment) |
| 228 | FragmentMap[ObjIndex] = FragmentIndex; |
| 229 | } |
| 230 | |
| 231 | void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, |
| 232 | uint64_t BitSize, uint64_t &AllocByteOffset, |
| 233 | uint8_t &AllocMask) { |
| 234 | // Find the smallest current allocation. |
| 235 | unsigned Bit = 0; |
| 236 | for (unsigned I = 1; I != BitsPerByte; ++I) |
| 237 | if (BitAllocs[I] < BitAllocs[Bit]) |
| 238 | Bit = I; |
| 239 | |
| 240 | AllocByteOffset = BitAllocs[Bit]; |
| 241 | |
| 242 | // Add our size to it. |
| 243 | unsigned ReqSize = AllocByteOffset + BitSize; |
| 244 | BitAllocs[Bit] = ReqSize; |
| 245 | if (Bytes.size() < ReqSize) |
| 246 | Bytes.resize(new_size: ReqSize); |
| 247 | |
| 248 | // Set our bits. |
| 249 | AllocMask = 1 << Bit; |
| 250 | for (uint64_t B : Bits) |
| 251 | Bytes[AllocByteOffset + B] |= AllocMask; |
| 252 | } |
| 253 | |
| 254 | bool lowertypetests::isJumpTableCanonical(Function *F) { |
| 255 | if (F->isDeclarationForLinker()) |
| 256 | return false; |
| 257 | auto *CI = mdconst::extract_or_null<ConstantInt>( |
| 258 | MD: F->getParent()->getModuleFlag(Key: "CFI Canonical Jump Tables" )); |
| 259 | if (!CI || !CI->isZero()) |
| 260 | return true; |
| 261 | return F->hasFnAttribute(Kind: "cfi-canonical-jump-table" ); |
| 262 | } |
| 263 | |
| 264 | namespace { |
| 265 | |
| 266 | struct ByteArrayInfo { |
| 267 | std::set<uint64_t> Bits; |
| 268 | uint64_t BitSize; |
| 269 | GlobalVariable *ByteArray; |
| 270 | GlobalVariable *MaskGlobal; |
| 271 | uint8_t *MaskPtr = nullptr; |
| 272 | }; |
| 273 | |
| 274 | /// A POD-like structure that we use to store a global reference together with |
| 275 | /// its metadata types. In this pass we frequently need to query the set of |
| 276 | /// metadata types referenced by a global, which at the IR level is an expensive |
| 277 | /// operation involving a map lookup; this data structure helps to reduce the |
| 278 | /// number of times we need to do this lookup. |
| 279 | class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { |
| 280 | friend TrailingObjects; |
| 281 | |
| 282 | GlobalObject *GO; |
| 283 | size_t NTypes; |
| 284 | |
| 285 | // For functions: true if the jump table is canonical. This essentially means |
| 286 | // whether the canonical address (i.e. the symbol table entry) of the function |
| 287 | // is provided by the local jump table. This is normally the same as whether |
| 288 | // the function is defined locally, but if canonical jump tables are disabled |
| 289 | // by the user then the jump table never provides a canonical definition. |
| 290 | bool IsJumpTableCanonical; |
| 291 | |
| 292 | // For functions: true if this function is either defined or used in a thinlto |
| 293 | // module and its jumptable entry needs to be exported to thinlto backends. |
| 294 | bool IsExported; |
| 295 | |
| 296 | public: |
| 297 | static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, |
| 298 | bool IsJumpTableCanonical, bool IsExported, |
| 299 | ArrayRef<MDNode *> Types) { |
| 300 | auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( |
| 301 | Size: totalSizeToAlloc<MDNode *>(Counts: Types.size()), Alignment: alignof(GlobalTypeMember))); |
| 302 | GTM->GO = GO; |
| 303 | GTM->NTypes = Types.size(); |
| 304 | GTM->IsJumpTableCanonical = IsJumpTableCanonical; |
| 305 | GTM->IsExported = IsExported; |
| 306 | llvm::copy(Range&: Types, Out: GTM->getTrailingObjects()); |
| 307 | return GTM; |
| 308 | } |
| 309 | |
| 310 | GlobalObject *getGlobal() const { |
| 311 | return GO; |
| 312 | } |
| 313 | |
| 314 | bool isJumpTableCanonical() const { |
| 315 | return IsJumpTableCanonical; |
| 316 | } |
| 317 | |
| 318 | bool isExported() const { |
| 319 | return IsExported; |
| 320 | } |
| 321 | |
| 322 | ArrayRef<MDNode *> types() const { return getTrailingObjects(N: NTypes); } |
| 323 | }; |
| 324 | |
| 325 | struct ICallBranchFunnel final |
| 326 | : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> { |
| 327 | static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI, |
| 328 | ArrayRef<GlobalTypeMember *> Targets, |
| 329 | unsigned UniqueId) { |
| 330 | auto *Call = static_cast<ICallBranchFunnel *>( |
| 331 | Alloc.Allocate(Size: totalSizeToAlloc<GlobalTypeMember *>(Counts: Targets.size()), |
| 332 | Alignment: alignof(ICallBranchFunnel))); |
| 333 | Call->CI = CI; |
| 334 | Call->UniqueId = UniqueId; |
| 335 | Call->NTargets = Targets.size(); |
| 336 | llvm::copy(Range&: Targets, Out: Call->getTrailingObjects()); |
| 337 | return Call; |
| 338 | } |
| 339 | |
| 340 | CallInst *CI; |
| 341 | ArrayRef<GlobalTypeMember *> targets() const { |
| 342 | return getTrailingObjects(N: NTargets); |
| 343 | } |
| 344 | |
| 345 | unsigned UniqueId; |
| 346 | |
| 347 | private: |
| 348 | size_t NTargets; |
| 349 | }; |
| 350 | |
| 351 | struct ScopedSaveAliaseesAndUsed { |
| 352 | Module &M; |
| 353 | SmallVector<GlobalValue *, 4> Used, CompilerUsed; |
| 354 | std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases; |
| 355 | std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs; |
| 356 | |
| 357 | // This function only removes functions from llvm.used and llvm.compiler.used. |
| 358 | // We cannot remove global variables because they need to follow RAUW, as |
| 359 | // they may be deleted by buildBitSetsFromGlobalVariables. |
| 360 | void collectAndEraseUsedFunctions(Module &M, |
| 361 | SmallVectorImpl<GlobalValue *> &Vec, |
| 362 | bool CompilerUsed) { |
| 363 | auto *GV = collectUsedGlobalVariables(M, Vec, CompilerUsed); |
| 364 | if (!GV) |
| 365 | return; |
| 366 | // There's no API to only remove certain array elements from |
| 367 | // llvm.used/llvm.compiler.used, so we remove all of them and add back only |
| 368 | // the non-functions. |
| 369 | GV->eraseFromParent(); |
| 370 | auto NonFuncBegin = |
| 371 | std::stable_partition(first: Vec.begin(), last: Vec.end(), pred: [](GlobalValue *GV) { |
| 372 | return isa<Function>(Val: GV); |
| 373 | }); |
| 374 | if (CompilerUsed) |
| 375 | appendToCompilerUsed(M, Values: {NonFuncBegin, Vec.end()}); |
| 376 | else |
| 377 | appendToUsed(M, Values: {NonFuncBegin, Vec.end()}); |
| 378 | Vec.resize(N: NonFuncBegin - Vec.begin()); |
| 379 | } |
| 380 | |
| 381 | ScopedSaveAliaseesAndUsed(Module &M) : M(M) { |
| 382 | // The users of this class want to replace all function references except |
| 383 | // for aliases and llvm.used/llvm.compiler.used with references to a jump |
| 384 | // table. We avoid replacing aliases in order to avoid introducing a double |
| 385 | // indirection (or an alias pointing to a declaration in ThinLTO mode), and |
| 386 | // we avoid replacing llvm.used/llvm.compiler.used because these global |
| 387 | // variables describe properties of the global, not the jump table (besides, |
| 388 | // offseted references to the jump table in llvm.used are invalid). |
| 389 | // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly |
| 390 | // indirect) users", so what we do is save the list of globals referenced by |
| 391 | // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW |
| 392 | // replace the aliasees and then set them back to their original values at |
| 393 | // the end. |
| 394 | collectAndEraseUsedFunctions(M, Vec&: Used, CompilerUsed: false); |
| 395 | collectAndEraseUsedFunctions(M, Vec&: CompilerUsed, CompilerUsed: true); |
| 396 | |
| 397 | for (auto &GA : M.aliases()) { |
| 398 | // FIXME: This should look past all aliases not just interposable ones, |
| 399 | // see discussion on D65118. |
| 400 | if (auto *F = dyn_cast<Function>(Val: GA.getAliasee()->stripPointerCasts())) |
| 401 | FunctionAliases.push_back(x: {&GA, F}); |
| 402 | } |
| 403 | |
| 404 | for (auto &GI : M.ifuncs()) |
| 405 | if (auto *F = dyn_cast<Function>(Val: GI.getResolver()->stripPointerCasts())) |
| 406 | ResolverIFuncs.push_back(x: {&GI, F}); |
| 407 | } |
| 408 | |
| 409 | ~ScopedSaveAliaseesAndUsed() { |
| 410 | appendToUsed(M, Values: Used); |
| 411 | appendToCompilerUsed(M, Values: CompilerUsed); |
| 412 | |
| 413 | for (auto P : FunctionAliases) |
| 414 | P.first->setAliasee(P.second); |
| 415 | |
| 416 | for (auto P : ResolverIFuncs) { |
| 417 | // This does not preserve pointer casts that may have been stripped by the |
| 418 | // constructor, but the resolver's type is different from that of the |
| 419 | // ifunc anyway. |
| 420 | P.first->setResolver(P.second); |
| 421 | } |
| 422 | } |
| 423 | }; |
| 424 | |
| 425 | class LowerTypeTestsModule { |
| 426 | Module &M; |
| 427 | |
| 428 | ModuleSummaryIndex *ExportSummary; |
| 429 | const ModuleSummaryIndex *ImportSummary; |
| 430 | // Set when the client has invoked this to simply drop all type test assume |
| 431 | // sequences. |
| 432 | DropTestKind DropTypeTests; |
| 433 | |
| 434 | Triple::ArchType Arch; |
| 435 | Triple::OSType OS; |
| 436 | Triple::ObjectFormatType ObjectFormat; |
| 437 | |
| 438 | // Determines which kind of Thumb jump table we generate. If arch is |
| 439 | // either 'arm' or 'thumb' we need to find this out, because |
| 440 | // selectJumpTableArmEncoding may decide to use Thumb in either case. |
| 441 | bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false; |
| 442 | |
| 443 | // Cache variable used by hasBranchTargetEnforcement(). |
| 444 | int HasBranchTargetEnforcement = -1; |
| 445 | |
| 446 | IntegerType *Int1Ty = Type::getInt1Ty(C&: M.getContext()); |
| 447 | IntegerType *Int8Ty = Type::getInt8Ty(C&: M.getContext()); |
| 448 | PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext()); |
| 449 | ArrayType *Int8Arr0Ty = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: 0); |
| 450 | IntegerType *Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
| 451 | IntegerType *Int64Ty = Type::getInt64Ty(C&: M.getContext()); |
| 452 | IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext(), AddressSpace: 0); |
| 453 | |
| 454 | // Indirect function call index assignment counter for WebAssembly |
| 455 | uint64_t IndirectIndex = 1; |
| 456 | |
| 457 | // Mapping from type identifiers to the call sites that test them, as well as |
| 458 | // whether the type identifier needs to be exported to ThinLTO backends as |
| 459 | // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId). |
| 460 | struct TypeIdUserInfo { |
| 461 | std::vector<CallInst *> CallSites; |
| 462 | bool IsExported = false; |
| 463 | }; |
| 464 | DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers; |
| 465 | |
| 466 | /// This structure describes how to lower type tests for a particular type |
| 467 | /// identifier. It is either built directly from the global analysis (during |
| 468 | /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type |
| 469 | /// identifier summaries and external symbol references (in ThinLTO backends). |
| 470 | struct TypeIdLowering { |
| 471 | TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat; |
| 472 | |
| 473 | /// All except Unsat: the address of the last element within the combined |
| 474 | /// global. |
| 475 | Constant *OffsetedGlobal; |
| 476 | |
| 477 | /// ByteArray, Inline, AllOnes: log2 of the required global alignment |
| 478 | /// relative to the start address. |
| 479 | Constant *AlignLog2; |
| 480 | |
| 481 | /// ByteArray, Inline, AllOnes: one less than the size of the memory region |
| 482 | /// covering members of this type identifier as a multiple of 2^AlignLog2. |
| 483 | Constant *SizeM1; |
| 484 | |
| 485 | /// ByteArray: the byte array to test the address against. |
| 486 | Constant *TheByteArray; |
| 487 | |
| 488 | /// ByteArray: the bit mask to apply to bytes loaded from the byte array. |
| 489 | Constant *BitMask; |
| 490 | |
| 491 | /// Inline: the bit mask to test the address against. |
| 492 | Constant *InlineBits; |
| 493 | }; |
| 494 | |
| 495 | std::vector<ByteArrayInfo> ByteArrayInfos; |
| 496 | |
| 497 | Function *WeakInitializerFn = nullptr; |
| 498 | |
| 499 | GlobalVariable *GlobalAnnotation; |
| 500 | DenseSet<Value *> FunctionAnnotations; |
| 501 | |
| 502 | bool shouldExportConstantsAsAbsoluteSymbols(); |
| 503 | uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); |
| 504 | TypeIdLowering importTypeId(StringRef TypeId); |
| 505 | void importTypeTest(CallInst *CI); |
| 506 | void importFunction(Function *F, bool isJumpTableCanonical); |
| 507 | |
| 508 | ByteArrayInfo *createByteArray(const BitSetInfo &BSI); |
| 509 | void allocateByteArrays(); |
| 510 | Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, |
| 511 | Value *BitOffset); |
| 512 | void lowerTypeTestCalls( |
| 513 | ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, |
| 514 | const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); |
| 515 | Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, |
| 516 | const TypeIdLowering &TIL); |
| 517 | |
| 518 | void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds, |
| 519 | ArrayRef<GlobalTypeMember *> Globals); |
| 520 | Triple::ArchType |
| 521 | selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions); |
| 522 | bool hasBranchTargetEnforcement(); |
| 523 | unsigned getJumpTableEntrySize(Triple::ArchType JumpTableArch); |
| 524 | InlineAsm *createJumpTableEntryAsm(Triple::ArchType JumpTableArch); |
| 525 | void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); |
| 526 | void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds, |
| 527 | ArrayRef<GlobalTypeMember *> Functions); |
| 528 | void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds, |
| 529 | ArrayRef<GlobalTypeMember *> Functions); |
| 530 | void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds, |
| 531 | ArrayRef<GlobalTypeMember *> Functions); |
| 532 | void |
| 533 | buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds, |
| 534 | ArrayRef<GlobalTypeMember *> Globals, |
| 535 | ArrayRef<ICallBranchFunnel *> ICallBranchFunnels); |
| 536 | |
| 537 | void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, |
| 538 | bool IsJumpTableCanonical); |
| 539 | void moveInitializerToModuleConstructor(GlobalVariable *GV); |
| 540 | void findGlobalVariableUsersOf(Constant *C, |
| 541 | SmallSetVector<GlobalVariable *, 8> &Out); |
| 542 | |
| 543 | void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions, |
| 544 | Triple::ArchType JumpTableArch); |
| 545 | |
| 546 | /// replaceCfiUses - Go through the uses list for this definition |
| 547 | /// and make each use point to "V" instead of "this" when the use is outside |
| 548 | /// the block. 'This's use list is expected to have at least one element. |
| 549 | /// Unlike replaceAllUsesWith this function skips blockaddr and direct call |
| 550 | /// uses. |
| 551 | void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); |
| 552 | |
| 553 | /// replaceDirectCalls - Go through the uses list for this definition and |
| 554 | /// replace each use, which is a direct function call. |
| 555 | void replaceDirectCalls(Value *Old, Value *New); |
| 556 | |
| 557 | bool isFunctionAnnotation(Value *V) const { |
| 558 | return FunctionAnnotations.contains(V); |
| 559 | } |
| 560 | |
| 561 | void maybeReplaceComdat(Function *F, StringRef OriginalName); |
| 562 | |
| 563 | public: |
| 564 | LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, |
| 565 | ModuleSummaryIndex *ExportSummary, |
| 566 | const ModuleSummaryIndex *ImportSummary, |
| 567 | DropTestKind DropTypeTests); |
| 568 | |
| 569 | bool lower(); |
| 570 | |
| 571 | // Lower the module using the action and summary passed as command line |
| 572 | // arguments. For testing purposes only. |
| 573 | static bool runForTesting(Module &M, ModuleAnalysisManager &AM); |
| 574 | }; |
| 575 | } // end anonymous namespace |
| 576 | |
| 577 | /// Build a bit set for list of offsets. |
| 578 | static BitSetInfo buildBitSet(ArrayRef<uint64_t> Offsets) { |
| 579 | // Compute the byte offset of each address associated with this type |
| 580 | // identifier. |
| 581 | return BitSetBuilder(Offsets).build(); |
| 582 | } |
| 583 | |
| 584 | /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in |
| 585 | /// Bits. This pattern matches to the bt instruction on x86. |
| 586 | static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, |
| 587 | Value *BitOffset) { |
| 588 | auto BitsType = cast<IntegerType>(Val: Bits->getType()); |
| 589 | unsigned BitWidth = BitsType->getBitWidth(); |
| 590 | |
| 591 | BitOffset = B.CreateZExtOrTrunc(V: BitOffset, DestTy: BitsType); |
| 592 | Value *BitIndex = |
| 593 | B.CreateAnd(LHS: BitOffset, RHS: ConstantInt::get(Ty: BitsType, V: BitWidth - 1)); |
| 594 | Value *BitMask = B.CreateShl(LHS: ConstantInt::get(Ty: BitsType, V: 1), RHS: BitIndex); |
| 595 | Value *MaskedBits = B.CreateAnd(LHS: Bits, RHS: BitMask); |
| 596 | return B.CreateICmpNE(LHS: MaskedBits, RHS: ConstantInt::get(Ty: BitsType, V: 0)); |
| 597 | } |
| 598 | |
| 599 | ByteArrayInfo *LowerTypeTestsModule::createByteArray(const BitSetInfo &BSI) { |
| 600 | // Create globals to stand in for byte arrays and masks. These never actually |
| 601 | // get initialized, we RAUW and erase them later in allocateByteArrays() once |
| 602 | // we know the offset and mask to use. |
| 603 | auto ByteArrayGlobal = new GlobalVariable( |
| 604 | M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); |
| 605 | auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true, |
| 606 | GlobalValue::PrivateLinkage, nullptr); |
| 607 | |
| 608 | ByteArrayInfos.emplace_back(); |
| 609 | ByteArrayInfo *BAI = &ByteArrayInfos.back(); |
| 610 | |
| 611 | BAI->Bits = BSI.Bits; |
| 612 | BAI->BitSize = BSI.BitSize; |
| 613 | BAI->ByteArray = ByteArrayGlobal; |
| 614 | BAI->MaskGlobal = MaskGlobal; |
| 615 | return BAI; |
| 616 | } |
| 617 | |
| 618 | void LowerTypeTestsModule::allocateByteArrays() { |
| 619 | llvm::stable_sort(Range&: ByteArrayInfos, |
| 620 | C: [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { |
| 621 | return BAI1.BitSize > BAI2.BitSize; |
| 622 | }); |
| 623 | |
| 624 | std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); |
| 625 | |
| 626 | ByteArrayBuilder BAB; |
| 627 | for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { |
| 628 | ByteArrayInfo *BAI = &ByteArrayInfos[I]; |
| 629 | |
| 630 | uint8_t Mask; |
| 631 | BAB.allocate(Bits: BAI->Bits, BitSize: BAI->BitSize, AllocByteOffset&: ByteArrayOffsets[I], AllocMask&: Mask); |
| 632 | |
| 633 | BAI->MaskGlobal->replaceAllUsesWith( |
| 634 | V: ConstantExpr::getIntToPtr(C: ConstantInt::get(Ty: Int8Ty, V: Mask), Ty: PtrTy)); |
| 635 | BAI->MaskGlobal->eraseFromParent(); |
| 636 | if (BAI->MaskPtr) |
| 637 | *BAI->MaskPtr = Mask; |
| 638 | } |
| 639 | |
| 640 | Constant *ByteArrayConst = ConstantDataArray::get(Context&: M.getContext(), Elts&: BAB.Bytes); |
| 641 | auto ByteArray = |
| 642 | new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true, |
| 643 | GlobalValue::PrivateLinkage, ByteArrayConst); |
| 644 | |
| 645 | for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { |
| 646 | ByteArrayInfo *BAI = &ByteArrayInfos[I]; |
| 647 | |
| 648 | Constant *Idxs[] = {ConstantInt::get(Ty: IntPtrTy, V: 0), |
| 649 | ConstantInt::get(Ty: IntPtrTy, V: ByteArrayOffsets[I])}; |
| 650 | Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( |
| 651 | Ty: ByteArrayConst->getType(), C: ByteArray, IdxList: Idxs); |
| 652 | |
| 653 | // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures |
| 654 | // that the pc-relative displacement is folded into the lea instead of the |
| 655 | // test instruction getting another displacement. |
| 656 | GlobalAlias *Alias = GlobalAlias::create( |
| 657 | Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::PrivateLinkage, Name: "bits" , Aliasee: GEP, Parent: &M); |
| 658 | BAI->ByteArray->replaceAllUsesWith(V: Alias); |
| 659 | BAI->ByteArray->eraseFromParent(); |
| 660 | } |
| 661 | |
| 662 | ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + |
| 663 | BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + |
| 664 | BAB.BitAllocs[6] + BAB.BitAllocs[7]; |
| 665 | ByteArraySizeBytes = BAB.Bytes.size(); |
| 666 | } |
| 667 | |
| 668 | /// Build a test that bit BitOffset is set in the type identifier that was |
| 669 | /// lowered to TIL, which must be either an Inline or a ByteArray. |
| 670 | Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, |
| 671 | const TypeIdLowering &TIL, |
| 672 | Value *BitOffset) { |
| 673 | if (TIL.TheKind == TypeTestResolution::Inline) { |
| 674 | // If the bit set is sufficiently small, we can avoid a load by bit testing |
| 675 | // a constant. |
| 676 | return createMaskedBitTest(B, Bits: TIL.InlineBits, BitOffset); |
| 677 | } else { |
| 678 | Constant *ByteArray = TIL.TheByteArray; |
| 679 | if (AvoidReuse && !ImportSummary) { |
| 680 | // Each use of the byte array uses a different alias. This makes the |
| 681 | // backend less likely to reuse previously computed byte array addresses, |
| 682 | // improving the security of the CFI mechanism based on this pass. |
| 683 | // This won't work when importing because TheByteArray is external. |
| 684 | ByteArray = GlobalAlias::create(Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::PrivateLinkage, |
| 685 | Name: "bits_use" , Aliasee: ByteArray, Parent: &M); |
| 686 | } |
| 687 | |
| 688 | Value *ByteAddr = B.CreateGEP(Ty: Int8Ty, Ptr: ByteArray, IdxList: BitOffset); |
| 689 | Value *Byte = B.CreateLoad(Ty: Int8Ty, Ptr: ByteAddr); |
| 690 | |
| 691 | Value *ByteAndMask = |
| 692 | B.CreateAnd(LHS: Byte, RHS: ConstantExpr::getPtrToInt(C: TIL.BitMask, Ty: Int8Ty)); |
| 693 | return B.CreateICmpNE(LHS: ByteAndMask, RHS: ConstantInt::get(Ty: Int8Ty, V: 0)); |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, |
| 698 | Value *V, uint64_t COffset) { |
| 699 | if (auto GV = dyn_cast<GlobalObject>(Val: V)) { |
| 700 | SmallVector<MDNode *, 2> Types; |
| 701 | GV->getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
| 702 | for (MDNode *Type : Types) { |
| 703 | if (Type->getOperand(I: 1) != TypeId) |
| 704 | continue; |
| 705 | uint64_t Offset = |
| 706 | cast<ConstantInt>( |
| 707 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
| 708 | ->getZExtValue(); |
| 709 | if (COffset == Offset) |
| 710 | return true; |
| 711 | } |
| 712 | return false; |
| 713 | } |
| 714 | |
| 715 | if (auto GEP = dyn_cast<GEPOperator>(Val: V)) { |
| 716 | APInt APOffset(DL.getIndexSizeInBits(AS: 0), 0); |
| 717 | bool Result = GEP->accumulateConstantOffset(DL, Offset&: APOffset); |
| 718 | if (!Result) |
| 719 | return false; |
| 720 | COffset += APOffset.getZExtValue(); |
| 721 | return isKnownTypeIdMember(TypeId, DL, V: GEP->getPointerOperand(), COffset); |
| 722 | } |
| 723 | |
| 724 | if (auto Op = dyn_cast<Operator>(Val: V)) { |
| 725 | if (Op->getOpcode() == Instruction::BitCast) |
| 726 | return isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 0), COffset); |
| 727 | |
| 728 | if (Op->getOpcode() == Instruction::Select) |
| 729 | return isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 1), COffset) && |
| 730 | isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 2), COffset); |
| 731 | } |
| 732 | |
| 733 | return false; |
| 734 | } |
| 735 | |
| 736 | /// Lower a llvm.type.test call to its implementation. Returns the value to |
| 737 | /// replace the call with. |
| 738 | Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, |
| 739 | const TypeIdLowering &TIL) { |
| 740 | // Delay lowering if the resolution is currently unknown. |
| 741 | if (TIL.TheKind == TypeTestResolution::Unknown) |
| 742 | return nullptr; |
| 743 | if (TIL.TheKind == TypeTestResolution::Unsat) |
| 744 | return ConstantInt::getFalse(Context&: M.getContext()); |
| 745 | |
| 746 | Value *Ptr = CI->getArgOperand(i: 0); |
| 747 | const DataLayout &DL = M.getDataLayout(); |
| 748 | if (isKnownTypeIdMember(TypeId, DL, V: Ptr, COffset: 0)) |
| 749 | return ConstantInt::getTrue(Context&: M.getContext()); |
| 750 | |
| 751 | BasicBlock *InitialBB = CI->getParent(); |
| 752 | |
| 753 | IRBuilder<> B(CI); |
| 754 | |
| 755 | Value *PtrAsInt = B.CreatePtrToInt(V: Ptr, DestTy: IntPtrTy); |
| 756 | |
| 757 | Constant *OffsetedGlobalAsInt = |
| 758 | ConstantExpr::getPtrToInt(C: TIL.OffsetedGlobal, Ty: IntPtrTy); |
| 759 | if (TIL.TheKind == TypeTestResolution::Single) |
| 760 | return B.CreateICmpEQ(LHS: PtrAsInt, RHS: OffsetedGlobalAsInt); |
| 761 | |
| 762 | // Here we compute `last element - address`. The reason why we do this instead |
| 763 | // of computing `address - first element` is that it leads to a slightly |
| 764 | // shorter instruction sequence on x86. Because it doesn't matter how we do |
| 765 | // the subtraction on other architectures, we do so unconditionally. |
| 766 | Value *PtrOffset = B.CreateSub(LHS: OffsetedGlobalAsInt, RHS: PtrAsInt); |
| 767 | |
| 768 | // We need to check that the offset both falls within our range and is |
| 769 | // suitably aligned. We can check both properties at the same time by |
| 770 | // performing a right rotate by log2(alignment) followed by an integer |
| 771 | // comparison against the bitset size. The rotate will move the lower |
| 772 | // order bits that need to be zero into the higher order bits of the |
| 773 | // result, causing the comparison to fail if they are nonzero. The rotate |
| 774 | // also conveniently gives us a bit offset to use during the load from |
| 775 | // the bitset. |
| 776 | Value *BitOffset = B.CreateIntrinsic(RetTy: IntPtrTy, ID: Intrinsic::fshr, |
| 777 | Args: {PtrOffset, PtrOffset, TIL.AlignLog2}); |
| 778 | |
| 779 | Value *OffsetInRange = B.CreateICmpULE(LHS: BitOffset, RHS: TIL.SizeM1); |
| 780 | |
| 781 | // If the bit set is all ones, testing against it is unnecessary. |
| 782 | if (TIL.TheKind == TypeTestResolution::AllOnes) |
| 783 | return OffsetInRange; |
| 784 | |
| 785 | // See if the intrinsic is used in the following common pattern: |
| 786 | // br(llvm.type.test(...), thenbb, elsebb) |
| 787 | // where nothing happens between the type test and the br. |
| 788 | // If so, create slightly simpler IR. |
| 789 | if (CI->hasOneUse()) |
| 790 | if (auto *Br = dyn_cast<BranchInst>(Val: *CI->user_begin())) |
| 791 | if (CI->getNextNode() == Br) { |
| 792 | BasicBlock *Then = InitialBB->splitBasicBlock(I: CI->getIterator()); |
| 793 | BasicBlock *Else = Br->getSuccessor(i: 1); |
| 794 | BranchInst *NewBr = BranchInst::Create(IfTrue: Then, IfFalse: Else, Cond: OffsetInRange); |
| 795 | NewBr->setMetadata(KindID: LLVMContext::MD_prof, |
| 796 | Node: Br->getMetadata(KindID: LLVMContext::MD_prof)); |
| 797 | ReplaceInstWithInst(From: InitialBB->getTerminator(), To: NewBr); |
| 798 | |
| 799 | // Update phis in Else resulting from InitialBB being split |
| 800 | for (auto &Phi : Else->phis()) |
| 801 | Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: Then), BB: InitialBB); |
| 802 | |
| 803 | IRBuilder<> ThenB(CI); |
| 804 | return createBitSetTest(B&: ThenB, TIL, BitOffset); |
| 805 | } |
| 806 | |
| 807 | MDBuilder MDB(M.getContext()); |
| 808 | IRBuilder<> ThenB(SplitBlockAndInsertIfThen(Cond: OffsetInRange, SplitBefore: CI, Unreachable: false, |
| 809 | BranchWeights: MDB.createLikelyBranchWeights())); |
| 810 | |
| 811 | // Now that we know that the offset is in range and aligned, load the |
| 812 | // appropriate bit from the bitset. |
| 813 | Value *Bit = createBitSetTest(B&: ThenB, TIL, BitOffset); |
| 814 | |
| 815 | // The value we want is 0 if we came directly from the initial block |
| 816 | // (having failed the range or alignment checks), or the loaded bit if |
| 817 | // we came from the block in which we loaded it. |
| 818 | B.SetInsertPoint(CI); |
| 819 | PHINode *P = B.CreatePHI(Ty: Int1Ty, NumReservedValues: 2); |
| 820 | P->addIncoming(V: ConstantInt::get(Ty: Int1Ty, V: 0), BB: InitialBB); |
| 821 | P->addIncoming(V: Bit, BB: ThenB.GetInsertBlock()); |
| 822 | return P; |
| 823 | } |
| 824 | |
| 825 | /// Given a disjoint set of type identifiers and globals, lay out the globals, |
| 826 | /// build the bit sets and lower the llvm.type.test calls. |
| 827 | void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( |
| 828 | ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) { |
| 829 | // Build a new global with the combined contents of the referenced globals. |
| 830 | // This global is a struct whose even-indexed elements contain the original |
| 831 | // contents of the referenced globals and whose odd-indexed elements contain |
| 832 | // any padding required to align the next element to the next power of 2 plus |
| 833 | // any additional padding required to meet its alignment requirements. |
| 834 | std::vector<Constant *> GlobalInits; |
| 835 | const DataLayout &DL = M.getDataLayout(); |
| 836 | DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; |
| 837 | Align MaxAlign; |
| 838 | uint64_t CurOffset = 0; |
| 839 | uint64_t DesiredPadding = 0; |
| 840 | for (GlobalTypeMember *G : Globals) { |
| 841 | auto *GV = cast<GlobalVariable>(Val: G->getGlobal()); |
| 842 | Align Alignment = |
| 843 | DL.getValueOrABITypeAlignment(Alignment: GV->getAlign(), Ty: GV->getValueType()); |
| 844 | MaxAlign = std::max(a: MaxAlign, b: Alignment); |
| 845 | uint64_t GVOffset = alignTo(Size: CurOffset + DesiredPadding, A: Alignment); |
| 846 | GlobalLayout[G] = GVOffset; |
| 847 | if (GVOffset != 0) { |
| 848 | uint64_t Padding = GVOffset - CurOffset; |
| 849 | GlobalInits.push_back( |
| 850 | x: ConstantAggregateZero::get(Ty: ArrayType::get(ElementType: Int8Ty, NumElements: Padding))); |
| 851 | } |
| 852 | |
| 853 | GlobalInits.push_back(x: GV->getInitializer()); |
| 854 | uint64_t InitSize = GV->getGlobalSize(DL); |
| 855 | CurOffset = GVOffset + InitSize; |
| 856 | |
| 857 | // Compute the amount of padding that we'd like for the next element. |
| 858 | DesiredPadding = NextPowerOf2(A: InitSize - 1) - InitSize; |
| 859 | |
| 860 | // Experiments of different caps with Chromium on both x64 and ARM64 |
| 861 | // have shown that the 32-byte cap generates the smallest binary on |
| 862 | // both platforms while different caps yield similar performance. |
| 863 | // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) |
| 864 | if (DesiredPadding > 32) |
| 865 | DesiredPadding = alignTo(Value: InitSize, Align: 32) - InitSize; |
| 866 | } |
| 867 | |
| 868 | Constant *NewInit = ConstantStruct::getAnon(Ctx&: M.getContext(), V: GlobalInits); |
| 869 | auto *CombinedGlobal = |
| 870 | new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, |
| 871 | GlobalValue::PrivateLinkage, NewInit); |
| 872 | CombinedGlobal->setAlignment(MaxAlign); |
| 873 | |
| 874 | StructType *NewTy = cast<StructType>(Val: NewInit->getType()); |
| 875 | lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: CombinedGlobal, GlobalLayout); |
| 876 | |
| 877 | // Build aliases pointing to offsets into the combined global for each |
| 878 | // global from which we built the combined global, and replace references |
| 879 | // to the original globals with references to the aliases. |
| 880 | for (unsigned I = 0; I != Globals.size(); ++I) { |
| 881 | GlobalVariable *GV = cast<GlobalVariable>(Val: Globals[I]->getGlobal()); |
| 882 | |
| 883 | // Multiply by 2 to account for padding elements. |
| 884 | Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Ty: Int32Ty, V: 0), |
| 885 | ConstantInt::get(Ty: Int32Ty, V: I * 2)}; |
| 886 | Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr( |
| 887 | Ty: NewInit->getType(), C: CombinedGlobal, IdxList: CombinedGlobalIdxs); |
| 888 | assert(GV->getType()->getAddressSpace() == 0); |
| 889 | GlobalAlias *GAlias = |
| 890 | GlobalAlias::create(Ty: NewTy->getElementType(N: I * 2), AddressSpace: 0, Linkage: GV->getLinkage(), |
| 891 | Name: "" , Aliasee: CombinedGlobalElemPtr, Parent: &M); |
| 892 | GAlias->setVisibility(GV->getVisibility()); |
| 893 | GAlias->takeName(V: GV); |
| 894 | GV->replaceAllUsesWith(V: GAlias); |
| 895 | GV->eraseFromParent(); |
| 896 | } |
| 897 | } |
| 898 | |
| 899 | bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() { |
| 900 | return (Arch == Triple::x86 || Arch == Triple::x86_64) && |
| 901 | ObjectFormat == Triple::ELF; |
| 902 | } |
| 903 | |
| 904 | /// Export the given type identifier so that ThinLTO backends may import it. |
| 905 | /// Type identifiers are exported by adding coarse-grained information about how |
| 906 | /// to test the type identifier to the summary, and creating symbols in the |
| 907 | /// object file (aliases and absolute symbols) containing fine-grained |
| 908 | /// information about the type identifier. |
| 909 | /// |
| 910 | /// Returns a pointer to the location in which to store the bitmask, if |
| 911 | /// applicable. |
| 912 | uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId, |
| 913 | const TypeIdLowering &TIL) { |
| 914 | TypeTestResolution &TTRes = |
| 915 | ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes; |
| 916 | TTRes.TheKind = TIL.TheKind; |
| 917 | |
| 918 | auto ExportGlobal = [&](StringRef Name, Constant *C) { |
| 919 | GlobalAlias *GA = |
| 920 | GlobalAlias::create(Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::ExternalLinkage, |
| 921 | Name: "__typeid_" + TypeId + "_" + Name, Aliasee: C, Parent: &M); |
| 922 | GA->setVisibility(GlobalValue::HiddenVisibility); |
| 923 | }; |
| 924 | |
| 925 | auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) { |
| 926 | if (shouldExportConstantsAsAbsoluteSymbols()) |
| 927 | ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Ty: PtrTy)); |
| 928 | else |
| 929 | Storage = cast<ConstantInt>(Val: C)->getZExtValue(); |
| 930 | }; |
| 931 | |
| 932 | if (TIL.TheKind != TypeTestResolution::Unsat) |
| 933 | ExportGlobal("global_addr" , TIL.OffsetedGlobal); |
| 934 | |
| 935 | if (TIL.TheKind == TypeTestResolution::ByteArray || |
| 936 | TIL.TheKind == TypeTestResolution::Inline || |
| 937 | TIL.TheKind == TypeTestResolution::AllOnes) { |
| 938 | ExportConstant("align" , TTRes.AlignLog2, TIL.AlignLog2); |
| 939 | ExportConstant("size_m1" , TTRes.SizeM1, TIL.SizeM1); |
| 940 | |
| 941 | uint64_t BitSize = cast<ConstantInt>(Val: TIL.SizeM1)->getZExtValue() + 1; |
| 942 | if (TIL.TheKind == TypeTestResolution::Inline) |
| 943 | TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6; |
| 944 | else |
| 945 | TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32; |
| 946 | } |
| 947 | |
| 948 | if (TIL.TheKind == TypeTestResolution::ByteArray) { |
| 949 | ExportGlobal("byte_array" , TIL.TheByteArray); |
| 950 | if (shouldExportConstantsAsAbsoluteSymbols()) |
| 951 | ExportGlobal("bit_mask" , TIL.BitMask); |
| 952 | else |
| 953 | return &TTRes.BitMask; |
| 954 | } |
| 955 | |
| 956 | if (TIL.TheKind == TypeTestResolution::Inline) |
| 957 | ExportConstant("inline_bits" , TTRes.InlineBits, TIL.InlineBits); |
| 958 | |
| 959 | return nullptr; |
| 960 | } |
| 961 | |
| 962 | LowerTypeTestsModule::TypeIdLowering |
| 963 | LowerTypeTestsModule::importTypeId(StringRef TypeId) { |
| 964 | const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId); |
| 965 | if (!TidSummary) |
| 966 | return {}; // Unsat: no globals match this type id. |
| 967 | const TypeTestResolution &TTRes = TidSummary->TTRes; |
| 968 | |
| 969 | TypeIdLowering TIL; |
| 970 | TIL.TheKind = TTRes.TheKind; |
| 971 | |
| 972 | auto ImportGlobal = [&](StringRef Name) { |
| 973 | // Give the global a type of length 0 so that it is not assumed not to alias |
| 974 | // with any other global. |
| 975 | GlobalVariable *GV = M.getOrInsertGlobal( |
| 976 | Name: ("__typeid_" + TypeId + "_" + Name).str(), Ty: Int8Arr0Ty); |
| 977 | GV->setVisibility(GlobalValue::HiddenVisibility); |
| 978 | return GV; |
| 979 | }; |
| 980 | |
| 981 | auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth, |
| 982 | Type *Ty) { |
| 983 | if (!shouldExportConstantsAsAbsoluteSymbols()) { |
| 984 | Constant *C = |
| 985 | ConstantInt::get(Ty: isa<IntegerType>(Val: Ty) ? Ty : Int64Ty, V: Const); |
| 986 | if (!isa<IntegerType>(Val: Ty)) |
| 987 | C = ConstantExpr::getIntToPtr(C, Ty); |
| 988 | return C; |
| 989 | } |
| 990 | |
| 991 | Constant *C = ImportGlobal(Name); |
| 992 | auto *GV = cast<GlobalVariable>(Val: C->stripPointerCasts()); |
| 993 | if (isa<IntegerType>(Val: Ty)) |
| 994 | C = ConstantExpr::getPtrToInt(C, Ty); |
| 995 | if (GV->getMetadata(KindID: LLVMContext::MD_absolute_symbol)) |
| 996 | return C; |
| 997 | |
| 998 | auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { |
| 999 | auto *MinC = ConstantAsMetadata::get(C: ConstantInt::get(Ty: IntPtrTy, V: Min)); |
| 1000 | auto *MaxC = ConstantAsMetadata::get(C: ConstantInt::get(Ty: IntPtrTy, V: Max)); |
| 1001 | GV->setMetadata(KindID: LLVMContext::MD_absolute_symbol, |
| 1002 | Node: MDNode::get(Context&: M.getContext(), MDs: {MinC, MaxC})); |
| 1003 | }; |
| 1004 | if (AbsWidth == IntPtrTy->getBitWidth()) { |
| 1005 | uint64_t AllOnes = IntPtrTy->getBitMask(); |
| 1006 | SetAbsRange(AllOnes, AllOnes); // Full set. |
| 1007 | } else { |
| 1008 | SetAbsRange(0, 1ull << AbsWidth); |
| 1009 | } |
| 1010 | return C; |
| 1011 | }; |
| 1012 | |
| 1013 | if (TIL.TheKind != TypeTestResolution::Unsat) { |
| 1014 | auto *GV = ImportGlobal("global_addr" ); |
| 1015 | // This is either a vtable (in .data.rel.ro) or a jump table (in .text). |
| 1016 | // Either way it's expected to be in the low 2 GiB, so set the small code |
| 1017 | // model. |
| 1018 | // |
| 1019 | // For .data.rel.ro, we currently place all such sections in the low 2 GiB |
| 1020 | // [1], and for .text the sections are expected to be in the low 2 GiB under |
| 1021 | // the small and medium code models [2] and this pass only supports those |
| 1022 | // code models (e.g. jump tables use jmp instead of movabs/jmp). |
| 1023 | // |
| 1024 | // [1]https://github.com/llvm/llvm-project/pull/137742 |
| 1025 | // [2]https://maskray.me/blog/2023-05-14-relocation-overflow-and-code-models |
| 1026 | GV->setCodeModel(CodeModel::Small); |
| 1027 | TIL.OffsetedGlobal = GV; |
| 1028 | } |
| 1029 | |
| 1030 | if (TIL.TheKind == TypeTestResolution::ByteArray || |
| 1031 | TIL.TheKind == TypeTestResolution::Inline || |
| 1032 | TIL.TheKind == TypeTestResolution::AllOnes) { |
| 1033 | TIL.AlignLog2 = ImportConstant("align" , TTRes.AlignLog2, 8, IntPtrTy); |
| 1034 | TIL.SizeM1 = |
| 1035 | ImportConstant("size_m1" , TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy); |
| 1036 | } |
| 1037 | |
| 1038 | if (TIL.TheKind == TypeTestResolution::ByteArray) { |
| 1039 | TIL.TheByteArray = ImportGlobal("byte_array" ); |
| 1040 | TIL.BitMask = ImportConstant("bit_mask" , TTRes.BitMask, 8, PtrTy); |
| 1041 | } |
| 1042 | |
| 1043 | if (TIL.TheKind == TypeTestResolution::Inline) |
| 1044 | TIL.InlineBits = ImportConstant( |
| 1045 | "inline_bits" , TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth, |
| 1046 | TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty); |
| 1047 | |
| 1048 | return TIL; |
| 1049 | } |
| 1050 | |
| 1051 | void LowerTypeTestsModule::importTypeTest(CallInst *CI) { |
| 1052 | auto TypeIdMDVal = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
| 1053 | if (!TypeIdMDVal) |
| 1054 | report_fatal_error(reason: "Second argument of llvm.type.test must be metadata" ); |
| 1055 | |
| 1056 | auto TypeIdStr = dyn_cast<MDString>(Val: TypeIdMDVal->getMetadata()); |
| 1057 | // If this is a local unpromoted type, which doesn't have a metadata string, |
| 1058 | // treat as Unknown and delay lowering, so that we can still utilize it for |
| 1059 | // later optimizations. |
| 1060 | if (!TypeIdStr) |
| 1061 | return; |
| 1062 | |
| 1063 | TypeIdLowering TIL = importTypeId(TypeId: TypeIdStr->getString()); |
| 1064 | Value *Lowered = lowerTypeTestCall(TypeId: TypeIdStr, CI, TIL); |
| 1065 | if (Lowered) { |
| 1066 | CI->replaceAllUsesWith(V: Lowered); |
| 1067 | CI->eraseFromParent(); |
| 1068 | } |
| 1069 | } |
| 1070 | |
| 1071 | void LowerTypeTestsModule::maybeReplaceComdat(Function *F, |
| 1072 | StringRef OriginalName) { |
| 1073 | // For COFF we should also rename the comdat if this function also |
| 1074 | // happens to be the key function. Even if the comdat name changes, this |
| 1075 | // should still be fine since comdat and symbol resolution happens |
| 1076 | // before LTO, so all symbols which would prevail have been selected. |
| 1077 | if (F->hasComdat() && ObjectFormat == Triple::COFF && |
| 1078 | F->getComdat()->getName() == OriginalName) { |
| 1079 | Comdat *OldComdat = F->getComdat(); |
| 1080 | Comdat *NewComdat = M.getOrInsertComdat(Name: F->getName()); |
| 1081 | for (GlobalObject &GO : M.global_objects()) { |
| 1082 | if (GO.getComdat() == OldComdat) |
| 1083 | GO.setComdat(NewComdat); |
| 1084 | } |
| 1085 | } |
| 1086 | } |
| 1087 | |
| 1088 | // ThinLTO backend: the function F has a jump table entry; update this module |
| 1089 | // accordingly. isJumpTableCanonical describes the type of the jump table entry. |
| 1090 | void LowerTypeTestsModule::importFunction(Function *F, |
| 1091 | bool isJumpTableCanonical) { |
| 1092 | assert(F->getType()->getAddressSpace() == 0); |
| 1093 | |
| 1094 | GlobalValue::VisibilityTypes Visibility = F->getVisibility(); |
| 1095 | std::string Name = std::string(F->getName()); |
| 1096 | |
| 1097 | if (F->isDeclarationForLinker() && isJumpTableCanonical) { |
| 1098 | // Non-dso_local functions may be overriden at run time, |
| 1099 | // don't short curcuit them |
| 1100 | if (F->isDSOLocal()) { |
| 1101 | Function *RealF = Function::Create(Ty: F->getFunctionType(), |
| 1102 | Linkage: GlobalValue::ExternalLinkage, |
| 1103 | AddrSpace: F->getAddressSpace(), |
| 1104 | N: Name + ".cfi" , M: &M); |
| 1105 | RealF->setVisibility(GlobalVariable::HiddenVisibility); |
| 1106 | replaceDirectCalls(Old: F, New: RealF); |
| 1107 | } |
| 1108 | return; |
| 1109 | } |
| 1110 | |
| 1111 | Function *FDecl; |
| 1112 | if (!isJumpTableCanonical) { |
| 1113 | // Either a declaration of an external function or a reference to a locally |
| 1114 | // defined jump table. |
| 1115 | FDecl = Function::Create(Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage, |
| 1116 | AddrSpace: F->getAddressSpace(), N: Name + ".cfi_jt" , M: &M); |
| 1117 | FDecl->setVisibility(GlobalValue::HiddenVisibility); |
| 1118 | } else { |
| 1119 | F->setName(Name + ".cfi" ); |
| 1120 | maybeReplaceComdat(F, OriginalName: Name); |
| 1121 | FDecl = Function::Create(Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage, |
| 1122 | AddrSpace: F->getAddressSpace(), N: Name, M: &M); |
| 1123 | FDecl->setVisibility(Visibility); |
| 1124 | Visibility = GlobalValue::HiddenVisibility; |
| 1125 | |
| 1126 | // Update aliases pointing to this function to also include the ".cfi" suffix, |
| 1127 | // We expect the jump table entry to either point to the real function or an |
| 1128 | // alias. Redirect all other users to the jump table entry. |
| 1129 | for (auto &U : F->uses()) { |
| 1130 | if (auto *A = dyn_cast<GlobalAlias>(Val: U.getUser())) { |
| 1131 | std::string AliasName = A->getName().str() + ".cfi" ; |
| 1132 | Function *AliasDecl = Function::Create( |
| 1133 | Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage, |
| 1134 | AddrSpace: F->getAddressSpace(), N: "" , M: &M); |
| 1135 | AliasDecl->takeName(V: A); |
| 1136 | A->replaceAllUsesWith(V: AliasDecl); |
| 1137 | A->setName(AliasName); |
| 1138 | } |
| 1139 | } |
| 1140 | } |
| 1141 | |
| 1142 | if (F->hasExternalWeakLinkage()) |
| 1143 | replaceWeakDeclarationWithJumpTablePtr(F, JT: FDecl, IsJumpTableCanonical: isJumpTableCanonical); |
| 1144 | else |
| 1145 | replaceCfiUses(Old: F, New: FDecl, IsJumpTableCanonical: isJumpTableCanonical); |
| 1146 | |
| 1147 | // Set visibility late because it's used in replaceCfiUses() to determine |
| 1148 | // whether uses need to be replaced. |
| 1149 | F->setVisibility(Visibility); |
| 1150 | } |
| 1151 | |
| 1152 | static auto |
| 1153 | buildBitSets(ArrayRef<Metadata *> TypeIds, |
| 1154 | const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { |
| 1155 | DenseMap<Metadata *, SmallVector<uint64_t, 16>> OffsetsByTypeID; |
| 1156 | // Pre-populate the map with interesting type identifiers. |
| 1157 | for (Metadata *TypeId : TypeIds) |
| 1158 | OffsetsByTypeID[TypeId]; |
| 1159 | for (const auto &[Mem, MemOff] : GlobalLayout) { |
| 1160 | for (MDNode *Type : Mem->types()) { |
| 1161 | auto It = OffsetsByTypeID.find(Val: Type->getOperand(I: 1)); |
| 1162 | if (It == OffsetsByTypeID.end()) |
| 1163 | continue; |
| 1164 | uint64_t Offset = |
| 1165 | cast<ConstantInt>( |
| 1166 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
| 1167 | ->getZExtValue(); |
| 1168 | It->second.push_back(Elt: MemOff + Offset); |
| 1169 | } |
| 1170 | } |
| 1171 | |
| 1172 | SmallVector<std::pair<Metadata *, BitSetInfo>> BitSets; |
| 1173 | BitSets.reserve(N: TypeIds.size()); |
| 1174 | for (Metadata *TypeId : TypeIds) { |
| 1175 | BitSets.emplace_back(Args&: TypeId, Args: buildBitSet(Offsets: OffsetsByTypeID[TypeId])); |
| 1176 | LLVM_DEBUG({ |
| 1177 | if (auto MDS = dyn_cast<MDString>(TypeId)) |
| 1178 | dbgs() << MDS->getString() << ": " ; |
| 1179 | else |
| 1180 | dbgs() << "<unnamed>: " ; |
| 1181 | BitSets.back().second.print(dbgs()); |
| 1182 | }); |
| 1183 | } |
| 1184 | |
| 1185 | return BitSets; |
| 1186 | } |
| 1187 | |
| 1188 | void LowerTypeTestsModule::lowerTypeTestCalls( |
| 1189 | ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, |
| 1190 | const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { |
| 1191 | // For each type identifier in this disjoint set... |
| 1192 | for (const auto &[TypeId, BSI] : buildBitSets(TypeIds, GlobalLayout)) { |
| 1193 | ByteArrayInfo *BAI = nullptr; |
| 1194 | TypeIdLowering TIL; |
| 1195 | |
| 1196 | uint64_t GlobalOffset = |
| 1197 | BSI.ByteOffset + ((BSI.BitSize - 1) << BSI.AlignLog2); |
| 1198 | TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr( |
| 1199 | Ty: Int8Ty, C: CombinedGlobalAddr, Idx: ConstantInt::get(Ty: IntPtrTy, V: GlobalOffset)), |
| 1200 | TIL.AlignLog2 = ConstantInt::get(Ty: IntPtrTy, V: BSI.AlignLog2); |
| 1201 | TIL.SizeM1 = ConstantInt::get(Ty: IntPtrTy, V: BSI.BitSize - 1); |
| 1202 | if (BSI.isAllOnes()) { |
| 1203 | TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single |
| 1204 | : TypeTestResolution::AllOnes; |
| 1205 | } else if (BSI.BitSize <= IntPtrTy->getBitWidth()) { |
| 1206 | TIL.TheKind = TypeTestResolution::Inline; |
| 1207 | uint64_t InlineBits = 0; |
| 1208 | for (auto Bit : BSI.Bits) |
| 1209 | InlineBits |= uint64_t(1) << Bit; |
| 1210 | if (InlineBits == 0) |
| 1211 | TIL.TheKind = TypeTestResolution::Unsat; |
| 1212 | else |
| 1213 | TIL.InlineBits = ConstantInt::get( |
| 1214 | Ty: (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, V: InlineBits); |
| 1215 | } else { |
| 1216 | TIL.TheKind = TypeTestResolution::ByteArray; |
| 1217 | ++NumByteArraysCreated; |
| 1218 | BAI = createByteArray(BSI); |
| 1219 | TIL.TheByteArray = BAI->ByteArray; |
| 1220 | TIL.BitMask = BAI->MaskGlobal; |
| 1221 | } |
| 1222 | |
| 1223 | TypeIdUserInfo &TIUI = TypeIdUsers[TypeId]; |
| 1224 | |
| 1225 | if (TIUI.IsExported) { |
| 1226 | uint8_t *MaskPtr = exportTypeId(TypeId: cast<MDString>(Val: TypeId)->getString(), TIL); |
| 1227 | if (BAI) |
| 1228 | BAI->MaskPtr = MaskPtr; |
| 1229 | } |
| 1230 | |
| 1231 | // Lower each call to llvm.type.test for this type identifier. |
| 1232 | for (CallInst *CI : TIUI.CallSites) { |
| 1233 | ++NumTypeTestCallsLowered; |
| 1234 | Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); |
| 1235 | if (Lowered) { |
| 1236 | CI->replaceAllUsesWith(V: Lowered); |
| 1237 | CI->eraseFromParent(); |
| 1238 | } |
| 1239 | } |
| 1240 | } |
| 1241 | } |
| 1242 | |
| 1243 | void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { |
| 1244 | if (Type->getNumOperands() != 2) |
| 1245 | report_fatal_error(reason: "All operands of type metadata must have 2 elements" ); |
| 1246 | |
| 1247 | if (GO->isThreadLocal()) |
| 1248 | report_fatal_error(reason: "Bit set element may not be thread-local" ); |
| 1249 | if (isa<GlobalVariable>(Val: GO) && GO->hasSection()) |
| 1250 | report_fatal_error( |
| 1251 | reason: "A member of a type identifier may not have an explicit section" ); |
| 1252 | |
| 1253 | // FIXME: We previously checked that global var member of a type identifier |
| 1254 | // must be a definition, but the IR linker may leave type metadata on |
| 1255 | // declarations. We should restore this check after fixing PR31759. |
| 1256 | |
| 1257 | auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0)); |
| 1258 | if (!OffsetConstMD) |
| 1259 | report_fatal_error(reason: "Type offset must be a constant" ); |
| 1260 | auto OffsetInt = dyn_cast<ConstantInt>(Val: OffsetConstMD->getValue()); |
| 1261 | if (!OffsetInt) |
| 1262 | report_fatal_error(reason: "Type offset must be an integer constant" ); |
| 1263 | } |
| 1264 | |
| 1265 | static const unsigned kX86JumpTableEntrySize = 8; |
| 1266 | static const unsigned kX86IBTJumpTableEntrySize = 16; |
| 1267 | static const unsigned kARMJumpTableEntrySize = 4; |
| 1268 | static const unsigned kARMBTIJumpTableEntrySize = 8; |
| 1269 | static const unsigned kARMv6MJumpTableEntrySize = 16; |
| 1270 | static const unsigned kRISCVJumpTableEntrySize = 8; |
| 1271 | static const unsigned kLOONGARCH64JumpTableEntrySize = 8; |
| 1272 | |
| 1273 | bool LowerTypeTestsModule::hasBranchTargetEnforcement() { |
| 1274 | if (HasBranchTargetEnforcement == -1) { |
| 1275 | // First time this query has been called. Find out the answer by checking |
| 1276 | // the module flags. |
| 1277 | if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( |
| 1278 | MD: M.getModuleFlag(Key: "branch-target-enforcement" ))) |
| 1279 | HasBranchTargetEnforcement = !BTE->isZero(); |
| 1280 | else |
| 1281 | HasBranchTargetEnforcement = 0; |
| 1282 | } |
| 1283 | return HasBranchTargetEnforcement; |
| 1284 | } |
| 1285 | |
| 1286 | unsigned |
| 1287 | LowerTypeTestsModule::getJumpTableEntrySize(Triple::ArchType JumpTableArch) { |
| 1288 | switch (JumpTableArch) { |
| 1289 | case Triple::x86: |
| 1290 | case Triple::x86_64: |
| 1291 | if (const auto *MD = mdconst::extract_or_null<ConstantInt>( |
| 1292 | MD: M.getModuleFlag(Key: "cf-protection-branch" ))) |
| 1293 | if (MD->getZExtValue()) |
| 1294 | return kX86IBTJumpTableEntrySize; |
| 1295 | return kX86JumpTableEntrySize; |
| 1296 | case Triple::arm: |
| 1297 | return kARMJumpTableEntrySize; |
| 1298 | case Triple::thumb: |
| 1299 | if (CanUseThumbBWJumpTable) { |
| 1300 | if (hasBranchTargetEnforcement()) |
| 1301 | return kARMBTIJumpTableEntrySize; |
| 1302 | return kARMJumpTableEntrySize; |
| 1303 | } else { |
| 1304 | return kARMv6MJumpTableEntrySize; |
| 1305 | } |
| 1306 | case Triple::aarch64: |
| 1307 | if (hasBranchTargetEnforcement()) |
| 1308 | return kARMBTIJumpTableEntrySize; |
| 1309 | return kARMJumpTableEntrySize; |
| 1310 | case Triple::riscv32: |
| 1311 | case Triple::riscv64: |
| 1312 | return kRISCVJumpTableEntrySize; |
| 1313 | case Triple::loongarch64: |
| 1314 | return kLOONGARCH64JumpTableEntrySize; |
| 1315 | default: |
| 1316 | report_fatal_error(reason: "Unsupported architecture for jump tables" ); |
| 1317 | } |
| 1318 | } |
| 1319 | |
| 1320 | // Create an inline asm constant representing a jump table entry for the target. |
| 1321 | // This consists of an instruction sequence containing a relative branch to |
| 1322 | // Dest. |
| 1323 | InlineAsm * |
| 1324 | LowerTypeTestsModule::createJumpTableEntryAsm(Triple::ArchType JumpTableArch) { |
| 1325 | std::string Asm; |
| 1326 | raw_string_ostream AsmOS(Asm); |
| 1327 | |
| 1328 | if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) { |
| 1329 | bool Endbr = false; |
| 1330 | if (const auto *MD = mdconst::extract_or_null<ConstantInt>( |
| 1331 | MD: M.getModuleFlag(Key: "cf-protection-branch" ))) |
| 1332 | Endbr = !MD->isZero(); |
| 1333 | if (Endbr) |
| 1334 | AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n" ); |
| 1335 | AsmOS << "jmp ${0:c}@plt\n" ; |
| 1336 | if (Endbr) |
| 1337 | AsmOS << ".balign 16, 0xcc\n" ; |
| 1338 | else |
| 1339 | AsmOS << "int3\nint3\nint3\n" ; |
| 1340 | } else if (JumpTableArch == Triple::arm) { |
| 1341 | AsmOS << "b $0\n" ; |
| 1342 | } else if (JumpTableArch == Triple::aarch64) { |
| 1343 | if (hasBranchTargetEnforcement()) |
| 1344 | AsmOS << "bti c\n" ; |
| 1345 | AsmOS << "b $0\n" ; |
| 1346 | } else if (JumpTableArch == Triple::thumb) { |
| 1347 | if (!CanUseThumbBWJumpTable) { |
| 1348 | // In Armv6-M, this sequence will generate a branch without corrupting |
| 1349 | // any registers. We use two stack words; in the second, we construct the |
| 1350 | // address we'll pop into pc, and the first is used to save and restore |
| 1351 | // r0 which we use as a temporary register. |
| 1352 | // |
| 1353 | // To support position-independent use cases, the offset of the target |
| 1354 | // function is stored as a relative offset (which will expand into an |
| 1355 | // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other |
| 1356 | // object file types), and added to pc after we load it. (The alternative |
| 1357 | // B.W is automatically pc-relative.) |
| 1358 | // |
| 1359 | // There are five 16-bit Thumb instructions here, so the .balign 4 adds a |
| 1360 | // sixth halfword of padding, and then the offset consumes a further 4 |
| 1361 | // bytes, for a total of 16, which is very convenient since entries in |
| 1362 | // this jump table need to have power-of-two size. |
| 1363 | AsmOS << "push {r0,r1}\n" |
| 1364 | << "ldr r0, 1f\n" |
| 1365 | << "0: add r0, r0, pc\n" |
| 1366 | << "str r0, [sp, #4]\n" |
| 1367 | << "pop {r0,pc}\n" |
| 1368 | << ".balign 4\n" |
| 1369 | << "1: .word $0 - (0b + 4)\n" ; |
| 1370 | } else { |
| 1371 | if (hasBranchTargetEnforcement()) |
| 1372 | AsmOS << "bti\n" ; |
| 1373 | AsmOS << "b.w $0\n" ; |
| 1374 | } |
| 1375 | } else if (JumpTableArch == Triple::riscv32 || |
| 1376 | JumpTableArch == Triple::riscv64) { |
| 1377 | AsmOS << "tail $0@plt\n" ; |
| 1378 | } else if (JumpTableArch == Triple::loongarch64) { |
| 1379 | AsmOS << "pcalau12i $$t0, %pc_hi20($0)\n" |
| 1380 | << "jirl $$r0, $$t0, %pc_lo12($0)\n" ; |
| 1381 | } else { |
| 1382 | report_fatal_error(reason: "Unsupported architecture for jump tables" ); |
| 1383 | } |
| 1384 | |
| 1385 | return InlineAsm::get( |
| 1386 | Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), Params: PtrTy, isVarArg: false), |
| 1387 | AsmString: AsmOS.str(), Constraints: "s" , |
| 1388 | /*hasSideEffects=*/true); |
| 1389 | } |
| 1390 | |
| 1391 | /// Given a disjoint set of type identifiers and functions, build the bit sets |
| 1392 | /// and lower the llvm.type.test calls, architecture dependently. |
| 1393 | void LowerTypeTestsModule::buildBitSetsFromFunctions( |
| 1394 | ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { |
| 1395 | if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || |
| 1396 | Arch == Triple::thumb || Arch == Triple::aarch64 || |
| 1397 | Arch == Triple::riscv32 || Arch == Triple::riscv64 || |
| 1398 | Arch == Triple::loongarch64) |
| 1399 | buildBitSetsFromFunctionsNative(TypeIds, Functions); |
| 1400 | else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) |
| 1401 | buildBitSetsFromFunctionsWASM(TypeIds, Functions); |
| 1402 | else |
| 1403 | report_fatal_error(reason: "Unsupported architecture for jump tables" ); |
| 1404 | } |
| 1405 | |
| 1406 | void LowerTypeTestsModule::moveInitializerToModuleConstructor( |
| 1407 | GlobalVariable *GV) { |
| 1408 | if (WeakInitializerFn == nullptr) { |
| 1409 | WeakInitializerFn = Function::Create( |
| 1410 | Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), |
| 1411 | /* IsVarArg */ isVarArg: false), |
| 1412 | Linkage: GlobalValue::InternalLinkage, |
| 1413 | AddrSpace: M.getDataLayout().getProgramAddressSpace(), |
| 1414 | N: "__cfi_global_var_init" , M: &M); |
| 1415 | BasicBlock *BB = |
| 1416 | BasicBlock::Create(Context&: M.getContext(), Name: "entry" , Parent: WeakInitializerFn); |
| 1417 | ReturnInst::Create(C&: M.getContext(), InsertAtEnd: BB); |
| 1418 | WeakInitializerFn->setSection( |
| 1419 | ObjectFormat == Triple::MachO |
| 1420 | ? "__TEXT,__StaticInit,regular,pure_instructions" |
| 1421 | : ".text.startup" ); |
| 1422 | // This code is equivalent to relocation application, and should run at the |
| 1423 | // earliest possible time (i.e. with the highest priority). |
| 1424 | appendToGlobalCtors(M, F: WeakInitializerFn, /* Priority */ 0); |
| 1425 | } |
| 1426 | |
| 1427 | IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator()); |
| 1428 | GV->setConstant(false); |
| 1429 | IRB.CreateAlignedStore(Val: GV->getInitializer(), Ptr: GV, Align: GV->getAlign()); |
| 1430 | GV->setInitializer(Constant::getNullValue(Ty: GV->getValueType())); |
| 1431 | } |
| 1432 | |
| 1433 | void LowerTypeTestsModule::findGlobalVariableUsersOf( |
| 1434 | Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) { |
| 1435 | for (auto *U : C->users()){ |
| 1436 | if (auto *GV = dyn_cast<GlobalVariable>(Val: U)) |
| 1437 | Out.insert(X: GV); |
| 1438 | else if (auto *C2 = dyn_cast<Constant>(Val: U)) |
| 1439 | findGlobalVariableUsersOf(C: C2, Out); |
| 1440 | } |
| 1441 | } |
| 1442 | |
| 1443 | // Replace all uses of F with (F ? JT : 0). |
| 1444 | void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( |
| 1445 | Function *F, Constant *JT, bool IsJumpTableCanonical) { |
| 1446 | // The target expression can not appear in a constant initializer on most |
| 1447 | // (all?) targets. Switch to a runtime initializer. |
| 1448 | SmallSetVector<GlobalVariable *, 8> GlobalVarUsers; |
| 1449 | findGlobalVariableUsersOf(C: F, Out&: GlobalVarUsers); |
| 1450 | for (auto *GV : GlobalVarUsers) { |
| 1451 | if (GV == GlobalAnnotation) |
| 1452 | continue; |
| 1453 | moveInitializerToModuleConstructor(GV); |
| 1454 | } |
| 1455 | |
| 1456 | // Can not RAUW F with an expression that uses F. Replace with a temporary |
| 1457 | // placeholder first. |
| 1458 | Function *PlaceholderFn = |
| 1459 | Function::Create(Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalWeakLinkage, |
| 1460 | AddrSpace: F->getAddressSpace(), N: "" , M: &M); |
| 1461 | replaceCfiUses(Old: F, New: PlaceholderFn, IsJumpTableCanonical); |
| 1462 | |
| 1463 | convertUsersOfConstantsToInstructions(Consts: PlaceholderFn); |
| 1464 | // Don't use range based loop, because use list will be modified. |
| 1465 | while (!PlaceholderFn->use_empty()) { |
| 1466 | Use &U = *PlaceholderFn->use_begin(); |
| 1467 | auto *InsertPt = dyn_cast<Instruction>(Val: U.getUser()); |
| 1468 | assert(InsertPt && "Non-instruction users should have been eliminated" ); |
| 1469 | auto *PN = dyn_cast<PHINode>(Val: InsertPt); |
| 1470 | if (PN) |
| 1471 | InsertPt = PN->getIncomingBlock(U)->getTerminator(); |
| 1472 | IRBuilder Builder(InsertPt); |
| 1473 | Value *ICmp = Builder.CreateICmp(P: CmpInst::ICMP_NE, LHS: F, |
| 1474 | RHS: Constant::getNullValue(Ty: F->getType())); |
| 1475 | Value *Select = Builder.CreateSelect(C: ICmp, True: JT, |
| 1476 | False: Constant::getNullValue(Ty: F->getType())); |
| 1477 | |
| 1478 | if (auto *SI = dyn_cast<SelectInst>(Val: Select)) |
| 1479 | setExplicitlyUnknownBranchWeightsIfProfiled(I&: *SI, DEBUG_TYPE); |
| 1480 | // For phi nodes, we need to update the incoming value for all operands |
| 1481 | // with the same predecessor. |
| 1482 | if (PN) |
| 1483 | PN->setIncomingValueForBlock(BB: InsertPt->getParent(), V: Select); |
| 1484 | else |
| 1485 | U.set(Select); |
| 1486 | } |
| 1487 | PlaceholderFn->eraseFromParent(); |
| 1488 | } |
| 1489 | |
| 1490 | static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) { |
| 1491 | Attribute TFAttr = F->getFnAttribute(Kind: "target-features" ); |
| 1492 | if (TFAttr.isValid()) { |
| 1493 | SmallVector<StringRef, 6> Features; |
| 1494 | TFAttr.getValueAsString().split(A&: Features, Separator: ','); |
| 1495 | for (StringRef Feature : Features) { |
| 1496 | if (Feature == "-thumb-mode" ) |
| 1497 | return false; |
| 1498 | else if (Feature == "+thumb-mode" ) |
| 1499 | return true; |
| 1500 | } |
| 1501 | } |
| 1502 | |
| 1503 | return ModuleArch == Triple::thumb; |
| 1504 | } |
| 1505 | |
| 1506 | // Each jump table must be either ARM or Thumb as a whole for the bit-test math |
| 1507 | // to work. Pick one that matches the majority of members to minimize interop |
| 1508 | // veneers inserted by the linker. |
| 1509 | Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding( |
| 1510 | ArrayRef<GlobalTypeMember *> Functions) { |
| 1511 | if (Arch != Triple::arm && Arch != Triple::thumb) |
| 1512 | return Arch; |
| 1513 | |
| 1514 | if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) { |
| 1515 | // In architectures that provide Arm and Thumb-1 but not Thumb-2, |
| 1516 | // we should always prefer the Arm jump table format, because the |
| 1517 | // Thumb-1 one is larger and slower. |
| 1518 | return Triple::arm; |
| 1519 | } |
| 1520 | |
| 1521 | // Otherwise, go with majority vote. |
| 1522 | unsigned ArmCount = 0, ThumbCount = 0; |
| 1523 | for (const auto GTM : Functions) { |
| 1524 | if (!GTM->isJumpTableCanonical()) { |
| 1525 | // PLT stubs are always ARM. |
| 1526 | // FIXME: This is the wrong heuristic for non-canonical jump tables. |
| 1527 | ++ArmCount; |
| 1528 | continue; |
| 1529 | } |
| 1530 | |
| 1531 | Function *F = cast<Function>(Val: GTM->getGlobal()); |
| 1532 | ++(isThumbFunction(F, ModuleArch: Arch) ? ThumbCount : ArmCount); |
| 1533 | } |
| 1534 | |
| 1535 | return ArmCount > ThumbCount ? Triple::arm : Triple::thumb; |
| 1536 | } |
| 1537 | |
| 1538 | void LowerTypeTestsModule::createJumpTable( |
| 1539 | Function *F, ArrayRef<GlobalTypeMember *> Functions, |
| 1540 | Triple::ArchType JumpTableArch) { |
| 1541 | BasicBlock *BB = BasicBlock::Create(Context&: M.getContext(), Name: "entry" , Parent: F); |
| 1542 | IRBuilder<> IRB(BB); |
| 1543 | |
| 1544 | InlineAsm *JumpTableAsm = createJumpTableEntryAsm(JumpTableArch); |
| 1545 | |
| 1546 | // Check if all entries have the NoUnwind attribute. |
| 1547 | // If all entries have it, we can safely mark the |
| 1548 | // cfi.jumptable as NoUnwind, otherwise, direct calls |
| 1549 | // to the jump table will not handle exceptions properly |
| 1550 | bool areAllEntriesNounwind = true; |
| 1551 | for (GlobalTypeMember *GTM : Functions) { |
| 1552 | if (!llvm::cast<llvm::Function>(Val: GTM->getGlobal()) |
| 1553 | ->hasFnAttribute(Kind: llvm::Attribute::NoUnwind)) { |
| 1554 | areAllEntriesNounwind = false; |
| 1555 | } |
| 1556 | IRB.CreateCall(Callee: JumpTableAsm, Args: GTM->getGlobal()); |
| 1557 | } |
| 1558 | IRB.CreateUnreachable(); |
| 1559 | |
| 1560 | // Align the whole table by entry size. |
| 1561 | F->setAlignment(Align(getJumpTableEntrySize(JumpTableArch))); |
| 1562 | F->addFnAttr(Kind: Attribute::Naked); |
| 1563 | if (JumpTableArch == Triple::arm) |
| 1564 | F->addFnAttr(Kind: "target-features" , Val: "-thumb-mode" ); |
| 1565 | if (JumpTableArch == Triple::thumb) { |
| 1566 | if (hasBranchTargetEnforcement()) { |
| 1567 | // If we're generating a Thumb jump table with BTI, add a target-features |
| 1568 | // setting to ensure BTI can be assembled. |
| 1569 | F->addFnAttr(Kind: "target-features" , Val: "+thumb-mode,+pacbti" ); |
| 1570 | } else { |
| 1571 | F->addFnAttr(Kind: "target-features" , Val: "+thumb-mode" ); |
| 1572 | if (CanUseThumbBWJumpTable) { |
| 1573 | // Thumb jump table assembly needs Thumb2. The following attribute is |
| 1574 | // added by Clang for -march=armv7. |
| 1575 | F->addFnAttr(Kind: "target-cpu" , Val: "cortex-a8" ); |
| 1576 | } |
| 1577 | } |
| 1578 | } |
| 1579 | // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI |
| 1580 | // for the function to avoid double BTI. This is a no-op without |
| 1581 | // -mbranch-protection=. |
| 1582 | if (JumpTableArch == Triple::aarch64 || JumpTableArch == Triple::thumb) { |
| 1583 | if (F->hasFnAttribute(Kind: "branch-target-enforcement" )) |
| 1584 | F->removeFnAttr(Kind: "branch-target-enforcement" ); |
| 1585 | if (F->hasFnAttribute(Kind: "sign-return-address" )) |
| 1586 | F->removeFnAttr(Kind: "sign-return-address" ); |
| 1587 | } |
| 1588 | if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { |
| 1589 | // Make sure the jump table assembly is not modified by the assembler or |
| 1590 | // the linker. |
| 1591 | F->addFnAttr(Kind: "target-features" , Val: "-c,-relax" ); |
| 1592 | } |
| 1593 | // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR |
| 1594 | // for the function to avoid double ENDBR. This is a no-op without |
| 1595 | // -fcf-protection=. |
| 1596 | if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) |
| 1597 | F->addFnAttr(Kind: Attribute::NoCfCheck); |
| 1598 | |
| 1599 | // Make sure we don't emit .eh_frame for this function if it isn't needed. |
| 1600 | if (areAllEntriesNounwind) |
| 1601 | F->addFnAttr(Kind: Attribute::NoUnwind); |
| 1602 | |
| 1603 | // Make sure we do not inline any calls to the cfi.jumptable. |
| 1604 | F->addFnAttr(Kind: Attribute::NoInline); |
| 1605 | } |
| 1606 | |
| 1607 | /// Given a disjoint set of type identifiers and functions, build a jump table |
| 1608 | /// for the functions, build the bit sets and lower the llvm.type.test calls. |
| 1609 | void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( |
| 1610 | ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { |
| 1611 | // Unlike the global bitset builder, the function bitset builder cannot |
| 1612 | // re-arrange functions in a particular order and base its calculations on the |
| 1613 | // layout of the functions' entry points, as we have no idea how large a |
| 1614 | // particular function will end up being (the size could even depend on what |
| 1615 | // this pass does!) Instead, we build a jump table, which is a block of code |
| 1616 | // consisting of one branch instruction for each of the functions in the bit |
| 1617 | // set that branches to the target function, and redirect any taken function |
| 1618 | // addresses to the corresponding jump table entry. In the object file's |
| 1619 | // symbol table, the symbols for the target functions also refer to the jump |
| 1620 | // table entries, so that addresses taken outside the module will pass any |
| 1621 | // verification done inside the module. |
| 1622 | // |
| 1623 | // In more concrete terms, suppose we have three functions f, g, h which are |
| 1624 | // of the same type, and a function foo that returns their addresses: |
| 1625 | // |
| 1626 | // f: |
| 1627 | // mov 0, %eax |
| 1628 | // ret |
| 1629 | // |
| 1630 | // g: |
| 1631 | // mov 1, %eax |
| 1632 | // ret |
| 1633 | // |
| 1634 | // h: |
| 1635 | // mov 2, %eax |
| 1636 | // ret |
| 1637 | // |
| 1638 | // foo: |
| 1639 | // mov f, %eax |
| 1640 | // mov g, %edx |
| 1641 | // mov h, %ecx |
| 1642 | // ret |
| 1643 | // |
| 1644 | // We output the jump table as module-level inline asm string. The end result |
| 1645 | // will (conceptually) look like this: |
| 1646 | // |
| 1647 | // f = .cfi.jumptable |
| 1648 | // g = .cfi.jumptable + 4 |
| 1649 | // h = .cfi.jumptable + 8 |
| 1650 | // .cfi.jumptable: |
| 1651 | // jmp f.cfi ; 5 bytes |
| 1652 | // int3 ; 1 byte |
| 1653 | // int3 ; 1 byte |
| 1654 | // int3 ; 1 byte |
| 1655 | // jmp g.cfi ; 5 bytes |
| 1656 | // int3 ; 1 byte |
| 1657 | // int3 ; 1 byte |
| 1658 | // int3 ; 1 byte |
| 1659 | // jmp h.cfi ; 5 bytes |
| 1660 | // int3 ; 1 byte |
| 1661 | // int3 ; 1 byte |
| 1662 | // int3 ; 1 byte |
| 1663 | // |
| 1664 | // f.cfi: |
| 1665 | // mov 0, %eax |
| 1666 | // ret |
| 1667 | // |
| 1668 | // g.cfi: |
| 1669 | // mov 1, %eax |
| 1670 | // ret |
| 1671 | // |
| 1672 | // h.cfi: |
| 1673 | // mov 2, %eax |
| 1674 | // ret |
| 1675 | // |
| 1676 | // foo: |
| 1677 | // mov f, %eax |
| 1678 | // mov g, %edx |
| 1679 | // mov h, %ecx |
| 1680 | // ret |
| 1681 | // |
| 1682 | // Because the addresses of f, g, h are evenly spaced at a power of 2, in the |
| 1683 | // normal case the check can be carried out using the same kind of simple |
| 1684 | // arithmetic that we normally use for globals. |
| 1685 | |
| 1686 | // FIXME: find a better way to represent the jumptable in the IR. |
| 1687 | assert(!Functions.empty()); |
| 1688 | |
| 1689 | // Decide on the jump table encoding, so that we know how big the |
| 1690 | // entries will be. |
| 1691 | Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions); |
| 1692 | |
| 1693 | // Build a simple layout based on the regular layout of jump tables. |
| 1694 | DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; |
| 1695 | unsigned EntrySize = getJumpTableEntrySize(JumpTableArch); |
| 1696 | for (unsigned I = 0; I != Functions.size(); ++I) |
| 1697 | GlobalLayout[Functions[I]] = I * EntrySize; |
| 1698 | |
| 1699 | Function *JumpTableFn = |
| 1700 | Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), |
| 1701 | /* IsVarArg */ isVarArg: false), |
| 1702 | Linkage: GlobalValue::PrivateLinkage, |
| 1703 | AddrSpace: M.getDataLayout().getProgramAddressSpace(), |
| 1704 | N: ".cfi.jumptable" , M: &M); |
| 1705 | ArrayType *JumpTableEntryType = ArrayType::get(ElementType: Int8Ty, NumElements: EntrySize); |
| 1706 | ArrayType *JumpTableType = |
| 1707 | ArrayType::get(ElementType: JumpTableEntryType, NumElements: Functions.size()); |
| 1708 | auto JumpTable = ConstantExpr::getPointerCast( |
| 1709 | C: JumpTableFn, Ty: PointerType::getUnqual(C&: M.getContext())); |
| 1710 | |
| 1711 | lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: JumpTable, GlobalLayout); |
| 1712 | |
| 1713 | // Build aliases pointing to offsets into the jump table, and replace |
| 1714 | // references to the original functions with references to the aliases. |
| 1715 | for (unsigned I = 0; I != Functions.size(); ++I) { |
| 1716 | Function *F = cast<Function>(Val: Functions[I]->getGlobal()); |
| 1717 | bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); |
| 1718 | |
| 1719 | Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr( |
| 1720 | Ty: JumpTableType, C: JumpTable, |
| 1721 | IdxList: ArrayRef<Constant *>{ConstantInt::get(Ty: IntPtrTy, V: 0), |
| 1722 | ConstantInt::get(Ty: IntPtrTy, V: I)}); |
| 1723 | |
| 1724 | const bool IsExported = Functions[I]->isExported(); |
| 1725 | if (!IsJumpTableCanonical) { |
| 1726 | GlobalValue::LinkageTypes LT = IsExported ? GlobalValue::ExternalLinkage |
| 1727 | : GlobalValue::InternalLinkage; |
| 1728 | GlobalAlias *JtAlias = GlobalAlias::create(Ty: JumpTableEntryType, AddressSpace: 0, Linkage: LT, |
| 1729 | Name: F->getName() + ".cfi_jt" , |
| 1730 | Aliasee: CombinedGlobalElemPtr, Parent: &M); |
| 1731 | if (IsExported) |
| 1732 | JtAlias->setVisibility(GlobalValue::HiddenVisibility); |
| 1733 | else |
| 1734 | appendToUsed(M, Values: {JtAlias}); |
| 1735 | } |
| 1736 | |
| 1737 | if (IsExported) { |
| 1738 | if (IsJumpTableCanonical) |
| 1739 | ExportSummary->cfiFunctionDefs().emplace(A: F->getName()); |
| 1740 | else |
| 1741 | ExportSummary->cfiFunctionDecls().emplace(A: F->getName()); |
| 1742 | } |
| 1743 | |
| 1744 | if (!IsJumpTableCanonical) { |
| 1745 | if (F->hasExternalWeakLinkage()) |
| 1746 | replaceWeakDeclarationWithJumpTablePtr(F, JT: CombinedGlobalElemPtr, |
| 1747 | IsJumpTableCanonical); |
| 1748 | else |
| 1749 | replaceCfiUses(Old: F, New: CombinedGlobalElemPtr, IsJumpTableCanonical); |
| 1750 | } else { |
| 1751 | assert(F->getType()->getAddressSpace() == 0); |
| 1752 | |
| 1753 | GlobalAlias *FAlias = |
| 1754 | GlobalAlias::create(Ty: JumpTableEntryType, AddressSpace: 0, Linkage: F->getLinkage(), Name: "" , |
| 1755 | Aliasee: CombinedGlobalElemPtr, Parent: &M); |
| 1756 | FAlias->setVisibility(F->getVisibility()); |
| 1757 | FAlias->takeName(V: F); |
| 1758 | if (FAlias->hasName()) { |
| 1759 | F->setName(FAlias->getName() + ".cfi" ); |
| 1760 | maybeReplaceComdat(F, OriginalName: FAlias->getName()); |
| 1761 | } |
| 1762 | replaceCfiUses(Old: F, New: FAlias, IsJumpTableCanonical); |
| 1763 | if (!F->hasLocalLinkage()) |
| 1764 | F->setVisibility(GlobalVariable::HiddenVisibility); |
| 1765 | } |
| 1766 | } |
| 1767 | |
| 1768 | createJumpTable(F: JumpTableFn, Functions, JumpTableArch); |
| 1769 | } |
| 1770 | |
| 1771 | /// Assign a dummy layout using an incrementing counter, tag each function |
| 1772 | /// with its index represented as metadata, and lower each type test to an |
| 1773 | /// integer range comparison. During generation of the indirect function call |
| 1774 | /// table in the backend, it will assign the given indexes. |
| 1775 | /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet |
| 1776 | /// been finalized. |
| 1777 | void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( |
| 1778 | ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { |
| 1779 | assert(!Functions.empty()); |
| 1780 | |
| 1781 | // Build consecutive monotonic integer ranges for each call target set |
| 1782 | DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; |
| 1783 | |
| 1784 | for (GlobalTypeMember *GTM : Functions) { |
| 1785 | Function *F = cast<Function>(Val: GTM->getGlobal()); |
| 1786 | |
| 1787 | // Skip functions that are not address taken, to avoid bloating the table |
| 1788 | if (!F->hasAddressTaken()) |
| 1789 | continue; |
| 1790 | |
| 1791 | // Store metadata with the index for each function |
| 1792 | MDNode *MD = MDNode::get(Context&: F->getContext(), |
| 1793 | MDs: ArrayRef<Metadata *>(ConstantAsMetadata::get( |
| 1794 | C: ConstantInt::get(Ty: Int64Ty, V: IndirectIndex)))); |
| 1795 | F->setMetadata(Kind: "wasm.index" , Node: MD); |
| 1796 | |
| 1797 | // Assign the counter value |
| 1798 | GlobalLayout[GTM] = IndirectIndex++; |
| 1799 | } |
| 1800 | |
| 1801 | // The indirect function table index space starts at zero, so pass a NULL |
| 1802 | // pointer as the subtracted "jump table" offset. |
| 1803 | lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: ConstantPointerNull::get(T: PtrTy), |
| 1804 | GlobalLayout); |
| 1805 | } |
| 1806 | |
| 1807 | void LowerTypeTestsModule::buildBitSetsFromDisjointSet( |
| 1808 | ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals, |
| 1809 | ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) { |
| 1810 | DenseMap<Metadata *, uint64_t> TypeIdIndices; |
| 1811 | for (unsigned I = 0; I != TypeIds.size(); ++I) |
| 1812 | TypeIdIndices[TypeIds[I]] = I; |
| 1813 | |
| 1814 | // For each type identifier, build a set of indices that refer to members of |
| 1815 | // the type identifier. |
| 1816 | std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size()); |
| 1817 | unsigned GlobalIndex = 0; |
| 1818 | DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices; |
| 1819 | for (GlobalTypeMember *GTM : Globals) { |
| 1820 | for (MDNode *Type : GTM->types()) { |
| 1821 | // Type = { offset, type identifier } |
| 1822 | auto I = TypeIdIndices.find(Val: Type->getOperand(I: 1)); |
| 1823 | if (I != TypeIdIndices.end()) |
| 1824 | TypeMembers[I->second].insert(x: GlobalIndex); |
| 1825 | } |
| 1826 | GlobalIndices[GTM] = GlobalIndex; |
| 1827 | GlobalIndex++; |
| 1828 | } |
| 1829 | |
| 1830 | for (ICallBranchFunnel *JT : ICallBranchFunnels) { |
| 1831 | TypeMembers.emplace_back(); |
| 1832 | std::set<uint64_t> &TMSet = TypeMembers.back(); |
| 1833 | for (GlobalTypeMember *T : JT->targets()) |
| 1834 | TMSet.insert(x: GlobalIndices[T]); |
| 1835 | } |
| 1836 | |
| 1837 | // Order the sets of indices by size. The GlobalLayoutBuilder works best |
| 1838 | // when given small index sets first. |
| 1839 | llvm::stable_sort(Range&: TypeMembers, C: [](const std::set<uint64_t> &O1, |
| 1840 | const std::set<uint64_t> &O2) { |
| 1841 | return O1.size() < O2.size(); |
| 1842 | }); |
| 1843 | |
| 1844 | // Create a GlobalLayoutBuilder and provide it with index sets as layout |
| 1845 | // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as |
| 1846 | // close together as possible. |
| 1847 | GlobalLayoutBuilder GLB(Globals.size()); |
| 1848 | for (auto &&MemSet : TypeMembers) |
| 1849 | GLB.addFragment(F: MemSet); |
| 1850 | |
| 1851 | // Build a vector of globals with the computed layout. |
| 1852 | bool IsGlobalSet = |
| 1853 | Globals.empty() || isa<GlobalVariable>(Val: Globals[0]->getGlobal()); |
| 1854 | std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size()); |
| 1855 | auto OGTMI = OrderedGTMs.begin(); |
| 1856 | for (auto &&F : GLB.Fragments) { |
| 1857 | for (auto &&Offset : F) { |
| 1858 | if (IsGlobalSet != isa<GlobalVariable>(Val: Globals[Offset]->getGlobal())) |
| 1859 | report_fatal_error(reason: "Type identifier may not contain both global " |
| 1860 | "variables and functions" ); |
| 1861 | *OGTMI++ = Globals[Offset]; |
| 1862 | } |
| 1863 | } |
| 1864 | |
| 1865 | // Build the bitsets from this disjoint set. |
| 1866 | if (IsGlobalSet) |
| 1867 | buildBitSetsFromGlobalVariables(TypeIds, Globals: OrderedGTMs); |
| 1868 | else |
| 1869 | buildBitSetsFromFunctions(TypeIds, Functions: OrderedGTMs); |
| 1870 | } |
| 1871 | |
| 1872 | /// Lower all type tests in this module. |
| 1873 | LowerTypeTestsModule::LowerTypeTestsModule( |
| 1874 | Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, |
| 1875 | const ModuleSummaryIndex *ImportSummary, DropTestKind DropTypeTests) |
| 1876 | : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), |
| 1877 | DropTypeTests(ClDropTypeTests > DropTypeTests ? ClDropTypeTests |
| 1878 | : DropTypeTests) { |
| 1879 | assert(!(ExportSummary && ImportSummary)); |
| 1880 | Triple TargetTriple(M.getTargetTriple()); |
| 1881 | Arch = TargetTriple.getArch(); |
| 1882 | if (Arch == Triple::arm) |
| 1883 | CanUseArmJumpTable = true; |
| 1884 | if (Arch == Triple::arm || Arch == Triple::thumb) { |
| 1885 | auto &FAM = |
| 1886 | AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
| 1887 | for (Function &F : M) { |
| 1888 | // Skip declarations since we should not query the TTI for them. |
| 1889 | if (F.isDeclaration()) |
| 1890 | continue; |
| 1891 | auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F); |
| 1892 | if (TTI.hasArmWideBranch(Thumb: false)) |
| 1893 | CanUseArmJumpTable = true; |
| 1894 | if (TTI.hasArmWideBranch(Thumb: true)) |
| 1895 | CanUseThumbBWJumpTable = true; |
| 1896 | } |
| 1897 | } |
| 1898 | OS = TargetTriple.getOS(); |
| 1899 | ObjectFormat = TargetTriple.getObjectFormat(); |
| 1900 | |
| 1901 | // Function annotation describes or applies to function itself, and |
| 1902 | // shouldn't be associated with jump table thunk generated for CFI. |
| 1903 | GlobalAnnotation = M.getGlobalVariable(Name: "llvm.global.annotations" ); |
| 1904 | if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) { |
| 1905 | const ConstantArray *CA = |
| 1906 | cast<ConstantArray>(Val: GlobalAnnotation->getInitializer()); |
| 1907 | FunctionAnnotations.insert_range(R: CA->operands()); |
| 1908 | } |
| 1909 | } |
| 1910 | |
| 1911 | bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { |
| 1912 | ModuleSummaryIndex Summary(/*HaveGVs=*/false); |
| 1913 | |
| 1914 | // Handle the command-line summary arguments. This code is for testing |
| 1915 | // purposes only, so we handle errors directly. |
| 1916 | if (!ClReadSummary.empty()) { |
| 1917 | ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + |
| 1918 | ": " ); |
| 1919 | auto ReadSummaryFile = ExitOnErr(errorOrToExpected( |
| 1920 | EO: MemoryBuffer::getFile(Filename: ClReadSummary, /*IsText=*/true))); |
| 1921 | |
| 1922 | yaml::Input In(ReadSummaryFile->getBuffer()); |
| 1923 | In >> Summary; |
| 1924 | ExitOnErr(errorCodeToError(EC: In.error())); |
| 1925 | } |
| 1926 | |
| 1927 | bool Changed = |
| 1928 | LowerTypeTestsModule( |
| 1929 | M, AM, |
| 1930 | ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, |
| 1931 | ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, |
| 1932 | /*DropTypeTests=*/DropTestKind::None) |
| 1933 | .lower(); |
| 1934 | |
| 1935 | if (!ClWriteSummary.empty()) { |
| 1936 | ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + |
| 1937 | ": " ); |
| 1938 | std::error_code EC; |
| 1939 | raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF); |
| 1940 | ExitOnErr(errorCodeToError(EC)); |
| 1941 | |
| 1942 | yaml::Output Out(OS); |
| 1943 | Out << Summary; |
| 1944 | } |
| 1945 | |
| 1946 | return Changed; |
| 1947 | } |
| 1948 | |
| 1949 | static bool isDirectCall(Use& U) { |
| 1950 | auto *Usr = dyn_cast<CallInst>(Val: U.getUser()); |
| 1951 | if (Usr) { |
| 1952 | auto *CB = dyn_cast<CallBase>(Val: Usr); |
| 1953 | if (CB && CB->isCallee(U: &U)) |
| 1954 | return true; |
| 1955 | } |
| 1956 | return false; |
| 1957 | } |
| 1958 | |
| 1959 | void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, |
| 1960 | bool IsJumpTableCanonical) { |
| 1961 | SmallSetVector<Constant *, 4> Constants; |
| 1962 | for (Use &U : llvm::make_early_inc_range(Range: Old->uses())) { |
| 1963 | // Skip no_cfi values, which refer to the function body instead of the jump |
| 1964 | // table. |
| 1965 | if (isa<NoCFIValue>(Val: U.getUser())) |
| 1966 | continue; |
| 1967 | |
| 1968 | // Skip direct calls to externally defined or non-dso_local functions. |
| 1969 | if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) |
| 1970 | continue; |
| 1971 | |
| 1972 | // Skip function annotation. |
| 1973 | if (isFunctionAnnotation(V: U.getUser())) |
| 1974 | continue; |
| 1975 | |
| 1976 | // Must handle Constants specially, we cannot call replaceUsesOfWith on a |
| 1977 | // constant because they are uniqued. |
| 1978 | if (auto *C = dyn_cast<Constant>(Val: U.getUser())) { |
| 1979 | if (!isa<GlobalValue>(Val: C)) { |
| 1980 | // Save unique users to avoid processing operand replacement |
| 1981 | // more than once. |
| 1982 | Constants.insert(X: C); |
| 1983 | continue; |
| 1984 | } |
| 1985 | } |
| 1986 | |
| 1987 | U.set(New); |
| 1988 | } |
| 1989 | |
| 1990 | // Process operand replacement of saved constants. |
| 1991 | for (auto *C : Constants) |
| 1992 | C->handleOperandChange(Old, New); |
| 1993 | } |
| 1994 | |
| 1995 | void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { |
| 1996 | Old->replaceUsesWithIf(New, ShouldReplace: isDirectCall); |
| 1997 | } |
| 1998 | |
| 1999 | static void dropTypeTests(Module &M, Function &TypeTestFunc, |
| 2000 | bool ShouldDropAll) { |
| 2001 | for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc.uses())) { |
| 2002 | auto *CI = cast<CallInst>(Val: U.getUser()); |
| 2003 | // Find and erase llvm.assume intrinsics for this llvm.type.test call. |
| 2004 | for (Use &CIU : llvm::make_early_inc_range(Range: CI->uses())) |
| 2005 | if (auto *Assume = dyn_cast<AssumeInst>(Val: CIU.getUser())) |
| 2006 | Assume->eraseFromParent(); |
| 2007 | // If the assume was merged with another assume, we might have a use on a |
| 2008 | // phi (which will feed the assume). Simply replace the use on the phi |
| 2009 | // with "true" and leave the merged assume. |
| 2010 | // |
| 2011 | // If ShouldDropAll is set, then we we need to update any remaining uses, |
| 2012 | // regardless of the instruction type. |
| 2013 | if (!CI->use_empty()) { |
| 2014 | assert(ShouldDropAll || all_of(CI->users(), [](User *U) -> bool { |
| 2015 | return isa<PHINode>(U); |
| 2016 | })); |
| 2017 | CI->replaceAllUsesWith(V: ConstantInt::getTrue(Context&: M.getContext())); |
| 2018 | } |
| 2019 | CI->eraseFromParent(); |
| 2020 | } |
| 2021 | } |
| 2022 | |
| 2023 | bool LowerTypeTestsModule::lower() { |
| 2024 | Function *TypeTestFunc = |
| 2025 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::type_test); |
| 2026 | |
| 2027 | if (DropTypeTests != DropTestKind::None) { |
| 2028 | bool ShouldDropAll = DropTypeTests == DropTestKind::All; |
| 2029 | if (TypeTestFunc) |
| 2030 | dropTypeTests(M, TypeTestFunc&: *TypeTestFunc, ShouldDropAll); |
| 2031 | // Normally we'd have already removed all @llvm.public.type.test calls, |
| 2032 | // except for in the case where we originally were performing ThinLTO but |
| 2033 | // decided not to in the backend. |
| 2034 | Function *PublicTypeTestFunc = |
| 2035 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::public_type_test); |
| 2036 | if (PublicTypeTestFunc) |
| 2037 | dropTypeTests(M, TypeTestFunc&: *PublicTypeTestFunc, ShouldDropAll); |
| 2038 | if (TypeTestFunc || PublicTypeTestFunc) { |
| 2039 | // We have deleted the type intrinsics, so we no longer have enough |
| 2040 | // information to reason about the liveness of virtual function pointers |
| 2041 | // in GlobalDCE. |
| 2042 | for (GlobalVariable &GV : M.globals()) |
| 2043 | GV.eraseMetadata(KindID: LLVMContext::MD_vcall_visibility); |
| 2044 | return true; |
| 2045 | } |
| 2046 | return false; |
| 2047 | } |
| 2048 | |
| 2049 | // If only some of the modules were split, we cannot correctly perform |
| 2050 | // this transformation. We already checked for the presense of type tests |
| 2051 | // with partially split modules during the thin link, and would have emitted |
| 2052 | // an error if any were found, so here we can simply return. |
| 2053 | if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) || |
| 2054 | (ImportSummary && ImportSummary->partiallySplitLTOUnits())) |
| 2055 | return false; |
| 2056 | |
| 2057 | Function *ICallBranchFunnelFunc = |
| 2058 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::icall_branch_funnel); |
| 2059 | if ((!TypeTestFunc || TypeTestFunc->use_empty()) && |
| 2060 | (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) && |
| 2061 | !ExportSummary && !ImportSummary) |
| 2062 | return false; |
| 2063 | |
| 2064 | if (ImportSummary) { |
| 2065 | if (TypeTestFunc) |
| 2066 | for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc->uses())) |
| 2067 | importTypeTest(CI: cast<CallInst>(Val: U.getUser())); |
| 2068 | |
| 2069 | if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) |
| 2070 | report_fatal_error( |
| 2071 | reason: "unexpected call to llvm.icall.branch.funnel during import phase" ); |
| 2072 | |
| 2073 | SmallVector<Function *, 8> Defs; |
| 2074 | SmallVector<Function *, 8> Decls; |
| 2075 | for (auto &F : M) { |
| 2076 | // CFI functions are either external, or promoted. A local function may |
| 2077 | // have the same name, but it's not the one we are looking for. |
| 2078 | if (F.hasLocalLinkage()) |
| 2079 | continue; |
| 2080 | if (ImportSummary->cfiFunctionDefs().count(S: F.getName())) |
| 2081 | Defs.push_back(Elt: &F); |
| 2082 | else if (ImportSummary->cfiFunctionDecls().count(S: F.getName())) |
| 2083 | Decls.push_back(Elt: &F); |
| 2084 | } |
| 2085 | |
| 2086 | { |
| 2087 | ScopedSaveAliaseesAndUsed S(M); |
| 2088 | for (auto *F : Defs) |
| 2089 | importFunction(F, /*isJumpTableCanonical*/ true); |
| 2090 | for (auto *F : Decls) |
| 2091 | importFunction(F, /*isJumpTableCanonical*/ false); |
| 2092 | } |
| 2093 | |
| 2094 | return true; |
| 2095 | } |
| 2096 | |
| 2097 | // Equivalence class set containing type identifiers and the globals that |
| 2098 | // reference them. This is used to partition the set of type identifiers in |
| 2099 | // the module into disjoint sets. |
| 2100 | using GlobalClassesTy = EquivalenceClasses< |
| 2101 | PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>; |
| 2102 | GlobalClassesTy GlobalClasses; |
| 2103 | |
| 2104 | // Verify the type metadata and build a few data structures to let us |
| 2105 | // efficiently enumerate the type identifiers associated with a global: |
| 2106 | // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector |
| 2107 | // of associated type metadata) and a mapping from type identifiers to their |
| 2108 | // list of GlobalTypeMembers and last observed index in the list of globals. |
| 2109 | // The indices will be used later to deterministically order the list of type |
| 2110 | // identifiers. |
| 2111 | BumpPtrAllocator Alloc; |
| 2112 | struct TIInfo { |
| 2113 | unsigned UniqueId; |
| 2114 | std::vector<GlobalTypeMember *> RefGlobals; |
| 2115 | }; |
| 2116 | DenseMap<Metadata *, TIInfo> TypeIdInfo; |
| 2117 | unsigned CurUniqueId = 0; |
| 2118 | SmallVector<MDNode *, 2> Types; |
| 2119 | |
| 2120 | // Cross-DSO CFI emits jumptable entries for exported functions as well as |
| 2121 | // address taken functions in case they are address taken in other modules. |
| 2122 | const bool CrossDsoCfi = M.getModuleFlag(Key: "Cross-DSO CFI" ) != nullptr; |
| 2123 | |
| 2124 | struct ExportedFunctionInfo { |
| 2125 | CfiFunctionLinkage Linkage; |
| 2126 | MDNode *FuncMD; // {name, linkage, type[, type...]} |
| 2127 | }; |
| 2128 | MapVector<StringRef, ExportedFunctionInfo> ExportedFunctions; |
| 2129 | if (ExportSummary) { |
| 2130 | NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions" ); |
| 2131 | if (CfiFunctionsMD) { |
| 2132 | // A set of all functions that are address taken by a live global object. |
| 2133 | DenseSet<GlobalValue::GUID> AddressTaken; |
| 2134 | for (auto &I : *ExportSummary) |
| 2135 | for (auto &GVS : I.second.getSummaryList()) |
| 2136 | if (GVS->isLive()) |
| 2137 | for (const auto &Ref : GVS->refs()) { |
| 2138 | AddressTaken.insert(V: Ref.getGUID()); |
| 2139 | for (auto &RefGVS : Ref.getSummaryList()) |
| 2140 | if (auto Alias = dyn_cast<AliasSummary>(Val: RefGVS.get())) |
| 2141 | AddressTaken.insert(V: Alias->getAliaseeGUID()); |
| 2142 | } |
| 2143 | auto IsAddressTaken = [&](GlobalValue::GUID GUID) { |
| 2144 | if (AddressTaken.count(V: GUID)) |
| 2145 | return true; |
| 2146 | auto VI = ExportSummary->getValueInfo(GUID); |
| 2147 | if (!VI) |
| 2148 | return false; |
| 2149 | for (auto &I : VI.getSummaryList()) |
| 2150 | if (auto Alias = dyn_cast<AliasSummary>(Val: I.get())) |
| 2151 | if (AddressTaken.count(V: Alias->getAliaseeGUID())) |
| 2152 | return true; |
| 2153 | return false; |
| 2154 | }; |
| 2155 | for (auto *FuncMD : CfiFunctionsMD->operands()) { |
| 2156 | assert(FuncMD->getNumOperands() >= 2); |
| 2157 | StringRef FunctionName = |
| 2158 | cast<MDString>(Val: FuncMD->getOperand(I: 0))->getString(); |
| 2159 | CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>( |
| 2160 | cast<ConstantAsMetadata>(Val: FuncMD->getOperand(I: 1)) |
| 2161 | ->getValue() |
| 2162 | ->getUniqueInteger() |
| 2163 | .getZExtValue()); |
| 2164 | const GlobalValue::GUID GUID = |
| 2165 | GlobalValue::getGUIDAssumingExternalLinkage( |
| 2166 | GlobalName: GlobalValue::dropLLVMManglingEscape(Name: FunctionName)); |
| 2167 | // Do not emit jumptable entries for functions that are not-live and |
| 2168 | // have no live references (and are not exported with cross-DSO CFI.) |
| 2169 | if (!ExportSummary->isGUIDLive(GUID)) |
| 2170 | continue; |
| 2171 | if (!IsAddressTaken(GUID)) { |
| 2172 | if (!CrossDsoCfi || Linkage != CFL_Definition) |
| 2173 | continue; |
| 2174 | |
| 2175 | bool Exported = false; |
| 2176 | if (auto VI = ExportSummary->getValueInfo(GUID)) |
| 2177 | for (const auto &GVS : VI.getSummaryList()) |
| 2178 | if (GVS->isLive() && !GlobalValue::isLocalLinkage(Linkage: GVS->linkage())) |
| 2179 | Exported = true; |
| 2180 | |
| 2181 | if (!Exported) |
| 2182 | continue; |
| 2183 | } |
| 2184 | auto P = ExportedFunctions.insert(KV: {FunctionName, {.Linkage: Linkage, .FuncMD: FuncMD}}); |
| 2185 | if (!P.second && P.first->second.Linkage != CFL_Definition) |
| 2186 | P.first->second = {.Linkage: Linkage, .FuncMD: FuncMD}; |
| 2187 | } |
| 2188 | |
| 2189 | for (const auto &P : ExportedFunctions) { |
| 2190 | StringRef FunctionName = P.first; |
| 2191 | CfiFunctionLinkage Linkage = P.second.Linkage; |
| 2192 | MDNode *FuncMD = P.second.FuncMD; |
| 2193 | Function *F = M.getFunction(Name: FunctionName); |
| 2194 | if (F && F->hasLocalLinkage()) { |
| 2195 | // Locally defined function that happens to have the same name as a |
| 2196 | // function defined in a ThinLTO module. Rename it to move it out of |
| 2197 | // the way of the external reference that we're about to create. |
| 2198 | // Note that setName will find a unique name for the function, so even |
| 2199 | // if there is an existing function with the suffix there won't be a |
| 2200 | // name collision. |
| 2201 | F->setName(F->getName() + ".1" ); |
| 2202 | F = nullptr; |
| 2203 | } |
| 2204 | |
| 2205 | if (!F) |
| 2206 | F = Function::Create( |
| 2207 | Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false), |
| 2208 | Linkage: GlobalVariable::ExternalLinkage, |
| 2209 | AddrSpace: M.getDataLayout().getProgramAddressSpace(), N: FunctionName, M: &M); |
| 2210 | |
| 2211 | // If the function is available_externally, remove its definition so |
| 2212 | // that it is handled the same way as a declaration. Later we will try |
| 2213 | // to create an alias using this function's linkage, which will fail if |
| 2214 | // the linkage is available_externally. This will also result in us |
| 2215 | // following the code path below to replace the type metadata. |
| 2216 | if (F->hasAvailableExternallyLinkage()) { |
| 2217 | F->setLinkage(GlobalValue::ExternalLinkage); |
| 2218 | F->deleteBody(); |
| 2219 | F->setComdat(nullptr); |
| 2220 | F->clearMetadata(); |
| 2221 | } |
| 2222 | |
| 2223 | // Update the linkage for extern_weak declarations when a definition |
| 2224 | // exists. |
| 2225 | if (Linkage == CFL_Definition && F->hasExternalWeakLinkage()) |
| 2226 | F->setLinkage(GlobalValue::ExternalLinkage); |
| 2227 | |
| 2228 | // If the function in the full LTO module is a declaration, replace its |
| 2229 | // type metadata with the type metadata we found in cfi.functions. That |
| 2230 | // metadata is presumed to be more accurate than the metadata attached |
| 2231 | // to the declaration. |
| 2232 | if (F->isDeclaration()) { |
| 2233 | if (Linkage == CFL_WeakDeclaration) |
| 2234 | F->setLinkage(GlobalValue::ExternalWeakLinkage); |
| 2235 | |
| 2236 | F->eraseMetadata(KindID: LLVMContext::MD_type); |
| 2237 | for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) |
| 2238 | F->addMetadata(KindID: LLVMContext::MD_type, |
| 2239 | MD&: *cast<MDNode>(Val: FuncMD->getOperand(I).get())); |
| 2240 | } |
| 2241 | } |
| 2242 | } |
| 2243 | } |
| 2244 | |
| 2245 | struct AliasToCreate { |
| 2246 | Function *Alias; |
| 2247 | std::string TargetName; |
| 2248 | }; |
| 2249 | std::vector<AliasToCreate> AliasesToCreate; |
| 2250 | |
| 2251 | // Parse alias data to replace stand-in function declarations for aliases |
| 2252 | // with an alias to the intended target. |
| 2253 | if (ExportSummary) { |
| 2254 | if (NamedMDNode *AliasesMD = M.getNamedMetadata(Name: "aliases" )) { |
| 2255 | for (auto *AliasMD : AliasesMD->operands()) { |
| 2256 | SmallVector<Function *> Aliases; |
| 2257 | for (Metadata *MD : AliasMD->operands()) { |
| 2258 | auto *MDS = dyn_cast<MDString>(Val: MD); |
| 2259 | if (!MDS) |
| 2260 | continue; |
| 2261 | StringRef AliasName = MDS->getString(); |
| 2262 | if (!ExportedFunctions.count(Key: AliasName)) |
| 2263 | continue; |
| 2264 | auto *AliasF = M.getFunction(Name: AliasName); |
| 2265 | if (AliasF) |
| 2266 | Aliases.push_back(Elt: AliasF); |
| 2267 | } |
| 2268 | |
| 2269 | if (Aliases.empty()) |
| 2270 | continue; |
| 2271 | |
| 2272 | for (unsigned I = 1; I != Aliases.size(); ++I) { |
| 2273 | auto *AliasF = Aliases[I]; |
| 2274 | ExportedFunctions.erase(Key: AliasF->getName()); |
| 2275 | AliasesToCreate.push_back( |
| 2276 | x: {.Alias: AliasF, .TargetName: std::string(Aliases[0]->getName())}); |
| 2277 | } |
| 2278 | } |
| 2279 | } |
| 2280 | } |
| 2281 | |
| 2282 | DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers; |
| 2283 | for (GlobalObject &GO : M.global_objects()) { |
| 2284 | if (isa<GlobalVariable>(Val: GO) && GO.isDeclarationForLinker()) |
| 2285 | continue; |
| 2286 | |
| 2287 | Types.clear(); |
| 2288 | GO.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
| 2289 | |
| 2290 | bool IsJumpTableCanonical = false; |
| 2291 | bool IsExported = false; |
| 2292 | if (Function *F = dyn_cast<Function>(Val: &GO)) { |
| 2293 | IsJumpTableCanonical = isJumpTableCanonical(F); |
| 2294 | if (auto It = ExportedFunctions.find(Key: F->getName()); |
| 2295 | It != ExportedFunctions.end()) { |
| 2296 | IsJumpTableCanonical |= It->second.Linkage == CFL_Definition; |
| 2297 | IsExported = true; |
| 2298 | // TODO: The logic here checks only that the function is address taken, |
| 2299 | // not that the address takers are live. This can be updated to check |
| 2300 | // their liveness and emit fewer jumptable entries once monolithic LTO |
| 2301 | // builds also emit summaries. |
| 2302 | } else if (!F->hasAddressTaken()) { |
| 2303 | if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) |
| 2304 | continue; |
| 2305 | } |
| 2306 | } |
| 2307 | |
| 2308 | auto *GTM = GlobalTypeMember::create(Alloc, GO: &GO, IsJumpTableCanonical, |
| 2309 | IsExported, Types); |
| 2310 | GlobalTypeMembers[&GO] = GTM; |
| 2311 | for (MDNode *Type : Types) { |
| 2312 | verifyTypeMDNode(GO: &GO, Type); |
| 2313 | auto &Info = TypeIdInfo[Type->getOperand(I: 1)]; |
| 2314 | Info.UniqueId = ++CurUniqueId; |
| 2315 | Info.RefGlobals.push_back(x: GTM); |
| 2316 | } |
| 2317 | } |
| 2318 | |
| 2319 | auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { |
| 2320 | // Add the call site to the list of call sites for this type identifier. We |
| 2321 | // also use TypeIdUsers to keep track of whether we have seen this type |
| 2322 | // identifier before. If we have, we don't need to re-add the referenced |
| 2323 | // globals to the equivalence class. |
| 2324 | auto Ins = TypeIdUsers.insert(KV: {TypeId, {}}); |
| 2325 | if (Ins.second) { |
| 2326 | // Add the type identifier to the equivalence class. |
| 2327 | auto &GCI = GlobalClasses.insert(Data: TypeId); |
| 2328 | GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(ECV: GCI); |
| 2329 | |
| 2330 | // Add the referenced globals to the type identifier's equivalence class. |
| 2331 | for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) |
| 2332 | CurSet = GlobalClasses.unionSets( |
| 2333 | L1: CurSet, L2: GlobalClasses.findLeader(ECV: GlobalClasses.insert(Data: GTM))); |
| 2334 | } |
| 2335 | |
| 2336 | return Ins.first->second; |
| 2337 | }; |
| 2338 | |
| 2339 | if (TypeTestFunc) { |
| 2340 | for (const Use &U : TypeTestFunc->uses()) { |
| 2341 | auto CI = cast<CallInst>(Val: U.getUser()); |
| 2342 | // If this type test is only used by llvm.assume instructions, it |
| 2343 | // was used for whole program devirtualization, and is being kept |
| 2344 | // for use by other optimization passes. We do not need or want to |
| 2345 | // lower it here. We also don't want to rewrite any associated globals |
| 2346 | // unnecessarily. These will be removed by a subsequent LTT invocation |
| 2347 | // with the DropTypeTests flag set. |
| 2348 | bool OnlyAssumeUses = !CI->use_empty(); |
| 2349 | for (const Use &CIU : CI->uses()) { |
| 2350 | if (isa<AssumeInst>(Val: CIU.getUser())) |
| 2351 | continue; |
| 2352 | OnlyAssumeUses = false; |
| 2353 | break; |
| 2354 | } |
| 2355 | if (OnlyAssumeUses) |
| 2356 | continue; |
| 2357 | |
| 2358 | auto TypeIdMDVal = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
| 2359 | if (!TypeIdMDVal) |
| 2360 | report_fatal_error(reason: "Second argument of llvm.type.test must be metadata" ); |
| 2361 | auto TypeId = TypeIdMDVal->getMetadata(); |
| 2362 | AddTypeIdUse(TypeId).CallSites.push_back(x: CI); |
| 2363 | } |
| 2364 | } |
| 2365 | |
| 2366 | if (ICallBranchFunnelFunc) { |
| 2367 | for (const Use &U : ICallBranchFunnelFunc->uses()) { |
| 2368 | if (Arch != Triple::x86_64) |
| 2369 | report_fatal_error( |
| 2370 | reason: "llvm.icall.branch.funnel not supported on this target" ); |
| 2371 | |
| 2372 | auto CI = cast<CallInst>(Val: U.getUser()); |
| 2373 | |
| 2374 | std::vector<GlobalTypeMember *> Targets; |
| 2375 | if (CI->arg_size() % 2 != 1) |
| 2376 | report_fatal_error(reason: "number of arguments should be odd" ); |
| 2377 | |
| 2378 | GlobalClassesTy::member_iterator CurSet; |
| 2379 | for (unsigned I = 1; I != CI->arg_size(); I += 2) { |
| 2380 | int64_t Offset; |
| 2381 | auto *Base = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset( |
| 2382 | Ptr: CI->getOperand(i_nocapture: I), Offset, DL: M.getDataLayout())); |
| 2383 | if (!Base) |
| 2384 | report_fatal_error( |
| 2385 | reason: "Expected branch funnel operand to be global value" ); |
| 2386 | |
| 2387 | GlobalTypeMember *GTM = GlobalTypeMembers[Base]; |
| 2388 | Targets.push_back(x: GTM); |
| 2389 | GlobalClassesTy::member_iterator NewSet = |
| 2390 | GlobalClasses.findLeader(ECV: GlobalClasses.insert(Data: GTM)); |
| 2391 | if (I == 1) |
| 2392 | CurSet = NewSet; |
| 2393 | else |
| 2394 | CurSet = GlobalClasses.unionSets(L1: CurSet, L2: NewSet); |
| 2395 | } |
| 2396 | |
| 2397 | GlobalClasses.unionSets( |
| 2398 | L1: CurSet, L2: GlobalClasses.findLeader( |
| 2399 | ECV: GlobalClasses.insert(Data: ICallBranchFunnel::create( |
| 2400 | Alloc, CI, Targets, UniqueId: ++CurUniqueId)))); |
| 2401 | } |
| 2402 | } |
| 2403 | |
| 2404 | if (ExportSummary) { |
| 2405 | DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID; |
| 2406 | for (auto &P : TypeIdInfo) { |
| 2407 | if (auto *TypeId = dyn_cast<MDString>(Val: P.first)) |
| 2408 | MetadataByGUID[GlobalValue::getGUIDAssumingExternalLinkage( |
| 2409 | GlobalName: TypeId->getString())] |
| 2410 | .push_back(NewVal: TypeId); |
| 2411 | } |
| 2412 | |
| 2413 | for (auto &P : *ExportSummary) { |
| 2414 | for (auto &S : P.second.getSummaryList()) { |
| 2415 | if (!ExportSummary->isGlobalValueLive(GVS: S.get())) |
| 2416 | continue; |
| 2417 | if (auto *FS = dyn_cast<FunctionSummary>(Val: S->getBaseObject())) |
| 2418 | for (GlobalValue::GUID G : FS->type_tests()) |
| 2419 | for (Metadata *MD : MetadataByGUID[G]) |
| 2420 | AddTypeIdUse(MD).IsExported = true; |
| 2421 | } |
| 2422 | } |
| 2423 | } |
| 2424 | |
| 2425 | if (GlobalClasses.empty()) |
| 2426 | return false; |
| 2427 | |
| 2428 | { |
| 2429 | ScopedSaveAliaseesAndUsed S(M); |
| 2430 | // For each disjoint set we found... |
| 2431 | for (const auto &C : GlobalClasses) { |
| 2432 | if (!C->isLeader()) |
| 2433 | continue; |
| 2434 | |
| 2435 | ++NumTypeIdDisjointSets; |
| 2436 | // Build the list of type identifiers in this disjoint set. |
| 2437 | std::vector<Metadata *> TypeIds; |
| 2438 | std::vector<GlobalTypeMember *> Globals; |
| 2439 | std::vector<ICallBranchFunnel *> ICallBranchFunnels; |
| 2440 | for (auto M : GlobalClasses.members(ECV: *C)) { |
| 2441 | if (isa<Metadata *>(Val: M)) |
| 2442 | TypeIds.push_back(x: cast<Metadata *>(Val&: M)); |
| 2443 | else if (isa<GlobalTypeMember *>(Val: M)) |
| 2444 | Globals.push_back(x: cast<GlobalTypeMember *>(Val&: M)); |
| 2445 | else |
| 2446 | ICallBranchFunnels.push_back(x: cast<ICallBranchFunnel *>(Val&: M)); |
| 2447 | } |
| 2448 | |
| 2449 | // Order type identifiers by unique ID for determinism. This ordering is |
| 2450 | // stable as there is a one-to-one mapping between metadata and unique |
| 2451 | // IDs. |
| 2452 | llvm::sort(C&: TypeIds, Comp: [&](Metadata *M1, Metadata *M2) { |
| 2453 | return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; |
| 2454 | }); |
| 2455 | |
| 2456 | // Same for the branch funnels. |
| 2457 | llvm::sort(C&: ICallBranchFunnels, |
| 2458 | Comp: [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) { |
| 2459 | return F1->UniqueId < F2->UniqueId; |
| 2460 | }); |
| 2461 | |
| 2462 | // Build bitsets for this disjoint set. |
| 2463 | buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels); |
| 2464 | } |
| 2465 | } |
| 2466 | |
| 2467 | allocateByteArrays(); |
| 2468 | |
| 2469 | for (auto A : AliasesToCreate) { |
| 2470 | auto *Target = M.getNamedValue(Name: A.TargetName); |
| 2471 | if (!isa<GlobalAlias>(Val: Target)) |
| 2472 | continue; |
| 2473 | auto *AliasGA = GlobalAlias::create(Name: "" , Aliasee: Target); |
| 2474 | AliasGA->setVisibility(A.Alias->getVisibility()); |
| 2475 | AliasGA->setLinkage(A.Alias->getLinkage()); |
| 2476 | AliasGA->takeName(V: A.Alias); |
| 2477 | A.Alias->replaceAllUsesWith(V: AliasGA); |
| 2478 | A.Alias->eraseFromParent(); |
| 2479 | } |
| 2480 | |
| 2481 | // Emit .symver directives for exported functions, if they exist. |
| 2482 | if (ExportSummary) { |
| 2483 | if (NamedMDNode *SymversMD = M.getNamedMetadata(Name: "symvers" )) { |
| 2484 | for (auto *Symver : SymversMD->operands()) { |
| 2485 | assert(Symver->getNumOperands() >= 2); |
| 2486 | StringRef SymbolName = |
| 2487 | cast<MDString>(Val: Symver->getOperand(I: 0))->getString(); |
| 2488 | StringRef Alias = cast<MDString>(Val: Symver->getOperand(I: 1))->getString(); |
| 2489 | |
| 2490 | if (!ExportedFunctions.count(Key: SymbolName)) |
| 2491 | continue; |
| 2492 | |
| 2493 | M.appendModuleInlineAsm( |
| 2494 | Asm: (llvm::Twine(".symver " ) + SymbolName + ", " + Alias).str()); |
| 2495 | } |
| 2496 | } |
| 2497 | } |
| 2498 | |
| 2499 | return true; |
| 2500 | } |
| 2501 | |
| 2502 | PreservedAnalyses LowerTypeTestsPass::run(Module &M, |
| 2503 | ModuleAnalysisManager &AM) { |
| 2504 | bool Changed; |
| 2505 | if (UseCommandLine) |
| 2506 | Changed = LowerTypeTestsModule::runForTesting(M, AM); |
| 2507 | else |
| 2508 | Changed = |
| 2509 | LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests) |
| 2510 | .lower(); |
| 2511 | if (!Changed) |
| 2512 | return PreservedAnalyses::all(); |
| 2513 | return PreservedAnalyses::none(); |
| 2514 | } |
| 2515 | |
| 2516 | PreservedAnalyses SimplifyTypeTestsPass::run(Module &M, |
| 2517 | ModuleAnalysisManager &AM) { |
| 2518 | bool Changed = false; |
| 2519 | // Figure out whether inlining has exposed a constant address to a lowered |
| 2520 | // type test, and remove the test if so and the address is known to pass the |
| 2521 | // test. Unfortunately this pass ends up needing to reverse engineer what |
| 2522 | // LowerTypeTests did; this is currently inherent to the design of ThinLTO |
| 2523 | // importing where LowerTypeTests needs to run at the start. |
| 2524 | // |
| 2525 | // We look for things like: |
| 2526 | // |
| 2527 | // sub (i64 ptrtoint (ptr @_Z2fpv to i64), i64 ptrtoint (ptr |
| 2528 | // @__typeid__ZTSFvvE_global_addr to i64)) |
| 2529 | // |
| 2530 | // which gets replaced with 0 if _Z2fpv (more specifically _Z2fpv.cfi, the |
| 2531 | // function referred to by the jump table) is a member of the type _ZTSFvv, as |
| 2532 | // well as things like |
| 2533 | // |
| 2534 | // icmp eq ptr @_Z2fpv, @__typeid__ZTSFvvE_global_addr |
| 2535 | // |
| 2536 | // which gets replaced with true if _Z2fpv is a member. |
| 2537 | for (auto &GV : M.globals()) { |
| 2538 | if (!GV.getName().starts_with(Prefix: "__typeid_" ) || |
| 2539 | !GV.getName().ends_with(Suffix: "_global_addr" )) |
| 2540 | continue; |
| 2541 | // __typeid_foo_global_addr -> foo |
| 2542 | auto *MD = MDString::get(Context&: M.getContext(), |
| 2543 | Str: GV.getName().substr(Start: 9, N: GV.getName().size() - 21)); |
| 2544 | auto MaySimplifyPtr = [&](Value *Ptr) { |
| 2545 | if (auto *GV = dyn_cast<GlobalValue>(Val: Ptr)) |
| 2546 | if (auto *CFIGV = M.getNamedValue(Name: (GV->getName() + ".cfi" ).str())) |
| 2547 | Ptr = CFIGV; |
| 2548 | return isKnownTypeIdMember(TypeId: MD, DL: M.getDataLayout(), V: Ptr, COffset: 0); |
| 2549 | }; |
| 2550 | auto MaySimplifyInt = [&](Value *Op) { |
| 2551 | auto *PtrAsInt = dyn_cast<ConstantExpr>(Val: Op); |
| 2552 | if (!PtrAsInt || PtrAsInt->getOpcode() != Instruction::PtrToInt) |
| 2553 | return false; |
| 2554 | return MaySimplifyPtr(PtrAsInt->getOperand(i_nocapture: 0)); |
| 2555 | }; |
| 2556 | for (User *U : make_early_inc_range(Range: GV.users())) { |
| 2557 | if (auto *CI = dyn_cast<ICmpInst>(Val: U)) { |
| 2558 | if (CI->getPredicate() == CmpInst::ICMP_EQ && |
| 2559 | MaySimplifyPtr(CI->getOperand(i_nocapture: 0))) { |
| 2560 | // This is an equality comparison (TypeTestResolution::Single case in |
| 2561 | // lowerTypeTestCall). In this case we just replace the comparison |
| 2562 | // with true. |
| 2563 | CI->replaceAllUsesWith(V: ConstantInt::getTrue(Context&: M.getContext())); |
| 2564 | CI->eraseFromParent(); |
| 2565 | Changed = true; |
| 2566 | continue; |
| 2567 | } |
| 2568 | } |
| 2569 | auto *CE = dyn_cast<ConstantExpr>(Val: U); |
| 2570 | if (!CE || CE->getOpcode() != Instruction::PtrToInt) |
| 2571 | continue; |
| 2572 | for (Use &U : make_early_inc_range(Range: CE->uses())) { |
| 2573 | auto *CE = dyn_cast<ConstantExpr>(Val: U.getUser()); |
| 2574 | if (U.getOperandNo() == 0 && CE && |
| 2575 | CE->getOpcode() == Instruction::Sub && |
| 2576 | MaySimplifyInt(CE->getOperand(i_nocapture: 1))) { |
| 2577 | // This is a computation of PtrOffset as generated by |
| 2578 | // LowerTypeTestsModule::lowerTypeTestCall above. If |
| 2579 | // isKnownTypeIdMember passes we just pretend it evaluated to 0. This |
| 2580 | // should cause later passes to remove the range and alignment checks. |
| 2581 | // The bitset checks won't be removed but those are uncommon. |
| 2582 | CE->replaceAllUsesWith(V: ConstantInt::get(Ty: CE->getType(), V: 0)); |
| 2583 | Changed = true; |
| 2584 | } |
| 2585 | auto *CI = dyn_cast<ICmpInst>(Val: U.getUser()); |
| 2586 | if (U.getOperandNo() == 1 && CI && |
| 2587 | CI->getPredicate() == CmpInst::ICMP_EQ && |
| 2588 | MaySimplifyInt(CI->getOperand(i_nocapture: 0))) { |
| 2589 | // This is an equality comparison. Unlike in the case above it |
| 2590 | // remained as an integer compare. |
| 2591 | CI->replaceAllUsesWith(V: ConstantInt::getTrue(Context&: M.getContext())); |
| 2592 | CI->eraseFromParent(); |
| 2593 | Changed = true; |
| 2594 | } |
| 2595 | } |
| 2596 | } |
| 2597 | } |
| 2598 | |
| 2599 | if (!Changed) |
| 2600 | return PreservedAnalyses::all(); |
| 2601 | PreservedAnalyses PA = PreservedAnalyses::none(); |
| 2602 | PA.preserve<DominatorTreeAnalysis>(); |
| 2603 | PA.preserve<PostDominatorTreeAnalysis>(); |
| 2604 | PA.preserve<LoopAnalysis>(); |
| 2605 | return PA; |
| 2606 | } |
| 2607 | |