| 1 | //===- GlobalSplit.cpp - global variable splitter -------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass uses inrange annotations on GEP indices to split globals where |
| 10 | // beneficial. Clang currently attaches these annotations to references to |
| 11 | // virtual table globals under the Itanium ABI for the benefit of the |
| 12 | // whole-program virtual call optimization and control flow integrity passes. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/Transforms/IPO/GlobalSplit.h" |
| 17 | #include "llvm/ADT/SmallVector.h" |
| 18 | #include "llvm/ADT/StringExtras.h" |
| 19 | #include "llvm/IR/Constant.h" |
| 20 | #include "llvm/IR/Constants.h" |
| 21 | #include "llvm/IR/DataLayout.h" |
| 22 | #include "llvm/IR/Function.h" |
| 23 | #include "llvm/IR/GlobalValue.h" |
| 24 | #include "llvm/IR/GlobalVariable.h" |
| 25 | #include "llvm/IR/Intrinsics.h" |
| 26 | #include "llvm/IR/LLVMContext.h" |
| 27 | #include "llvm/IR/Metadata.h" |
| 28 | #include "llvm/IR/Module.h" |
| 29 | #include "llvm/IR/Operator.h" |
| 30 | #include "llvm/IR/Type.h" |
| 31 | #include "llvm/IR/User.h" |
| 32 | #include "llvm/Support/Casting.h" |
| 33 | #include <cstdint> |
| 34 | #include <vector> |
| 35 | |
| 36 | using namespace llvm; |
| 37 | |
| 38 | static bool splitGlobal(GlobalVariable &GV) { |
| 39 | // If the address of the global is taken outside of the module, we cannot |
| 40 | // apply this transformation. |
| 41 | if (!GV.hasLocalLinkage()) |
| 42 | return false; |
| 43 | |
| 44 | // We currently only know how to split ConstantStructs. |
| 45 | auto *Init = dyn_cast_or_null<ConstantStruct>(Val: GV.getInitializer()); |
| 46 | if (!Init) |
| 47 | return false; |
| 48 | |
| 49 | const DataLayout &DL = GV.getDataLayout(); |
| 50 | const StructLayout *SL = DL.getStructLayout(Ty: Init->getType()); |
| 51 | ArrayRef<TypeSize> MemberOffsets = SL->getMemberOffsets(); |
| 52 | unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ty: GV.getType()); |
| 53 | |
| 54 | // Verify that each user of the global is an inrange getelementptr constant, |
| 55 | // and collect information on how it relates to the global. |
| 56 | struct GEPInfo { |
| 57 | GEPOperator *GEP; |
| 58 | unsigned MemberIndex; |
| 59 | APInt MemberRelativeOffset; |
| 60 | |
| 61 | GEPInfo(GEPOperator *GEP, unsigned MemberIndex, APInt MemberRelativeOffset) |
| 62 | : GEP(GEP), MemberIndex(MemberIndex), |
| 63 | MemberRelativeOffset(std::move(MemberRelativeOffset)) {} |
| 64 | }; |
| 65 | SmallVector<GEPInfo> Infos; |
| 66 | for (User *U : GV.users()) { |
| 67 | auto *GEP = dyn_cast<GEPOperator>(Val: U); |
| 68 | if (!GEP) |
| 69 | return false; |
| 70 | |
| 71 | std::optional<ConstantRange> InRange = GEP->getInRange(); |
| 72 | if (!InRange) |
| 73 | return false; |
| 74 | |
| 75 | APInt Offset(IndexWidth, 0); |
| 76 | if (!GEP->accumulateConstantOffset(DL, Offset)) |
| 77 | return false; |
| 78 | |
| 79 | // Determine source-relative inrange. |
| 80 | ConstantRange SrcInRange = InRange->sextOrTrunc(BitWidth: IndexWidth).add(Other: Offset); |
| 81 | |
| 82 | // Check that the GEP offset is in the range (treating upper bound as |
| 83 | // inclusive here). |
| 84 | if (!SrcInRange.contains(Val: Offset) && SrcInRange.getUpper() != Offset) |
| 85 | return false; |
| 86 | |
| 87 | // Find which struct member the range corresponds to. |
| 88 | if (SrcInRange.getLower().uge(RHS: SL->getSizeInBytes())) |
| 89 | return false; |
| 90 | |
| 91 | unsigned MemberIndex = |
| 92 | SL->getElementContainingOffset(FixedOffset: SrcInRange.getLower().getZExtValue()); |
| 93 | TypeSize MemberStart = MemberOffsets[MemberIndex]; |
| 94 | TypeSize MemberEnd = MemberIndex == MemberOffsets.size() - 1 |
| 95 | ? SL->getSizeInBytes() |
| 96 | : MemberOffsets[MemberIndex + 1]; |
| 97 | |
| 98 | // Verify that the range matches that struct member. |
| 99 | if (SrcInRange.getLower() != MemberStart || |
| 100 | SrcInRange.getUpper() != MemberEnd) |
| 101 | return false; |
| 102 | |
| 103 | Infos.emplace_back(Args&: GEP, Args&: MemberIndex, Args: Offset - MemberStart); |
| 104 | } |
| 105 | |
| 106 | SmallVector<MDNode *, 2> Types; |
| 107 | GV.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
| 108 | |
| 109 | IntegerType *Int32Ty = Type::getInt32Ty(C&: GV.getContext()); |
| 110 | |
| 111 | std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands()); |
| 112 | for (unsigned I = 0; I != Init->getNumOperands(); ++I) { |
| 113 | // Build a global representing this split piece. |
| 114 | auto *SplitGV = |
| 115 | new GlobalVariable(*GV.getParent(), Init->getOperand(i_nocapture: I)->getType(), |
| 116 | GV.isConstant(), GlobalValue::PrivateLinkage, |
| 117 | Init->getOperand(i_nocapture: I), GV.getName() + "." + utostr(X: I)); |
| 118 | SplitGlobals[I] = SplitGV; |
| 119 | |
| 120 | unsigned SplitBegin = SL->getElementOffset(Idx: I); |
| 121 | unsigned SplitEnd = (I == Init->getNumOperands() - 1) |
| 122 | ? SL->getSizeInBytes() |
| 123 | : SL->getElementOffset(Idx: I + 1); |
| 124 | |
| 125 | // Rebuild type metadata, adjusting by the split offset. |
| 126 | // FIXME: See if we can use DW_OP_piece to preserve debug metadata here. |
| 127 | for (MDNode *Type : Types) { |
| 128 | uint64_t ByteOffset = cast<ConstantInt>( |
| 129 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
| 130 | ->getZExtValue(); |
| 131 | // Type metadata may be attached one byte after the end of the vtable, for |
| 132 | // classes without virtual methods in Itanium ABI. AFAIK, it is never |
| 133 | // attached to the first byte of a vtable. Subtract one to get the right |
| 134 | // slice. |
| 135 | // This is making an assumption that vtable groups are the only kinds of |
| 136 | // global variables that !type metadata can be attached to, and that they |
| 137 | // are either Itanium ABI vtable groups or contain a single vtable (i.e. |
| 138 | // Microsoft ABI vtables). |
| 139 | uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1; |
| 140 | if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd) |
| 141 | continue; |
| 142 | SplitGV->addMetadata( |
| 143 | KindID: LLVMContext::MD_type, |
| 144 | MD&: *MDNode::get(Context&: GV.getContext(), |
| 145 | MDs: {ConstantAsMetadata::get( |
| 146 | C: ConstantInt::get(Ty: Int32Ty, V: ByteOffset - SplitBegin)), |
| 147 | Type->getOperand(I: 1)})); |
| 148 | } |
| 149 | |
| 150 | if (GV.hasMetadata(KindID: LLVMContext::MD_vcall_visibility)) |
| 151 | SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility()); |
| 152 | } |
| 153 | |
| 154 | for (const GEPInfo &Info : Infos) { |
| 155 | assert(Info.MemberIndex < SplitGlobals.size() && "Invalid member" ); |
| 156 | auto *NewGEP = ConstantExpr::getGetElementPtr( |
| 157 | Ty: Type::getInt8Ty(C&: GV.getContext()), C: SplitGlobals[Info.MemberIndex], |
| 158 | Idx: ConstantInt::get(Context&: GV.getContext(), V: Info.MemberRelativeOffset), |
| 159 | NW: Info.GEP->isInBounds()); |
| 160 | Info.GEP->replaceAllUsesWith(V: NewGEP); |
| 161 | } |
| 162 | |
| 163 | // Finally, remove the original global. Any remaining uses refer to invalid |
| 164 | // elements of the global, so replace with poison. |
| 165 | if (!GV.use_empty()) |
| 166 | GV.replaceAllUsesWith(V: PoisonValue::get(T: GV.getType())); |
| 167 | GV.eraseFromParent(); |
| 168 | return true; |
| 169 | } |
| 170 | |
| 171 | static bool splitGlobals(Module &M) { |
| 172 | // First, see if the module uses either of the llvm.type.test or |
| 173 | // llvm.type.checked.load intrinsics, which indicates that splitting globals |
| 174 | // may be beneficial. |
| 175 | Function *TypeTestFunc = |
| 176 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::type_test); |
| 177 | Function *TypeCheckedLoadFunc = |
| 178 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::type_checked_load); |
| 179 | Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists( |
| 180 | M: &M, id: Intrinsic::type_checked_load_relative); |
| 181 | if ((!TypeTestFunc || TypeTestFunc->use_empty()) && |
| 182 | (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && |
| 183 | (!TypeCheckedLoadRelativeFunc || |
| 184 | TypeCheckedLoadRelativeFunc->use_empty())) |
| 185 | return false; |
| 186 | |
| 187 | bool Changed = false; |
| 188 | for (GlobalVariable &GV : llvm::make_early_inc_range(Range: M.globals())) |
| 189 | Changed |= splitGlobal(GV); |
| 190 | return Changed; |
| 191 | } |
| 192 | |
| 193 | PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) { |
| 194 | if (!splitGlobals(M)) |
| 195 | return PreservedAnalyses::all(); |
| 196 | return PreservedAnalyses::none(); |
| 197 | } |
| 198 | |