1//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions perform manipulations on Modules.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Utils/ModuleUtils.h"
14#include "llvm/Analysis/VectorUtils.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/IR/DerivedTypes.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/MDBuilder.h"
20#include "llvm/IR/Module.h"
21#include "llvm/Support/MD5.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/Support/xxhash.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "moduleutils"
28
29static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30 int Priority, Constant *Data) {
31 IRBuilder<> IRB(M.getContext());
32 FunctionType *FnTy = FunctionType::get(Result: IRB.getVoidTy(), isVarArg: false);
33
34 // Get the current set of static global constructors and add the new ctor
35 // to the list.
36 SmallVector<Constant *, 16> CurrentCtors;
37 StructType *EltTy;
38 if (GlobalVariable *GVCtor = M.getNamedGlobal(Name: ArrayName)) {
39 EltTy = cast<StructType>(Val: GVCtor->getValueType()->getArrayElementType());
40 if (Constant *Init = GVCtor->getInitializer()) {
41 unsigned n = Init->getNumOperands();
42 CurrentCtors.reserve(N: n + 1);
43 for (unsigned i = 0; i != n; ++i)
44 CurrentCtors.push_back(Elt: cast<Constant>(Val: Init->getOperand(i)));
45 }
46 GVCtor->eraseFromParent();
47 } else {
48 EltTy = StructType::get(elt1: IRB.getInt32Ty(),
49 elts: PointerType::get(ElementType: FnTy, AddressSpace: F->getAddressSpace()),
50 elts: IRB.getPtrTy());
51 }
52
53 // Build a 3 field global_ctor entry. We don't take a comdat key.
54 Constant *CSVals[3];
55 CSVals[0] = IRB.getInt32(C: Priority);
56 CSVals[1] = F;
57 CSVals[2] = Data ? ConstantExpr::getPointerCast(C: Data, Ty: IRB.getPtrTy())
58 : Constant::getNullValue(Ty: IRB.getPtrTy());
59 Constant *RuntimeCtorInit =
60 ConstantStruct::get(T: EltTy, V: ArrayRef(CSVals, EltTy->getNumElements()));
61
62 CurrentCtors.push_back(Elt: RuntimeCtorInit);
63
64 // Create a new initializer.
65 ArrayType *AT = ArrayType::get(ElementType: EltTy, NumElements: CurrentCtors.size());
66 Constant *NewInit = ConstantArray::get(T: AT, V: CurrentCtors);
67
68 // Create the new global variable and replace all uses of
69 // the old global variable with the new one.
70 (void)new GlobalVariable(M, NewInit->getType(), false,
71 GlobalValue::AppendingLinkage, NewInit, ArrayName);
72}
73
74void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75 appendToGlobalArray(ArrayName: "llvm.global_ctors", M, F, Priority, Data);
76}
77
78void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79 appendToGlobalArray(ArrayName: "llvm.global_dtors", M, F, Priority, Data);
80}
81
82static void collectUsedGlobals(GlobalVariable *GV,
83 SmallSetVector<Constant *, 16> &Init) {
84 if (!GV || !GV->hasInitializer())
85 return;
86
87 auto *CA = cast<ConstantArray>(Val: GV->getInitializer());
88 for (Use &Op : CA->operands())
89 Init.insert(X: cast<Constant>(Val&: Op));
90}
91
92static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
93 GlobalVariable *GV = M.getGlobalVariable(Name);
94
95 SmallSetVector<Constant *, 16> Init;
96 collectUsedGlobals(GV, Init);
97 if (GV)
98 GV->eraseFromParent();
99
100 Type *ArrayEltTy = llvm::PointerType::getUnqual(C&: M.getContext());
101 for (auto *V : Values)
102 Init.insert(X: ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: V, Ty: ArrayEltTy));
103
104 if (Init.empty())
105 return;
106
107 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: Init.size());
108 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
109 ConstantArray::get(T: ATy, V: Init.getArrayRef()),
110 Name);
111 GV->setSection("llvm.metadata");
112}
113
114void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
115 appendToUsedList(M, Name: "llvm.used", Values);
116}
117
118void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
119 appendToUsedList(M, Name: "llvm.compiler.used", Values);
120}
121
122static void removeFromUsedList(Module &M, StringRef Name,
123 function_ref<bool(Constant *)> ShouldRemove) {
124 GlobalVariable *GV = M.getNamedGlobal(Name);
125 if (!GV)
126 return;
127
128 SmallSetVector<Constant *, 16> Init;
129 collectUsedGlobals(GV, Init);
130
131 Type *ArrayEltTy = cast<ArrayType>(Val: GV->getValueType())->getElementType();
132
133 SmallVector<Constant *, 16> NewInit;
134 for (Constant *MaybeRemoved : Init) {
135 if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
136 NewInit.push_back(Elt: MaybeRemoved);
137 }
138
139 if (!NewInit.empty()) {
140 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: NewInit.size());
141 GlobalVariable *NewGV =
142 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
143 ConstantArray::get(T: ATy, V: NewInit), "", GV,
144 GV->getThreadLocalMode(), GV->getAddressSpace());
145 NewGV->setSection(GV->getSection());
146 NewGV->takeName(V: GV);
147 }
148
149 GV->eraseFromParent();
150}
151
152void llvm::removeFromUsedLists(Module &M,
153 function_ref<bool(Constant *)> ShouldRemove) {
154 removeFromUsedList(M, Name: "llvm.used", ShouldRemove);
155 removeFromUsedList(M, Name: "llvm.compiler.used", ShouldRemove);
156}
157
158void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
159 if (!M.getModuleFlag(Key: "kcfi"))
160 return;
161 // Matches CodeGenModule::CreateKCFITypeId in Clang.
162 LLVMContext &Ctx = M.getContext();
163 MDBuilder MDB(Ctx);
164 F.setMetadata(
165 KindID: LLVMContext::MD_kcfi_type,
166 Node: MDNode::get(Context&: Ctx, MDs: MDB.createConstant(C: ConstantInt::get(
167 Ty: Type::getInt32Ty(C&: Ctx),
168 V: static_cast<uint32_t>(xxHash64(Data: MangledType))))));
169 // If the module was compiled with -fpatchable-function-entry, ensure
170 // we use the same patchable-function-prefix.
171 if (auto *MD = mdconst::extract_or_null<ConstantInt>(
172 MD: M.getModuleFlag(Key: "kcfi-offset"))) {
173 if (unsigned Offset = MD->getZExtValue())
174 F.addFnAttr(Kind: "patchable-function-prefix", Val: std::to_string(val: Offset));
175 }
176}
177
178FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
179 ArrayRef<Type *> InitArgTypes,
180 bool Weak) {
181 assert(!InitName.empty() && "Expected init function name");
182 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
183 auto *FnTy = FunctionType::get(Result: VoidTy, Params: InitArgTypes, isVarArg: false);
184 auto FnCallee = M.getOrInsertFunction(Name: InitName, T: FnTy);
185 auto *Fn = cast<Function>(Val: FnCallee.getCallee());
186 if (Weak && Fn->isDeclaration())
187 Fn->setLinkage(Function::ExternalWeakLinkage);
188 return FnCallee;
189}
190
191Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
192 Function *Ctor = Function::createWithDefaultAttr(
193 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false),
194 Linkage: GlobalValue::InternalLinkage, AddrSpace: M.getDataLayout().getProgramAddressSpace(),
195 N: CtorName, M: &M);
196 Ctor->addFnAttr(Kind: Attribute::NoUnwind);
197 setKCFIType(M, F&: *Ctor, MangledType: "_ZTSFvvE"); // void (*)(void)
198 BasicBlock *CtorBB = BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: Ctor);
199 ReturnInst::Create(C&: M.getContext(), InsertAtEnd: CtorBB);
200 // Ensure Ctor cannot be discarded, even if in a comdat.
201 appendToUsed(M, Values: {Ctor});
202 return Ctor;
203}
204
205std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
206 Module &M, StringRef CtorName, StringRef InitName,
207 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
208 StringRef VersionCheckName, bool Weak) {
209 assert(!InitName.empty() && "Expected init function name");
210 assert(InitArgs.size() == InitArgTypes.size() &&
211 "Sanitizer's init function expects different number of arguments");
212 FunctionCallee InitFunction =
213 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
214 Function *Ctor = createSanitizerCtor(M, CtorName);
215 IRBuilder<> IRB(M.getContext());
216
217 BasicBlock *RetBB = &Ctor->getEntryBlock();
218 if (Weak) {
219 RetBB->setName("ret");
220 auto *EntryBB = BasicBlock::Create(Context&: M.getContext(), Name: "entry", Parent: Ctor, InsertBefore: RetBB);
221 auto *CallInitBB =
222 BasicBlock::Create(Context&: M.getContext(), Name: "callfunc", Parent: Ctor, InsertBefore: RetBB);
223 auto *InitFn = cast<Function>(Val: InitFunction.getCallee());
224 auto *InitFnPtr =
225 PointerType::get(ElementType: InitFn->getType(), AddressSpace: InitFn->getAddressSpace());
226 IRB.SetInsertPoint(EntryBB);
227 Value *InitNotNull =
228 IRB.CreateICmpNE(LHS: InitFn, RHS: ConstantPointerNull::get(T: InitFnPtr));
229 IRB.CreateCondBr(Cond: InitNotNull, True: CallInitBB, False: RetBB);
230 IRB.SetInsertPoint(CallInitBB);
231 } else {
232 IRB.SetInsertPoint(RetBB->getTerminator());
233 }
234
235 IRB.CreateCall(Callee: InitFunction, Args: InitArgs);
236 if (!VersionCheckName.empty()) {
237 FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
238 Name: VersionCheckName, T: FunctionType::get(Result: IRB.getVoidTy(), Params: {}, isVarArg: false),
239 AttributeList: AttributeList());
240 IRB.CreateCall(Callee: VersionCheckFunction, Args: {});
241 }
242
243 if (Weak)
244 IRB.CreateBr(Dest: RetBB);
245
246 return std::make_pair(x&: Ctor, y&: InitFunction);
247}
248
249std::pair<Function *, FunctionCallee>
250llvm::getOrCreateSanitizerCtorAndInitFunctions(
251 Module &M, StringRef CtorName, StringRef InitName,
252 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
253 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
254 StringRef VersionCheckName, bool Weak) {
255 assert(!CtorName.empty() && "Expected ctor function name");
256
257 if (Function *Ctor = M.getFunction(Name: CtorName))
258 // FIXME: Sink this logic into the module, similar to the handling of
259 // globals. This will make moving to a concurrent model much easier.
260 if (Ctor->arg_empty() ||
261 Ctor->getReturnType() == Type::getVoidTy(C&: M.getContext()))
262 return {Ctor,
263 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
264
265 Function *Ctor;
266 FunctionCallee InitFunction;
267 std::tie(args&: Ctor, args&: InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
268 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
269 FunctionsCreatedCallback(Ctor, InitFunction);
270 return std::make_pair(x&: Ctor, y&: InitFunction);
271}
272
273void llvm::filterDeadComdatFunctions(
274 SmallVectorImpl<Function *> &DeadComdatFunctions) {
275 SmallPtrSet<Function *, 32> MaybeDeadFunctions;
276 SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
277 for (Function *F : DeadComdatFunctions) {
278 MaybeDeadFunctions.insert(Ptr: F);
279 if (Comdat *C = F->getComdat())
280 MaybeDeadComdats.insert(Ptr: C);
281 }
282
283 // Find comdats for which all users are dead now.
284 SmallPtrSet<Comdat *, 32> DeadComdats;
285 for (Comdat *C : MaybeDeadComdats) {
286 auto IsUserDead = [&](GlobalObject *GO) {
287 auto *F = dyn_cast<Function>(Val: GO);
288 return F && MaybeDeadFunctions.contains(Ptr: F);
289 };
290 if (all_of(Range: C->getUsers(), P: IsUserDead))
291 DeadComdats.insert(Ptr: C);
292 }
293
294 // Only keep functions which have no comdat or a dead comdat.
295 erase_if(C&: DeadComdatFunctions, P: [&](Function *F) {
296 Comdat *C = F->getComdat();
297 return C && !DeadComdats.contains(Ptr: C);
298 });
299}
300
301std::string llvm::getUniqueModuleId(Module *M) {
302 MD5 Md5;
303 bool ExportsSymbols = false;
304 auto AddGlobal = [&](GlobalValue &GV) {
305 if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm.") ||
306 !GV.hasExternalLinkage() || GV.hasComdat())
307 return;
308 ExportsSymbols = true;
309 Md5.update(Str: GV.getName());
310 Md5.update(Data: ArrayRef<uint8_t>{0});
311 };
312
313 for (auto &F : *M)
314 AddGlobal(F);
315 for (auto &GV : M->globals())
316 AddGlobal(GV);
317 for (auto &GA : M->aliases())
318 AddGlobal(GA);
319 for (auto &IF : M->ifuncs())
320 AddGlobal(IF);
321
322 if (!ExportsSymbols)
323 return "";
324
325 MD5::MD5Result R;
326 Md5.final(Result&: R);
327
328 SmallString<32> Str;
329 MD5::stringifyResult(Result&: R, Str);
330 return ("." + Str).str();
331}
332
333void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
334 StringRef SectionName, Align Alignment) {
335 // Embed the memory buffer into the module.
336 Constant *ModuleConstant = ConstantDataArray::get(
337 Context&: M.getContext(), Elts: ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
338 GlobalVariable *GV = new GlobalVariable(
339 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
340 ModuleConstant, "llvm.embedded.object");
341 GV->setSection(SectionName);
342 GV->setAlignment(Alignment);
343
344 LLVMContext &Ctx = M.getContext();
345 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.embedded.objects");
346 Metadata *MDVals[] = {ConstantAsMetadata::get(C: GV),
347 MDString::get(Context&: Ctx, Str: SectionName)};
348
349 MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
350 GV->setMetadata(KindID: LLVMContext::MD_exclude, Node: llvm::MDNode::get(Context&: Ctx, MDs: {}));
351
352 appendToCompilerUsed(M, Values: GV);
353}
354
355bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
356 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
357 SmallVector<GlobalIFunc *, 32> AllIFuncs;
358 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
359 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
360 for (GlobalIFunc &GI : M.ifuncs())
361 AllIFuncs.push_back(Elt: &GI);
362 IFuncsToLower = AllIFuncs;
363 }
364
365 bool UnhandledUsers = false;
366 LLVMContext &Ctx = M.getContext();
367 const DataLayout &DL = M.getDataLayout();
368
369 PointerType *TableEntryTy =
370 PointerType::get(C&: Ctx, AddressSpace: DL.getProgramAddressSpace());
371
372 ArrayType *FuncPtrTableTy =
373 ArrayType::get(ElementType: TableEntryTy, NumElements: IFuncsToLower.size());
374
375 Align PtrAlign = DL.getABITypeAlign(Ty: TableEntryTy);
376
377 // Create a global table of function pointers we'll initialize in a global
378 // constructor.
379 auto *FuncPtrTable = new GlobalVariable(
380 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
381 PoisonValue::get(T: FuncPtrTableTy), "", nullptr,
382 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
383 FuncPtrTable->setAlignment(PtrAlign);
384
385 // Create a function to initialize the function pointer table.
386 Function *NewCtor = Function::Create(
387 Ty: FunctionType::get(Result: Type::getVoidTy(C&: Ctx), isVarArg: false), Linkage: Function::InternalLinkage,
388 AddrSpace: DL.getProgramAddressSpace(), N: "", M: &M);
389
390 BasicBlock *BB = BasicBlock::Create(Context&: Ctx, Name: "", Parent: NewCtor);
391 IRBuilder<> InitBuilder(BB);
392
393 size_t TableIndex = 0;
394 for (GlobalIFunc *GI : IFuncsToLower) {
395 Function *ResolvedFunction = GI->getResolverFunction();
396
397 // We don't know what to pass to a resolver function taking arguments
398 //
399 // FIXME: Is this even valid? clang and gcc don't complain but this
400 // probably should be invalid IR. We could just pass through undef.
401 if (!std::empty(cont: ResolvedFunction->getFunctionType()->params())) {
402 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
403 << ResolvedFunction->getName() << " with parameters\n");
404 UnhandledUsers = true;
405 continue;
406 }
407
408 // Initialize the function pointer table.
409 CallInst *ResolvedFunc = InitBuilder.CreateCall(Callee: ResolvedFunction);
410 Value *Casted = InitBuilder.CreatePointerCast(V: ResolvedFunc, DestTy: TableEntryTy);
411 Constant *GEP = cast<Constant>(Val: InitBuilder.CreateConstInBoundsGEP2_32(
412 Ty: FuncPtrTableTy, Ptr: FuncPtrTable, Idx0: 0, Idx1: TableIndex++));
413 InitBuilder.CreateAlignedStore(Val: Casted, Ptr: GEP, Align: PtrAlign);
414
415 // Update all users to load a pointer from the global table.
416 for (User *User : make_early_inc_range(Range: GI->users())) {
417 Instruction *UserInst = dyn_cast<Instruction>(Val: User);
418 if (!UserInst) {
419 // TODO: Should handle constantexpr casts in user instructions. Probably
420 // can't do much about constant initializers.
421 UnhandledUsers = true;
422 continue;
423 }
424
425 IRBuilder<> UseBuilder(UserInst);
426 LoadInst *ResolvedTarget =
427 UseBuilder.CreateAlignedLoad(Ty: TableEntryTy, Ptr: GEP, Align: PtrAlign);
428 Value *ResolvedCast =
429 UseBuilder.CreatePointerCast(V: ResolvedTarget, DestTy: GI->getType());
430 UserInst->replaceUsesOfWith(From: GI, To: ResolvedCast);
431 }
432
433 // If we handled all users, erase the ifunc.
434 if (GI->use_empty())
435 GI->eraseFromParent();
436 }
437
438 InitBuilder.CreateRetVoid();
439
440 PointerType *ConstantDataTy = PointerType::get(C&: Ctx, AddressSpace: 0);
441
442 // TODO: Is this the right priority? Probably should be before any other
443 // constructors?
444 const int Priority = 10;
445 appendToGlobalCtors(M, F: NewCtor, Priority,
446 Data: ConstantPointerNull::get(T: ConstantDataTy));
447 return UnhandledUsers;
448}
449