1//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions perform manipulations on Modules.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Utils/ModuleUtils.h"
14#include "llvm/ADT/SmallString.h"
15#include "llvm/Analysis/VectorUtils.h"
16#include "llvm/IR/DerivedTypes.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/MDBuilder.h"
20#include "llvm/IR/Module.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Hash.h"
23#include "llvm/Support/MD5.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "moduleutils"
29
30static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
31 int Priority, Constant *Data) {
32 IRBuilder<> IRB(M.getContext());
33
34 // Get the current set of static global constructors and add the new ctor
35 // to the list.
36 SmallVector<Constant *, 16> CurrentCtors;
37 StructType *EltTy;
38 if (GlobalVariable *GVCtor = M.getNamedGlobal(Name: ArrayName)) {
39 EltTy = cast<StructType>(Val: GVCtor->getValueType()->getArrayElementType());
40 if (Constant *Init = GVCtor->getInitializer()) {
41 unsigned n = Init->getNumOperands();
42 CurrentCtors.reserve(N: n + 1);
43 for (unsigned i = 0; i != n; ++i)
44 CurrentCtors.push_back(Elt: cast<Constant>(Val: Init->getOperand(i)));
45 }
46 GVCtor->eraseFromParent();
47 } else {
48 EltTy = StructType::get(
49 elt1: IRB.getInt32Ty(),
50 elts: PointerType::get(C&: M.getContext(), AddressSpace: F->getAddressSpace()), elts: IRB.getPtrTy());
51 }
52
53 // Build a 3 field global_ctor entry. We don't take a comdat key.
54 Constant *CSVals[3];
55 CSVals[0] = IRB.getInt32(C: Priority);
56 CSVals[1] = F;
57 CSVals[2] = Data ? ConstantExpr::getPointerCast(C: Data, Ty: IRB.getPtrTy())
58 : Constant::getNullValue(Ty: IRB.getPtrTy());
59 Constant *RuntimeCtorInit =
60 ConstantStruct::get(T: EltTy, V: ArrayRef(CSVals, EltTy->getNumElements()));
61
62 CurrentCtors.push_back(Elt: RuntimeCtorInit);
63
64 // Create a new initializer.
65 ArrayType *AT = ArrayType::get(ElementType: EltTy, NumElements: CurrentCtors.size());
66 Constant *NewInit = ConstantArray::get(T: AT, V: CurrentCtors);
67
68 // Create the new global variable and replace all uses of
69 // the old global variable with the new one.
70 (void)new GlobalVariable(M, NewInit->getType(), false,
71 GlobalValue::AppendingLinkage, NewInit, ArrayName);
72}
73
74void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75 appendToGlobalArray(ArrayName: "llvm.global_ctors", M, F, Priority, Data);
76}
77
78void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79 appendToGlobalArray(ArrayName: "llvm.global_dtors", M, F, Priority, Data);
80}
81
82static void transformGlobalArray(StringRef ArrayName, Module &M,
83 const GlobalCtorTransformFn &Fn) {
84 GlobalVariable *GVCtor = M.getNamedGlobal(Name: ArrayName);
85 if (!GVCtor)
86 return;
87
88 IRBuilder<> IRB(M.getContext());
89 SmallVector<Constant *, 16> CurrentCtors;
90 bool Changed = false;
91 StructType *EltTy =
92 cast<StructType>(Val: GVCtor->getValueType()->getArrayElementType());
93 if (Constant *Init = GVCtor->getInitializer()) {
94 CurrentCtors.reserve(N: Init->getNumOperands());
95 for (Value *OP : Init->operands()) {
96 Constant *C = cast<Constant>(Val: OP);
97 Constant *NewC = Fn(C);
98 Changed |= (!NewC || NewC != C);
99 if (NewC)
100 CurrentCtors.push_back(Elt: NewC);
101 }
102 }
103 if (!Changed)
104 return;
105
106 GVCtor->eraseFromParent();
107
108 // Create a new initializer.
109 ArrayType *AT = ArrayType::get(ElementType: EltTy, NumElements: CurrentCtors.size());
110 Constant *NewInit = ConstantArray::get(T: AT, V: CurrentCtors);
111
112 // Create the new global variable and replace all uses of
113 // the old global variable with the new one.
114 (void)new GlobalVariable(M, NewInit->getType(), false,
115 GlobalValue::AppendingLinkage, NewInit, ArrayName);
116}
117
118void llvm::transformGlobalCtors(Module &M, const GlobalCtorTransformFn &Fn) {
119 transformGlobalArray(ArrayName: "llvm.global_ctors", M, Fn);
120}
121
122void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) {
123 transformGlobalArray(ArrayName: "llvm.global_dtors", M, Fn);
124}
125
126static void collectUsedGlobals(GlobalVariable *GV,
127 SmallSetVector<Constant *, 16> &Init) {
128 if (!GV || !GV->hasInitializer())
129 return;
130
131 auto *CA = cast<ConstantArray>(Val: GV->getInitializer());
132 for (Use &Op : CA->operands())
133 Init.insert(X: cast<Constant>(Val&: Op));
134}
135
136static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
137 GlobalVariable *GV = M.getGlobalVariable(Name);
138
139 SmallSetVector<Constant *, 16> Init;
140 collectUsedGlobals(GV, Init);
141 if (GV)
142 GV->eraseFromParent();
143
144 Type *ArrayEltTy = llvm::PointerType::getUnqual(C&: M.getContext());
145 for (auto *V : Values)
146 Init.insert(X: ConstantExpr::getPointerBitCastOrAddrSpaceCast(C: V, Ty: ArrayEltTy));
147
148 if (Init.empty())
149 return;
150
151 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: Init.size());
152 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
153 ConstantArray::get(T: ATy, V: Init.getArrayRef()),
154 Name);
155 GV->setSection("llvm.metadata");
156}
157
158void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
159 appendToUsedList(M, Name: "llvm.used", Values);
160}
161
162void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
163 appendToUsedList(M, Name: "llvm.compiler.used", Values);
164}
165
166static void removeFromUsedList(Module &M, StringRef Name,
167 function_ref<bool(Constant *)> ShouldRemove) {
168 GlobalVariable *GV = M.getNamedGlobal(Name);
169 if (!GV)
170 return;
171
172 SmallSetVector<Constant *, 16> Init;
173 collectUsedGlobals(GV, Init);
174
175 Type *ArrayEltTy = cast<ArrayType>(Val: GV->getValueType())->getElementType();
176
177 SmallVector<Constant *, 16> NewInit;
178 for (Constant *MaybeRemoved : Init) {
179 if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
180 NewInit.push_back(Elt: MaybeRemoved);
181 }
182
183 if (!NewInit.empty()) {
184 ArrayType *ATy = ArrayType::get(ElementType: ArrayEltTy, NumElements: NewInit.size());
185 GlobalVariable *NewGV =
186 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
187 ConstantArray::get(T: ATy, V: NewInit), "", GV,
188 GV->getThreadLocalMode(), GV->getAddressSpace());
189 NewGV->setSection(GV->getSection());
190 NewGV->takeName(V: GV);
191 }
192
193 GV->eraseFromParent();
194}
195
196void llvm::removeFromUsedLists(Module &M,
197 function_ref<bool(Constant *)> ShouldRemove) {
198 removeFromUsedList(M, Name: "llvm.used", ShouldRemove);
199 removeFromUsedList(M, Name: "llvm.compiler.used", ShouldRemove);
200}
201
202void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
203 if (!M.getModuleFlag(Key: "kcfi"))
204 return;
205 // Matches CodeGenModule::CreateKCFITypeId in Clang.
206 LLVMContext &Ctx = M.getContext();
207 MDBuilder MDB(Ctx);
208 std::string Type = MangledType.str();
209 if (M.getModuleFlag(Key: "cfi-normalize-integers"))
210 Type += ".normalized";
211
212 // Determine which hash algorithm to use
213 auto *MD = dyn_cast_or_null<MDString>(Val: M.getModuleFlag(Key: "kcfi-hash"));
214 KCFIHashAlgorithm Algorithm =
215 parseKCFIHashAlgorithm(Name: MD ? MD->getString() : "");
216
217 F.setMetadata(KindID: LLVMContext::MD_kcfi_type,
218 Node: MDNode::get(Context&: Ctx, MDs: MDB.createConstant(C: ConstantInt::get(
219 Ty: Type::getInt32Ty(C&: Ctx),
220 V: getKCFITypeID(MangledTypeName: Type, Algorithm)))));
221 // If the module was compiled with -fpatchable-function-entry, ensure
222 // we use the same patchable-function-prefix.
223 if (auto *MD = mdconst::extract_or_null<ConstantInt>(
224 MD: M.getModuleFlag(Key: "kcfi-offset"))) {
225 if (unsigned Offset = MD->getZExtValue())
226 F.addFnAttr(Kind: "patchable-function-prefix", Val: std::to_string(val: Offset));
227 }
228}
229
230FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
231 ArrayRef<Type *> InitArgTypes,
232 bool Weak) {
233 assert(!InitName.empty() && "Expected init function name");
234 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
235 auto *FnTy = FunctionType::get(Result: VoidTy, Params: InitArgTypes, isVarArg: false);
236 auto FnCallee = M.getOrInsertFunction(Name: InitName, T: FnTy);
237 auto *Fn = cast<Function>(Val: FnCallee.getCallee());
238 if (Weak && Fn->isDeclaration())
239 Fn->setLinkage(Function::ExternalWeakLinkage);
240 return FnCallee;
241}
242
243Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
244 Function *Ctor = Function::createWithDefaultAttr(
245 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false),
246 Linkage: GlobalValue::InternalLinkage, AddrSpace: M.getDataLayout().getProgramAddressSpace(),
247 N: CtorName, M: &M);
248 Ctor->addFnAttr(Kind: Attribute::NoUnwind);
249 setKCFIType(M, F&: *Ctor, MangledType: "_ZTSFvvE"); // void (*)(void)
250 BasicBlock *CtorBB = BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: Ctor);
251 ReturnInst::Create(C&: M.getContext(), InsertAtEnd: CtorBB);
252 // Ensure Ctor cannot be discarded, even if in a comdat.
253 appendToUsed(M, Values: {Ctor});
254 return Ctor;
255}
256
257std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
258 Module &M, StringRef CtorName, StringRef InitName,
259 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
260 StringRef VersionCheckName, bool Weak) {
261 assert(!InitName.empty() && "Expected init function name");
262 assert(InitArgs.size() == InitArgTypes.size() &&
263 "Sanitizer's init function expects different number of arguments");
264 FunctionCallee InitFunction =
265 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
266 Function *Ctor = createSanitizerCtor(M, CtorName);
267 IRBuilder<> IRB(M.getContext());
268
269 BasicBlock *RetBB = &Ctor->getEntryBlock();
270 if (Weak) {
271 RetBB->setName("ret");
272 auto *EntryBB = BasicBlock::Create(Context&: M.getContext(), Name: "entry", Parent: Ctor, InsertBefore: RetBB);
273 auto *CallInitBB =
274 BasicBlock::Create(Context&: M.getContext(), Name: "callfunc", Parent: Ctor, InsertBefore: RetBB);
275 auto *InitFn = cast<Function>(Val: InitFunction.getCallee());
276 auto *InitFnPtr =
277 PointerType::get(C&: M.getContext(), AddressSpace: InitFn->getAddressSpace());
278 IRB.SetInsertPoint(EntryBB);
279 Value *InitNotNull =
280 IRB.CreateICmpNE(LHS: InitFn, RHS: ConstantPointerNull::get(T: InitFnPtr));
281 IRB.CreateCondBr(Cond: InitNotNull, True: CallInitBB, False: RetBB);
282 IRB.SetInsertPoint(CallInitBB);
283 } else {
284 IRB.SetInsertPoint(RetBB->getTerminator());
285 }
286
287 IRB.CreateCall(Callee: InitFunction, Args: InitArgs);
288 if (!VersionCheckName.empty()) {
289 FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
290 Name: VersionCheckName, T: FunctionType::get(Result: IRB.getVoidTy(), Params: {}, isVarArg: false),
291 AttributeList: AttributeList());
292 IRB.CreateCall(Callee: VersionCheckFunction, Args: {});
293 }
294
295 if (Weak)
296 IRB.CreateBr(Dest: RetBB);
297
298 return std::make_pair(x&: Ctor, y&: InitFunction);
299}
300
301std::pair<Function *, FunctionCallee>
302llvm::getOrCreateSanitizerCtorAndInitFunctions(
303 Module &M, StringRef CtorName, StringRef InitName,
304 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
305 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
306 StringRef VersionCheckName, bool Weak) {
307 assert(!CtorName.empty() && "Expected ctor function name");
308
309 if (Function *Ctor = M.getFunction(Name: CtorName))
310 // FIXME: Sink this logic into the module, similar to the handling of
311 // globals. This will make moving to a concurrent model much easier.
312 if (Ctor->arg_empty() ||
313 Ctor->getReturnType() == Type::getVoidTy(C&: M.getContext()))
314 return {Ctor,
315 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
316
317 Function *Ctor;
318 FunctionCallee InitFunction;
319 std::tie(args&: Ctor, args&: InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
320 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
321 FunctionsCreatedCallback(Ctor, InitFunction);
322 return std::make_pair(x&: Ctor, y&: InitFunction);
323}
324
325void llvm::filterDeadComdatFunctions(
326 SmallVectorImpl<Function *> &DeadComdatFunctions) {
327 SmallPtrSet<Function *, 32> MaybeDeadFunctions;
328 SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
329 for (Function *F : DeadComdatFunctions) {
330 MaybeDeadFunctions.insert(Ptr: F);
331 if (Comdat *C = F->getComdat())
332 MaybeDeadComdats.insert(Ptr: C);
333 }
334
335 // Find comdats for which all users are dead now.
336 SmallPtrSet<Comdat *, 32> DeadComdats;
337 for (Comdat *C : MaybeDeadComdats) {
338 auto IsUserDead = [&](GlobalObject *GO) {
339 auto *F = dyn_cast<Function>(Val: GO);
340 return F && MaybeDeadFunctions.contains(Ptr: F);
341 };
342 if (all_of(Range: C->getUsers(), P: IsUserDead))
343 DeadComdats.insert(Ptr: C);
344 }
345
346 // Only keep functions which have no comdat or a dead comdat.
347 erase_if(C&: DeadComdatFunctions, P: [&](Function *F) {
348 Comdat *C = F->getComdat();
349 return C && !DeadComdats.contains(Ptr: C);
350 });
351}
352
353std::string llvm::getUniqueModuleId(Module *M) {
354 MD5 Md5;
355
356 auto *UniqueSourceFileIdentifier = dyn_cast_or_null<MDNode>(
357 Val: M->getModuleFlag(Key: "Unique Source File Identifier"));
358 if (UniqueSourceFileIdentifier) {
359 Md5.update(
360 Str: cast<MDString>(Val: UniqueSourceFileIdentifier->getOperand(I: 0))->getString());
361 } else {
362 bool ExportsSymbols = false;
363 for (auto &GV : M->global_values()) {
364 if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm.") ||
365 !GV.hasExternalLinkage() || GV.hasComdat())
366 continue;
367 ExportsSymbols = true;
368 Md5.update(Str: GV.getName());
369 Md5.update(Data: ArrayRef<uint8_t>{0});
370 }
371
372 if (!ExportsSymbols)
373 return "";
374 }
375
376 MD5::MD5Result R;
377 Md5.final(Result&: R);
378
379 SmallString<32> Str;
380 MD5::stringifyResult(Result&: R, Str);
381 return ("." + Str).str();
382}
383
384void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
385 StringRef SectionName, Align Alignment) {
386 // Embed the memory buffer into the module.
387 Constant *ModuleConstant = ConstantDataArray::get(
388 Context&: M.getContext(), Elts: ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
389 GlobalVariable *GV = new GlobalVariable(
390 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
391 ModuleConstant, "llvm.embedded.object");
392 GV->setSection(SectionName);
393 GV->setAlignment(Alignment);
394
395 LLVMContext &Ctx = M.getContext();
396 NamedMDNode *MD = M.getOrInsertNamedMetadata(Name: "llvm.embedded.objects");
397 Metadata *MDVals[] = {ConstantAsMetadata::get(C: GV),
398 MDString::get(Context&: Ctx, Str: SectionName)};
399
400 MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
401 GV->setMetadata(KindID: LLVMContext::MD_exclude, Node: llvm::MDNode::get(Context&: Ctx, MDs: {}));
402
403 appendToCompilerUsed(M, Values: GV);
404}
405
406bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
407 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
408 SmallVector<GlobalIFunc *, 32> AllIFuncs;
409 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
410 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
411 for (GlobalIFunc &GI : M.ifuncs())
412 AllIFuncs.push_back(Elt: &GI);
413 IFuncsToLower = AllIFuncs;
414 }
415
416 bool UnhandledUsers = false;
417 LLVMContext &Ctx = M.getContext();
418 const DataLayout &DL = M.getDataLayout();
419
420 PointerType *TableEntryTy =
421 PointerType::get(C&: Ctx, AddressSpace: DL.getProgramAddressSpace());
422
423 ArrayType *FuncPtrTableTy =
424 ArrayType::get(ElementType: TableEntryTy, NumElements: IFuncsToLower.size());
425
426 Align PtrAlign = DL.getABITypeAlign(Ty: TableEntryTy);
427
428 // Create a global table of function pointers we'll initialize in a global
429 // constructor.
430 auto *FuncPtrTable = new GlobalVariable(
431 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
432 PoisonValue::get(T: FuncPtrTableTy), "", nullptr,
433 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
434 FuncPtrTable->setAlignment(PtrAlign);
435
436 // Create a function to initialize the function pointer table.
437 Function *NewCtor = Function::Create(
438 Ty: FunctionType::get(Result: Type::getVoidTy(C&: Ctx), isVarArg: false), Linkage: Function::InternalLinkage,
439 AddrSpace: DL.getProgramAddressSpace(), N: "", M: &M);
440
441 BasicBlock *BB = BasicBlock::Create(Context&: Ctx, Name: "", Parent: NewCtor);
442 IRBuilder<> InitBuilder(BB);
443
444 size_t TableIndex = 0;
445 for (GlobalIFunc *GI : IFuncsToLower) {
446 Function *ResolvedFunction = GI->getResolverFunction();
447
448 // We don't know what to pass to a resolver function taking arguments
449 //
450 // FIXME: Is this even valid? clang and gcc don't complain but this
451 // probably should be invalid IR. We could just pass through undef.
452 if (!std::empty(cont: ResolvedFunction->getFunctionType()->params())) {
453 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
454 << ResolvedFunction->getName() << " with parameters\n");
455 UnhandledUsers = true;
456 continue;
457 }
458
459 // Initialize the function pointer table.
460 CallInst *ResolvedFunc = InitBuilder.CreateCall(Callee: ResolvedFunction);
461 Value *Casted = InitBuilder.CreatePointerCast(V: ResolvedFunc, DestTy: TableEntryTy);
462 Constant *GEP = cast<Constant>(Val: InitBuilder.CreateConstInBoundsGEP2_32(
463 Ty: FuncPtrTableTy, Ptr: FuncPtrTable, Idx0: 0, Idx1: TableIndex++));
464 InitBuilder.CreateAlignedStore(Val: Casted, Ptr: GEP, Align: PtrAlign);
465
466 // Update all users to load a pointer from the global table.
467 for (User *User : make_early_inc_range(Range: GI->users())) {
468 Instruction *UserInst = dyn_cast<Instruction>(Val: User);
469 if (!UserInst) {
470 // TODO: Should handle constantexpr casts in user instructions. Probably
471 // can't do much about constant initializers.
472 UnhandledUsers = true;
473 continue;
474 }
475
476 IRBuilder<> UseBuilder(UserInst);
477 LoadInst *ResolvedTarget =
478 UseBuilder.CreateAlignedLoad(Ty: TableEntryTy, Ptr: GEP, Align: PtrAlign);
479 Value *ResolvedCast =
480 UseBuilder.CreatePointerCast(V: ResolvedTarget, DestTy: GI->getType());
481 UserInst->replaceUsesOfWith(From: GI, To: ResolvedCast);
482 }
483
484 // If we handled all users, erase the ifunc.
485 if (GI->use_empty())
486 GI->eraseFromParent();
487 }
488
489 InitBuilder.CreateRetVoid();
490
491 PointerType *ConstantDataTy = PointerType::get(C&: Ctx, AddressSpace: 0);
492
493 // TODO: Is this the right priority? Probably should be before any other
494 // constructors?
495 const int Priority = 10;
496 appendToGlobalCtors(M, F: NewCtor, Priority,
497 Data: ConstantPointerNull::get(T: ConstantDataTy));
498 return UnhandledUsers;
499}
500