1 | //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interface to a pass that merges duplicate global |
10 | // constants together into a single constant that is shared. This is useful |
11 | // because some passes (ie TraceValues) insert a lot of string constants into |
12 | // the program, regardless of whether or not an existing string is available. |
13 | // |
14 | // Algorithm: ConstantMerge is designed to build up a map of available constants |
15 | // and eliminate duplicates when it is initialized. |
16 | // |
17 | //===----------------------------------------------------------------------===// |
18 | |
19 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
20 | #include "llvm/ADT/DenseMap.h" |
21 | #include "llvm/ADT/SmallPtrSet.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/ADT/Statistic.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/DataLayout.h" |
26 | #include "llvm/IR/DerivedTypes.h" |
27 | #include "llvm/IR/GlobalValue.h" |
28 | #include "llvm/IR/GlobalVariable.h" |
29 | #include "llvm/IR/LLVMContext.h" |
30 | #include "llvm/IR/Module.h" |
31 | #include "llvm/Support/Casting.h" |
32 | #include "llvm/Support/Debug.h" |
33 | #include "llvm/Transforms/IPO.h" |
34 | #include <algorithm> |
35 | #include <cassert> |
36 | #include <utility> |
37 | |
38 | using namespace llvm; |
39 | |
40 | #define DEBUG_TYPE "constmerge" |
41 | |
42 | STATISTIC(NumIdenticalMerged, "Number of identical global constants merged" ); |
43 | |
44 | /// Find values that are marked as llvm.used. |
45 | static void FindUsedValues(GlobalVariable *LLVMUsed, |
46 | SmallPtrSetImpl<const GlobalValue*> &UsedValues) { |
47 | if (!LLVMUsed) return; |
48 | ConstantArray *Inits = cast<ConstantArray>(Val: LLVMUsed->getInitializer()); |
49 | |
50 | for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { |
51 | Value *Operand = Inits->getOperand(i_nocapture: i)->stripPointerCasts(); |
52 | GlobalValue *GV = cast<GlobalValue>(Val: Operand); |
53 | UsedValues.insert(Ptr: GV); |
54 | } |
55 | } |
56 | |
57 | // True if A is better than B. |
58 | static bool IsBetterCanonical(const GlobalVariable &A, |
59 | const GlobalVariable &B) { |
60 | if (!A.hasLocalLinkage() && B.hasLocalLinkage()) |
61 | return true; |
62 | |
63 | if (A.hasLocalLinkage() && !B.hasLocalLinkage()) |
64 | return false; |
65 | |
66 | return A.hasGlobalUnnamedAddr(); |
67 | } |
68 | |
69 | static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) { |
70 | SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; |
71 | GV->getAllMetadata(MDs); |
72 | for (const auto &V : MDs) |
73 | if (V.first != LLVMContext::MD_dbg) |
74 | return true; |
75 | return false; |
76 | } |
77 | |
78 | static void copyDebugLocMetadata(const GlobalVariable *From, |
79 | GlobalVariable *To) { |
80 | SmallVector<DIGlobalVariableExpression *, 1> MDs; |
81 | From->getDebugInfo(GVs&: MDs); |
82 | for (auto *MD : MDs) |
83 | To->addDebugInfo(GV: MD); |
84 | } |
85 | |
86 | static Align getAlign(GlobalVariable *GV) { |
87 | return GV->getAlign().value_or( |
88 | u: GV->getDataLayout().getPreferredAlign(GV)); |
89 | } |
90 | |
91 | static bool |
92 | isUnmergeableGlobal(GlobalVariable *GV, |
93 | const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) { |
94 | // Only process constants with initializers in the default address space. |
95 | return !GV->isConstant() || !GV->hasDefinitiveInitializer() || |
96 | GV->getType()->getAddressSpace() != 0 || GV->hasSection() || |
97 | // Don't touch thread-local variables. |
98 | GV->isThreadLocal() || |
99 | // Don't touch values marked with attribute(used). |
100 | UsedGlobals.count(Ptr: GV); |
101 | } |
102 | |
103 | enum class CanMerge { No, Yes }; |
104 | static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) { |
105 | if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr()) |
106 | return CanMerge::No; |
107 | if (hasMetadataOtherThanDebugLoc(GV: Old)) |
108 | return CanMerge::No; |
109 | assert(!hasMetadataOtherThanDebugLoc(New)); |
110 | if (!Old->hasGlobalUnnamedAddr()) |
111 | New->setUnnamedAddr(GlobalValue::UnnamedAddr::None); |
112 | return CanMerge::Yes; |
113 | } |
114 | |
115 | static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { |
116 | Constant *NewConstant = New; |
117 | |
118 | LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @" |
119 | << New->getName() << "\n" ); |
120 | |
121 | // Bump the alignment if necessary. |
122 | if (Old->getAlign() || New->getAlign()) |
123 | New->setAlignment(std::max(a: getAlign(GV: Old), b: getAlign(GV: New))); |
124 | |
125 | copyDebugLocMetadata(From: Old, To: New); |
126 | Old->replaceAllUsesWith(V: NewConstant); |
127 | |
128 | // Delete the global value from the module. |
129 | assert(Old->hasLocalLinkage() && |
130 | "Refusing to delete an externally visible global variable." ); |
131 | Old->eraseFromParent(); |
132 | } |
133 | |
134 | static bool mergeConstants(Module &M) { |
135 | // Find all the globals that are marked "used". These cannot be merged. |
136 | SmallPtrSet<const GlobalValue*, 8> UsedGlobals; |
137 | FindUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.used" ), UsedValues&: UsedGlobals); |
138 | FindUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.compiler.used" ), UsedValues&: UsedGlobals); |
139 | |
140 | // Map unique constants to globals. |
141 | DenseMap<Constant *, GlobalVariable *> CMap; |
142 | |
143 | SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32> |
144 | SameContentReplacements; |
145 | |
146 | size_t ChangesMade = 0; |
147 | size_t OldChangesMade = 0; |
148 | |
149 | // Iterate constant merging while we are still making progress. Merging two |
150 | // constants together may allow us to merge other constants together if the |
151 | // second level constants have initializers which point to the globals that |
152 | // were just merged. |
153 | while (true) { |
154 | // Find the canonical constants others will be merged with. |
155 | for (GlobalVariable &GV : llvm::make_early_inc_range(Range: M.globals())) { |
156 | // If this GV is dead, remove it. |
157 | GV.removeDeadConstantUsers(); |
158 | if (GV.use_empty() && GV.hasLocalLinkage()) { |
159 | GV.eraseFromParent(); |
160 | ++ChangesMade; |
161 | continue; |
162 | } |
163 | |
164 | if (isUnmergeableGlobal(GV: &GV, UsedGlobals)) |
165 | continue; |
166 | |
167 | // This transformation is legal for weak ODR globals in the sense it |
168 | // doesn't change semantics, but we really don't want to perform it |
169 | // anyway; it's likely to pessimize code generation, and some tools |
170 | // (like the Darwin linker in cases involving CFString) don't expect it. |
171 | if (GV.isWeakForLinker()) |
172 | continue; |
173 | |
174 | // Don't touch globals with metadata other then !dbg. |
175 | if (hasMetadataOtherThanDebugLoc(GV: &GV)) |
176 | continue; |
177 | |
178 | Constant *Init = GV.getInitializer(); |
179 | |
180 | // Check to see if the initializer is already known. |
181 | GlobalVariable *&Slot = CMap[Init]; |
182 | |
183 | // If this is the first constant we find or if the old one is local, |
184 | // replace with the current one. If the current is externally visible |
185 | // it cannot be replace, but can be the canonical constant we merge with. |
186 | bool FirstConstantFound = !Slot; |
187 | if (FirstConstantFound || IsBetterCanonical(A: GV, B: *Slot)) { |
188 | Slot = &GV; |
189 | LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName() |
190 | << (FirstConstantFound ? "\n" : " (updated)\n" )); |
191 | } |
192 | } |
193 | |
194 | // Identify all globals that can be merged together, filling in the |
195 | // SameContentReplacements vector. We cannot do the replacement in this pass |
196 | // because doing so may cause initializers of other globals to be rewritten, |
197 | // invalidating the Constant* pointers in CMap. |
198 | for (GlobalVariable &GV : llvm::make_early_inc_range(Range: M.globals())) { |
199 | if (isUnmergeableGlobal(GV: &GV, UsedGlobals)) |
200 | continue; |
201 | |
202 | // We can only replace constant with local linkage. |
203 | if (!GV.hasLocalLinkage()) |
204 | continue; |
205 | |
206 | Constant *Init = GV.getInitializer(); |
207 | |
208 | // Check to see if the initializer is already known. |
209 | auto Found = CMap.find(Val: Init); |
210 | if (Found == CMap.end()) |
211 | continue; |
212 | |
213 | GlobalVariable *Slot = Found->second; |
214 | if (Slot == &GV) |
215 | continue; |
216 | |
217 | if (makeMergeable(Old: &GV, New: Slot) == CanMerge::No) |
218 | continue; |
219 | |
220 | // Make all uses of the duplicate constant use the canonical version. |
221 | LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @" |
222 | << Slot->getName() << "\n" ); |
223 | SameContentReplacements.push_back(Elt: std::make_pair(x: &GV, y&: Slot)); |
224 | } |
225 | |
226 | // Now that we have figured out which replacements must be made, do them all |
227 | // now. This avoid invalidating the pointers in CMap, which are unneeded |
228 | // now. |
229 | for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) { |
230 | GlobalVariable *Old = SameContentReplacements[i].first; |
231 | GlobalVariable *New = SameContentReplacements[i].second; |
232 | replace(M, Old, New); |
233 | ++ChangesMade; |
234 | ++NumIdenticalMerged; |
235 | } |
236 | |
237 | if (ChangesMade == OldChangesMade) |
238 | break; |
239 | OldChangesMade = ChangesMade; |
240 | |
241 | SameContentReplacements.clear(); |
242 | CMap.clear(); |
243 | } |
244 | |
245 | return ChangesMade; |
246 | } |
247 | |
248 | PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { |
249 | if (!mergeConstants(M)) |
250 | return PreservedAnalyses::all(); |
251 | return PreservedAnalyses::none(); |
252 | } |
253 | |