1 | #include "llvm/Transforms/Utils/VNCoercion.h" |
2 | #include "llvm/Analysis/ConstantFolding.h" |
3 | #include "llvm/Analysis/ValueTracking.h" |
4 | #include "llvm/IR/IRBuilder.h" |
5 | #include "llvm/IR/IntrinsicInst.h" |
6 | #include "llvm/Support/Debug.h" |
7 | |
8 | #define DEBUG_TYPE "vncoerce" |
9 | |
10 | namespace llvm { |
11 | namespace VNCoercion { |
12 | |
13 | static bool isFirstClassAggregateOrScalableType(Type *Ty) { |
14 | return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Val: Ty); |
15 | } |
16 | |
17 | /// Return true if coerceAvailableValueToLoadType will succeed. |
18 | bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, |
19 | const DataLayout &DL) { |
20 | Type *StoredTy = StoredVal->getType(); |
21 | |
22 | if (StoredTy == LoadTy) |
23 | return true; |
24 | |
25 | // If the loaded/stored value is a first class array/struct, or scalable type, |
26 | // don't try to transform them. We need to be able to bitcast to integer. |
27 | if (isFirstClassAggregateOrScalableType(Ty: LoadTy) || |
28 | isFirstClassAggregateOrScalableType(Ty: StoredTy)) |
29 | return false; |
30 | |
31 | uint64_t StoreSize = DL.getTypeSizeInBits(Ty: StoredTy).getFixedValue(); |
32 | |
33 | // The store size must be byte-aligned to support future type casts. |
34 | if (llvm::alignTo(Value: StoreSize, Align: 8) != StoreSize) |
35 | return false; |
36 | |
37 | // The store has to be at least as big as the load. |
38 | if (StoreSize < DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue()) |
39 | return false; |
40 | |
41 | bool StoredNI = DL.isNonIntegralPointerType(Ty: StoredTy->getScalarType()); |
42 | bool LoadNI = DL.isNonIntegralPointerType(Ty: LoadTy->getScalarType()); |
43 | // Don't coerce non-integral pointers to integers or vice versa. |
44 | if (StoredNI != LoadNI) { |
45 | // As a special case, allow coercion of memset used to initialize |
46 | // an array w/null. Despite non-integral pointers not generally having a |
47 | // specific bit pattern, we do assume null is zero. |
48 | if (auto *CI = dyn_cast<Constant>(Val: StoredVal)) |
49 | return CI->isNullValue(); |
50 | return false; |
51 | } else if (StoredNI && LoadNI && |
52 | StoredTy->getPointerAddressSpace() != |
53 | LoadTy->getPointerAddressSpace()) { |
54 | return false; |
55 | } |
56 | |
57 | |
58 | // The implementation below uses inttoptr for vectors of unequal size; we |
59 | // can't allow this for non integral pointers. We could teach it to extract |
60 | // exact subvectors if desired. |
61 | if (StoredNI && StoreSize != DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue()) |
62 | return false; |
63 | |
64 | if (StoredTy->isTargetExtTy() || LoadTy->isTargetExtTy()) |
65 | return false; |
66 | |
67 | return true; |
68 | } |
69 | |
70 | /// If we saw a store of a value to memory, and |
71 | /// then a load from a must-aliased pointer of a different type, try to coerce |
72 | /// the stored value. LoadedTy is the type of the load we want to replace. |
73 | /// IRB is IRBuilder used to insert new instructions. |
74 | /// |
75 | /// If we can't do it, return null. |
76 | Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, |
77 | IRBuilderBase &Helper, |
78 | const DataLayout &DL) { |
79 | assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && |
80 | "precondition violation - materialization can't fail" ); |
81 | if (auto *C = dyn_cast<Constant>(Val: StoredVal)) |
82 | StoredVal = ConstantFoldConstant(C, DL); |
83 | |
84 | // If this is already the right type, just return it. |
85 | Type *StoredValTy = StoredVal->getType(); |
86 | |
87 | uint64_t StoredValSize = DL.getTypeSizeInBits(Ty: StoredValTy).getFixedValue(); |
88 | uint64_t LoadedValSize = DL.getTypeSizeInBits(Ty: LoadedTy).getFixedValue(); |
89 | |
90 | // If the store and reload are the same size, we can always reuse it. |
91 | if (StoredValSize == LoadedValSize) { |
92 | // Pointer to Pointer -> use bitcast. |
93 | if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { |
94 | StoredVal = Helper.CreateBitCast(V: StoredVal, DestTy: LoadedTy); |
95 | } else { |
96 | // Convert source pointers to integers, which can be bitcast. |
97 | if (StoredValTy->isPtrOrPtrVectorTy()) { |
98 | StoredValTy = DL.getIntPtrType(StoredValTy); |
99 | StoredVal = Helper.CreatePtrToInt(V: StoredVal, DestTy: StoredValTy); |
100 | } |
101 | |
102 | Type *TypeToCastTo = LoadedTy; |
103 | if (TypeToCastTo->isPtrOrPtrVectorTy()) |
104 | TypeToCastTo = DL.getIntPtrType(TypeToCastTo); |
105 | |
106 | if (StoredValTy != TypeToCastTo) |
107 | StoredVal = Helper.CreateBitCast(V: StoredVal, DestTy: TypeToCastTo); |
108 | |
109 | // Cast to pointer if the load needs a pointer type. |
110 | if (LoadedTy->isPtrOrPtrVectorTy()) |
111 | StoredVal = Helper.CreateIntToPtr(V: StoredVal, DestTy: LoadedTy); |
112 | } |
113 | |
114 | if (auto *C = dyn_cast<ConstantExpr>(Val: StoredVal)) |
115 | StoredVal = ConstantFoldConstant(C, DL); |
116 | |
117 | return StoredVal; |
118 | } |
119 | // If the loaded value is smaller than the available value, then we can |
120 | // extract out a piece from it. If the available value is too small, then we |
121 | // can't do anything. |
122 | assert(StoredValSize >= LoadedValSize && |
123 | "canCoerceMustAliasedValueToLoad fail" ); |
124 | |
125 | // Convert source pointers to integers, which can be manipulated. |
126 | if (StoredValTy->isPtrOrPtrVectorTy()) { |
127 | StoredValTy = DL.getIntPtrType(StoredValTy); |
128 | StoredVal = Helper.CreatePtrToInt(V: StoredVal, DestTy: StoredValTy); |
129 | } |
130 | |
131 | // Convert vectors and fp to integer, which can be manipulated. |
132 | if (!StoredValTy->isIntegerTy()) { |
133 | StoredValTy = IntegerType::get(C&: StoredValTy->getContext(), NumBits: StoredValSize); |
134 | StoredVal = Helper.CreateBitCast(V: StoredVal, DestTy: StoredValTy); |
135 | } |
136 | |
137 | // If this is a big-endian system, we need to shift the value down to the low |
138 | // bits so that a truncate will work. |
139 | if (DL.isBigEndian()) { |
140 | uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(Ty: StoredValTy).getFixedValue() - |
141 | DL.getTypeStoreSizeInBits(Ty: LoadedTy).getFixedValue(); |
142 | StoredVal = Helper.CreateLShr( |
143 | LHS: StoredVal, RHS: ConstantInt::get(Ty: StoredVal->getType(), V: ShiftAmt)); |
144 | } |
145 | |
146 | // Truncate the integer to the right size now. |
147 | Type *NewIntTy = IntegerType::get(C&: StoredValTy->getContext(), NumBits: LoadedValSize); |
148 | StoredVal = Helper.CreateTruncOrBitCast(V: StoredVal, DestTy: NewIntTy); |
149 | |
150 | if (LoadedTy != NewIntTy) { |
151 | // If the result is a pointer, inttoptr. |
152 | if (LoadedTy->isPtrOrPtrVectorTy()) |
153 | StoredVal = Helper.CreateIntToPtr(V: StoredVal, DestTy: LoadedTy); |
154 | else |
155 | // Otherwise, bitcast. |
156 | StoredVal = Helper.CreateBitCast(V: StoredVal, DestTy: LoadedTy); |
157 | } |
158 | |
159 | if (auto *C = dyn_cast<Constant>(Val: StoredVal)) |
160 | StoredVal = ConstantFoldConstant(C, DL); |
161 | |
162 | return StoredVal; |
163 | } |
164 | |
165 | /// This function is called when we have a memdep query of a load that ends up |
166 | /// being a clobbering memory write (store, memset, memcpy, memmove). This |
167 | /// means that the write *may* provide bits used by the load but we can't be |
168 | /// sure because the pointers don't must-alias. |
169 | /// |
170 | /// Check this case to see if there is anything more we can do before we give |
171 | /// up. This returns -1 if we have to give up, or a byte number in the stored |
172 | /// value of the piece that feeds the load. |
173 | static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, |
174 | Value *WritePtr, |
175 | uint64_t WriteSizeInBits, |
176 | const DataLayout &DL) { |
177 | // If the loaded/stored value is a first class array/struct, or scalable type, |
178 | // don't try to transform them. We need to be able to bitcast to integer. |
179 | if (isFirstClassAggregateOrScalableType(Ty: LoadTy)) |
180 | return -1; |
181 | |
182 | int64_t StoreOffset = 0, LoadOffset = 0; |
183 | Value *StoreBase = |
184 | GetPointerBaseWithConstantOffset(Ptr: WritePtr, Offset&: StoreOffset, DL); |
185 | Value *LoadBase = GetPointerBaseWithConstantOffset(Ptr: LoadPtr, Offset&: LoadOffset, DL); |
186 | if (StoreBase != LoadBase) |
187 | return -1; |
188 | |
189 | uint64_t LoadSize = DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue(); |
190 | |
191 | if ((WriteSizeInBits & 7) | (LoadSize & 7)) |
192 | return -1; |
193 | uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. |
194 | LoadSize /= 8; |
195 | |
196 | // If the Load isn't completely contained within the stored bits, we don't |
197 | // have all the bits to feed it. We could do something crazy in the future |
198 | // (issue a smaller load then merge the bits in) but this seems unlikely to be |
199 | // valuable. |
200 | if (StoreOffset > LoadOffset || |
201 | StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize)) |
202 | return -1; |
203 | |
204 | // Okay, we can do this transformation. Return the number of bytes into the |
205 | // store that the load is. |
206 | return LoadOffset - StoreOffset; |
207 | } |
208 | |
209 | /// This function is called when we have a |
210 | /// memdep query of a load that ends up being a clobbering store. |
211 | int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, |
212 | StoreInst *DepSI, const DataLayout &DL) { |
213 | auto *StoredVal = DepSI->getValueOperand(); |
214 | |
215 | // Cannot handle reading from store of first-class aggregate or scalable type. |
216 | if (isFirstClassAggregateOrScalableType(Ty: StoredVal->getType())) |
217 | return -1; |
218 | |
219 | if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) |
220 | return -1; |
221 | |
222 | Value *StorePtr = DepSI->getPointerOperand(); |
223 | uint64_t StoreSize = |
224 | DL.getTypeSizeInBits(Ty: DepSI->getValueOperand()->getType()).getFixedValue(); |
225 | return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, WritePtr: StorePtr, WriteSizeInBits: StoreSize, |
226 | DL); |
227 | } |
228 | |
229 | /// This function is called when we have a |
230 | /// memdep query of a load that ends up being clobbered by another load. See if |
231 | /// the other load can feed into the second load. |
232 | int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, |
233 | const DataLayout &DL) { |
234 | // Cannot handle reading from store of first-class aggregate yet. |
235 | if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) |
236 | return -1; |
237 | |
238 | if (!canCoerceMustAliasedValueToLoad(StoredVal: DepLI, LoadTy, DL)) |
239 | return -1; |
240 | |
241 | Value *DepPtr = DepLI->getPointerOperand(); |
242 | uint64_t DepSize = DL.getTypeSizeInBits(Ty: DepLI->getType()).getFixedValue(); |
243 | return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, WritePtr: DepPtr, WriteSizeInBits: DepSize, DL); |
244 | } |
245 | |
246 | int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, |
247 | MemIntrinsic *MI, const DataLayout &DL) { |
248 | // If the mem operation is a non-constant size, we can't handle it. |
249 | ConstantInt *SizeCst = dyn_cast<ConstantInt>(Val: MI->getLength()); |
250 | if (!SizeCst) |
251 | return -1; |
252 | uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; |
253 | |
254 | // If this is memset, we just need to see if the offset is valid in the size |
255 | // of the memset.. |
256 | if (const auto *memset_inst = dyn_cast<MemSetInst>(Val: MI)) { |
257 | if (DL.isNonIntegralPointerType(Ty: LoadTy->getScalarType())) { |
258 | auto *CI = dyn_cast<ConstantInt>(Val: memset_inst->getValue()); |
259 | if (!CI || !CI->isZero()) |
260 | return -1; |
261 | } |
262 | return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, WritePtr: MI->getDest(), |
263 | WriteSizeInBits: MemSizeInBits, DL); |
264 | } |
265 | |
266 | // If we have a memcpy/memmove, the only case we can handle is if this is a |
267 | // copy from constant memory. In that case, we can read directly from the |
268 | // constant memory. |
269 | MemTransferInst *MTI = cast<MemTransferInst>(Val: MI); |
270 | |
271 | Constant *Src = dyn_cast<Constant>(Val: MTI->getSource()); |
272 | if (!Src) |
273 | return -1; |
274 | |
275 | GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: getUnderlyingObject(V: Src)); |
276 | if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) |
277 | return -1; |
278 | |
279 | // See if the access is within the bounds of the transfer. |
280 | int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, WritePtr: MI->getDest(), |
281 | WriteSizeInBits: MemSizeInBits, DL); |
282 | if (Offset == -1) |
283 | return Offset; |
284 | |
285 | // Otherwise, see if we can constant fold a load from the constant with the |
286 | // offset applied as appropriate. |
287 | unsigned IndexSize = DL.getIndexTypeSizeInBits(Ty: Src->getType()); |
288 | if (ConstantFoldLoadFromConstPtr(C: Src, Ty: LoadTy, Offset: APInt(IndexSize, Offset), DL)) |
289 | return Offset; |
290 | return -1; |
291 | } |
292 | |
293 | static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset, |
294 | Type *LoadTy, IRBuilderBase &Builder, |
295 | const DataLayout &DL) { |
296 | LLVMContext &Ctx = SrcVal->getType()->getContext(); |
297 | |
298 | // If two pointers are in the same address space, they have the same size, |
299 | // so we don't need to do any truncation, etc. This avoids introducing |
300 | // ptrtoint instructions for pointers that may be non-integral. |
301 | if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && |
302 | cast<PointerType>(Val: SrcVal->getType())->getAddressSpace() == |
303 | cast<PointerType>(Val: LoadTy)->getAddressSpace()) { |
304 | return SrcVal; |
305 | } |
306 | |
307 | uint64_t StoreSize = |
308 | (DL.getTypeSizeInBits(Ty: SrcVal->getType()).getFixedValue() + 7) / 8; |
309 | uint64_t LoadSize = (DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue() + 7) / 8; |
310 | // Compute which bits of the stored value are being used by the load. Convert |
311 | // to an integer type to start with. |
312 | if (SrcVal->getType()->isPtrOrPtrVectorTy()) |
313 | SrcVal = |
314 | Builder.CreatePtrToInt(V: SrcVal, DestTy: DL.getIntPtrType(SrcVal->getType())); |
315 | if (!SrcVal->getType()->isIntegerTy()) |
316 | SrcVal = |
317 | Builder.CreateBitCast(V: SrcVal, DestTy: IntegerType::get(C&: Ctx, NumBits: StoreSize * 8)); |
318 | |
319 | // Shift the bits to the least significant depending on endianness. |
320 | unsigned ShiftAmt; |
321 | if (DL.isLittleEndian()) |
322 | ShiftAmt = Offset * 8; |
323 | else |
324 | ShiftAmt = (StoreSize - LoadSize - Offset) * 8; |
325 | if (ShiftAmt) |
326 | SrcVal = Builder.CreateLShr(LHS: SrcVal, |
327 | RHS: ConstantInt::get(Ty: SrcVal->getType(), V: ShiftAmt)); |
328 | |
329 | if (LoadSize != StoreSize) |
330 | SrcVal = Builder.CreateTruncOrBitCast(V: SrcVal, |
331 | DestTy: IntegerType::get(C&: Ctx, NumBits: LoadSize * 8)); |
332 | return SrcVal; |
333 | } |
334 | |
335 | Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, |
336 | Instruction *InsertPt, const DataLayout &DL) { |
337 | |
338 | #ifndef NDEBUG |
339 | unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); |
340 | unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); |
341 | assert(Offset + LoadSize <= SrcValSize); |
342 | #endif |
343 | IRBuilder<> Builder(InsertPt); |
344 | SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); |
345 | return coerceAvailableValueToLoadType(StoredVal: SrcVal, LoadedTy: LoadTy, Helper&: Builder, DL); |
346 | } |
347 | |
348 | Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset, |
349 | Type *LoadTy, const DataLayout &DL) { |
350 | #ifndef NDEBUG |
351 | unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); |
352 | unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); |
353 | assert(Offset + LoadSize <= SrcValSize); |
354 | #endif |
355 | return ConstantFoldLoadFromConst(C: SrcVal, Ty: LoadTy, Offset: APInt(32, Offset), DL); |
356 | } |
357 | |
358 | /// This function is called when we have a |
359 | /// memdep query of a load that ends up being a clobbering mem intrinsic. |
360 | Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, |
361 | Type *LoadTy, Instruction *InsertPt, |
362 | const DataLayout &DL) { |
363 | LLVMContext &Ctx = LoadTy->getContext(); |
364 | uint64_t LoadSize = DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue() / 8; |
365 | IRBuilder<> Builder(InsertPt); |
366 | |
367 | // We know that this method is only called when the mem transfer fully |
368 | // provides the bits for the load. |
369 | if (MemSetInst *MSI = dyn_cast<MemSetInst>(Val: SrcInst)) { |
370 | // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and |
371 | // independently of what the offset is. |
372 | Value *Val = MSI->getValue(); |
373 | if (LoadSize != 1) |
374 | Val = |
375 | Builder.CreateZExtOrBitCast(V: Val, DestTy: IntegerType::get(C&: Ctx, NumBits: LoadSize * 8)); |
376 | Value *OneElt = Val; |
377 | |
378 | // Splat the value out to the right number of bits. |
379 | for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { |
380 | // If we can double the number of bytes set, do it. |
381 | if (NumBytesSet * 2 <= LoadSize) { |
382 | Value *ShVal = Builder.CreateShl( |
383 | LHS: Val, RHS: ConstantInt::get(Ty: Val->getType(), V: NumBytesSet * 8)); |
384 | Val = Builder.CreateOr(LHS: Val, RHS: ShVal); |
385 | NumBytesSet <<= 1; |
386 | continue; |
387 | } |
388 | |
389 | // Otherwise insert one byte at a time. |
390 | Value *ShVal = |
391 | Builder.CreateShl(LHS: Val, RHS: ConstantInt::get(Ty: Val->getType(), V: 1 * 8)); |
392 | Val = Builder.CreateOr(LHS: OneElt, RHS: ShVal); |
393 | ++NumBytesSet; |
394 | } |
395 | |
396 | return coerceAvailableValueToLoadType(StoredVal: Val, LoadedTy: LoadTy, Helper&: Builder, DL); |
397 | } |
398 | |
399 | // Otherwise, this is a memcpy/memmove from a constant global. |
400 | MemTransferInst *MTI = cast<MemTransferInst>(Val: SrcInst); |
401 | Constant *Src = cast<Constant>(Val: MTI->getSource()); |
402 | unsigned IndexSize = DL.getIndexTypeSizeInBits(Ty: Src->getType()); |
403 | return ConstantFoldLoadFromConstPtr(C: Src, Ty: LoadTy, Offset: APInt(IndexSize, Offset), |
404 | DL); |
405 | } |
406 | |
407 | Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, |
408 | Type *LoadTy, const DataLayout &DL) { |
409 | LLVMContext &Ctx = LoadTy->getContext(); |
410 | uint64_t LoadSize = DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue() / 8; |
411 | |
412 | // We know that this method is only called when the mem transfer fully |
413 | // provides the bits for the load. |
414 | if (MemSetInst *MSI = dyn_cast<MemSetInst>(Val: SrcInst)) { |
415 | auto *Val = dyn_cast<ConstantInt>(Val: MSI->getValue()); |
416 | if (!Val) |
417 | return nullptr; |
418 | |
419 | Val = ConstantInt::get(Context&: Ctx, V: APInt::getSplat(NewLen: LoadSize * 8, V: Val->getValue())); |
420 | return ConstantFoldLoadFromConst(C: Val, Ty: LoadTy, DL); |
421 | } |
422 | |
423 | // Otherwise, this is a memcpy/memmove from a constant global. |
424 | MemTransferInst *MTI = cast<MemTransferInst>(Val: SrcInst); |
425 | Constant *Src = cast<Constant>(Val: MTI->getSource()); |
426 | unsigned IndexSize = DL.getIndexTypeSizeInBits(Ty: Src->getType()); |
427 | return ConstantFoldLoadFromConstPtr(C: Src, Ty: LoadTy, Offset: APInt(IndexSize, Offset), |
428 | DL); |
429 | } |
430 | } // namespace VNCoercion |
431 | } // namespace llvm |
432 | |