1 | //===- InstCombineCalls.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the visitCall, visitInvoke, and visitCallBr functions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstCombineInternal.h" |
14 | #include "llvm/ADT/APFloat.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/APSInt.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/STLFunctionalExtras.h" |
19 | #include "llvm/ADT/SmallBitVector.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/Statistic.h" |
22 | #include "llvm/Analysis/AliasAnalysis.h" |
23 | #include "llvm/Analysis/AssumeBundleQueries.h" |
24 | #include "llvm/Analysis/AssumptionCache.h" |
25 | #include "llvm/Analysis/InstructionSimplify.h" |
26 | #include "llvm/Analysis/Loads.h" |
27 | #include "llvm/Analysis/MemoryBuiltins.h" |
28 | #include "llvm/Analysis/ValueTracking.h" |
29 | #include "llvm/Analysis/VectorUtils.h" |
30 | #include "llvm/IR/AttributeMask.h" |
31 | #include "llvm/IR/Attributes.h" |
32 | #include "llvm/IR/BasicBlock.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DebugInfo.h" |
37 | #include "llvm/IR/DerivedTypes.h" |
38 | #include "llvm/IR/Function.h" |
39 | #include "llvm/IR/GlobalVariable.h" |
40 | #include "llvm/IR/InlineAsm.h" |
41 | #include "llvm/IR/InstrTypes.h" |
42 | #include "llvm/IR/Instruction.h" |
43 | #include "llvm/IR/Instructions.h" |
44 | #include "llvm/IR/IntrinsicInst.h" |
45 | #include "llvm/IR/Intrinsics.h" |
46 | #include "llvm/IR/IntrinsicsAArch64.h" |
47 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
48 | #include "llvm/IR/IntrinsicsARM.h" |
49 | #include "llvm/IR/IntrinsicsHexagon.h" |
50 | #include "llvm/IR/LLVMContext.h" |
51 | #include "llvm/IR/Metadata.h" |
52 | #include "llvm/IR/PatternMatch.h" |
53 | #include "llvm/IR/Statepoint.h" |
54 | #include "llvm/IR/Type.h" |
55 | #include "llvm/IR/User.h" |
56 | #include "llvm/IR/Value.h" |
57 | #include "llvm/IR/ValueHandle.h" |
58 | #include "llvm/Support/AtomicOrdering.h" |
59 | #include "llvm/Support/Casting.h" |
60 | #include "llvm/Support/CommandLine.h" |
61 | #include "llvm/Support/Compiler.h" |
62 | #include "llvm/Support/Debug.h" |
63 | #include "llvm/Support/ErrorHandling.h" |
64 | #include "llvm/Support/KnownBits.h" |
65 | #include "llvm/Support/MathExtras.h" |
66 | #include "llvm/Support/raw_ostream.h" |
67 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
68 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
69 | #include "llvm/Transforms/Utils/Local.h" |
70 | #include "llvm/Transforms/Utils/SimplifyLibCalls.h" |
71 | #include <algorithm> |
72 | #include <cassert> |
73 | #include <cstdint> |
74 | #include <optional> |
75 | #include <utility> |
76 | #include <vector> |
77 | |
78 | #define DEBUG_TYPE "instcombine" |
79 | #include "llvm/Transforms/Utils/InstructionWorklist.h" |
80 | |
81 | using namespace llvm; |
82 | using namespace PatternMatch; |
83 | |
84 | STATISTIC(NumSimplified, "Number of library calls simplified" ); |
85 | |
86 | static cl::opt<unsigned> GuardWideningWindow( |
87 | "instcombine-guard-widening-window" , |
88 | cl::init(Val: 3), |
89 | cl::desc("How wide an instruction window to bypass looking for " |
90 | "another guard" )); |
91 | |
92 | /// Return the specified type promoted as it would be to pass though a va_arg |
93 | /// area. |
94 | static Type *getPromotedType(Type *Ty) { |
95 | if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) { |
96 | if (ITy->getBitWidth() < 32) |
97 | return Type::getInt32Ty(C&: Ty->getContext()); |
98 | } |
99 | return Ty; |
100 | } |
101 | |
102 | /// Recognize a memcpy/memmove from a trivially otherwise unused alloca. |
103 | /// TODO: This should probably be integrated with visitAllocSites, but that |
104 | /// requires a deeper change to allow either unread or unwritten objects. |
105 | static bool hasUndefSource(AnyMemTransferInst *MI) { |
106 | auto *Src = MI->getRawSource(); |
107 | while (isa<GetElementPtrInst>(Val: Src) || isa<BitCastInst>(Val: Src)) { |
108 | if (!Src->hasOneUse()) |
109 | return false; |
110 | Src = cast<Instruction>(Val: Src)->getOperand(i: 0); |
111 | } |
112 | return isa<AllocaInst>(Val: Src) && Src->hasOneUse(); |
113 | } |
114 | |
115 | Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { |
116 | Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT); |
117 | MaybeAlign CopyDstAlign = MI->getDestAlign(); |
118 | if (!CopyDstAlign || *CopyDstAlign < DstAlign) { |
119 | MI->setDestAlignment(DstAlign); |
120 | return MI; |
121 | } |
122 | |
123 | Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT); |
124 | MaybeAlign CopySrcAlign = MI->getSourceAlign(); |
125 | if (!CopySrcAlign || *CopySrcAlign < SrcAlign) { |
126 | MI->setSourceAlignment(SrcAlign); |
127 | return MI; |
128 | } |
129 | |
130 | // If we have a store to a location which is known constant, we can conclude |
131 | // that the store must be storing the constant value (else the memory |
132 | // wouldn't be constant), and this must be a noop. |
133 | if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) { |
134 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
135 | MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType())); |
136 | return MI; |
137 | } |
138 | |
139 | // If the source is provably undef, the memcpy/memmove doesn't do anything |
140 | // (unless the transfer is volatile). |
141 | if (hasUndefSource(MI) && !MI->isVolatile()) { |
142 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
143 | MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType())); |
144 | return MI; |
145 | } |
146 | |
147 | // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with |
148 | // load/store. |
149 | ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength()); |
150 | if (!MemOpLength) return nullptr; |
151 | |
152 | // Source and destination pointer types are always "i8*" for intrinsic. See |
153 | // if the size is something we can handle with a single primitive load/store. |
154 | // A single load+store correctly handles overlapping memory in the memmove |
155 | // case. |
156 | uint64_t Size = MemOpLength->getLimitedValue(); |
157 | assert(Size && "0-sized memory transferring should be removed already." ); |
158 | |
159 | if (Size > 8 || (Size&(Size-1))) |
160 | return nullptr; // If not 1/2/4/8 bytes, exit. |
161 | |
162 | // If it is an atomic and alignment is less than the size then we will |
163 | // introduce the unaligned memory access which will be later transformed |
164 | // into libcall in CodeGen. This is not evident performance gain so disable |
165 | // it now. |
166 | if (isa<AtomicMemTransferInst>(Val: MI)) |
167 | if (*CopyDstAlign < Size || *CopySrcAlign < Size) |
168 | return nullptr; |
169 | |
170 | // Use an integer load+store unless we can find something better. |
171 | IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<3); |
172 | |
173 | // If the memcpy has metadata describing the members, see if we can get the |
174 | // TBAA, scope and noalias tags describing our copy. |
175 | AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size); |
176 | |
177 | Value *Src = MI->getArgOperand(i: 1); |
178 | Value *Dest = MI->getArgOperand(i: 0); |
179 | LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src); |
180 | // Alignment from the mem intrinsic will be better, so use it. |
181 | L->setAlignment(*CopySrcAlign); |
182 | L->setAAMetadata(AACopyMD); |
183 | MDNode *LoopMemParallelMD = |
184 | MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access); |
185 | if (LoopMemParallelMD) |
186 | L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD); |
187 | MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group); |
188 | if (AccessGroupMD) |
189 | L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD); |
190 | |
191 | StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest); |
192 | // Alignment from the mem intrinsic will be better, so use it. |
193 | S->setAlignment(*CopyDstAlign); |
194 | S->setAAMetadata(AACopyMD); |
195 | if (LoopMemParallelMD) |
196 | S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD); |
197 | if (AccessGroupMD) |
198 | S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD); |
199 | S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID); |
200 | |
201 | if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) { |
202 | // non-atomics can be volatile |
203 | L->setVolatile(MT->isVolatile()); |
204 | S->setVolatile(MT->isVolatile()); |
205 | } |
206 | if (isa<AtomicMemTransferInst>(Val: MI)) { |
207 | // atomics have to be unordered |
208 | L->setOrdering(AtomicOrdering::Unordered); |
209 | S->setOrdering(AtomicOrdering::Unordered); |
210 | } |
211 | |
212 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
213 | MI->setLength(Constant::getNullValue(Ty: MemOpLength->getType())); |
214 | return MI; |
215 | } |
216 | |
217 | Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { |
218 | const Align KnownAlignment = |
219 | getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT); |
220 | MaybeAlign MemSetAlign = MI->getDestAlign(); |
221 | if (!MemSetAlign || *MemSetAlign < KnownAlignment) { |
222 | MI->setDestAlignment(KnownAlignment); |
223 | return MI; |
224 | } |
225 | |
226 | // If we have a store to a location which is known constant, we can conclude |
227 | // that the store must be storing the constant value (else the memory |
228 | // wouldn't be constant), and this must be a noop. |
229 | if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) { |
230 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
231 | MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType())); |
232 | return MI; |
233 | } |
234 | |
235 | // Remove memset with an undef value. |
236 | // FIXME: This is technically incorrect because it might overwrite a poison |
237 | // value. Change to PoisonValue once #52930 is resolved. |
238 | if (isa<UndefValue>(Val: MI->getValue())) { |
239 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
240 | MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType())); |
241 | return MI; |
242 | } |
243 | |
244 | // Extract the length and alignment and fill if they are constant. |
245 | ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength()); |
246 | ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue()); |
247 | if (!LenC || !FillC || !FillC->getType()->isIntegerTy(Bitwidth: 8)) |
248 | return nullptr; |
249 | const uint64_t Len = LenC->getLimitedValue(); |
250 | assert(Len && "0-sized memory setting should be removed already." ); |
251 | const Align Alignment = MI->getDestAlign().valueOrOne(); |
252 | |
253 | // If it is an atomic and alignment is less than the size then we will |
254 | // introduce the unaligned memory access which will be later transformed |
255 | // into libcall in CodeGen. This is not evident performance gain so disable |
256 | // it now. |
257 | if (isa<AtomicMemSetInst>(Val: MI)) |
258 | if (Alignment < Len) |
259 | return nullptr; |
260 | |
261 | // memset(s,c,n) -> store s, c (for n=1,2,4,8) |
262 | if (Len <= 8 && isPowerOf2_32(Value: (uint32_t)Len)) { |
263 | Type *ITy = IntegerType::get(C&: MI->getContext(), NumBits: Len*8); // n=1 -> i8. |
264 | |
265 | Value *Dest = MI->getDest(); |
266 | |
267 | // Extract the fill value and store. |
268 | const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; |
269 | Constant *FillVal = ConstantInt::get(Ty: ITy, V: Fill); |
270 | StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile()); |
271 | S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID); |
272 | auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) { |
273 | if (llvm::is_contained(DbgAssign->location_ops(), FillC)) |
274 | DbgAssign->replaceVariableLocationOp(FillC, FillVal); |
275 | }; |
276 | for_each(Range: at::getAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers); |
277 | for_each(Range: at::getDVRAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers); |
278 | |
279 | S->setAlignment(Alignment); |
280 | if (isa<AtomicMemSetInst>(Val: MI)) |
281 | S->setOrdering(AtomicOrdering::Unordered); |
282 | |
283 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
284 | MI->setLength(Constant::getNullValue(Ty: LenC->getType())); |
285 | return MI; |
286 | } |
287 | |
288 | return nullptr; |
289 | } |
290 | |
291 | // TODO, Obvious Missing Transforms: |
292 | // * Narrow width by halfs excluding zero/undef lanes |
293 | Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { |
294 | Value *LoadPtr = II.getArgOperand(i: 0); |
295 | const Align Alignment = |
296 | cast<ConstantInt>(Val: II.getArgOperand(i: 1))->getAlignValue(); |
297 | |
298 | // If the mask is all ones or undefs, this is a plain vector load of the 1st |
299 | // argument. |
300 | if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: 2))) { |
301 | LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment, |
302 | Name: "unmaskedload" ); |
303 | L->copyMetadata(SrcInst: II); |
304 | return L; |
305 | } |
306 | |
307 | // If we can unconditionally load from this address, replace with a |
308 | // load/select idiom. TODO: use DT for context sensitive query |
309 | if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(), |
310 | DL: II.getDataLayout(), CtxI: &II, AC: &AC)) { |
311 | LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment, |
312 | Name: "unmaskedload" ); |
313 | LI->copyMetadata(SrcInst: II); |
314 | return Builder.CreateSelect(C: II.getArgOperand(i: 2), True: LI, False: II.getArgOperand(i: 3)); |
315 | } |
316 | |
317 | return nullptr; |
318 | } |
319 | |
320 | // TODO, Obvious Missing Transforms: |
321 | // * Single constant active lane -> store |
322 | // * Narrow width by halfs excluding zero/undef lanes |
323 | Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { |
324 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 3)); |
325 | if (!ConstMask) |
326 | return nullptr; |
327 | |
328 | // If the mask is all zeros, this instruction does nothing. |
329 | if (ConstMask->isNullValue()) |
330 | return eraseInstFromFunction(I&: II); |
331 | |
332 | // If the mask is all ones, this is a plain vector store of the 1st argument. |
333 | if (ConstMask->isAllOnesValue()) { |
334 | Value *StorePtr = II.getArgOperand(i: 1); |
335 | Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: 2))->getAlignValue(); |
336 | StoreInst *S = |
337 | new StoreInst(II.getArgOperand(i: 0), StorePtr, false, Alignment); |
338 | S->copyMetadata(SrcInst: II); |
339 | return S; |
340 | } |
341 | |
342 | if (isa<ScalableVectorType>(Val: ConstMask->getType())) |
343 | return nullptr; |
344 | |
345 | // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts |
346 | APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask); |
347 | APInt PoisonElts(DemandedElts.getBitWidth(), 0); |
348 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts, |
349 | PoisonElts)) |
350 | return replaceOperand(I&: II, OpNum: 0, V); |
351 | |
352 | return nullptr; |
353 | } |
354 | |
355 | // TODO, Obvious Missing Transforms: |
356 | // * Single constant active lane load -> load |
357 | // * Dereferenceable address & few lanes -> scalarize speculative load/selects |
358 | // * Adjacent vector addresses -> masked.load |
359 | // * Narrow width by halfs excluding zero/undef lanes |
360 | // * Vector incrementing address -> vector masked load |
361 | Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { |
362 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2)); |
363 | if (!ConstMask) |
364 | return nullptr; |
365 | |
366 | // Vector splat address w/known mask -> scalar load |
367 | // Fold the gather to load the source vector first lane |
368 | // because it is reloading the same value each time |
369 | if (ConstMask->isAllOnesValue()) |
370 | if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 0))) { |
371 | auto *VecTy = cast<VectorType>(Val: II.getType()); |
372 | const Align Alignment = |
373 | cast<ConstantInt>(Val: II.getArgOperand(i: 1))->getAlignValue(); |
374 | LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr, |
375 | Align: Alignment, Name: "load.scalar" ); |
376 | Value *Shuf = |
377 | Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast" ); |
378 | return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf)); |
379 | } |
380 | |
381 | return nullptr; |
382 | } |
383 | |
384 | // TODO, Obvious Missing Transforms: |
385 | // * Single constant active lane -> store |
386 | // * Adjacent vector addresses -> masked.store |
387 | // * Narrow store width by halfs excluding zero/undef lanes |
388 | // * Vector incrementing address -> vector masked store |
389 | Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { |
390 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 3)); |
391 | if (!ConstMask) |
392 | return nullptr; |
393 | |
394 | // If the mask is all zeros, a scatter does nothing. |
395 | if (ConstMask->isNullValue()) |
396 | return eraseInstFromFunction(I&: II); |
397 | |
398 | // Vector splat address -> scalar store |
399 | if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 1))) { |
400 | // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr |
401 | if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: 0))) { |
402 | if (maskContainsAllOneOrUndef(Mask: ConstMask)) { |
403 | Align Alignment = |
404 | cast<ConstantInt>(Val: II.getArgOperand(i: 2))->getAlignValue(); |
405 | StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, |
406 | Alignment); |
407 | S->copyMetadata(SrcInst: II); |
408 | return S; |
409 | } |
410 | } |
411 | // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, |
412 | // lastlane), ptr |
413 | if (ConstMask->isAllOnesValue()) { |
414 | Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: 2))->getAlignValue(); |
415 | VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: 1)->getType()); |
416 | ElementCount VF = WideLoadTy->getElementCount(); |
417 | Value *RunTimeVF = Builder.CreateElementCount(DstType: Builder.getInt32Ty(), EC: VF); |
418 | Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: 1)); |
419 | Value * = |
420 | Builder.CreateExtractElement(Vec: II.getArgOperand(i: 0), Idx: LastLane); |
421 | StoreInst *S = |
422 | new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment); |
423 | S->copyMetadata(SrcInst: II); |
424 | return S; |
425 | } |
426 | } |
427 | if (isa<ScalableVectorType>(Val: ConstMask->getType())) |
428 | return nullptr; |
429 | |
430 | // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts |
431 | APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask); |
432 | APInt PoisonElts(DemandedElts.getBitWidth(), 0); |
433 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts, |
434 | PoisonElts)) |
435 | return replaceOperand(I&: II, OpNum: 0, V); |
436 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 1), DemandedElts, |
437 | PoisonElts)) |
438 | return replaceOperand(I&: II, OpNum: 1, V); |
439 | |
440 | return nullptr; |
441 | } |
442 | |
443 | /// This function transforms launder.invariant.group and strip.invariant.group |
444 | /// like: |
445 | /// launder(launder(%x)) -> launder(%x) (the result is not the argument) |
446 | /// launder(strip(%x)) -> launder(%x) |
447 | /// strip(strip(%x)) -> strip(%x) (the result is not the argument) |
448 | /// strip(launder(%x)) -> strip(%x) |
449 | /// This is legal because it preserves the most recent information about |
450 | /// the presence or absence of invariant.group. |
451 | static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, |
452 | InstCombinerImpl &IC) { |
453 | auto *Arg = II.getArgOperand(i: 0); |
454 | auto *StrippedArg = Arg->stripPointerCasts(); |
455 | auto *StrippedInvariantGroupsArg = StrippedArg; |
456 | while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) { |
457 | if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group && |
458 | Intr->getIntrinsicID() != Intrinsic::strip_invariant_group) |
459 | break; |
460 | StrippedInvariantGroupsArg = Intr->getArgOperand(i: 0)->stripPointerCasts(); |
461 | } |
462 | if (StrippedArg == StrippedInvariantGroupsArg) |
463 | return nullptr; // No launders/strips to remove. |
464 | |
465 | Value *Result = nullptr; |
466 | |
467 | if (II.getIntrinsicID() == Intrinsic::launder_invariant_group) |
468 | Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg); |
469 | else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group) |
470 | Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg); |
471 | else |
472 | llvm_unreachable( |
473 | "simplifyInvariantGroupIntrinsic only handles launder and strip" ); |
474 | if (Result->getType()->getPointerAddressSpace() != |
475 | II.getType()->getPointerAddressSpace()) |
476 | Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType()); |
477 | |
478 | return cast<Instruction>(Val: Result); |
479 | } |
480 | |
481 | static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { |
482 | assert((II.getIntrinsicID() == Intrinsic::cttz || |
483 | II.getIntrinsicID() == Intrinsic::ctlz) && |
484 | "Expected cttz or ctlz intrinsic" ); |
485 | bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; |
486 | Value *Op0 = II.getArgOperand(i: 0); |
487 | Value *Op1 = II.getArgOperand(i: 1); |
488 | Value *X; |
489 | // ctlz(bitreverse(x)) -> cttz(x) |
490 | // cttz(bitreverse(x)) -> ctlz(x) |
491 | if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) { |
492 | Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz; |
493 | Function *F = Intrinsic::getDeclaration(M: II.getModule(), id: ID, Tys: II.getType()); |
494 | return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: 1)}); |
495 | } |
496 | |
497 | if (II.getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
498 | // ctlz/cttz i1 Op0 --> not Op0 |
499 | if (match(V: Op1, P: m_Zero())) |
500 | return BinaryOperator::CreateNot(Op: Op0); |
501 | // If zero is poison, then the input can be assumed to be "true", so the |
502 | // instruction simplifies to "false". |
503 | assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1" ); |
504 | return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType())); |
505 | } |
506 | |
507 | // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. |
508 | if (II.hasOneUse() && match(V: Op1, P: m_Zero()) && |
509 | match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) |
510 | return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue()); |
511 | |
512 | Constant *C; |
513 | |
514 | if (IsTZ) { |
515 | // cttz(-x) -> cttz(x) |
516 | if (match(V: Op0, P: m_Neg(V: m_Value(V&: X)))) |
517 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
518 | |
519 | // cttz(-x & x) -> cttz(x) |
520 | if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X)))) |
521 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
522 | |
523 | // cttz(sext(x)) -> cttz(zext(x)) |
524 | if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) { |
525 | auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType()); |
526 | auto *CttzZext = |
527 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1); |
528 | return IC.replaceInstUsesWith(I&: II, V: CttzZext); |
529 | } |
530 | |
531 | // Zext doesn't change the number of trailing zeros, so narrow: |
532 | // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'. |
533 | if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) { |
534 | auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X, |
535 | RHS: IC.Builder.getTrue()); |
536 | auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType()); |
537 | return IC.replaceInstUsesWith(I&: II, V: ZextCttz); |
538 | } |
539 | |
540 | // cttz(abs(x)) -> cttz(x) |
541 | // cttz(nabs(x)) -> cttz(x) |
542 | Value *Y; |
543 | SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor; |
544 | if (SPF == SPF_ABS || SPF == SPF_NABS) |
545 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
546 | |
547 | if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X)))) |
548 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
549 | |
550 | // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val) |
551 | if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
552 | match(V: Op1, P: m_One())) { |
553 | Value *ConstCttz = |
554 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1); |
555 | return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X); |
556 | } |
557 | |
558 | // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val) |
559 | if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) && |
560 | match(V: Op1, P: m_One())) { |
561 | Value *ConstCttz = |
562 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1); |
563 | return BinaryOperator::CreateSub(V1: ConstCttz, V2: X); |
564 | } |
565 | |
566 | // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val) |
567 | if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) { |
568 | Value *Width = |
569 | ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits()); |
570 | return BinaryOperator::CreateSub(V1: Width, V2: X); |
571 | } |
572 | } else { |
573 | // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val) |
574 | if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
575 | match(V: Op1, P: m_One())) { |
576 | Value *ConstCtlz = |
577 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1); |
578 | return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X); |
579 | } |
580 | |
581 | // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val) |
582 | if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
583 | match(V: Op1, P: m_One())) { |
584 | Value *ConstCtlz = |
585 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1); |
586 | return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X); |
587 | } |
588 | } |
589 | |
590 | KnownBits Known = IC.computeKnownBits(V: Op0, Depth: 0, CxtI: &II); |
591 | |
592 | // Create a mask for bits above (ctlz) or below (cttz) the first known one. |
593 | unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros() |
594 | : Known.countMaxLeadingZeros(); |
595 | unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros() |
596 | : Known.countMinLeadingZeros(); |
597 | |
598 | // If all bits above (ctlz) or below (cttz) the first known one are known |
599 | // zero, this value is constant. |
600 | // FIXME: This should be in InstSimplify because we're replacing an |
601 | // instruction with a constant. |
602 | if (PossibleZeros == DefiniteZeros) { |
603 | auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros); |
604 | return IC.replaceInstUsesWith(I&: II, V: C); |
605 | } |
606 | |
607 | // If the input to cttz/ctlz is known to be non-zero, |
608 | // then change the 'ZeroIsPoison' parameter to 'true' |
609 | // because we know the zero behavior can't affect the result. |
610 | if (!Known.One.isZero() || |
611 | isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) { |
612 | if (!match(V: II.getArgOperand(i: 1), P: m_One())) |
613 | return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue()); |
614 | } |
615 | |
616 | // Add range attribute since known bits can't completely reflect what we know. |
617 | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
618 | if (BitWidth != 1 && !II.hasRetAttr(Kind: Attribute::Range) && |
619 | !II.getMetadata(KindID: LLVMContext::MD_range)) { |
620 | ConstantRange Range(APInt(BitWidth, DefiniteZeros), |
621 | APInt(BitWidth, PossibleZeros + 1)); |
622 | II.addRangeRetAttr(CR: Range); |
623 | return &II; |
624 | } |
625 | |
626 | return nullptr; |
627 | } |
628 | |
629 | static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { |
630 | assert(II.getIntrinsicID() == Intrinsic::ctpop && |
631 | "Expected ctpop intrinsic" ); |
632 | Type *Ty = II.getType(); |
633 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
634 | Value *Op0 = II.getArgOperand(i: 0); |
635 | Value *X, *Y; |
636 | |
637 | // ctpop(bitreverse(x)) -> ctpop(x) |
638 | // ctpop(bswap(x)) -> ctpop(x) |
639 | if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) || match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X)))) |
640 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
641 | |
642 | // ctpop(rot(x)) -> ctpop(x) |
643 | if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) || |
644 | match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) && |
645 | X == Y) |
646 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
647 | |
648 | // ctpop(x | -x) -> bitwidth - cttz(x, false) |
649 | if (Op0->hasOneUse() && |
650 | match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) { |
651 | Function *F = |
652 | Intrinsic::getDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty); |
653 | auto *Cttz = IC.Builder.CreateCall(Callee: F, Args: {X, IC.Builder.getFalse()}); |
654 | auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth)); |
655 | return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz)); |
656 | } |
657 | |
658 | // ctpop(~x & (x - 1)) -> cttz(x, false) |
659 | if (match(V: Op0, |
660 | P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) { |
661 | Function *F = |
662 | Intrinsic::getDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty); |
663 | return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()}); |
664 | } |
665 | |
666 | // Zext doesn't change the number of set bits, so narrow: |
667 | // ctpop (zext X) --> zext (ctpop X) |
668 | if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) { |
669 | Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X); |
670 | return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty); |
671 | } |
672 | |
673 | KnownBits Known(BitWidth); |
674 | IC.computeKnownBits(V: Op0, Known, Depth: 0, CxtI: &II); |
675 | |
676 | // If all bits are zero except for exactly one fixed bit, then the result |
677 | // must be 0 or 1, and we can get that answer by shifting to LSB: |
678 | // ctpop (X & 32) --> (X & 32) >> 5 |
679 | // TODO: Investigate removing this as its likely unnecessary given the below |
680 | // `isKnownToBeAPowerOfTwo` check. |
681 | if ((~Known.Zero).isPowerOf2()) |
682 | return BinaryOperator::CreateLShr( |
683 | V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2())); |
684 | |
685 | // More generally we can also handle non-constant power of 2 patterns such as |
686 | // shl/shr(Pow2, X), (X & -X), etc... by transforming: |
687 | // ctpop(Pow2OrZero) --> icmp ne X, 0 |
688 | if (IC.isKnownToBeAPowerOfTwo(V: Op0, /* OrZero */ true)) |
689 | return CastInst::Create(Instruction::ZExt, |
690 | S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0, |
691 | RHS: Constant::getNullValue(Ty)), |
692 | Ty); |
693 | |
694 | // Add range attribute since known bits can't completely reflect what we know. |
695 | if (BitWidth != 1 && !II.hasRetAttr(Kind: Attribute::Range) && |
696 | !II.getMetadata(KindID: LLVMContext::MD_range)) { |
697 | ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()), |
698 | APInt(BitWidth, Known.countMaxPopulation() + 1)); |
699 | II.addRangeRetAttr(CR: Range); |
700 | return &II; |
701 | } |
702 | |
703 | return nullptr; |
704 | } |
705 | |
706 | /// Convert a table lookup to shufflevector if the mask is constant. |
707 | /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in |
708 | /// which case we could lower the shufflevector with rev64 instructions |
709 | /// as it's actually a byte reverse. |
710 | static Value *simplifyNeonTbl1(const IntrinsicInst &II, |
711 | InstCombiner::BuilderTy &Builder) { |
712 | // Bail out if the mask is not a constant. |
713 | auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: 1)); |
714 | if (!C) |
715 | return nullptr; |
716 | |
717 | auto *VecTy = cast<FixedVectorType>(Val: II.getType()); |
718 | unsigned NumElts = VecTy->getNumElements(); |
719 | |
720 | // Only perform this transformation for <8 x i8> vector types. |
721 | if (!VecTy->getElementType()->isIntegerTy(Bitwidth: 8) || NumElts != 8) |
722 | return nullptr; |
723 | |
724 | int Indexes[8]; |
725 | |
726 | for (unsigned I = 0; I < NumElts; ++I) { |
727 | Constant *COp = C->getAggregateElement(Elt: I); |
728 | |
729 | if (!COp || !isa<ConstantInt>(Val: COp)) |
730 | return nullptr; |
731 | |
732 | Indexes[I] = cast<ConstantInt>(Val: COp)->getLimitedValue(); |
733 | |
734 | // Make sure the mask indices are in range. |
735 | if ((unsigned)Indexes[I] >= NumElts) |
736 | return nullptr; |
737 | } |
738 | |
739 | auto *V1 = II.getArgOperand(i: 0); |
740 | auto *V2 = Constant::getNullValue(Ty: V1->getType()); |
741 | return Builder.CreateShuffleVector(V1, V2, Mask: ArrayRef(Indexes)); |
742 | } |
743 | |
744 | // Returns true iff the 2 intrinsics have the same operands, limiting the |
745 | // comparison to the first NumOperands. |
746 | static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, |
747 | unsigned NumOperands) { |
748 | assert(I.arg_size() >= NumOperands && "Not enough operands" ); |
749 | assert(E.arg_size() >= NumOperands && "Not enough operands" ); |
750 | for (unsigned i = 0; i < NumOperands; i++) |
751 | if (I.getArgOperand(i) != E.getArgOperand(i)) |
752 | return false; |
753 | return true; |
754 | } |
755 | |
756 | // Remove trivially empty start/end intrinsic ranges, i.e. a start |
757 | // immediately followed by an end (ignoring debuginfo or other |
758 | // start/end intrinsics in between). As this handles only the most trivial |
759 | // cases, tracking the nesting level is not needed: |
760 | // |
761 | // call @llvm.foo.start(i1 0) |
762 | // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed |
763 | // call @llvm.foo.end(i1 0) |
764 | // call @llvm.foo.end(i1 0) ; &I |
765 | static bool |
766 | removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, |
767 | std::function<bool(const IntrinsicInst &)> IsStart) { |
768 | // We start from the end intrinsic and scan backwards, so that InstCombine |
769 | // has already processed (and potentially removed) all the instructions |
770 | // before the end intrinsic. |
771 | BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend()); |
772 | for (; BI != BE; ++BI) { |
773 | if (auto *I = dyn_cast<IntrinsicInst>(Val: &*BI)) { |
774 | if (I->isDebugOrPseudoInst() || |
775 | I->getIntrinsicID() == EndI.getIntrinsicID()) |
776 | continue; |
777 | if (IsStart(*I)) { |
778 | if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) { |
779 | IC.eraseInstFromFunction(I&: *I); |
780 | IC.eraseInstFromFunction(I&: EndI); |
781 | return true; |
782 | } |
783 | // Skip start intrinsics that don't pair with this end intrinsic. |
784 | continue; |
785 | } |
786 | } |
787 | break; |
788 | } |
789 | |
790 | return false; |
791 | } |
792 | |
793 | Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { |
794 | removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [](const IntrinsicInst &I) { |
795 | return I.getIntrinsicID() == Intrinsic::vastart || |
796 | I.getIntrinsicID() == Intrinsic::vacopy; |
797 | }); |
798 | return nullptr; |
799 | } |
800 | |
801 | static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { |
802 | assert(Call.arg_size() > 1 && "Need at least 2 args to swap" ); |
803 | Value *Arg0 = Call.getArgOperand(i: 0), *Arg1 = Call.getArgOperand(i: 1); |
804 | if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) { |
805 | Call.setArgOperand(i: 0, v: Arg1); |
806 | Call.setArgOperand(i: 1, v: Arg0); |
807 | return &Call; |
808 | } |
809 | return nullptr; |
810 | } |
811 | |
812 | /// Creates a result tuple for an overflow intrinsic \p II with a given |
813 | /// \p Result and a constant \p Overflow value. |
814 | static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, |
815 | Constant *Overflow) { |
816 | Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow}; |
817 | StructType *ST = cast<StructType>(Val: II->getType()); |
818 | Constant *Struct = ConstantStruct::get(T: ST, V); |
819 | return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: 0); |
820 | } |
821 | |
822 | Instruction * |
823 | InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { |
824 | WithOverflowInst *WO = cast<WithOverflowInst>(Val: II); |
825 | Value *OperationResult = nullptr; |
826 | Constant *OverflowResult = nullptr; |
827 | if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(), |
828 | RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult)) |
829 | return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult); |
830 | return nullptr; |
831 | } |
832 | |
833 | static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { |
834 | Ty = Ty->getScalarType(); |
835 | return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE; |
836 | } |
837 | |
838 | static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) { |
839 | Ty = Ty->getScalarType(); |
840 | return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero(); |
841 | } |
842 | |
843 | /// \returns the compare predicate type if the test performed by |
844 | /// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the |
845 | /// floating-point environment assumed for \p F for type \p Ty |
846 | static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, |
847 | const Function &F, Type *Ty) { |
848 | switch (static_cast<unsigned>(Mask)) { |
849 | case fcZero: |
850 | if (inputDenormalIsIEEE(F, Ty)) |
851 | return FCmpInst::FCMP_OEQ; |
852 | break; |
853 | case fcZero | fcSubnormal: |
854 | if (inputDenormalIsDAZ(F, Ty)) |
855 | return FCmpInst::FCMP_OEQ; |
856 | break; |
857 | case fcPositive | fcNegZero: |
858 | if (inputDenormalIsIEEE(F, Ty)) |
859 | return FCmpInst::FCMP_OGE; |
860 | break; |
861 | case fcPositive | fcNegZero | fcNegSubnormal: |
862 | if (inputDenormalIsDAZ(F, Ty)) |
863 | return FCmpInst::FCMP_OGE; |
864 | break; |
865 | case fcPosSubnormal | fcPosNormal | fcPosInf: |
866 | if (inputDenormalIsIEEE(F, Ty)) |
867 | return FCmpInst::FCMP_OGT; |
868 | break; |
869 | case fcNegative | fcPosZero: |
870 | if (inputDenormalIsIEEE(F, Ty)) |
871 | return FCmpInst::FCMP_OLE; |
872 | break; |
873 | case fcNegative | fcPosZero | fcPosSubnormal: |
874 | if (inputDenormalIsDAZ(F, Ty)) |
875 | return FCmpInst::FCMP_OLE; |
876 | break; |
877 | case fcNegSubnormal | fcNegNormal | fcNegInf: |
878 | if (inputDenormalIsIEEE(F, Ty)) |
879 | return FCmpInst::FCMP_OLT; |
880 | break; |
881 | case fcPosNormal | fcPosInf: |
882 | if (inputDenormalIsDAZ(F, Ty)) |
883 | return FCmpInst::FCMP_OGT; |
884 | break; |
885 | case fcNegNormal | fcNegInf: |
886 | if (inputDenormalIsDAZ(F, Ty)) |
887 | return FCmpInst::FCMP_OLT; |
888 | break; |
889 | case ~fcZero & ~fcNan: |
890 | if (inputDenormalIsIEEE(F, Ty)) |
891 | return FCmpInst::FCMP_ONE; |
892 | break; |
893 | case ~(fcZero | fcSubnormal) & ~fcNan: |
894 | if (inputDenormalIsDAZ(F, Ty)) |
895 | return FCmpInst::FCMP_ONE; |
896 | break; |
897 | default: |
898 | break; |
899 | } |
900 | |
901 | return FCmpInst::BAD_FCMP_PREDICATE; |
902 | } |
903 | |
904 | Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) { |
905 | Value *Src0 = II.getArgOperand(i: 0); |
906 | Value *Src1 = II.getArgOperand(i: 1); |
907 | const ConstantInt *CMask = cast<ConstantInt>(Val: Src1); |
908 | FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue()); |
909 | const bool IsUnordered = (Mask & fcNan) == fcNan; |
910 | const bool IsOrdered = (Mask & fcNan) == fcNone; |
911 | const FPClassTest OrderedMask = Mask & ~fcNan; |
912 | const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan; |
913 | |
914 | const bool IsStrict = |
915 | II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP); |
916 | |
917 | Value *FNegSrc; |
918 | if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) { |
919 | // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask) |
920 | |
921 | II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask))); |
922 | return replaceOperand(I&: II, OpNum: 0, V: FNegSrc); |
923 | } |
924 | |
925 | Value *FAbsSrc; |
926 | if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) { |
927 | II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask))); |
928 | return replaceOperand(I&: II, OpNum: 0, V: FAbsSrc); |
929 | } |
930 | |
931 | if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) && |
932 | (IsOrdered || IsUnordered) && !IsStrict) { |
933 | // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf |
934 | // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf |
935 | // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf |
936 | // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf |
937 | Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType()); |
938 | FCmpInst::Predicate Pred = |
939 | IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ; |
940 | if (OrderedInvertedMask == fcInf) |
941 | Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE; |
942 | |
943 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0); |
944 | Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf); |
945 | CmpInf->takeName(V: &II); |
946 | return replaceInstUsesWith(I&: II, V: CmpInf); |
947 | } |
948 | |
949 | if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) && |
950 | (IsOrdered || IsUnordered) && !IsStrict) { |
951 | // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf |
952 | // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf |
953 | // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf |
954 | // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf |
955 | Constant *Inf = |
956 | ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf); |
957 | Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf) |
958 | : Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf); |
959 | |
960 | EqInf->takeName(V: &II); |
961 | return replaceInstUsesWith(I&: II, V: EqInf); |
962 | } |
963 | |
964 | if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) && |
965 | (IsOrdered || IsUnordered) && !IsStrict) { |
966 | // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf |
967 | // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf |
968 | // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf |
969 | // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf |
970 | Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(), |
971 | Negative: OrderedInvertedMask == fcNegInf); |
972 | Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf) |
973 | : Builder.CreateFCmpONE(LHS: Src0, RHS: Inf); |
974 | NeInf->takeName(V: &II); |
975 | return replaceInstUsesWith(I&: II, V: NeInf); |
976 | } |
977 | |
978 | if (Mask == fcNan && !IsStrict) { |
979 | // Equivalent of isnan. Replace with standard fcmp if we don't care about FP |
980 | // exceptions. |
981 | Value *IsNan = |
982 | Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType())); |
983 | IsNan->takeName(V: &II); |
984 | return replaceInstUsesWith(I&: II, V: IsNan); |
985 | } |
986 | |
987 | if (Mask == (~fcNan & fcAllFlags) && !IsStrict) { |
988 | // Equivalent of !isnan. Replace with standard fcmp. |
989 | Value *FCmp = |
990 | Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType())); |
991 | FCmp->takeName(V: &II); |
992 | return replaceInstUsesWith(I&: II, V: FCmp); |
993 | } |
994 | |
995 | FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE; |
996 | |
997 | // Try to replace with an fcmp with 0 |
998 | // |
999 | // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0 |
1000 | // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0 |
1001 | // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0 |
1002 | // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0 |
1003 | // |
1004 | // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0 |
1005 | // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0 |
1006 | // |
1007 | // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0 |
1008 | // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0 |
1009 | // |
1010 | if (!IsStrict && (IsOrdered || IsUnordered) && |
1011 | (PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(), |
1012 | Ty: Src0->getType())) != |
1013 | FCmpInst::BAD_FCMP_PREDICATE) { |
1014 | Constant *Zero = ConstantFP::getZero(Ty: Src0->getType()); |
1015 | // Equivalent of == 0. |
1016 | Value *FCmp = Builder.CreateFCmp( |
1017 | P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType, |
1018 | LHS: Src0, RHS: Zero); |
1019 | |
1020 | FCmp->takeName(V: &II); |
1021 | return replaceInstUsesWith(I&: II, V: FCmp); |
1022 | } |
1023 | |
1024 | KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II); |
1025 | |
1026 | // Clear test bits we know must be false from the source value. |
1027 | // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other |
1028 | // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other |
1029 | if ((Mask & Known.KnownFPClasses) != Mask) { |
1030 | II.setArgOperand( |
1031 | i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses)); |
1032 | return &II; |
1033 | } |
1034 | |
1035 | // If none of the tests which can return false are possible, fold to true. |
1036 | // fp_class (nnan x), ~(qnan|snan) -> true |
1037 | // fp_class (ninf x), ~(ninf|pinf) -> true |
1038 | if (Mask == Known.KnownFPClasses) |
1039 | return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true)); |
1040 | |
1041 | return nullptr; |
1042 | } |
1043 | |
1044 | static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) { |
1045 | KnownBits Known = computeKnownBits(V: Op, /*Depth=*/0, Q: SQ); |
1046 | if (Known.isNonNegative()) |
1047 | return false; |
1048 | if (Known.isNegative()) |
1049 | return true; |
1050 | |
1051 | Value *X, *Y; |
1052 | if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
1053 | return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL); |
1054 | |
1055 | return std::nullopt; |
1056 | } |
1057 | |
1058 | static std::optional<bool> getKnownSignOrZero(Value *Op, |
1059 | const SimplifyQuery &SQ) { |
1060 | if (std::optional<bool> Sign = getKnownSign(Op, SQ)) |
1061 | return Sign; |
1062 | |
1063 | Value *X, *Y; |
1064 | if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
1065 | return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL); |
1066 | |
1067 | return std::nullopt; |
1068 | } |
1069 | |
1070 | /// Return true if two values \p Op0 and \p Op1 are known to have the same sign. |
1071 | static bool signBitMustBeTheSame(Value *Op0, Value *Op1, |
1072 | const SimplifyQuery &SQ) { |
1073 | std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ); |
1074 | if (!Known1) |
1075 | return false; |
1076 | std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ); |
1077 | if (!Known0) |
1078 | return false; |
1079 | return *Known0 == *Known1; |
1080 | } |
1081 | |
1082 | /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This |
1083 | /// can trigger other combines. |
1084 | static Instruction *moveAddAfterMinMax(IntrinsicInst *II, |
1085 | InstCombiner::BuilderTy &Builder) { |
1086 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
1087 | assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin || |
1088 | MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) && |
1089 | "Expected a min or max intrinsic" ); |
1090 | |
1091 | // TODO: Match vectors with undef elements, but undef may not propagate. |
1092 | Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1); |
1093 | Value *X; |
1094 | const APInt *C0, *C1; |
1095 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) || |
1096 | !match(V: Op1, P: m_APInt(Res&: C1))) |
1097 | return nullptr; |
1098 | |
1099 | // Check for necessary no-wrap and overflow constraints. |
1100 | bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin; |
1101 | auto *Add = cast<BinaryOperator>(Val: Op0); |
1102 | if ((IsSigned && !Add->hasNoSignedWrap()) || |
1103 | (!IsSigned && !Add->hasNoUnsignedWrap())) |
1104 | return nullptr; |
1105 | |
1106 | // If the constant difference overflows, then instsimplify should reduce the |
1107 | // min/max to the add or C1. |
1108 | bool Overflow; |
1109 | APInt CDiff = |
1110 | IsSigned ? C1->ssub_ov(RHS: *C0, Overflow) : C1->usub_ov(RHS: *C0, Overflow); |
1111 | assert(!Overflow && "Expected simplify of min/max" ); |
1112 | |
1113 | // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0 |
1114 | // Note: the "mismatched" no-overflow setting does not propagate. |
1115 | Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff); |
1116 | Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC); |
1117 | return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1)) |
1118 | : BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1)); |
1119 | } |
1120 | /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. |
1121 | Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) { |
1122 | Type *Ty = MinMax1.getType(); |
1123 | |
1124 | // We are looking for a tree of: |
1125 | // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) |
1126 | // Where the min and max could be reversed |
1127 | Instruction *MinMax2; |
1128 | BinaryOperator *AddSub; |
1129 | const APInt *MinValue, *MaxValue; |
1130 | if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) { |
1131 | if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue)))) |
1132 | return nullptr; |
1133 | } else if (match(V: &MinMax1, |
1134 | P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) { |
1135 | if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue)))) |
1136 | return nullptr; |
1137 | } else |
1138 | return nullptr; |
1139 | |
1140 | // Check that the constants clamp a saturate, and that the new type would be |
1141 | // sensible to convert to. |
1142 | if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) |
1143 | return nullptr; |
1144 | // In what bitwidth can this be treated as saturating arithmetics? |
1145 | unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; |
1146 | // FIXME: This isn't quite right for vectors, but using the scalar type is a |
1147 | // good first approximation for what should be done there. |
1148 | if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth)) |
1149 | return nullptr; |
1150 | |
1151 | // Also make sure that the inner min/max and the add/sub have one use. |
1152 | if (!MinMax2->hasOneUse() || !AddSub->hasOneUse()) |
1153 | return nullptr; |
1154 | |
1155 | // Create the new type (which can be a vector type) |
1156 | Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); |
1157 | |
1158 | Intrinsic::ID IntrinsicID; |
1159 | if (AddSub->getOpcode() == Instruction::Add) |
1160 | IntrinsicID = Intrinsic::sadd_sat; |
1161 | else if (AddSub->getOpcode() == Instruction::Sub) |
1162 | IntrinsicID = Intrinsic::ssub_sat; |
1163 | else |
1164 | return nullptr; |
1165 | |
1166 | // The two operands of the add/sub must be nsw-truncatable to the NewTy. This |
1167 | // is usually achieved via a sext from a smaller type. |
1168 | if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 0), Depth: 0, CxtI: AddSub) > |
1169 | NewBitWidth || |
1170 | ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 1), Depth: 0, CxtI: AddSub) > NewBitWidth) |
1171 | return nullptr; |
1172 | |
1173 | // Finally create and return the sat intrinsic, truncated to the new type |
1174 | Function *F = Intrinsic::getDeclaration(M: MinMax1.getModule(), id: IntrinsicID, Tys: NewTy); |
1175 | Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 0), DestTy: NewTy); |
1176 | Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 1), DestTy: NewTy); |
1177 | Value *Sat = Builder.CreateCall(Callee: F, Args: {AT, BT}); |
1178 | return CastInst::Create(Instruction::SExt, S: Sat, Ty); |
1179 | } |
1180 | |
1181 | |
1182 | /// If we have a clamp pattern like max (min X, 42), 41 -- where the output |
1183 | /// can only be one of two possible constant values -- turn that into a select |
1184 | /// of constants. |
1185 | static Instruction *foldClampRangeOfTwo(IntrinsicInst *II, |
1186 | InstCombiner::BuilderTy &Builder) { |
1187 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
1188 | Value *X; |
1189 | const APInt *C0, *C1; |
1190 | if (!match(V: I1, P: m_APInt(Res&: C1)) || !I0->hasOneUse()) |
1191 | return nullptr; |
1192 | |
1193 | CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; |
1194 | switch (II->getIntrinsicID()) { |
1195 | case Intrinsic::smax: |
1196 | if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1) |
1197 | Pred = ICmpInst::ICMP_SGT; |
1198 | break; |
1199 | case Intrinsic::smin: |
1200 | if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1) |
1201 | Pred = ICmpInst::ICMP_SLT; |
1202 | break; |
1203 | case Intrinsic::umax: |
1204 | if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1) |
1205 | Pred = ICmpInst::ICMP_UGT; |
1206 | break; |
1207 | case Intrinsic::umin: |
1208 | if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1) |
1209 | Pred = ICmpInst::ICMP_ULT; |
1210 | break; |
1211 | default: |
1212 | llvm_unreachable("Expected min/max intrinsic" ); |
1213 | } |
1214 | if (Pred == CmpInst::BAD_ICMP_PREDICATE) |
1215 | return nullptr; |
1216 | |
1217 | // max (min X, 42), 41 --> X > 41 ? 42 : 41 |
1218 | // min (max X, 42), 43 --> X < 43 ? 42 : 43 |
1219 | Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1); |
1220 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1); |
1221 | } |
1222 | |
1223 | /// If this min/max has a constant operand and an operand that is a matching |
1224 | /// min/max with a constant operand, constant-fold the 2 constant operands. |
1225 | static Value *reassociateMinMaxWithConstants(IntrinsicInst *II, |
1226 | IRBuilderBase &Builder, |
1227 | const SimplifyQuery &SQ) { |
1228 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
1229 | auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: 0)); |
1230 | if (!LHS) |
1231 | return nullptr; |
1232 | |
1233 | Constant *C0, *C1; |
1234 | if (!match(V: LHS->getArgOperand(i: 1), P: m_ImmConstant(C&: C0)) || |
1235 | !match(V: II->getArgOperand(i: 1), P: m_ImmConstant(C&: C1))) |
1236 | return nullptr; |
1237 | |
1238 | // max (max X, C0), C1 --> max X, (max C0, C1) |
1239 | // min (min X, C0), C1 --> min X, (min C0, C1) |
1240 | // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1) |
1241 | // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1) |
1242 | Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID(); |
1243 | if (InnerMinMaxID != MinMaxID && |
1244 | !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) || |
1245 | (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) && |
1246 | isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ))) |
1247 | return nullptr; |
1248 | |
1249 | ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID); |
1250 | Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1); |
1251 | Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1); |
1252 | return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(), |
1253 | Args: {LHS->getArgOperand(i: 0), NewC}); |
1254 | } |
1255 | |
1256 | /// If this min/max has a matching min/max operand with a constant, try to push |
1257 | /// the constant operand into this instruction. This can enable more folds. |
1258 | static Instruction * |
1259 | reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, |
1260 | InstCombiner::BuilderTy &Builder) { |
1261 | // Match and capture a min/max operand candidate. |
1262 | Value *X, *Y; |
1263 | Constant *C; |
1264 | Instruction *Inner; |
1265 | if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd( |
1266 | L: m_Instruction(I&: Inner), |
1267 | R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))), |
1268 | R: m_Value(V&: Y)))) |
1269 | return nullptr; |
1270 | |
1271 | // The inner op must match. Check for constants to avoid infinite loops. |
1272 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
1273 | auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner); |
1274 | if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID || |
1275 | match(V: X, P: m_ImmConstant()) || match(V: Y, P: m_ImmConstant())) |
1276 | return nullptr; |
1277 | |
1278 | // max (max X, C), Y --> max (max X, Y), C |
1279 | Function *MinMax = |
1280 | Intrinsic::getDeclaration(M: II->getModule(), id: MinMaxID, Tys: II->getType()); |
1281 | Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y); |
1282 | NewInner->takeName(V: Inner); |
1283 | return CallInst::Create(Func: MinMax, Args: {NewInner, C}); |
1284 | } |
1285 | |
1286 | /// Reduce a sequence of min/max intrinsics with a common operand. |
1287 | static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { |
1288 | // Match 3 of the same min/max ops. Example: umin(umin(), umin()). |
1289 | auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0)); |
1290 | auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 1)); |
1291 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
1292 | if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID || |
1293 | RHS->getIntrinsicID() != MinMaxID || |
1294 | (!LHS->hasOneUse() && !RHS->hasOneUse())) |
1295 | return nullptr; |
1296 | |
1297 | Value *A = LHS->getArgOperand(i: 0); |
1298 | Value *B = LHS->getArgOperand(i: 1); |
1299 | Value *C = RHS->getArgOperand(i: 0); |
1300 | Value *D = RHS->getArgOperand(i: 1); |
1301 | |
1302 | // Look for a common operand. |
1303 | Value *MinMaxOp = nullptr; |
1304 | Value *ThirdOp = nullptr; |
1305 | if (LHS->hasOneUse()) { |
1306 | // If the LHS is only used in this chain and the RHS is used outside of it, |
1307 | // reuse the RHS min/max because that will eliminate the LHS. |
1308 | if (D == A || C == A) { |
1309 | // min(min(a, b), min(c, a)) --> min(min(c, a), b) |
1310 | // min(min(a, b), min(a, d)) --> min(min(a, d), b) |
1311 | MinMaxOp = RHS; |
1312 | ThirdOp = B; |
1313 | } else if (D == B || C == B) { |
1314 | // min(min(a, b), min(c, b)) --> min(min(c, b), a) |
1315 | // min(min(a, b), min(b, d)) --> min(min(b, d), a) |
1316 | MinMaxOp = RHS; |
1317 | ThirdOp = A; |
1318 | } |
1319 | } else { |
1320 | assert(RHS->hasOneUse() && "Expected one-use operand" ); |
1321 | // Reuse the LHS. This will eliminate the RHS. |
1322 | if (D == A || D == B) { |
1323 | // min(min(a, b), min(c, a)) --> min(min(a, b), c) |
1324 | // min(min(a, b), min(c, b)) --> min(min(a, b), c) |
1325 | MinMaxOp = LHS; |
1326 | ThirdOp = C; |
1327 | } else if (C == A || C == B) { |
1328 | // min(min(a, b), min(b, d)) --> min(min(a, b), d) |
1329 | // min(min(a, b), min(c, b)) --> min(min(a, b), d) |
1330 | MinMaxOp = LHS; |
1331 | ThirdOp = D; |
1332 | } |
1333 | } |
1334 | |
1335 | if (!MinMaxOp || !ThirdOp) |
1336 | return nullptr; |
1337 | |
1338 | Module *Mod = II->getModule(); |
1339 | Function *MinMax = Intrinsic::getDeclaration(M: Mod, id: MinMaxID, Tys: II->getType()); |
1340 | return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp }); |
1341 | } |
1342 | |
1343 | /// If all arguments of the intrinsic are unary shuffles with the same mask, |
1344 | /// try to shuffle after the intrinsic. |
1345 | static Instruction * |
1346 | foldShuffledIntrinsicOperands(IntrinsicInst *II, |
1347 | InstCombiner::BuilderTy &Builder) { |
1348 | // TODO: This should be extended to handle other intrinsics like fshl, ctpop, |
1349 | // etc. Use llvm::isTriviallyVectorizable() and related to determine |
1350 | // which intrinsics are safe to shuffle? |
1351 | switch (II->getIntrinsicID()) { |
1352 | case Intrinsic::smax: |
1353 | case Intrinsic::smin: |
1354 | case Intrinsic::umax: |
1355 | case Intrinsic::umin: |
1356 | case Intrinsic::fma: |
1357 | case Intrinsic::fshl: |
1358 | case Intrinsic::fshr: |
1359 | break; |
1360 | default: |
1361 | return nullptr; |
1362 | } |
1363 | |
1364 | Value *X; |
1365 | ArrayRef<int> Mask; |
1366 | if (!match(V: II->getArgOperand(i: 0), |
1367 | P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(Mask)))) |
1368 | return nullptr; |
1369 | |
1370 | // At least 1 operand must have 1 use because we are creating 2 instructions. |
1371 | if (none_of(Range: II->args(), P: [](Value *V) { return V->hasOneUse(); })) |
1372 | return nullptr; |
1373 | |
1374 | // See if all arguments are shuffled with the same mask. |
1375 | SmallVector<Value *, 4> NewArgs(II->arg_size()); |
1376 | NewArgs[0] = X; |
1377 | Type *SrcTy = X->getType(); |
1378 | for (unsigned i = 1, e = II->arg_size(); i != e; ++i) { |
1379 | if (!match(V: II->getArgOperand(i), |
1380 | P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_SpecificMask(Mask))) || |
1381 | X->getType() != SrcTy) |
1382 | return nullptr; |
1383 | NewArgs[i] = X; |
1384 | } |
1385 | |
1386 | // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M |
1387 | Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr; |
1388 | Value *NewIntrinsic = |
1389 | Builder.CreateIntrinsic(ID: II->getIntrinsicID(), Types: SrcTy, Args: NewArgs, FMFSource: FPI); |
1390 | return new ShuffleVectorInst(NewIntrinsic, Mask); |
1391 | } |
1392 | |
1393 | /// Fold the following cases and accepts bswap and bitreverse intrinsics: |
1394 | /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y)) |
1395 | /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse) |
1396 | template <Intrinsic::ID IntrID> |
1397 | static Instruction *foldBitOrderCrossLogicOp(Value *V, |
1398 | InstCombiner::BuilderTy &Builder) { |
1399 | static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse, |
1400 | "This helper only supports BSWAP and BITREVERSE intrinsics" ); |
1401 | |
1402 | Value *X, *Y; |
1403 | // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we |
1404 | // don't match ConstantExpr that aren't meaningful for this transform. |
1405 | if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) && |
1406 | isa<BinaryOperator>(Val: V)) { |
1407 | Value *OldReorderX, *OldReorderY; |
1408 | BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode(); |
1409 | |
1410 | // If both X and Y are bswap/bitreverse, the transform reduces the number |
1411 | // of instructions even if there's multiuse. |
1412 | // If only one operand is bswap/bitreverse, we need to ensure the operand |
1413 | // have only one use. |
1414 | if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) && |
1415 | match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) { |
1416 | return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY); |
1417 | } |
1418 | |
1419 | if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) { |
1420 | Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y); |
1421 | return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder); |
1422 | } |
1423 | |
1424 | if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) { |
1425 | Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X); |
1426 | return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY); |
1427 | } |
1428 | } |
1429 | return nullptr; |
1430 | } |
1431 | |
1432 | static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) { |
1433 | if (!CanReorderLanes) |
1434 | return nullptr; |
1435 | |
1436 | Value *V; |
1437 | if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V)))) |
1438 | return V; |
1439 | |
1440 | ArrayRef<int> Mask; |
1441 | if (!isa<FixedVectorType>(Val: Arg->getType()) || |
1442 | !match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask(Mask))) || |
1443 | !cast<ShuffleVectorInst>(Val: Arg)->isSingleSource()) |
1444 | return nullptr; |
1445 | |
1446 | int Sz = Mask.size(); |
1447 | SmallBitVector UsedIndices(Sz); |
1448 | for (int Idx : Mask) { |
1449 | if (Idx == PoisonMaskElem || UsedIndices.test(Idx)) |
1450 | return nullptr; |
1451 | UsedIndices.set(Idx); |
1452 | } |
1453 | |
1454 | // Can remove shuffle iff just shuffled elements, no repeats, undefs, or |
1455 | // other changes. |
1456 | return UsedIndices.all() ? V : nullptr; |
1457 | } |
1458 | |
1459 | /// Fold an unsigned minimum of trailing or leading zero bits counts: |
1460 | /// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp)) |
1461 | /// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin |
1462 | /// >> ConstOp)) |
1463 | template <Intrinsic::ID IntrID> |
1464 | static Value * |
1465 | foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, |
1466 | const DataLayout &DL, |
1467 | InstCombiner::BuilderTy &Builder) { |
1468 | static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz, |
1469 | "This helper only supports cttz and ctlz intrinsics" ); |
1470 | |
1471 | Value *CtOp; |
1472 | Value *ZeroUndef; |
1473 | if (!match(I0, |
1474 | m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp), m_Value(V&: ZeroUndef))))) |
1475 | return nullptr; |
1476 | |
1477 | unsigned BitWidth = I1->getType()->getScalarSizeInBits(); |
1478 | auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); }; |
1479 | if (!match(I1, m_CheckedInt(LessBitWidth))) |
1480 | // We have a constant >= BitWidth (which can be handled by CVP) |
1481 | // or a non-splat vector with elements < and >= BitWidth |
1482 | return nullptr; |
1483 | |
1484 | Type *Ty = I1->getType(); |
1485 | Constant *NewConst = ConstantFoldBinaryOpOperands( |
1486 | Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr, |
1487 | LHS: IntrID == Intrinsic::cttz |
1488 | ? ConstantInt::get(Ty, V: 1) |
1489 | : ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)), |
1490 | RHS: cast<Constant>(Val: I1), DL); |
1491 | return Builder.CreateBinaryIntrinsic( |
1492 | ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp, RHS: NewConst), |
1493 | RHS: ConstantInt::getTrue(Ty: ZeroUndef->getType())); |
1494 | } |
1495 | |
1496 | /// CallInst simplification. This mostly only handles folding of intrinsic |
1497 | /// instructions. For normal calls, it allows visitCallBase to do the heavy |
1498 | /// lifting. |
1499 | Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { |
1500 | // Don't try to simplify calls without uses. It will not do anything useful, |
1501 | // but will result in the following folds being skipped. |
1502 | if (!CI.use_empty()) { |
1503 | SmallVector<Value *, 4> Args; |
1504 | Args.reserve(N: CI.arg_size()); |
1505 | for (Value *Op : CI.args()) |
1506 | Args.push_back(Elt: Op); |
1507 | if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args, |
1508 | Q: SQ.getWithInstruction(I: &CI))) |
1509 | return replaceInstUsesWith(I&: CI, V); |
1510 | } |
1511 | |
1512 | if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI)) |
1513 | return visitFree(FI&: CI, FreedOp); |
1514 | |
1515 | // If the caller function (i.e. us, the function that contains this CallInst) |
1516 | // is nounwind, mark the call as nounwind, even if the callee isn't. |
1517 | if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { |
1518 | CI.setDoesNotThrow(); |
1519 | return &CI; |
1520 | } |
1521 | |
1522 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI); |
1523 | if (!II) return visitCallBase(Call&: CI); |
1524 | |
1525 | // For atomic unordered mem intrinsics if len is not a positive or |
1526 | // not a multiple of element size then behavior is undefined. |
1527 | if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: II)) |
1528 | if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(Val: AMI->getLength())) |
1529 | if (NumBytes->isNegative() || |
1530 | (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) { |
1531 | CreateNonTerminatorUnreachable(InsertAt: AMI); |
1532 | assert(AMI->getType()->isVoidTy() && |
1533 | "non void atomic unordered mem intrinsic" ); |
1534 | return eraseInstFromFunction(I&: *AMI); |
1535 | } |
1536 | |
1537 | // Intrinsics cannot occur in an invoke or a callbr, so handle them here |
1538 | // instead of in visitCallBase. |
1539 | if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) { |
1540 | bool Changed = false; |
1541 | |
1542 | // memmove/cpy/set of zero bytes is a noop. |
1543 | if (Constant *NumBytes = dyn_cast<Constant>(Val: MI->getLength())) { |
1544 | if (NumBytes->isNullValue()) |
1545 | return eraseInstFromFunction(I&: CI); |
1546 | } |
1547 | |
1548 | // No other transformations apply to volatile transfers. |
1549 | if (auto *M = dyn_cast<MemIntrinsic>(Val: MI)) |
1550 | if (M->isVolatile()) |
1551 | return nullptr; |
1552 | |
1553 | // If we have a memmove and the source operation is a constant global, |
1554 | // then the source and dest pointers can't alias, so we can change this |
1555 | // into a call to memcpy. |
1556 | if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) { |
1557 | if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource())) |
1558 | if (GVSrc->isConstant()) { |
1559 | Module *M = CI.getModule(); |
1560 | Intrinsic::ID MemCpyID = |
1561 | isa<AtomicMemMoveInst>(Val: MMI) |
1562 | ? Intrinsic::memcpy_element_unordered_atomic |
1563 | : Intrinsic::memcpy; |
1564 | Type *Tys[3] = { CI.getArgOperand(i: 0)->getType(), |
1565 | CI.getArgOperand(i: 1)->getType(), |
1566 | CI.getArgOperand(i: 2)->getType() }; |
1567 | CI.setCalledFunction(Intrinsic::getDeclaration(M, id: MemCpyID, Tys)); |
1568 | Changed = true; |
1569 | } |
1570 | } |
1571 | |
1572 | if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) { |
1573 | // memmove(x,x,size) -> noop. |
1574 | if (MTI->getSource() == MTI->getDest()) |
1575 | return eraseInstFromFunction(I&: CI); |
1576 | } |
1577 | |
1578 | // If we can determine a pointer alignment that is bigger than currently |
1579 | // set, update the alignment. |
1580 | if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) { |
1581 | if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI)) |
1582 | return I; |
1583 | } else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) { |
1584 | if (Instruction *I = SimplifyAnyMemSet(MI: MSI)) |
1585 | return I; |
1586 | } |
1587 | |
1588 | if (Changed) return II; |
1589 | } |
1590 | |
1591 | // For fixed width vector result intrinsics, use the generic demanded vector |
1592 | // support. |
1593 | if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) { |
1594 | auto VWidth = IIFVTy->getNumElements(); |
1595 | APInt PoisonElts(VWidth, 0); |
1596 | APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth)); |
1597 | if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) { |
1598 | if (V != II) |
1599 | return replaceInstUsesWith(I&: *II, V); |
1600 | return II; |
1601 | } |
1602 | } |
1603 | |
1604 | if (II->isCommutative()) { |
1605 | if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: 0), RHS: II->getOperand(i_nocapture: 1))) { |
1606 | replaceOperand(I&: *II, OpNum: 0, V: Pair->first); |
1607 | replaceOperand(I&: *II, OpNum: 1, V: Pair->second); |
1608 | return II; |
1609 | } |
1610 | |
1611 | if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI)) |
1612 | return NewCall; |
1613 | } |
1614 | |
1615 | // Unused constrained FP intrinsic calls may have declared side effect, which |
1616 | // prevents it from being removed. In some cases however the side effect is |
1617 | // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it |
1618 | // returns a replacement, the call may be removed. |
1619 | if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) { |
1620 | if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI))) |
1621 | return eraseInstFromFunction(I&: CI); |
1622 | } |
1623 | |
1624 | Intrinsic::ID IID = II->getIntrinsicID(); |
1625 | switch (IID) { |
1626 | case Intrinsic::objectsize: { |
1627 | SmallVector<Instruction *> InsertedInstructions; |
1628 | if (Value *V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/false, |
1629 | InsertedInstructions: &InsertedInstructions)) { |
1630 | for (Instruction *Inserted : InsertedInstructions) |
1631 | Worklist.add(I: Inserted); |
1632 | return replaceInstUsesWith(I&: CI, V); |
1633 | } |
1634 | return nullptr; |
1635 | } |
1636 | case Intrinsic::abs: { |
1637 | Value *IIOperand = II->getArgOperand(i: 0); |
1638 | bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: 1))->isOneValue(); |
1639 | |
1640 | // abs(-x) -> abs(x) |
1641 | // TODO: Copy nsw if it was present on the neg? |
1642 | Value *X; |
1643 | if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X)))) |
1644 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
1645 | if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) |
1646 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
1647 | if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X)))) |
1648 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
1649 | |
1650 | Value *Y; |
1651 | // abs(a * abs(b)) -> abs(a * b) |
1652 | if (match(V: IIOperand, |
1653 | P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X), |
1654 | R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) { |
1655 | bool NSW = |
1656 | cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison; |
1657 | auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y); |
1658 | return replaceOperand(I&: *II, OpNum: 0, V: XY); |
1659 | } |
1660 | |
1661 | if (std::optional<bool> Known = |
1662 | getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) { |
1663 | // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y) |
1664 | // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y) |
1665 | if (!*Known) |
1666 | return replaceInstUsesWith(I&: *II, V: IIOperand); |
1667 | |
1668 | // abs(x) -> -x if x < 0 |
1669 | // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y) |
1670 | if (IntMinIsPoison) |
1671 | return BinaryOperator::CreateNSWNeg(Op: IIOperand); |
1672 | return BinaryOperator::CreateNeg(Op: IIOperand); |
1673 | } |
1674 | |
1675 | // abs (sext X) --> zext (abs X*) |
1676 | // Clear the IsIntMin (nsw) bit on the abs to allow narrowing. |
1677 | if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) { |
1678 | Value *NarrowAbs = |
1679 | Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse()); |
1680 | return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType()); |
1681 | } |
1682 | |
1683 | // Match a complicated way to check if a number is odd/even: |
1684 | // abs (srem X, 2) --> and X, 1 |
1685 | const APInt *C; |
1686 | if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == 2) |
1687 | return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: 1)); |
1688 | |
1689 | break; |
1690 | } |
1691 | case Intrinsic::umin: { |
1692 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
1693 | // umin(x, 1) == zext(x != 0) |
1694 | if (match(V: I1, P: m_One())) { |
1695 | assert(II->getType()->getScalarSizeInBits() != 1 && |
1696 | "Expected simplify of umin with max constant" ); |
1697 | Value *Zero = Constant::getNullValue(Ty: I0->getType()); |
1698 | Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero); |
1699 | return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType()); |
1700 | } |
1701 | // umin(cttz(x), const) --> cttz(x | (1 << const)) |
1702 | if (Value *FoldedCttz = |
1703 | foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>( |
1704 | I0, I1, DL, Builder)) |
1705 | return replaceInstUsesWith(I&: *II, V: FoldedCttz); |
1706 | // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const)) |
1707 | if (Value *FoldedCtlz = |
1708 | foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>( |
1709 | I0, I1, DL, Builder)) |
1710 | return replaceInstUsesWith(I&: *II, V: FoldedCtlz); |
1711 | [[fallthrough]]; |
1712 | } |
1713 | case Intrinsic::umax: { |
1714 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
1715 | Value *X, *Y; |
1716 | if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) && |
1717 | (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { |
1718 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y); |
1719 | return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType()); |
1720 | } |
1721 | Constant *C; |
1722 | if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) && |
1723 | I0->hasOneUse()) { |
1724 | if (Constant *NarrowC = getLosslessUnsignedTrunc(C, TruncTy: X->getType())) { |
1725 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC); |
1726 | return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType()); |
1727 | } |
1728 | } |
1729 | // If both operands of unsigned min/max are sign-extended, it is still ok |
1730 | // to narrow the operation. |
1731 | [[fallthrough]]; |
1732 | } |
1733 | case Intrinsic::smax: |
1734 | case Intrinsic::smin: { |
1735 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
1736 | Value *X, *Y; |
1737 | if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) && |
1738 | (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { |
1739 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y); |
1740 | return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType()); |
1741 | } |
1742 | |
1743 | Constant *C; |
1744 | if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) && |
1745 | I0->hasOneUse()) { |
1746 | if (Constant *NarrowC = getLosslessSignedTrunc(C, TruncTy: X->getType())) { |
1747 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC); |
1748 | return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType()); |
1749 | } |
1750 | } |
1751 | |
1752 | // umin(i1 X, i1 Y) -> and i1 X, Y |
1753 | // smax(i1 X, i1 Y) -> and i1 X, Y |
1754 | if ((IID == Intrinsic::umin || IID == Intrinsic::smax) && |
1755 | II->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
1756 | return BinaryOperator::CreateAnd(V1: I0, V2: I1); |
1757 | } |
1758 | |
1759 | // umax(i1 X, i1 Y) -> or i1 X, Y |
1760 | // smin(i1 X, i1 Y) -> or i1 X, Y |
1761 | if ((IID == Intrinsic::umax || IID == Intrinsic::smin) && |
1762 | II->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
1763 | return BinaryOperator::CreateOr(V1: I0, V2: I1); |
1764 | } |
1765 | |
1766 | if (IID == Intrinsic::smax || IID == Intrinsic::smin) { |
1767 | // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y) |
1768 | // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y) |
1769 | // TODO: Canonicalize neg after min/max if I1 is constant. |
1770 | if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) && |
1771 | (I0->hasOneUse() || I1->hasOneUse())) { |
1772 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID); |
1773 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y); |
1774 | return BinaryOperator::CreateNSWNeg(Op: InvMaxMin); |
1775 | } |
1776 | } |
1777 | |
1778 | // (umax X, (xor X, Pow2)) |
1779 | // -> (or X, Pow2) |
1780 | // (umin X, (xor X, Pow2)) |
1781 | // -> (and X, ~Pow2) |
1782 | // (smax X, (xor X, Pos_Pow2)) |
1783 | // -> (or X, Pos_Pow2) |
1784 | // (smin X, (xor X, Pos_Pow2)) |
1785 | // -> (and X, ~Pos_Pow2) |
1786 | // (smax X, (xor X, Neg_Pow2)) |
1787 | // -> (and X, ~Neg_Pow2) |
1788 | // (smin X, (xor X, Neg_Pow2)) |
1789 | // -> (or X, Neg_Pow2) |
1790 | if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) || |
1791 | match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) && |
1792 | isKnownToBeAPowerOfTwo(V: X, /* OrZero */ true)) { |
1793 | bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax; |
1794 | bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin; |
1795 | |
1796 | if (IID == Intrinsic::smax || IID == Intrinsic::smin) { |
1797 | auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II)); |
1798 | if (KnownSign == std::nullopt) { |
1799 | UseOr = false; |
1800 | UseAndN = false; |
1801 | } else if (*KnownSign /* true is Signed. */) { |
1802 | UseOr ^= true; |
1803 | UseAndN ^= true; |
1804 | Type *Ty = I0->getType(); |
1805 | // Negative power of 2 must be IntMin. It's possible to be able to |
1806 | // prove negative / power of 2 without actually having known bits, so |
1807 | // just get the value by hand. |
1808 | X = Constant::getIntegerValue( |
1809 | Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits())); |
1810 | } |
1811 | } |
1812 | if (UseOr) |
1813 | return BinaryOperator::CreateOr(V1: I0, V2: X); |
1814 | else if (UseAndN) |
1815 | return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X)); |
1816 | } |
1817 | |
1818 | // If we can eliminate ~A and Y is free to invert: |
1819 | // max ~A, Y --> ~(min A, ~Y) |
1820 | // |
1821 | // Examples: |
1822 | // max ~A, ~Y --> ~(min A, Y) |
1823 | // max ~A, C --> ~(min A, ~C) |
1824 | // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z)) |
1825 | auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { |
1826 | Value *A; |
1827 | if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) && |
1828 | !isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) { |
1829 | if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) { |
1830 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID); |
1831 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY); |
1832 | return BinaryOperator::CreateNot(Op: InvMaxMin); |
1833 | } |
1834 | } |
1835 | return nullptr; |
1836 | }; |
1837 | |
1838 | if (Instruction *I = moveNotAfterMinMax(I0, I1)) |
1839 | return I; |
1840 | if (Instruction *I = moveNotAfterMinMax(I1, I0)) |
1841 | return I; |
1842 | |
1843 | if (Instruction *I = moveAddAfterMinMax(II, Builder)) |
1844 | return I; |
1845 | |
1846 | // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C |
1847 | const APInt *RHSC; |
1848 | if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) && |
1849 | match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC))))) |
1850 | return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y), |
1851 | V2: ConstantInt::get(Ty: II->getType(), V: *RHSC)); |
1852 | |
1853 | // smax(X, -X) --> abs(X) |
1854 | // smin(X, -X) --> -abs(X) |
1855 | // umax(X, -X) --> -abs(X) |
1856 | // umin(X, -X) --> abs(X) |
1857 | if (isKnownNegation(X: I0, Y: I1)) { |
1858 | // We can choose either operand as the input to abs(), but if we can |
1859 | // eliminate the only use of a value, that's better for subsequent |
1860 | // transforms/analysis. |
1861 | if (I0->hasOneUse() && !I1->hasOneUse()) |
1862 | std::swap(a&: I0, b&: I1); |
1863 | |
1864 | // This is some variant of abs(). See if we can propagate 'nsw' to the abs |
1865 | // operation and potentially its negation. |
1866 | bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, /* NeedNSW */ true); |
1867 | Value *Abs = Builder.CreateBinaryIntrinsic( |
1868 | ID: Intrinsic::abs, LHS: I0, |
1869 | RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison)); |
1870 | |
1871 | // We don't have a "nabs" intrinsic, so negate if needed based on the |
1872 | // max/min operation. |
1873 | if (IID == Intrinsic::smin || IID == Intrinsic::umax) |
1874 | Abs = Builder.CreateNeg(V: Abs, Name: "nabs" , HasNSW: IntMinIsPoison); |
1875 | return replaceInstUsesWith(I&: CI, V: Abs); |
1876 | } |
1877 | |
1878 | if (Instruction *Sel = foldClampRangeOfTwo(II, Builder)) |
1879 | return Sel; |
1880 | |
1881 | if (Instruction *SAdd = matchSAddSubSat(MinMax1&: *II)) |
1882 | return SAdd; |
1883 | |
1884 | if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ)) |
1885 | return replaceInstUsesWith(I&: *II, V: NewMinMax); |
1886 | |
1887 | if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder)) |
1888 | return R; |
1889 | |
1890 | if (Instruction *NewMinMax = factorizeMinMaxTree(II)) |
1891 | return NewMinMax; |
1892 | |
1893 | // Try to fold minmax with constant RHS based on range information |
1894 | if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) { |
1895 | ICmpInst::Predicate Pred = |
1896 | ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID)); |
1897 | bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID); |
1898 | ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits( |
1899 | V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II)); |
1900 | if (!LHS_CR.isFullSet()) { |
1901 | if (LHS_CR.icmp(Pred, Other: *RHSC)) |
1902 | return replaceInstUsesWith(I&: *II, V: I0); |
1903 | if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC)) |
1904 | return replaceInstUsesWith(I&: *II, |
1905 | V: ConstantInt::get(Ty: II->getType(), V: *RHSC)); |
1906 | } |
1907 | } |
1908 | |
1909 | break; |
1910 | } |
1911 | case Intrinsic::bitreverse: { |
1912 | Value *IIOperand = II->getArgOperand(i: 0); |
1913 | // bitrev (zext i1 X to ?) --> X ? SignBitC : 0 |
1914 | Value *X; |
1915 | if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) && |
1916 | X->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
1917 | Type *Ty = II->getType(); |
1918 | APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits()); |
1919 | return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit), |
1920 | S2: ConstantInt::getNullValue(Ty)); |
1921 | } |
1922 | |
1923 | if (Instruction *crossLogicOpFold = |
1924 | foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder)) |
1925 | return crossLogicOpFold; |
1926 | |
1927 | break; |
1928 | } |
1929 | case Intrinsic::bswap: { |
1930 | Value *IIOperand = II->getArgOperand(i: 0); |
1931 | |
1932 | // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as |
1933 | // inverse-shift-of-bswap: |
1934 | // bswap (shl X, Y) --> lshr (bswap X), Y |
1935 | // bswap (lshr X, Y) --> shl (bswap X), Y |
1936 | Value *X, *Y; |
1937 | if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
1938 | unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits(); |
1939 | if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: 3))) { |
1940 | Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X); |
1941 | BinaryOperator::BinaryOps InverseShift = |
1942 | cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl |
1943 | ? Instruction::LShr |
1944 | : Instruction::Shl; |
1945 | return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y); |
1946 | } |
1947 | } |
1948 | |
1949 | KnownBits Known = computeKnownBits(V: IIOperand, Depth: 0, CxtI: II); |
1950 | uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: 8); |
1951 | uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: 8); |
1952 | unsigned BW = Known.getBitWidth(); |
1953 | |
1954 | // bswap(x) -> shift(x) if x has exactly one "active byte" |
1955 | if (BW - LZ - TZ == 8) { |
1956 | assert(LZ != TZ && "active byte cannot be in the middle" ); |
1957 | if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x |
1958 | return BinaryOperator::CreateNUWShl( |
1959 | V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ)); |
1960 | // -> lshr(x) if the "active byte" is in the high part of x |
1961 | return BinaryOperator::CreateExactLShr( |
1962 | V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ)); |
1963 | } |
1964 | |
1965 | // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) |
1966 | if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) { |
1967 | unsigned C = X->getType()->getScalarSizeInBits() - BW; |
1968 | Value *CV = ConstantInt::get(Ty: X->getType(), V: C); |
1969 | Value *V = Builder.CreateLShr(LHS: X, RHS: CV); |
1970 | return new TruncInst(V, IIOperand->getType()); |
1971 | } |
1972 | |
1973 | if (Instruction *crossLogicOpFold = |
1974 | foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) { |
1975 | return crossLogicOpFold; |
1976 | } |
1977 | |
1978 | // Try to fold into bitreverse if bswap is the root of the expression tree. |
1979 | if (Instruction *BitOp = matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ false, |
1980 | /*MatchBitReversals*/ true)) |
1981 | return BitOp; |
1982 | break; |
1983 | } |
1984 | case Intrinsic::masked_load: |
1985 | if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(II&: *II)) |
1986 | return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp); |
1987 | break; |
1988 | case Intrinsic::masked_store: |
1989 | return simplifyMaskedStore(II&: *II); |
1990 | case Intrinsic::masked_gather: |
1991 | return simplifyMaskedGather(II&: *II); |
1992 | case Intrinsic::masked_scatter: |
1993 | return simplifyMaskedScatter(II&: *II); |
1994 | case Intrinsic::launder_invariant_group: |
1995 | case Intrinsic::strip_invariant_group: |
1996 | if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: *II, IC&: *this)) |
1997 | return replaceInstUsesWith(I&: *II, V: SkippedBarrier); |
1998 | break; |
1999 | case Intrinsic::powi: |
2000 | if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 1))) { |
2001 | // 0 and 1 are handled in instsimplify |
2002 | // powi(x, -1) -> 1/x |
2003 | if (Power->isMinusOne()) |
2004 | return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: 1.0), |
2005 | V2: II->getArgOperand(i: 0), FMFSource: II); |
2006 | // powi(x, 2) -> x*x |
2007 | if (Power->equalsInt(V: 2)) |
2008 | return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: 0), |
2009 | V2: II->getArgOperand(i: 0), FMFSource: II); |
2010 | |
2011 | if (!Power->getValue()[0]) { |
2012 | Value *X; |
2013 | // If power is even: |
2014 | // powi(-x, p) -> powi(x, p) |
2015 | // powi(fabs(x), p) -> powi(x, p) |
2016 | // powi(copysign(x, y), p) -> powi(x, p) |
2017 | if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: X))) || |
2018 | match(V: II->getArgOperand(i: 0), P: m_FAbs(Op0: m_Value(V&: X))) || |
2019 | match(V: II->getArgOperand(i: 0), |
2020 | P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value()))) |
2021 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
2022 | } |
2023 | } |
2024 | break; |
2025 | |
2026 | case Intrinsic::cttz: |
2027 | case Intrinsic::ctlz: |
2028 | if (auto *I = foldCttzCtlz(II&: *II, IC&: *this)) |
2029 | return I; |
2030 | break; |
2031 | |
2032 | case Intrinsic::ctpop: |
2033 | if (auto *I = foldCtpop(II&: *II, IC&: *this)) |
2034 | return I; |
2035 | break; |
2036 | |
2037 | case Intrinsic::fshl: |
2038 | case Intrinsic::fshr: { |
2039 | Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1); |
2040 | Type *Ty = II->getType(); |
2041 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
2042 | Constant *ShAmtC; |
2043 | if (match(V: II->getArgOperand(i: 2), P: m_ImmConstant(C&: ShAmtC))) { |
2044 | // Canonicalize a shift amount constant operand to modulo the bit-width. |
2045 | Constant *WidthC = ConstantInt::get(Ty, V: BitWidth); |
2046 | Constant *ModuloC = |
2047 | ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL); |
2048 | if (!ModuloC) |
2049 | return nullptr; |
2050 | if (ModuloC != ShAmtC) |
2051 | return replaceOperand(I&: *II, OpNum: 2, V: ModuloC); |
2052 | |
2053 | assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC, |
2054 | ShAmtC, DL), |
2055 | m_One()) && |
2056 | "Shift amount expected to be modulo bitwidth" ); |
2057 | |
2058 | // Canonicalize funnel shift right by constant to funnel shift left. This |
2059 | // is not entirely arbitrary. For historical reasons, the backend may |
2060 | // recognize rotate left patterns but miss rotate right patterns. |
2061 | if (IID == Intrinsic::fshr) { |
2062 | // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero. |
2063 | if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II))) |
2064 | return nullptr; |
2065 | |
2066 | Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC); |
2067 | Module *Mod = II->getModule(); |
2068 | Function *Fshl = Intrinsic::getDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty); |
2069 | return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC }); |
2070 | } |
2071 | assert(IID == Intrinsic::fshl && |
2072 | "All funnel shifts by simple constants should go left" ); |
2073 | |
2074 | // fshl(X, 0, C) --> shl X, C |
2075 | // fshl(X, undef, C) --> shl X, C |
2076 | if (match(V: Op1, P: m_ZeroInt()) || match(V: Op1, P: m_Undef())) |
2077 | return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC); |
2078 | |
2079 | // fshl(0, X, C) --> lshr X, (BW-C) |
2080 | // fshl(undef, X, C) --> lshr X, (BW-C) |
2081 | if (match(V: Op0, P: m_ZeroInt()) || match(V: Op0, P: m_Undef())) |
2082 | return BinaryOperator::CreateLShr(V1: Op1, |
2083 | V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC)); |
2084 | |
2085 | // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form) |
2086 | if (Op0 == Op1 && BitWidth == 16 && match(V: ShAmtC, P: m_SpecificInt(V: 8))) { |
2087 | Module *Mod = II->getModule(); |
2088 | Function *Bswap = Intrinsic::getDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty); |
2089 | return CallInst::Create(Func: Bswap, Args: { Op0 }); |
2090 | } |
2091 | if (Instruction *BitOp = |
2092 | matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ true, |
2093 | /*MatchBitReversals*/ true)) |
2094 | return BitOp; |
2095 | } |
2096 | |
2097 | // Left or right might be masked. |
2098 | if (SimplifyDemandedInstructionBits(Inst&: *II)) |
2099 | return &CI; |
2100 | |
2101 | // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, |
2102 | // so only the low bits of the shift amount are demanded if the bitwidth is |
2103 | // a power-of-2. |
2104 | if (!isPowerOf2_32(Value: BitWidth)) |
2105 | break; |
2106 | APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth)); |
2107 | KnownBits Op2Known(BitWidth); |
2108 | if (SimplifyDemandedBits(I: II, OpNo: 2, DemandedMask: Op2Demanded, Known&: Op2Known)) |
2109 | return &CI; |
2110 | break; |
2111 | } |
2112 | case Intrinsic::ptrmask: { |
2113 | unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType()); |
2114 | KnownBits Known(BitWidth); |
2115 | if (SimplifyDemandedInstructionBits(Inst&: *II, Known)) |
2116 | return II; |
2117 | |
2118 | Value *InnerPtr, *InnerMask; |
2119 | bool Changed = false; |
2120 | // Combine: |
2121 | // (ptrmask (ptrmask p, A), B) |
2122 | // -> (ptrmask p, (and A, B)) |
2123 | if (match(V: II->getArgOperand(i: 0), |
2124 | P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr), |
2125 | Op1: m_Value(V&: InnerMask))))) { |
2126 | assert(II->getArgOperand(1)->getType() == InnerMask->getType() && |
2127 | "Mask types must match" ); |
2128 | // TODO: If InnerMask == Op1, we could copy attributes from inner |
2129 | // callsite -> outer callsite. |
2130 | Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: 1), RHS: InnerMask); |
2131 | replaceOperand(I&: CI, OpNum: 0, V: InnerPtr); |
2132 | replaceOperand(I&: CI, OpNum: 1, V: NewMask); |
2133 | Changed = true; |
2134 | } |
2135 | |
2136 | // See if we can deduce non-null. |
2137 | if (!CI.hasRetAttr(Kind: Attribute::NonNull) && |
2138 | (Known.isNonZero() || |
2139 | isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) { |
2140 | CI.addRetAttr(Kind: Attribute::NonNull); |
2141 | Changed = true; |
2142 | } |
2143 | |
2144 | unsigned NewAlignmentLog = |
2145 | std::min(a: Value::MaxAlignmentExponent, |
2146 | b: std::min(a: BitWidth - 1, b: Known.countMinTrailingZeros())); |
2147 | // Known bits will capture if we had alignment information associated with |
2148 | // the pointer argument. |
2149 | if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) { |
2150 | CI.addRetAttr(Attr: Attribute::getWithAlignment( |
2151 | Context&: CI.getContext(), Alignment: Align(uint64_t(1) << NewAlignmentLog))); |
2152 | Changed = true; |
2153 | } |
2154 | if (Changed) |
2155 | return &CI; |
2156 | break; |
2157 | } |
2158 | case Intrinsic::uadd_with_overflow: |
2159 | case Intrinsic::sadd_with_overflow: { |
2160 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
2161 | return I; |
2162 | |
2163 | // Given 2 constant operands whose sum does not overflow: |
2164 | // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 |
2165 | // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 |
2166 | Value *X; |
2167 | const APInt *C0, *C1; |
2168 | Value *Arg0 = II->getArgOperand(i: 0); |
2169 | Value *Arg1 = II->getArgOperand(i: 1); |
2170 | bool IsSigned = IID == Intrinsic::sadd_with_overflow; |
2171 | bool HasNWAdd = IsSigned |
2172 | ? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0))) |
2173 | : match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0))); |
2174 | if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) { |
2175 | bool Overflow; |
2176 | APInt NewC = |
2177 | IsSigned ? C1->sadd_ov(RHS: *C0, Overflow) : C1->uadd_ov(RHS: *C0, Overflow); |
2178 | if (!Overflow) |
2179 | return replaceInstUsesWith( |
2180 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
2181 | ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC))); |
2182 | } |
2183 | break; |
2184 | } |
2185 | |
2186 | case Intrinsic::umul_with_overflow: |
2187 | case Intrinsic::smul_with_overflow: |
2188 | case Intrinsic::usub_with_overflow: |
2189 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
2190 | return I; |
2191 | break; |
2192 | |
2193 | case Intrinsic::ssub_with_overflow: { |
2194 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
2195 | return I; |
2196 | |
2197 | Constant *C; |
2198 | Value *Arg0 = II->getArgOperand(i: 0); |
2199 | Value *Arg1 = II->getArgOperand(i: 1); |
2200 | // Given a constant C that is not the minimum signed value |
2201 | // for an integer of a given bit width: |
2202 | // |
2203 | // ssubo X, C -> saddo X, -C |
2204 | if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) { |
2205 | Value *NegVal = ConstantExpr::getNeg(C); |
2206 | // Build a saddo call that is equivalent to the discovered |
2207 | // ssubo call. |
2208 | return replaceInstUsesWith( |
2209 | I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow, |
2210 | LHS: Arg0, RHS: NegVal)); |
2211 | } |
2212 | |
2213 | break; |
2214 | } |
2215 | |
2216 | case Intrinsic::uadd_sat: |
2217 | case Intrinsic::sadd_sat: |
2218 | case Intrinsic::usub_sat: |
2219 | case Intrinsic::ssub_sat: { |
2220 | SaturatingInst *SI = cast<SaturatingInst>(Val: II); |
2221 | Type *Ty = SI->getType(); |
2222 | Value *Arg0 = SI->getLHS(); |
2223 | Value *Arg1 = SI->getRHS(); |
2224 | |
2225 | // Make use of known overflow information. |
2226 | OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(), |
2227 | LHS: Arg0, RHS: Arg1, CxtI: SI); |
2228 | switch (OR) { |
2229 | case OverflowResult::MayOverflow: |
2230 | break; |
2231 | case OverflowResult::NeverOverflows: |
2232 | if (SI->isSigned()) |
2233 | return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1); |
2234 | else |
2235 | return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1); |
2236 | case OverflowResult::AlwaysOverflowsLow: { |
2237 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
2238 | APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned()); |
2239 | return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min)); |
2240 | } |
2241 | case OverflowResult::AlwaysOverflowsHigh: { |
2242 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
2243 | APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned()); |
2244 | return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max)); |
2245 | } |
2246 | } |
2247 | |
2248 | // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A) |
2249 | // which after that: |
2250 | // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C |
2251 | // usub_sat((sub nuw C, A), C1) -> 0 otherwise |
2252 | Constant *C, *C1; |
2253 | Value *A; |
2254 | if (IID == Intrinsic::usub_sat && |
2255 | match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) && |
2256 | match(V: Arg1, P: m_ImmConstant(C&: C1))) { |
2257 | auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1); |
2258 | auto *NewSub = |
2259 | Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A); |
2260 | return replaceInstUsesWith(I&: *SI, V: NewSub); |
2261 | } |
2262 | |
2263 | // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN |
2264 | if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) && |
2265 | C->isNotMinSignedValue()) { |
2266 | Value *NegVal = ConstantExpr::getNeg(C); |
2267 | return replaceInstUsesWith( |
2268 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
2269 | ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal)); |
2270 | } |
2271 | |
2272 | // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) |
2273 | // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2)) |
2274 | // if Val and Val2 have the same sign |
2275 | if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) { |
2276 | Value *X; |
2277 | const APInt *Val, *Val2; |
2278 | APInt NewVal; |
2279 | bool IsUnsigned = |
2280 | IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat; |
2281 | if (Other->getIntrinsicID() == IID && |
2282 | match(V: Arg1, P: m_APInt(Res&: Val)) && |
2283 | match(V: Other->getArgOperand(i: 0), P: m_Value(V&: X)) && |
2284 | match(V: Other->getArgOperand(i: 1), P: m_APInt(Res&: Val2))) { |
2285 | if (IsUnsigned) |
2286 | NewVal = Val->uadd_sat(RHS: *Val2); |
2287 | else if (Val->isNonNegative() == Val2->isNonNegative()) { |
2288 | bool Overflow; |
2289 | NewVal = Val->sadd_ov(RHS: *Val2, Overflow); |
2290 | if (Overflow) { |
2291 | // Both adds together may add more than SignedMaxValue |
2292 | // without saturating the final result. |
2293 | break; |
2294 | } |
2295 | } else { |
2296 | // Cannot fold saturated addition with different signs. |
2297 | break; |
2298 | } |
2299 | |
2300 | return replaceInstUsesWith( |
2301 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
2302 | ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal))); |
2303 | } |
2304 | } |
2305 | break; |
2306 | } |
2307 | |
2308 | case Intrinsic::minnum: |
2309 | case Intrinsic::maxnum: |
2310 | case Intrinsic::minimum: |
2311 | case Intrinsic::maximum: { |
2312 | Value *Arg0 = II->getArgOperand(i: 0); |
2313 | Value *Arg1 = II->getArgOperand(i: 1); |
2314 | Value *X, *Y; |
2315 | if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) && |
2316 | (Arg0->hasOneUse() || Arg1->hasOneUse())) { |
2317 | // If both operands are negated, invert the call and negate the result: |
2318 | // min(-X, -Y) --> -(max(X, Y)) |
2319 | // max(-X, -Y) --> -(min(X, Y)) |
2320 | Intrinsic::ID NewIID; |
2321 | switch (IID) { |
2322 | case Intrinsic::maxnum: |
2323 | NewIID = Intrinsic::minnum; |
2324 | break; |
2325 | case Intrinsic::minnum: |
2326 | NewIID = Intrinsic::maxnum; |
2327 | break; |
2328 | case Intrinsic::maximum: |
2329 | NewIID = Intrinsic::minimum; |
2330 | break; |
2331 | case Intrinsic::minimum: |
2332 | NewIID = Intrinsic::maximum; |
2333 | break; |
2334 | default: |
2335 | llvm_unreachable("unexpected intrinsic ID" ); |
2336 | } |
2337 | Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II); |
2338 | Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall); |
2339 | FNeg->copyIRFlags(V: II); |
2340 | return FNeg; |
2341 | } |
2342 | |
2343 | // m(m(X, C2), C1) -> m(X, C) |
2344 | const APFloat *C1, *C2; |
2345 | if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) { |
2346 | if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) && |
2347 | ((match(V: M->getArgOperand(i: 0), P: m_Value(V&: X)) && |
2348 | match(V: M->getArgOperand(i: 1), P: m_APFloat(Res&: C2))) || |
2349 | (match(V: M->getArgOperand(i: 1), P: m_Value(V&: X)) && |
2350 | match(V: M->getArgOperand(i: 0), P: m_APFloat(Res&: C2))))) { |
2351 | APFloat Res(0.0); |
2352 | switch (IID) { |
2353 | case Intrinsic::maxnum: |
2354 | Res = maxnum(A: *C1, B: *C2); |
2355 | break; |
2356 | case Intrinsic::minnum: |
2357 | Res = minnum(A: *C1, B: *C2); |
2358 | break; |
2359 | case Intrinsic::maximum: |
2360 | Res = maximum(A: *C1, B: *C2); |
2361 | break; |
2362 | case Intrinsic::minimum: |
2363 | Res = minimum(A: *C1, B: *C2); |
2364 | break; |
2365 | default: |
2366 | llvm_unreachable("unexpected intrinsic ID" ); |
2367 | } |
2368 | Value *V = Builder.CreateBinaryIntrinsic( |
2369 | ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res), FMFSource: II); |
2370 | // TODO: Conservatively intersecting FMF. If Res == C2, the transform |
2371 | // was a simplification (so Arg0 and its original flags could |
2372 | // propagate?) |
2373 | if (auto *CI = dyn_cast<CallInst>(Val: V)) |
2374 | CI->andIRFlags(V: M); |
2375 | return replaceInstUsesWith(I&: *II, V); |
2376 | } |
2377 | } |
2378 | |
2379 | // m((fpext X), (fpext Y)) -> fpext (m(X, Y)) |
2380 | if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) && |
2381 | match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) && |
2382 | X->getType() == Y->getType()) { |
2383 | Value *NewCall = |
2384 | Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName()); |
2385 | return new FPExtInst(NewCall, II->getType()); |
2386 | } |
2387 | |
2388 | // max X, -X --> fabs X |
2389 | // min X, -X --> -(fabs X) |
2390 | // TODO: Remove one-use limitation? That is obviously better for max, |
2391 | // hence why we don't check for one-use for that. However, |
2392 | // it would be an extra instruction for min (fnabs), but |
2393 | // that is still likely better for analysis and codegen. |
2394 | auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) { |
2395 | if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X))) |
2396 | return Op0->hasOneUse() || |
2397 | (IID != Intrinsic::minimum && IID != Intrinsic::minnum); |
2398 | return false; |
2399 | }; |
2400 | |
2401 | if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) { |
2402 | Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II); |
2403 | if (IID == Intrinsic::minimum || IID == Intrinsic::minnum) |
2404 | R = Builder.CreateFNegFMF(V: R, FMFSource: II); |
2405 | return replaceInstUsesWith(I&: *II, V: R); |
2406 | } |
2407 | |
2408 | break; |
2409 | } |
2410 | case Intrinsic::matrix_multiply: { |
2411 | // Optimize negation in matrix multiplication. |
2412 | |
2413 | // -A * -B -> A * B |
2414 | Value *A, *B; |
2415 | if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: A))) && |
2416 | match(V: II->getArgOperand(i: 1), P: m_FNeg(X: m_Value(V&: B)))) { |
2417 | replaceOperand(I&: *II, OpNum: 0, V: A); |
2418 | replaceOperand(I&: *II, OpNum: 1, V: B); |
2419 | return II; |
2420 | } |
2421 | |
2422 | Value *Op0 = II->getOperand(i_nocapture: 0); |
2423 | Value *Op1 = II->getOperand(i_nocapture: 1); |
2424 | Value *OpNotNeg, *NegatedOp; |
2425 | unsigned NegatedOpArg, OtherOpArg; |
2426 | if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) { |
2427 | NegatedOp = Op0; |
2428 | NegatedOpArg = 0; |
2429 | OtherOpArg = 1; |
2430 | } else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) { |
2431 | NegatedOp = Op1; |
2432 | NegatedOpArg = 1; |
2433 | OtherOpArg = 0; |
2434 | } else |
2435 | // Multiplication doesn't have a negated operand. |
2436 | break; |
2437 | |
2438 | // Only optimize if the negated operand has only one use. |
2439 | if (!NegatedOp->hasOneUse()) |
2440 | break; |
2441 | |
2442 | Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg); |
2443 | VectorType *RetTy = cast<VectorType>(Val: II->getType()); |
2444 | VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType()); |
2445 | VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType()); |
2446 | ElementCount NegatedCount = NegatedOpTy->getElementCount(); |
2447 | ElementCount OtherCount = OtherOpTy->getElementCount(); |
2448 | ElementCount RetCount = RetTy->getElementCount(); |
2449 | // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa. |
2450 | if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) && |
2451 | ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) { |
2452 | Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp); |
2453 | replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg); |
2454 | replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp); |
2455 | return II; |
2456 | } |
2457 | // (-A) * B -> -(A * B), if it is cheaper to negate the result |
2458 | if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) { |
2459 | SmallVector<Value *, 5> NewArgs(II->args()); |
2460 | NewArgs[NegatedOpArg] = OpNotNeg; |
2461 | Instruction *NewMul = |
2462 | Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II); |
2463 | return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II)); |
2464 | } |
2465 | break; |
2466 | } |
2467 | case Intrinsic::fmuladd: { |
2468 | // Try to simplify the underlying FMul. |
2469 | if (Value *V = simplifyFMulInst(LHS: II->getArgOperand(i: 0), RHS: II->getArgOperand(i: 1), |
2470 | FMF: II->getFastMathFlags(), |
2471 | Q: SQ.getWithInstruction(I: II))) { |
2472 | auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: 2)); |
2473 | FAdd->copyFastMathFlags(I: II); |
2474 | return FAdd; |
2475 | } |
2476 | |
2477 | [[fallthrough]]; |
2478 | } |
2479 | case Intrinsic::fma: { |
2480 | // fma fneg(x), fneg(y), z -> fma x, y, z |
2481 | Value *Src0 = II->getArgOperand(i: 0); |
2482 | Value *Src1 = II->getArgOperand(i: 1); |
2483 | Value *X, *Y; |
2484 | if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) { |
2485 | replaceOperand(I&: *II, OpNum: 0, V: X); |
2486 | replaceOperand(I&: *II, OpNum: 1, V: Y); |
2487 | return II; |
2488 | } |
2489 | |
2490 | // fma fabs(x), fabs(x), z -> fma x, x, z |
2491 | if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) && |
2492 | match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) { |
2493 | replaceOperand(I&: *II, OpNum: 0, V: X); |
2494 | replaceOperand(I&: *II, OpNum: 1, V: X); |
2495 | return II; |
2496 | } |
2497 | |
2498 | // Try to simplify the underlying FMul. We can only apply simplifications |
2499 | // that do not require rounding. |
2500 | if (Value *V = simplifyFMAFMul(LHS: II->getArgOperand(i: 0), RHS: II->getArgOperand(i: 1), |
2501 | FMF: II->getFastMathFlags(), |
2502 | Q: SQ.getWithInstruction(I: II))) { |
2503 | auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: 2)); |
2504 | FAdd->copyFastMathFlags(I: II); |
2505 | return FAdd; |
2506 | } |
2507 | |
2508 | // fma x, y, 0 -> fmul x, y |
2509 | // This is always valid for -0.0, but requires nsz for +0.0 as |
2510 | // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own. |
2511 | if (match(V: II->getArgOperand(i: 2), P: m_NegZeroFP()) || |
2512 | (match(V: II->getArgOperand(i: 2), P: m_PosZeroFP()) && |
2513 | II->getFastMathFlags().noSignedZeros())) |
2514 | return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II); |
2515 | |
2516 | break; |
2517 | } |
2518 | case Intrinsic::copysign: { |
2519 | Value *Mag = II->getArgOperand(i: 0), *Sign = II->getArgOperand(i: 1); |
2520 | if (std::optional<bool> KnownSignBit = computeKnownFPSignBit( |
2521 | V: Sign, /*Depth=*/0, SQ: getSimplifyQuery().getWithInstruction(I: II))) { |
2522 | if (*KnownSignBit) { |
2523 | // If we know that the sign argument is negative, reduce to FNABS: |
2524 | // copysign Mag, -Sign --> fneg (fabs Mag) |
2525 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II); |
2526 | return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II)); |
2527 | } |
2528 | |
2529 | // If we know that the sign argument is positive, reduce to FABS: |
2530 | // copysign Mag, +Sign --> fabs Mag |
2531 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II); |
2532 | return replaceInstUsesWith(I&: *II, V: Fabs); |
2533 | } |
2534 | |
2535 | // Propagate sign argument through nested calls: |
2536 | // copysign Mag, (copysign ?, X) --> copysign Mag, X |
2537 | Value *X; |
2538 | if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) |
2539 | return replaceOperand(I&: *II, OpNum: 1, V: X); |
2540 | |
2541 | // Clear sign-bit of constant magnitude: |
2542 | // copysign -MagC, X --> copysign MagC, X |
2543 | // TODO: Support constant folding for fabs |
2544 | const APFloat *MagC; |
2545 | if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) { |
2546 | APFloat PosMagC = *MagC; |
2547 | PosMagC.clearSign(); |
2548 | return replaceOperand(I&: *II, OpNum: 0, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC)); |
2549 | } |
2550 | |
2551 | // Peek through changes of magnitude's sign-bit. This call rewrites those: |
2552 | // copysign (fabs X), Sign --> copysign X, Sign |
2553 | // copysign (fneg X), Sign --> copysign X, Sign |
2554 | if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) || match(V: Mag, P: m_FNeg(X: m_Value(V&: X)))) |
2555 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
2556 | |
2557 | break; |
2558 | } |
2559 | case Intrinsic::fabs: { |
2560 | Value *Cond, *TVal, *FVal; |
2561 | Value *Arg = II->getArgOperand(i: 0); |
2562 | Value *X; |
2563 | // fabs (-X) --> fabs (X) |
2564 | if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) { |
2565 | CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II); |
2566 | return replaceInstUsesWith(I&: CI, V: Fabs); |
2567 | } |
2568 | |
2569 | if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) { |
2570 | // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF |
2571 | if (isa<Constant>(Val: TVal) || isa<Constant>(Val: FVal)) { |
2572 | CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal}); |
2573 | CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal}); |
2574 | SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF); |
2575 | FastMathFlags FMF1 = II->getFastMathFlags(); |
2576 | FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags(); |
2577 | FMF2.setNoSignedZeros(false); |
2578 | SI->setFastMathFlags(FMF1 | FMF2); |
2579 | return SI; |
2580 | } |
2581 | // fabs (select Cond, -FVal, FVal) --> fabs FVal |
2582 | if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal)))) |
2583 | return replaceOperand(I&: *II, OpNum: 0, V: FVal); |
2584 | // fabs (select Cond, TVal, -TVal) --> fabs TVal |
2585 | if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal)))) |
2586 | return replaceOperand(I&: *II, OpNum: 0, V: TVal); |
2587 | } |
2588 | |
2589 | Value *Magnitude, *Sign; |
2590 | if (match(V: II->getArgOperand(i: 0), |
2591 | P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) { |
2592 | // fabs (copysign x, y) -> (fabs x) |
2593 | CallInst *AbsSign = |
2594 | Builder.CreateCall(Callee: II->getCalledFunction(), Args: {Magnitude}); |
2595 | AbsSign->copyFastMathFlags(I: II); |
2596 | return replaceInstUsesWith(I&: *II, V: AbsSign); |
2597 | } |
2598 | |
2599 | [[fallthrough]]; |
2600 | } |
2601 | case Intrinsic::ceil: |
2602 | case Intrinsic::floor: |
2603 | case Intrinsic::round: |
2604 | case Intrinsic::roundeven: |
2605 | case Intrinsic::nearbyint: |
2606 | case Intrinsic::rint: |
2607 | case Intrinsic::trunc: { |
2608 | Value *ExtSrc; |
2609 | if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) { |
2610 | // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x) |
2611 | Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II); |
2612 | return new FPExtInst(NarrowII, II->getType()); |
2613 | } |
2614 | break; |
2615 | } |
2616 | case Intrinsic::cos: |
2617 | case Intrinsic::amdgcn_cos: { |
2618 | Value *X, *Sign; |
2619 | Value *Src = II->getArgOperand(i: 0); |
2620 | if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) || match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) || |
2621 | match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) { |
2622 | // cos(-x) --> cos(x) |
2623 | // cos(fabs(x)) --> cos(x) |
2624 | // cos(copysign(x, y)) --> cos(x) |
2625 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
2626 | } |
2627 | break; |
2628 | } |
2629 | case Intrinsic::sin: |
2630 | case Intrinsic::amdgcn_sin: { |
2631 | Value *X; |
2632 | if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) { |
2633 | // sin(-x) --> -sin(x) |
2634 | Value *NewSin = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II); |
2635 | return UnaryOperator::CreateFNegFMF(Op: NewSin, FMFSource: II); |
2636 | } |
2637 | break; |
2638 | } |
2639 | case Intrinsic::ldexp: { |
2640 | // ldexp(ldexp(x, a), b) -> ldexp(x, a + b) |
2641 | // |
2642 | // The danger is if the first ldexp would overflow to infinity or underflow |
2643 | // to zero, but the combined exponent avoids it. We ignore this with |
2644 | // reassoc. |
2645 | // |
2646 | // It's also safe to fold if we know both exponents are >= 0 or <= 0 since |
2647 | // it would just double down on the overflow/underflow which would occur |
2648 | // anyway. |
2649 | // |
2650 | // TODO: Could do better if we had range tracking for the input value |
2651 | // exponent. Also could broaden sign check to cover == 0 case. |
2652 | Value *Src = II->getArgOperand(i: 0); |
2653 | Value *Exp = II->getArgOperand(i: 1); |
2654 | Value *InnerSrc; |
2655 | Value *InnerExp; |
2656 | if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>( |
2657 | Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) && |
2658 | Exp->getType() == InnerExp->getType()) { |
2659 | FastMathFlags FMF = II->getFastMathFlags(); |
2660 | FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags(); |
2661 | |
2662 | if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) || |
2663 | signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) { |
2664 | // TODO: Add nsw/nuw probably safe if integer type exceeds exponent |
2665 | // width. |
2666 | Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp); |
2667 | II->setArgOperand(i: 1, v: NewExp); |
2668 | II->setFastMathFlags(InnerFlags); // Or the inner flags. |
2669 | return replaceOperand(I&: *II, OpNum: 0, V: InnerSrc); |
2670 | } |
2671 | } |
2672 | |
2673 | // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0) |
2674 | // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0) |
2675 | Value *ExtSrc; |
2676 | if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) && |
2677 | ExtSrc->getType()->getScalarSizeInBits() == 1) { |
2678 | Value *Select = |
2679 | Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 2.0), |
2680 | False: ConstantFP::get(Ty: II->getType(), V: 1.0)); |
2681 | return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II); |
2682 | } |
2683 | if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) && |
2684 | ExtSrc->getType()->getScalarSizeInBits() == 1) { |
2685 | Value *Select = |
2686 | Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 0.5), |
2687 | False: ConstantFP::get(Ty: II->getType(), V: 1.0)); |
2688 | return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II); |
2689 | } |
2690 | |
2691 | // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x |
2692 | // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp) |
2693 | /// |
2694 | // TODO: If we cared, should insert a canonicalize for x |
2695 | Value *SelectCond, *SelectLHS, *SelectRHS; |
2696 | if (match(V: II->getArgOperand(i: 1), |
2697 | P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS), |
2698 | R: m_Value(V&: SelectRHS))))) { |
2699 | Value *NewLdexp = nullptr; |
2700 | Value *Select = nullptr; |
2701 | if (match(V: SelectRHS, P: m_ZeroInt())) { |
2702 | NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS); |
2703 | Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src); |
2704 | } else if (match(V: SelectLHS, P: m_ZeroInt())) { |
2705 | NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS); |
2706 | Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp); |
2707 | } |
2708 | |
2709 | if (NewLdexp) { |
2710 | Select->takeName(V: II); |
2711 | cast<Instruction>(Val: NewLdexp)->copyFastMathFlags(I: II); |
2712 | return replaceInstUsesWith(I&: *II, V: Select); |
2713 | } |
2714 | } |
2715 | |
2716 | break; |
2717 | } |
2718 | case Intrinsic::ptrauth_auth: |
2719 | case Intrinsic::ptrauth_resign: { |
2720 | // (sign|resign) + (auth|resign) can be folded by omitting the middle |
2721 | // sign+auth component if the key and discriminator match. |
2722 | bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign; |
2723 | Value *Ptr = II->getArgOperand(i: 0); |
2724 | Value *Key = II->getArgOperand(i: 1); |
2725 | Value *Disc = II->getArgOperand(i: 2); |
2726 | |
2727 | // AuthKey will be the key we need to end up authenticating against in |
2728 | // whatever we replace this sequence with. |
2729 | Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr; |
2730 | if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) { |
2731 | BasePtr = CI->getArgOperand(i: 0); |
2732 | if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) { |
2733 | if (CI->getArgOperand(i: 1) != Key || CI->getArgOperand(i: 2) != Disc) |
2734 | break; |
2735 | } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) { |
2736 | if (CI->getArgOperand(i: 3) != Key || CI->getArgOperand(i: 4) != Disc) |
2737 | break; |
2738 | AuthKey = CI->getArgOperand(i: 1); |
2739 | AuthDisc = CI->getArgOperand(i: 2); |
2740 | } else |
2741 | break; |
2742 | } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) { |
2743 | // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for |
2744 | // our purposes, so check for that too. |
2745 | const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: 0)); |
2746 | if (!CPA || !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL)) |
2747 | break; |
2748 | |
2749 | // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr) |
2750 | if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: 4))) { |
2751 | auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: 3)); |
2752 | auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: 4)); |
2753 | auto *SignAddrDisc = ConstantPointerNull::get(T: Builder.getPtrTy()); |
2754 | auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey, |
2755 | Disc: SignDisc, AddrDisc: SignAddrDisc); |
2756 | replaceInstUsesWith( |
2757 | I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType())); |
2758 | return eraseInstFromFunction(I&: *II); |
2759 | } |
2760 | |
2761 | // auth(ptrauth(p,k,d),k,d) -> p |
2762 | BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType()); |
2763 | } else |
2764 | break; |
2765 | |
2766 | unsigned NewIntrin; |
2767 | if (AuthKey && NeedSign) { |
2768 | // resign(0,1) + resign(1,2) = resign(0, 2) |
2769 | NewIntrin = Intrinsic::ptrauth_resign; |
2770 | } else if (AuthKey) { |
2771 | // resign(0,1) + auth(1) = auth(0) |
2772 | NewIntrin = Intrinsic::ptrauth_auth; |
2773 | } else if (NeedSign) { |
2774 | // sign(0) + resign(0, 1) = sign(1) |
2775 | NewIntrin = Intrinsic::ptrauth_sign; |
2776 | } else { |
2777 | // sign(0) + auth(0) = nop |
2778 | replaceInstUsesWith(I&: *II, V: BasePtr); |
2779 | return eraseInstFromFunction(I&: *II); |
2780 | } |
2781 | |
2782 | SmallVector<Value *, 4> CallArgs; |
2783 | CallArgs.push_back(Elt: BasePtr); |
2784 | if (AuthKey) { |
2785 | CallArgs.push_back(Elt: AuthKey); |
2786 | CallArgs.push_back(Elt: AuthDisc); |
2787 | } |
2788 | |
2789 | if (NeedSign) { |
2790 | CallArgs.push_back(Elt: II->getArgOperand(i: 3)); |
2791 | CallArgs.push_back(Elt: II->getArgOperand(i: 4)); |
2792 | } |
2793 | |
2794 | Function *NewFn = Intrinsic::getDeclaration(M: II->getModule(), id: NewIntrin); |
2795 | return CallInst::Create(Func: NewFn, Args: CallArgs); |
2796 | } |
2797 | case Intrinsic::arm_neon_vtbl1: |
2798 | case Intrinsic::aarch64_neon_tbl1: |
2799 | if (Value *V = simplifyNeonTbl1(II: *II, Builder)) |
2800 | return replaceInstUsesWith(I&: *II, V); |
2801 | break; |
2802 | |
2803 | case Intrinsic::arm_neon_vmulls: |
2804 | case Intrinsic::arm_neon_vmullu: |
2805 | case Intrinsic::aarch64_neon_smull: |
2806 | case Intrinsic::aarch64_neon_umull: { |
2807 | Value *Arg0 = II->getArgOperand(i: 0); |
2808 | Value *Arg1 = II->getArgOperand(i: 1); |
2809 | |
2810 | // Handle mul by zero first: |
2811 | if (isa<ConstantAggregateZero>(Val: Arg0) || isa<ConstantAggregateZero>(Val: Arg1)) { |
2812 | return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType())); |
2813 | } |
2814 | |
2815 | // Check for constant LHS & RHS - in this case we just simplify. |
2816 | bool Zext = (IID == Intrinsic::arm_neon_vmullu || |
2817 | IID == Intrinsic::aarch64_neon_umull); |
2818 | VectorType *NewVT = cast<VectorType>(Val: II->getType()); |
2819 | if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) { |
2820 | if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) { |
2821 | Value *V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /*isSigned=*/!Zext); |
2822 | Value *V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /*isSigned=*/!Zext); |
2823 | return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1)); |
2824 | } |
2825 | |
2826 | // Couldn't simplify - canonicalize constant to the RHS. |
2827 | std::swap(a&: Arg0, b&: Arg1); |
2828 | } |
2829 | |
2830 | // Handle mul by one: |
2831 | if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) |
2832 | if (ConstantInt *Splat = |
2833 | dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue())) |
2834 | if (Splat->isOne()) |
2835 | return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(), |
2836 | /*isSigned=*/!Zext); |
2837 | |
2838 | break; |
2839 | } |
2840 | case Intrinsic::arm_neon_aesd: |
2841 | case Intrinsic::arm_neon_aese: |
2842 | case Intrinsic::aarch64_crypto_aesd: |
2843 | case Intrinsic::aarch64_crypto_aese: { |
2844 | Value *DataArg = II->getArgOperand(i: 0); |
2845 | Value *KeyArg = II->getArgOperand(i: 1); |
2846 | |
2847 | // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR |
2848 | Value *Data, *Key; |
2849 | if (match(V: KeyArg, P: m_ZeroInt()) && |
2850 | match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) { |
2851 | replaceOperand(I&: *II, OpNum: 0, V: Data); |
2852 | replaceOperand(I&: *II, OpNum: 1, V: Key); |
2853 | return II; |
2854 | } |
2855 | break; |
2856 | } |
2857 | case Intrinsic::hexagon_V6_vandvrt: |
2858 | case Intrinsic::hexagon_V6_vandvrt_128B: { |
2859 | // Simplify Q -> V -> Q conversion. |
2860 | if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) { |
2861 | Intrinsic::ID ID0 = Op0->getIntrinsicID(); |
2862 | if (ID0 != Intrinsic::hexagon_V6_vandqrt && |
2863 | ID0 != Intrinsic::hexagon_V6_vandqrt_128B) |
2864 | break; |
2865 | Value *Bytes = Op0->getArgOperand(i: 1), *Mask = II->getArgOperand(i: 1); |
2866 | uint64_t Bytes1 = computeKnownBits(V: Bytes, Depth: 0, CxtI: Op0).One.getZExtValue(); |
2867 | uint64_t Mask1 = computeKnownBits(V: Mask, Depth: 0, CxtI: II).One.getZExtValue(); |
2868 | // Check if every byte has common bits in Bytes and Mask. |
2869 | uint64_t C = Bytes1 & Mask1; |
2870 | if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000)) |
2871 | return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: 0)); |
2872 | } |
2873 | break; |
2874 | } |
2875 | case Intrinsic::stackrestore: { |
2876 | enum class ClassifyResult { |
2877 | None, |
2878 | Alloca, |
2879 | StackRestore, |
2880 | CallWithSideEffects, |
2881 | }; |
2882 | auto Classify = [](const Instruction *I) { |
2883 | if (isa<AllocaInst>(Val: I)) |
2884 | return ClassifyResult::Alloca; |
2885 | |
2886 | if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
2887 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) { |
2888 | if (II->getIntrinsicID() == Intrinsic::stackrestore) |
2889 | return ClassifyResult::StackRestore; |
2890 | |
2891 | if (II->mayHaveSideEffects()) |
2892 | return ClassifyResult::CallWithSideEffects; |
2893 | } else { |
2894 | // Consider all non-intrinsic calls to be side effects |
2895 | return ClassifyResult::CallWithSideEffects; |
2896 | } |
2897 | } |
2898 | |
2899 | return ClassifyResult::None; |
2900 | }; |
2901 | |
2902 | // If the stacksave and the stackrestore are in the same BB, and there is |
2903 | // no intervening call, alloca, or stackrestore of a different stacksave, |
2904 | // remove the restore. This can happen when variable allocas are DCE'd. |
2905 | if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) { |
2906 | if (SS->getIntrinsicID() == Intrinsic::stacksave && |
2907 | SS->getParent() == II->getParent()) { |
2908 | BasicBlock::iterator BI(SS); |
2909 | bool CannotRemove = false; |
2910 | for (++BI; &*BI != II; ++BI) { |
2911 | switch (Classify(&*BI)) { |
2912 | case ClassifyResult::None: |
2913 | // So far so good, look at next instructions. |
2914 | break; |
2915 | |
2916 | case ClassifyResult::StackRestore: |
2917 | // If we found an intervening stackrestore for a different |
2918 | // stacksave, we can't remove the stackrestore. Otherwise, continue. |
2919 | if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: 0) != SS) |
2920 | CannotRemove = true; |
2921 | break; |
2922 | |
2923 | case ClassifyResult::Alloca: |
2924 | case ClassifyResult::CallWithSideEffects: |
2925 | // If we found an alloca, a non-intrinsic call, or an intrinsic |
2926 | // call with side effects, we can't remove the stackrestore. |
2927 | CannotRemove = true; |
2928 | break; |
2929 | } |
2930 | if (CannotRemove) |
2931 | break; |
2932 | } |
2933 | |
2934 | if (!CannotRemove) |
2935 | return eraseInstFromFunction(I&: CI); |
2936 | } |
2937 | } |
2938 | |
2939 | // Scan down this block to see if there is another stack restore in the |
2940 | // same block without an intervening call/alloca. |
2941 | BasicBlock::iterator BI(II); |
2942 | Instruction *TI = II->getParent()->getTerminator(); |
2943 | bool CannotRemove = false; |
2944 | for (++BI; &*BI != TI; ++BI) { |
2945 | switch (Classify(&*BI)) { |
2946 | case ClassifyResult::None: |
2947 | // So far so good, look at next instructions. |
2948 | break; |
2949 | |
2950 | case ClassifyResult::StackRestore: |
2951 | // If there is a stackrestore below this one, remove this one. |
2952 | return eraseInstFromFunction(I&: CI); |
2953 | |
2954 | case ClassifyResult::Alloca: |
2955 | case ClassifyResult::CallWithSideEffects: |
2956 | // If we found an alloca, a non-intrinsic call, or an intrinsic call |
2957 | // with side effects (such as llvm.stacksave and llvm.read_register), |
2958 | // we can't remove the stack restore. |
2959 | CannotRemove = true; |
2960 | break; |
2961 | } |
2962 | if (CannotRemove) |
2963 | break; |
2964 | } |
2965 | |
2966 | // If the stack restore is in a return, resume, or unwind block and if there |
2967 | // are no allocas or calls between the restore and the return, nuke the |
2968 | // restore. |
2969 | if (!CannotRemove && (isa<ReturnInst>(Val: TI) || isa<ResumeInst>(Val: TI))) |
2970 | return eraseInstFromFunction(I&: CI); |
2971 | break; |
2972 | } |
2973 | case Intrinsic::lifetime_end: |
2974 | // Asan needs to poison memory to detect invalid access which is possible |
2975 | // even for empty lifetime range. |
2976 | if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) || |
2977 | II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) || |
2978 | II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress)) |
2979 | break; |
2980 | |
2981 | if (removeTriviallyEmptyRange(EndI&: *II, IC&: *this, IsStart: [](const IntrinsicInst &I) { |
2982 | return I.getIntrinsicID() == Intrinsic::lifetime_start; |
2983 | })) |
2984 | return nullptr; |
2985 | break; |
2986 | case Intrinsic::assume: { |
2987 | Value *IIOperand = II->getArgOperand(i: 0); |
2988 | SmallVector<OperandBundleDef, 4> OpBundles; |
2989 | II->getOperandBundlesAsDefs(Defs&: OpBundles); |
2990 | |
2991 | /// This will remove the boolean Condition from the assume given as |
2992 | /// argument and remove the assume if it becomes useless. |
2993 | /// always returns nullptr for use as a return values. |
2994 | auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * { |
2995 | assert(isa<AssumeInst>(Assume)); |
2996 | if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II))) |
2997 | return eraseInstFromFunction(I&: CI); |
2998 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: ConstantInt::getTrue(Context&: II->getContext())); |
2999 | return nullptr; |
3000 | }; |
3001 | // Remove an assume if it is followed by an identical assume. |
3002 | // TODO: Do we need this? Unless there are conflicting assumptions, the |
3003 | // computeKnownBits(IIOperand) below here eliminates redundant assumes. |
3004 | Instruction *Next = II->getNextNonDebugInstruction(); |
3005 | if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand)))) |
3006 | return RemoveConditionFromAssume(Next); |
3007 | |
3008 | // Canonicalize assume(a && b) -> assume(a); assume(b); |
3009 | // Note: New assumption intrinsics created here are registered by |
3010 | // the InstCombineIRInserter object. |
3011 | FunctionType *AssumeIntrinsicTy = II->getFunctionType(); |
3012 | Value *AssumeIntrinsic = II->getCalledOperand(); |
3013 | Value *A, *B; |
3014 | if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
3015 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles, |
3016 | Name: II->getName()); |
3017 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName()); |
3018 | return eraseInstFromFunction(I&: *II); |
3019 | } |
3020 | // assume(!(a || b)) -> assume(!a); assume(!b); |
3021 | if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) { |
3022 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, |
3023 | Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName()); |
3024 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, |
3025 | Args: Builder.CreateNot(V: B), Name: II->getName()); |
3026 | return eraseInstFromFunction(I&: *II); |
3027 | } |
3028 | |
3029 | // assume( (load addr) != null ) -> add 'nonnull' metadata to load |
3030 | // (if assume is valid at the load) |
3031 | CmpInst::Predicate Pred; |
3032 | Instruction *LHS; |
3033 | if (match(V: IIOperand, P: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Zero())) && |
3034 | Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && |
3035 | LHS->getType()->isPointerTy() && |
3036 | isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) { |
3037 | MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: std::nullopt); |
3038 | LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD); |
3039 | LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD); |
3040 | return RemoveConditionFromAssume(II); |
3041 | |
3042 | // TODO: apply nonnull return attributes to calls and invokes |
3043 | // TODO: apply range metadata for range check patterns? |
3044 | } |
3045 | |
3046 | // Separate storage assumptions apply to the underlying allocations, not any |
3047 | // particular pointer within them. When evaluating the hints for AA purposes |
3048 | // we getUnderlyingObject them; by precomputing the answers here we can |
3049 | // avoid having to do so repeatedly there. |
3050 | for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { |
3051 | OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx); |
3052 | if (OBU.getTagName() == "separate_storage" ) { |
3053 | assert(OBU.Inputs.size() == 2); |
3054 | auto MaybeSimplifyHint = [&](const Use &U) { |
3055 | Value *Hint = U.get(); |
3056 | // Not having a limit is safe because InstCombine removes unreachable |
3057 | // code. |
3058 | Value *UnderlyingObject = getUnderlyingObject(V: Hint, /*MaxLookup*/ 0); |
3059 | if (Hint != UnderlyingObject) |
3060 | replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject); |
3061 | }; |
3062 | MaybeSimplifyHint(OBU.Inputs[0]); |
3063 | MaybeSimplifyHint(OBU.Inputs[1]); |
3064 | } |
3065 | } |
3066 | |
3067 | // Convert nonnull assume like: |
3068 | // %A = icmp ne i32* %PTR, null |
3069 | // call void @llvm.assume(i1 %A) |
3070 | // into |
3071 | // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ] |
3072 | if (EnableKnowledgeRetention && |
3073 | match(V: IIOperand, P: m_Cmp(Pred, L: m_Value(V&: A), R: m_Zero())) && |
3074 | Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) { |
3075 | if (auto *Replacement = buildAssumeFromKnowledge( |
3076 | Knowledge: {RetainedKnowledge{.AttrKind: Attribute::NonNull, .ArgValue: 0, .WasOn: A}}, CtxI: Next, AC: &AC, DT: &DT)) { |
3077 | |
3078 | Replacement->insertBefore(InsertPos: Next); |
3079 | AC.registerAssumption(CI: Replacement); |
3080 | return RemoveConditionFromAssume(II); |
3081 | } |
3082 | } |
3083 | |
3084 | // Convert alignment assume like: |
3085 | // %B = ptrtoint i32* %A to i64 |
3086 | // %C = and i64 %B, Constant |
3087 | // %D = icmp eq i64 %C, 0 |
3088 | // call void @llvm.assume(i1 %D) |
3089 | // into |
3090 | // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)] |
3091 | uint64_t AlignMask; |
3092 | if (EnableKnowledgeRetention && |
3093 | match(V: IIOperand, |
3094 | P: m_Cmp(Pred, L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)), |
3095 | R: m_Zero())) && |
3096 | Pred == CmpInst::ICMP_EQ) { |
3097 | if (isPowerOf2_64(Value: AlignMask + 1)) { |
3098 | uint64_t Offset = 0; |
3099 | match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset))); |
3100 | if (match(V: A, P: m_PtrToInt(Op: m_Value(V&: A)))) { |
3101 | /// Note: this doesn't preserve the offset information but merges |
3102 | /// offset and alignment. |
3103 | /// TODO: we can generate a GEP instead of merging the alignment with |
3104 | /// the offset. |
3105 | RetainedKnowledge RK{.AttrKind: Attribute::Alignment, |
3106 | .ArgValue: (unsigned)MinAlign(A: Offset, B: AlignMask + 1), .WasOn: A}; |
3107 | if (auto *Replacement = |
3108 | buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) { |
3109 | |
3110 | Replacement->insertAfter(InsertPos: II); |
3111 | AC.registerAssumption(CI: Replacement); |
3112 | } |
3113 | return RemoveConditionFromAssume(II); |
3114 | } |
3115 | } |
3116 | } |
3117 | |
3118 | /// Canonicalize Knowledge in operand bundles. |
3119 | if (EnableKnowledgeRetention && II->hasOperandBundles()) { |
3120 | for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { |
3121 | auto &BOI = II->bundle_op_info_begin()[Idx]; |
3122 | RetainedKnowledge RK = |
3123 | llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI); |
3124 | if (BOI.End - BOI.Begin > 2) |
3125 | continue; // Prevent reducing knowledge in an align with offset since |
3126 | // extracting a RetainedKnowledge from them looses offset |
3127 | // information |
3128 | RetainedKnowledge CanonRK = |
3129 | llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK, |
3130 | AC: &getAssumptionCache(), |
3131 | DT: &getDominatorTree()); |
3132 | if (CanonRK == RK) |
3133 | continue; |
3134 | if (!CanonRK) { |
3135 | if (BOI.End - BOI.Begin > 0) { |
3136 | Worklist.pushValue(V: II->op_begin()[BOI.Begin]); |
3137 | Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]); |
3138 | } |
3139 | continue; |
3140 | } |
3141 | assert(RK.AttrKind == CanonRK.AttrKind); |
3142 | if (BOI.End - BOI.Begin > 0) |
3143 | II->op_begin()[BOI.Begin].set(CanonRK.WasOn); |
3144 | if (BOI.End - BOI.Begin > 1) |
3145 | II->op_begin()[BOI.Begin + 1].set(ConstantInt::get( |
3146 | Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue)); |
3147 | if (RK.WasOn) |
3148 | Worklist.pushValue(V: RK.WasOn); |
3149 | return II; |
3150 | } |
3151 | } |
3152 | |
3153 | // If there is a dominating assume with the same condition as this one, |
3154 | // then this one is redundant, and should be removed. |
3155 | KnownBits Known(1); |
3156 | computeKnownBits(V: IIOperand, Known, Depth: 0, CxtI: II); |
3157 | if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II))) |
3158 | return eraseInstFromFunction(I&: *II); |
3159 | |
3160 | // assume(false) is unreachable. |
3161 | if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) { |
3162 | CreateNonTerminatorUnreachable(InsertAt: II); |
3163 | return eraseInstFromFunction(I&: *II); |
3164 | } |
3165 | |
3166 | // Update the cache of affected values for this assumption (we might be |
3167 | // here because we just simplified the condition). |
3168 | AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II)); |
3169 | break; |
3170 | } |
3171 | case Intrinsic::experimental_guard: { |
3172 | // Is this guard followed by another guard? We scan forward over a small |
3173 | // fixed window of instructions to handle common cases with conditions |
3174 | // computed between guards. |
3175 | Instruction *NextInst = II->getNextNonDebugInstruction(); |
3176 | for (unsigned i = 0; i < GuardWideningWindow; i++) { |
3177 | // Note: Using context-free form to avoid compile time blow up |
3178 | if (!isSafeToSpeculativelyExecute(I: NextInst)) |
3179 | break; |
3180 | NextInst = NextInst->getNextNonDebugInstruction(); |
3181 | } |
3182 | Value *NextCond = nullptr; |
3183 | if (match(V: NextInst, |
3184 | P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) { |
3185 | Value *CurrCond = II->getArgOperand(i: 0); |
3186 | |
3187 | // Remove a guard that it is immediately preceded by an identical guard. |
3188 | // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). |
3189 | if (CurrCond != NextCond) { |
3190 | Instruction *MoveI = II->getNextNonDebugInstruction(); |
3191 | while (MoveI != NextInst) { |
3192 | auto *Temp = MoveI; |
3193 | MoveI = MoveI->getNextNonDebugInstruction(); |
3194 | Temp->moveBefore(MovePos: II); |
3195 | } |
3196 | replaceOperand(I&: *II, OpNum: 0, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond)); |
3197 | } |
3198 | eraseInstFromFunction(I&: *NextInst); |
3199 | return II; |
3200 | } |
3201 | break; |
3202 | } |
3203 | case Intrinsic::vector_insert: { |
3204 | Value *Vec = II->getArgOperand(i: 0); |
3205 | Value *SubVec = II->getArgOperand(i: 1); |
3206 | Value *Idx = II->getArgOperand(i: 2); |
3207 | auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType()); |
3208 | auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
3209 | auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType()); |
3210 | |
3211 | // Only canonicalize if the destination vector, Vec, and SubVec are all |
3212 | // fixed vectors. |
3213 | if (DstTy && VecTy && SubVecTy) { |
3214 | unsigned DstNumElts = DstTy->getNumElements(); |
3215 | unsigned VecNumElts = VecTy->getNumElements(); |
3216 | unsigned SubVecNumElts = SubVecTy->getNumElements(); |
3217 | unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
3218 | |
3219 | // An insert that entirely overwrites Vec with SubVec is a nop. |
3220 | if (VecNumElts == SubVecNumElts) |
3221 | return replaceInstUsesWith(I&: CI, V: SubVec); |
3222 | |
3223 | // Widen SubVec into a vector of the same width as Vec, since |
3224 | // shufflevector requires the two input vectors to be the same width. |
3225 | // Elements beyond the bounds of SubVec within the widened vector are |
3226 | // undefined. |
3227 | SmallVector<int, 8> WidenMask; |
3228 | unsigned i; |
3229 | for (i = 0; i != SubVecNumElts; ++i) |
3230 | WidenMask.push_back(Elt: i); |
3231 | for (; i != VecNumElts; ++i) |
3232 | WidenMask.push_back(Elt: PoisonMaskElem); |
3233 | |
3234 | Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask); |
3235 | |
3236 | SmallVector<int, 8> Mask; |
3237 | for (unsigned i = 0; i != IdxN; ++i) |
3238 | Mask.push_back(Elt: i); |
3239 | for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) |
3240 | Mask.push_back(Elt: i); |
3241 | for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) |
3242 | Mask.push_back(Elt: i); |
3243 | |
3244 | Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask); |
3245 | return replaceInstUsesWith(I&: CI, V: Shuffle); |
3246 | } |
3247 | break; |
3248 | } |
3249 | case Intrinsic::vector_extract: { |
3250 | Value *Vec = II->getArgOperand(i: 0); |
3251 | Value *Idx = II->getArgOperand(i: 1); |
3252 | |
3253 | Type *ReturnType = II->getType(); |
3254 | // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx), |
3255 | // ExtractIdx) |
3256 | unsigned = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
3257 | Value *InsertTuple, *InsertIdx, *InsertValue; |
3258 | if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple), |
3259 | Op1: m_Value(V&: InsertValue), |
3260 | Op2: m_Value(V&: InsertIdx))) && |
3261 | InsertValue->getType() == ReturnType) { |
3262 | unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue(); |
3263 | // Case where we get the same index right after setting it. |
3264 | // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) --> |
3265 | // InsertValue |
3266 | if (ExtractIdx == Index) |
3267 | return replaceInstUsesWith(I&: CI, V: InsertValue); |
3268 | // If we are getting a different index than what was set in the |
3269 | // insert.vector intrinsic. We can just set the input tuple to the one up |
3270 | // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue, |
3271 | // InsertIndex), ExtractIndex) |
3272 | // --> extract.vector(InsertTuple, ExtractIndex) |
3273 | else |
3274 | return replaceOperand(I&: CI, OpNum: 0, V: InsertTuple); |
3275 | } |
3276 | |
3277 | auto *DstTy = dyn_cast<VectorType>(Val: ReturnType); |
3278 | auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType()); |
3279 | |
3280 | if (DstTy && VecTy) { |
3281 | auto DstEltCnt = DstTy->getElementCount(); |
3282 | auto VecEltCnt = VecTy->getElementCount(); |
3283 | unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
3284 | |
3285 | // Extracting the entirety of Vec is a nop. |
3286 | if (DstEltCnt == VecTy->getElementCount()) { |
3287 | replaceInstUsesWith(I&: CI, V: Vec); |
3288 | return eraseInstFromFunction(I&: CI); |
3289 | } |
3290 | |
3291 | // Only canonicalize to shufflevector if the destination vector and |
3292 | // Vec are fixed vectors. |
3293 | if (VecEltCnt.isScalable() || DstEltCnt.isScalable()) |
3294 | break; |
3295 | |
3296 | SmallVector<int, 8> Mask; |
3297 | for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i) |
3298 | Mask.push_back(Elt: IdxN + i); |
3299 | |
3300 | Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask); |
3301 | return replaceInstUsesWith(I&: CI, V: Shuffle); |
3302 | } |
3303 | break; |
3304 | } |
3305 | case Intrinsic::vector_reverse: { |
3306 | Value *BO0, *BO1, *X, *Y; |
3307 | Value *Vec = II->getArgOperand(i: 0); |
3308 | if (match(V: Vec, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: BO0), R: m_Value(V&: BO1))))) { |
3309 | auto *OldBinOp = cast<BinaryOperator>(Val: Vec); |
3310 | if (match(V: BO0, P: m_VecReverse(Op0: m_Value(V&: X)))) { |
3311 | // rev(binop rev(X), rev(Y)) --> binop X, Y |
3312 | if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y)))) |
3313 | return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags( |
3314 | Opc: OldBinOp->getOpcode(), V1: X, V2: Y, |
3315 | CopyO: OldBinOp, Name: OldBinOp->getName(), |
3316 | InsertBefore: II->getIterator())); |
3317 | // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat |
3318 | if (isSplatValue(V: BO1)) |
3319 | return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags( |
3320 | Opc: OldBinOp->getOpcode(), V1: X, V2: BO1, |
3321 | CopyO: OldBinOp, Name: OldBinOp->getName(), |
3322 | InsertBefore: II->getIterator())); |
3323 | } |
3324 | // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y |
3325 | if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))) && isSplatValue(V: BO0)) |
3326 | return replaceInstUsesWith(I&: CI, |
3327 | V: BinaryOperator::CreateWithCopiedFlags( |
3328 | Opc: OldBinOp->getOpcode(), V1: BO0, V2: Y, CopyO: OldBinOp, |
3329 | Name: OldBinOp->getName(), InsertBefore: II->getIterator())); |
3330 | } |
3331 | // rev(unop rev(X)) --> unop X |
3332 | if (match(V: Vec, P: m_OneUse(SubPattern: m_UnOp(X: m_VecReverse(Op0: m_Value(V&: X)))))) { |
3333 | auto *OldUnOp = cast<UnaryOperator>(Val: Vec); |
3334 | auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( |
3335 | Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(), |
3336 | InsertBefore: II->getIterator()); |
3337 | return replaceInstUsesWith(I&: CI, V: NewUnOp); |
3338 | } |
3339 | break; |
3340 | } |
3341 | case Intrinsic::vector_reduce_or: |
3342 | case Intrinsic::vector_reduce_and: { |
3343 | // Canonicalize logical or/and reductions: |
3344 | // Or reduction for i1 is represented as: |
3345 | // %val = bitcast <ReduxWidth x i1> to iReduxWidth |
3346 | // %res = cmp ne iReduxWidth %val, 0 |
3347 | // And reduction for i1 is represented as: |
3348 | // %val = bitcast <ReduxWidth x i1> to iReduxWidth |
3349 | // %res = cmp eq iReduxWidth %val, 11111 |
3350 | Value *Arg = II->getArgOperand(i: 0); |
3351 | Value *Vect; |
3352 | |
3353 | if (Value *NewOp = |
3354 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3355 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3356 | return II; |
3357 | } |
3358 | |
3359 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3360 | if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType())) |
3361 | if (FTy->getElementType() == Builder.getInt1Ty()) { |
3362 | Value *Res = Builder.CreateBitCast( |
3363 | V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements())); |
3364 | if (IID == Intrinsic::vector_reduce_and) { |
3365 | Res = Builder.CreateICmpEQ( |
3366 | LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType())); |
3367 | } else { |
3368 | assert(IID == Intrinsic::vector_reduce_or && |
3369 | "Expected or reduction." ); |
3370 | Res = Builder.CreateIsNotNull(Arg: Res); |
3371 | } |
3372 | if (Arg != Vect) |
3373 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
3374 | DestTy: II->getType()); |
3375 | return replaceInstUsesWith(I&: CI, V: Res); |
3376 | } |
3377 | } |
3378 | [[fallthrough]]; |
3379 | } |
3380 | case Intrinsic::vector_reduce_add: { |
3381 | if (IID == Intrinsic::vector_reduce_add) { |
3382 | // Convert vector_reduce_add(ZExt(<n x i1>)) to |
3383 | // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). |
3384 | // Convert vector_reduce_add(SExt(<n x i1>)) to |
3385 | // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). |
3386 | // Convert vector_reduce_add(<n x i1>) to |
3387 | // Trunc(ctpop(bitcast <n x i1> to in)). |
3388 | Value *Arg = II->getArgOperand(i: 0); |
3389 | Value *Vect; |
3390 | |
3391 | if (Value *NewOp = |
3392 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3393 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3394 | return II; |
3395 | } |
3396 | |
3397 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3398 | if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType())) |
3399 | if (FTy->getElementType() == Builder.getInt1Ty()) { |
3400 | Value *V = Builder.CreateBitCast( |
3401 | V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements())); |
3402 | Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V); |
3403 | if (Res->getType() != II->getType()) |
3404 | Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType()); |
3405 | if (Arg != Vect && |
3406 | cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt) |
3407 | Res = Builder.CreateNeg(V: Res); |
3408 | return replaceInstUsesWith(I&: CI, V: Res); |
3409 | } |
3410 | } |
3411 | } |
3412 | [[fallthrough]]; |
3413 | } |
3414 | case Intrinsic::vector_reduce_xor: { |
3415 | if (IID == Intrinsic::vector_reduce_xor) { |
3416 | // Exclusive disjunction reduction over the vector with |
3417 | // (potentially-extended) i1 element type is actually a |
3418 | // (potentially-extended) arithmetic `add` reduction over the original |
3419 | // non-extended value: |
3420 | // vector_reduce_xor(?ext(<n x i1>)) |
3421 | // --> |
3422 | // ?ext(vector_reduce_add(<n x i1>)) |
3423 | Value *Arg = II->getArgOperand(i: 0); |
3424 | Value *Vect; |
3425 | |
3426 | if (Value *NewOp = |
3427 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3428 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3429 | return II; |
3430 | } |
3431 | |
3432 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3433 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
3434 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
3435 | Value *Res = Builder.CreateAddReduce(Src: Vect); |
3436 | if (Arg != Vect) |
3437 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
3438 | DestTy: II->getType()); |
3439 | return replaceInstUsesWith(I&: CI, V: Res); |
3440 | } |
3441 | } |
3442 | } |
3443 | [[fallthrough]]; |
3444 | } |
3445 | case Intrinsic::vector_reduce_mul: { |
3446 | if (IID == Intrinsic::vector_reduce_mul) { |
3447 | // Multiplicative reduction over the vector with (potentially-extended) |
3448 | // i1 element type is actually a (potentially zero-extended) |
3449 | // logical `and` reduction over the original non-extended value: |
3450 | // vector_reduce_mul(?ext(<n x i1>)) |
3451 | // --> |
3452 | // zext(vector_reduce_and(<n x i1>)) |
3453 | Value *Arg = II->getArgOperand(i: 0); |
3454 | Value *Vect; |
3455 | |
3456 | if (Value *NewOp = |
3457 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3458 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3459 | return II; |
3460 | } |
3461 | |
3462 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3463 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
3464 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
3465 | Value *Res = Builder.CreateAndReduce(Src: Vect); |
3466 | if (Res->getType() != II->getType()) |
3467 | Res = Builder.CreateZExt(V: Res, DestTy: II->getType()); |
3468 | return replaceInstUsesWith(I&: CI, V: Res); |
3469 | } |
3470 | } |
3471 | } |
3472 | [[fallthrough]]; |
3473 | } |
3474 | case Intrinsic::vector_reduce_umin: |
3475 | case Intrinsic::vector_reduce_umax: { |
3476 | if (IID == Intrinsic::vector_reduce_umin || |
3477 | IID == Intrinsic::vector_reduce_umax) { |
3478 | // UMin/UMax reduction over the vector with (potentially-extended) |
3479 | // i1 element type is actually a (potentially-extended) |
3480 | // logical `and`/`or` reduction over the original non-extended value: |
3481 | // vector_reduce_u{min,max}(?ext(<n x i1>)) |
3482 | // --> |
3483 | // ?ext(vector_reduce_{and,or}(<n x i1>)) |
3484 | Value *Arg = II->getArgOperand(i: 0); |
3485 | Value *Vect; |
3486 | |
3487 | if (Value *NewOp = |
3488 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3489 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3490 | return II; |
3491 | } |
3492 | |
3493 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3494 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
3495 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
3496 | Value *Res = IID == Intrinsic::vector_reduce_umin |
3497 | ? Builder.CreateAndReduce(Src: Vect) |
3498 | : Builder.CreateOrReduce(Src: Vect); |
3499 | if (Arg != Vect) |
3500 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
3501 | DestTy: II->getType()); |
3502 | return replaceInstUsesWith(I&: CI, V: Res); |
3503 | } |
3504 | } |
3505 | } |
3506 | [[fallthrough]]; |
3507 | } |
3508 | case Intrinsic::vector_reduce_smin: |
3509 | case Intrinsic::vector_reduce_smax: { |
3510 | if (IID == Intrinsic::vector_reduce_smin || |
3511 | IID == Intrinsic::vector_reduce_smax) { |
3512 | // SMin/SMax reduction over the vector with (potentially-extended) |
3513 | // i1 element type is actually a (potentially-extended) |
3514 | // logical `and`/`or` reduction over the original non-extended value: |
3515 | // vector_reduce_s{min,max}(<n x i1>) |
3516 | // --> |
3517 | // vector_reduce_{or,and}(<n x i1>) |
3518 | // and |
3519 | // vector_reduce_s{min,max}(sext(<n x i1>)) |
3520 | // --> |
3521 | // sext(vector_reduce_{or,and}(<n x i1>)) |
3522 | // and |
3523 | // vector_reduce_s{min,max}(zext(<n x i1>)) |
3524 | // --> |
3525 | // zext(vector_reduce_{and,or}(<n x i1>)) |
3526 | Value *Arg = II->getArgOperand(i: 0); |
3527 | Value *Vect; |
3528 | |
3529 | if (Value *NewOp = |
3530 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
3531 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
3532 | return II; |
3533 | } |
3534 | |
3535 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
3536 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
3537 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
3538 | Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd; |
3539 | if (Arg != Vect) |
3540 | ExtOpc = cast<CastInst>(Val: Arg)->getOpcode(); |
3541 | Value *Res = ((IID == Intrinsic::vector_reduce_smin) == |
3542 | (ExtOpc == Instruction::CastOps::ZExt)) |
3543 | ? Builder.CreateAndReduce(Src: Vect) |
3544 | : Builder.CreateOrReduce(Src: Vect); |
3545 | if (Arg != Vect) |
3546 | Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType()); |
3547 | return replaceInstUsesWith(I&: CI, V: Res); |
3548 | } |
3549 | } |
3550 | } |
3551 | [[fallthrough]]; |
3552 | } |
3553 | case Intrinsic::vector_reduce_fmax: |
3554 | case Intrinsic::vector_reduce_fmin: |
3555 | case Intrinsic::vector_reduce_fadd: |
3556 | case Intrinsic::vector_reduce_fmul: { |
3557 | bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd && |
3558 | IID != Intrinsic::vector_reduce_fmul) || |
3559 | II->hasAllowReassoc(); |
3560 | const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || |
3561 | IID == Intrinsic::vector_reduce_fmul) |
3562 | ? 1 |
3563 | : 0; |
3564 | Value *Arg = II->getArgOperand(i: ArgIdx); |
3565 | if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) { |
3566 | replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp); |
3567 | return nullptr; |
3568 | } |
3569 | break; |
3570 | } |
3571 | case Intrinsic::is_fpclass: { |
3572 | if (Instruction *I = foldIntrinsicIsFPClass(II&: *II)) |
3573 | return I; |
3574 | break; |
3575 | } |
3576 | case Intrinsic::threadlocal_address: { |
3577 | Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: 0), DL, CxtI: II, AC: &AC, DT: &DT); |
3578 | MaybeAlign Align = II->getRetAlign(); |
3579 | if (MinAlign > Align.valueOrOne()) { |
3580 | II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign)); |
3581 | return II; |
3582 | } |
3583 | break; |
3584 | } |
3585 | default: { |
3586 | // Handle target specific intrinsics |
3587 | std::optional<Instruction *> V = targetInstCombineIntrinsic(II&: *II); |
3588 | if (V) |
3589 | return *V; |
3590 | break; |
3591 | } |
3592 | } |
3593 | |
3594 | // Try to fold intrinsic into select operands. This is legal if: |
3595 | // * The intrinsic is speculatable. |
3596 | // * The select condition is not a vector, or the intrinsic does not |
3597 | // perform cross-lane operations. |
3598 | switch (IID) { |
3599 | case Intrinsic::ctlz: |
3600 | case Intrinsic::cttz: |
3601 | case Intrinsic::ctpop: |
3602 | case Intrinsic::umin: |
3603 | case Intrinsic::umax: |
3604 | case Intrinsic::smin: |
3605 | case Intrinsic::smax: |
3606 | case Intrinsic::usub_sat: |
3607 | case Intrinsic::uadd_sat: |
3608 | case Intrinsic::ssub_sat: |
3609 | case Intrinsic::sadd_sat: |
3610 | for (Value *Op : II->args()) |
3611 | if (auto *Sel = dyn_cast<SelectInst>(Val: Op)) |
3612 | if (Instruction *R = FoldOpIntoSelect(Op&: *II, SI: Sel)) |
3613 | return R; |
3614 | [[fallthrough]]; |
3615 | default: |
3616 | break; |
3617 | } |
3618 | |
3619 | if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder)) |
3620 | return Shuf; |
3621 | |
3622 | // Some intrinsics (like experimental_gc_statepoint) can be used in invoke |
3623 | // context, so it is handled in visitCallBase and we should trigger it. |
3624 | return visitCallBase(Call&: *II); |
3625 | } |
3626 | |
3627 | // Fence instruction simplification |
3628 | Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { |
3629 | auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNonDebugInstruction()); |
3630 | // This check is solely here to handle arbitrary target-dependent syncscopes. |
3631 | // TODO: Can remove if does not matter in practice. |
3632 | if (NFI && FI.isIdenticalTo(I: NFI)) |
3633 | return eraseInstFromFunction(I&: FI); |
3634 | |
3635 | // Returns true if FI1 is identical or stronger fence than FI2. |
3636 | auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { |
3637 | auto FI1SyncScope = FI1->getSyncScopeID(); |
3638 | // Consider same scope, where scope is global or single-thread. |
3639 | if (FI1SyncScope != FI2->getSyncScopeID() || |
3640 | (FI1SyncScope != SyncScope::System && |
3641 | FI1SyncScope != SyncScope::SingleThread)) |
3642 | return false; |
3643 | |
3644 | return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering()); |
3645 | }; |
3646 | if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) |
3647 | return eraseInstFromFunction(I&: FI); |
3648 | |
3649 | if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNonDebugInstruction())) |
3650 | if (isIdenticalOrStrongerFence(PFI, &FI)) |
3651 | return eraseInstFromFunction(I&: FI); |
3652 | return nullptr; |
3653 | } |
3654 | |
3655 | // InvokeInst simplification |
3656 | Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) { |
3657 | return visitCallBase(Call&: II); |
3658 | } |
3659 | |
3660 | // CallBrInst simplification |
3661 | Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { |
3662 | return visitCallBase(Call&: CBI); |
3663 | } |
3664 | |
3665 | Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { |
3666 | if (!CI->getCalledFunction()) return nullptr; |
3667 | |
3668 | // Skip optimizing notail and musttail calls so |
3669 | // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. |
3670 | // LibCallSimplifier::optimizeCall should try to preseve tail calls though. |
3671 | if (CI->isMustTailCall() || CI->isNoTailCall()) |
3672 | return nullptr; |
3673 | |
3674 | auto InstCombineRAUW = [this](Instruction *From, Value *With) { |
3675 | replaceInstUsesWith(I&: *From, V: With); |
3676 | }; |
3677 | auto InstCombineErase = [this](Instruction *I) { |
3678 | eraseInstFromFunction(I&: *I); |
3679 | }; |
3680 | LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW, |
3681 | InstCombineErase); |
3682 | if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) { |
3683 | ++NumSimplified; |
3684 | return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With); |
3685 | } |
3686 | |
3687 | return nullptr; |
3688 | } |
3689 | |
3690 | static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) { |
3691 | // Strip off at most one level of pointer casts, looking for an alloca. This |
3692 | // is good enough in practice and simpler than handling any number of casts. |
3693 | Value *Underlying = TrampMem->stripPointerCasts(); |
3694 | if (Underlying != TrampMem && |
3695 | (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) |
3696 | return nullptr; |
3697 | if (!isa<AllocaInst>(Val: Underlying)) |
3698 | return nullptr; |
3699 | |
3700 | IntrinsicInst *InitTrampoline = nullptr; |
3701 | for (User *U : TrampMem->users()) { |
3702 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U); |
3703 | if (!II) |
3704 | return nullptr; |
3705 | if (II->getIntrinsicID() == Intrinsic::init_trampoline) { |
3706 | if (InitTrampoline) |
3707 | // More than one init_trampoline writes to this value. Give up. |
3708 | return nullptr; |
3709 | InitTrampoline = II; |
3710 | continue; |
3711 | } |
3712 | if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) |
3713 | // Allow any number of calls to adjust.trampoline. |
3714 | continue; |
3715 | return nullptr; |
3716 | } |
3717 | |
3718 | // No call to init.trampoline found. |
3719 | if (!InitTrampoline) |
3720 | return nullptr; |
3721 | |
3722 | // Check that the alloca is being used in the expected way. |
3723 | if (InitTrampoline->getOperand(i_nocapture: 0) != TrampMem) |
3724 | return nullptr; |
3725 | |
3726 | return InitTrampoline; |
3727 | } |
3728 | |
3729 | static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, |
3730 | Value *TrampMem) { |
3731 | // Visit all the previous instructions in the basic block, and try to find a |
3732 | // init.trampoline which has a direct path to the adjust.trampoline. |
3733 | for (BasicBlock::iterator I = AdjustTramp->getIterator(), |
3734 | E = AdjustTramp->getParent()->begin(); |
3735 | I != E;) { |
3736 | Instruction *Inst = &*--I; |
3737 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I)) |
3738 | if (II->getIntrinsicID() == Intrinsic::init_trampoline && |
3739 | II->getOperand(i_nocapture: 0) == TrampMem) |
3740 | return II; |
3741 | if (Inst->mayWriteToMemory()) |
3742 | return nullptr; |
3743 | } |
3744 | return nullptr; |
3745 | } |
3746 | |
3747 | // Given a call to llvm.adjust.trampoline, find and return the corresponding |
3748 | // call to llvm.init.trampoline if the call to the trampoline can be optimized |
3749 | // to a direct call to a function. Otherwise return NULL. |
3750 | static IntrinsicInst *findInitTrampoline(Value *Callee) { |
3751 | Callee = Callee->stripPointerCasts(); |
3752 | IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee); |
3753 | if (!AdjustTramp || |
3754 | AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) |
3755 | return nullptr; |
3756 | |
3757 | Value *TrampMem = AdjustTramp->getOperand(i_nocapture: 0); |
3758 | |
3759 | if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem)) |
3760 | return IT; |
3761 | if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem)) |
3762 | return IT; |
3763 | return nullptr; |
3764 | } |
3765 | |
3766 | bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, |
3767 | const TargetLibraryInfo *TLI) { |
3768 | // Note: We only handle cases which can't be driven from generic attributes |
3769 | // here. So, for example, nonnull and noalias (which are common properties |
3770 | // of some allocation functions) are expected to be handled via annotation |
3771 | // of the respective allocator declaration with generic attributes. |
3772 | bool Changed = false; |
3773 | |
3774 | if (!Call.getType()->isPointerTy()) |
3775 | return Changed; |
3776 | |
3777 | std::optional<APInt> Size = getAllocSize(CB: &Call, TLI); |
3778 | if (Size && *Size != 0) { |
3779 | // TODO: We really should just emit deref_or_null here and then |
3780 | // let the generic inference code combine that with nonnull. |
3781 | if (Call.hasRetAttr(Kind: Attribute::NonNull)) { |
3782 | Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable); |
3783 | Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes( |
3784 | Context&: Call.getContext(), Bytes: Size->getLimitedValue())); |
3785 | } else { |
3786 | Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull); |
3787 | Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes( |
3788 | Context&: Call.getContext(), Bytes: Size->getLimitedValue())); |
3789 | } |
3790 | } |
3791 | |
3792 | // Add alignment attribute if alignment is a power of two constant. |
3793 | Value *Alignment = getAllocAlignment(V: &Call, TLI); |
3794 | if (!Alignment) |
3795 | return Changed; |
3796 | |
3797 | ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment); |
3798 | if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) { |
3799 | uint64_t AlignmentVal = AlignOpC->getZExtValue(); |
3800 | if (llvm::isPowerOf2_64(Value: AlignmentVal)) { |
3801 | Align ExistingAlign = Call.getRetAlign().valueOrOne(); |
3802 | Align NewAlign = Align(AlignmentVal); |
3803 | if (NewAlign > ExistingAlign) { |
3804 | Call.addRetAttr( |
3805 | Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign)); |
3806 | Changed = true; |
3807 | } |
3808 | } |
3809 | } |
3810 | return Changed; |
3811 | } |
3812 | |
3813 | /// Improvements for call, callbr and invoke instructions. |
3814 | Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { |
3815 | bool Changed = annotateAnyAllocSite(Call, TLI: &TLI); |
3816 | |
3817 | // Mark any parameters that are known to be non-null with the nonnull |
3818 | // attribute. This is helpful for inlining calls to functions with null |
3819 | // checks on their arguments. |
3820 | SmallVector<unsigned, 4> ArgNos; |
3821 | unsigned ArgNo = 0; |
3822 | |
3823 | for (Value *V : Call.args()) { |
3824 | if (V->getType()->isPointerTy() && |
3825 | !Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) && |
3826 | isKnownNonZero(V, Q: getSimplifyQuery().getWithInstruction(I: &Call))) |
3827 | ArgNos.push_back(Elt: ArgNo); |
3828 | ArgNo++; |
3829 | } |
3830 | |
3831 | assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly." ); |
3832 | |
3833 | if (!ArgNos.empty()) { |
3834 | AttributeList AS = Call.getAttributes(); |
3835 | LLVMContext &Ctx = Call.getContext(); |
3836 | AS = AS.addParamAttribute(C&: Ctx, ArgNos, |
3837 | A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull)); |
3838 | Call.setAttributes(AS); |
3839 | Changed = true; |
3840 | } |
3841 | |
3842 | // If the callee is a pointer to a function, attempt to move any casts to the |
3843 | // arguments of the call/callbr/invoke. |
3844 | Value *Callee = Call.getCalledOperand(); |
3845 | Function *CalleeF = dyn_cast<Function>(Val: Callee); |
3846 | if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) && |
3847 | transformConstExprCastCall(Call)) |
3848 | return nullptr; |
3849 | |
3850 | if (CalleeF) { |
3851 | // Remove the convergent attr on calls when the callee is not convergent. |
3852 | if (Call.isConvergent() && !CalleeF->isConvergent() && |
3853 | !CalleeF->isIntrinsic()) { |
3854 | LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call |
3855 | << "\n" ); |
3856 | Call.setNotConvergent(); |
3857 | return &Call; |
3858 | } |
3859 | |
3860 | // If the call and callee calling conventions don't match, and neither one |
3861 | // of the calling conventions is compatible with C calling convention |
3862 | // this call must be unreachable, as the call is undefined. |
3863 | if ((CalleeF->getCallingConv() != Call.getCallingConv() && |
3864 | !(CalleeF->getCallingConv() == llvm::CallingConv::C && |
3865 | TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) && |
3866 | !(Call.getCallingConv() == llvm::CallingConv::C && |
3867 | TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) && |
3868 | // Only do this for calls to a function with a body. A prototype may |
3869 | // not actually end up matching the implementation's calling conv for a |
3870 | // variety of reasons (e.g. it may be written in assembly). |
3871 | !CalleeF->isDeclaration()) { |
3872 | Instruction *OldCall = &Call; |
3873 | CreateNonTerminatorUnreachable(InsertAt: OldCall); |
3874 | // If OldCall does not return void then replaceInstUsesWith poison. |
3875 | // This allows ValueHandlers and custom metadata to adjust itself. |
3876 | if (!OldCall->getType()->isVoidTy()) |
3877 | replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType())); |
3878 | if (isa<CallInst>(Val: OldCall)) |
3879 | return eraseInstFromFunction(I&: *OldCall); |
3880 | |
3881 | // We cannot remove an invoke or a callbr, because it would change thexi |
3882 | // CFG, just change the callee to a null pointer. |
3883 | cast<CallBase>(Val: OldCall)->setCalledFunction( |
3884 | FTy: CalleeF->getFunctionType(), |
3885 | Fn: Constant::getNullValue(Ty: CalleeF->getType())); |
3886 | return nullptr; |
3887 | } |
3888 | } |
3889 | |
3890 | // Calling a null function pointer is undefined if a null address isn't |
3891 | // dereferenceable. |
3892 | if ((isa<ConstantPointerNull>(Val: Callee) && |
3893 | !NullPointerIsDefined(F: Call.getFunction())) || |
3894 | isa<UndefValue>(Val: Callee)) { |
3895 | // If Call does not return void then replaceInstUsesWith poison. |
3896 | // This allows ValueHandlers and custom metadata to adjust itself. |
3897 | if (!Call.getType()->isVoidTy()) |
3898 | replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType())); |
3899 | |
3900 | if (Call.isTerminator()) { |
3901 | // Can't remove an invoke or callbr because we cannot change the CFG. |
3902 | return nullptr; |
3903 | } |
3904 | |
3905 | // This instruction is not reachable, just remove it. |
3906 | CreateNonTerminatorUnreachable(InsertAt: &Call); |
3907 | return eraseInstFromFunction(I&: Call); |
3908 | } |
3909 | |
3910 | if (IntrinsicInst *II = findInitTrampoline(Callee)) |
3911 | return transformCallThroughTrampoline(Call, Tramp&: *II); |
3912 | |
3913 | if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) { |
3914 | InlineAsm *IA = cast<InlineAsm>(Val: Callee); |
3915 | if (!IA->canThrow()) { |
3916 | // Normal inline asm calls cannot throw - mark them |
3917 | // 'nounwind'. |
3918 | Call.setDoesNotThrow(); |
3919 | Changed = true; |
3920 | } |
3921 | } |
3922 | |
3923 | // Try to optimize the call if possible, we require DataLayout for most of |
3924 | // this. None of these calls are seen as possibly dead so go ahead and |
3925 | // delete the instruction now. |
3926 | if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) { |
3927 | Instruction *I = tryOptimizeCall(CI); |
3928 | // If we changed something return the result, etc. Otherwise let |
3929 | // the fallthrough check. |
3930 | if (I) return eraseInstFromFunction(I&: *I); |
3931 | } |
3932 | |
3933 | if (!Call.use_empty() && !Call.isMustTailCall()) |
3934 | if (Value *ReturnedArg = Call.getReturnedArgOperand()) { |
3935 | Type *CallTy = Call.getType(); |
3936 | Type *RetArgTy = ReturnedArg->getType(); |
3937 | if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy)) |
3938 | return replaceInstUsesWith( |
3939 | I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy)); |
3940 | } |
3941 | |
3942 | // Drop unnecessary kcfi operand bundles from calls that were converted |
3943 | // into direct calls. |
3944 | auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi); |
3945 | if (Bundle && !Call.isIndirectCall()) { |
3946 | DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi" , { |
3947 | if (CalleeF) { |
3948 | ConstantInt *FunctionType = nullptr; |
3949 | ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]); |
3950 | |
3951 | if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type)) |
3952 | FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0)); |
3953 | |
3954 | if (FunctionType && |
3955 | FunctionType->getZExtValue() != ExpectedType->getZExtValue()) |
3956 | dbgs() << Call.getModule()->getName() |
3957 | << ": warning: kcfi: " << Call.getCaller()->getName() |
3958 | << ": call to " << CalleeF->getName() |
3959 | << " using a mismatching function pointer type\n" ; |
3960 | } |
3961 | }); |
3962 | |
3963 | return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi); |
3964 | } |
3965 | |
3966 | if (isRemovableAlloc(V: &Call, TLI: &TLI)) |
3967 | return visitAllocSite(FI&: Call); |
3968 | |
3969 | // Handle intrinsics which can be used in both call and invoke context. |
3970 | switch (Call.getIntrinsicID()) { |
3971 | case Intrinsic::experimental_gc_statepoint: { |
3972 | GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call); |
3973 | SmallPtrSet<Value *, 32> LiveGcValues; |
3974 | for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { |
3975 | GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); |
3976 | |
3977 | // Remove the relocation if unused. |
3978 | if (GCR.use_empty()) { |
3979 | eraseInstFromFunction(I&: GCR); |
3980 | continue; |
3981 | } |
3982 | |
3983 | Value *DerivedPtr = GCR.getDerivedPtr(); |
3984 | Value *BasePtr = GCR.getBasePtr(); |
3985 | |
3986 | // Undef is undef, even after relocation. |
3987 | if (isa<UndefValue>(Val: DerivedPtr) || isa<UndefValue>(Val: BasePtr)) { |
3988 | replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType())); |
3989 | eraseInstFromFunction(I&: GCR); |
3990 | continue; |
3991 | } |
3992 | |
3993 | if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) { |
3994 | // The relocation of null will be null for most any collector. |
3995 | // TODO: provide a hook for this in GCStrategy. There might be some |
3996 | // weird collector this property does not hold for. |
3997 | if (isa<ConstantPointerNull>(Val: DerivedPtr)) { |
3998 | // Use null-pointer of gc_relocate's type to replace it. |
3999 | replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT)); |
4000 | eraseInstFromFunction(I&: GCR); |
4001 | continue; |
4002 | } |
4003 | |
4004 | // isKnownNonNull -> nonnull attribute |
4005 | if (!GCR.hasRetAttr(Kind: Attribute::NonNull) && |
4006 | isKnownNonZero(V: DerivedPtr, |
4007 | Q: getSimplifyQuery().getWithInstruction(I: &Call))) { |
4008 | GCR.addRetAttr(Kind: Attribute::NonNull); |
4009 | // We discovered new fact, re-check users. |
4010 | Worklist.pushUsersToWorkList(I&: GCR); |
4011 | } |
4012 | } |
4013 | |
4014 | // If we have two copies of the same pointer in the statepoint argument |
4015 | // list, canonicalize to one. This may let us common gc.relocates. |
4016 | if (GCR.getBasePtr() == GCR.getDerivedPtr() && |
4017 | GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { |
4018 | auto *OpIntTy = GCR.getOperand(i_nocapture: 2)->getType(); |
4019 | GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex())); |
4020 | } |
4021 | |
4022 | // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) |
4023 | // Canonicalize on the type from the uses to the defs |
4024 | |
4025 | // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) |
4026 | LiveGcValues.insert(Ptr: BasePtr); |
4027 | LiveGcValues.insert(Ptr: DerivedPtr); |
4028 | } |
4029 | std::optional<OperandBundleUse> Bundle = |
4030 | GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live); |
4031 | unsigned NumOfGCLives = LiveGcValues.size(); |
4032 | if (!Bundle || NumOfGCLives == Bundle->Inputs.size()) |
4033 | break; |
4034 | // We can reduce the size of gc live bundle. |
4035 | DenseMap<Value *, unsigned> Val2Idx; |
4036 | std::vector<Value *> NewLiveGc; |
4037 | for (Value *V : Bundle->Inputs) { |
4038 | if (Val2Idx.count(Val: V)) |
4039 | continue; |
4040 | if (LiveGcValues.count(Ptr: V)) { |
4041 | Val2Idx[V] = NewLiveGc.size(); |
4042 | NewLiveGc.push_back(x: V); |
4043 | } else |
4044 | Val2Idx[V] = NumOfGCLives; |
4045 | } |
4046 | // Update all gc.relocates |
4047 | for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { |
4048 | GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); |
4049 | Value *BasePtr = GCR.getBasePtr(); |
4050 | assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives && |
4051 | "Missed live gc for base pointer" ); |
4052 | auto *OpIntTy1 = GCR.getOperand(i_nocapture: 1)->getType(); |
4053 | GCR.setOperand(i_nocapture: 1, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx[BasePtr])); |
4054 | Value *DerivedPtr = GCR.getDerivedPtr(); |
4055 | assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives && |
4056 | "Missed live gc for derived pointer" ); |
4057 | auto *OpIntTy2 = GCR.getOperand(i_nocapture: 2)->getType(); |
4058 | GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx[DerivedPtr])); |
4059 | } |
4060 | // Create new statepoint instruction. |
4061 | OperandBundleDef NewBundle("gc-live" , NewLiveGc); |
4062 | return CallBase::Create(CB: &Call, Bundle: NewBundle); |
4063 | } |
4064 | default: { break; } |
4065 | } |
4066 | |
4067 | return Changed ? &Call : nullptr; |
4068 | } |
4069 | |
4070 | /// If the callee is a constexpr cast of a function, attempt to move the cast to |
4071 | /// the arguments of the call/invoke. |
4072 | /// CallBrInst is not supported. |
4073 | bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { |
4074 | auto *Callee = |
4075 | dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts()); |
4076 | if (!Callee) |
4077 | return false; |
4078 | |
4079 | assert(!isa<CallBrInst>(Call) && |
4080 | "CallBr's don't have a single point after a def to insert at" ); |
4081 | |
4082 | // If this is a call to a thunk function, don't remove the cast. Thunks are |
4083 | // used to transparently forward all incoming parameters and outgoing return |
4084 | // values, so it's important to leave the cast in place. |
4085 | if (Callee->hasFnAttribute(Kind: "thunk" )) |
4086 | return false; |
4087 | |
4088 | // If this is a call to a naked function, the assembly might be |
4089 | // using an argument, or otherwise rely on the frame layout, |
4090 | // the function prototype will mismatch. |
4091 | if (Callee->hasFnAttribute(Kind: Attribute::Naked)) |
4092 | return false; |
4093 | |
4094 | // If this is a musttail call, the callee's prototype must match the caller's |
4095 | // prototype with the exception of pointee types. The code below doesn't |
4096 | // implement that, so we can't do this transform. |
4097 | // TODO: Do the transform if it only requires adding pointer casts. |
4098 | if (Call.isMustTailCall()) |
4099 | return false; |
4100 | |
4101 | Instruction *Caller = &Call; |
4102 | const AttributeList &CallerPAL = Call.getAttributes(); |
4103 | |
4104 | // Okay, this is a cast from a function to a different type. Unless doing so |
4105 | // would cause a type conversion of one of our arguments, change this call to |
4106 | // be a direct call with arguments casted to the appropriate types. |
4107 | FunctionType *FT = Callee->getFunctionType(); |
4108 | Type *OldRetTy = Caller->getType(); |
4109 | Type *NewRetTy = FT->getReturnType(); |
4110 | |
4111 | // Check to see if we are changing the return type... |
4112 | if (OldRetTy != NewRetTy) { |
4113 | |
4114 | if (NewRetTy->isStructTy()) |
4115 | return false; // TODO: Handle multiple return values. |
4116 | |
4117 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) { |
4118 | if (Callee->isDeclaration()) |
4119 | return false; // Cannot transform this return value. |
4120 | |
4121 | if (!Caller->use_empty() && |
4122 | // void -> non-void is handled specially |
4123 | !NewRetTy->isVoidTy()) |
4124 | return false; // Cannot transform this return value. |
4125 | } |
4126 | |
4127 | if (!CallerPAL.isEmpty() && !Caller->use_empty()) { |
4128 | AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); |
4129 | if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy))) |
4130 | return false; // Attribute not compatible with transformed value. |
4131 | } |
4132 | |
4133 | // If the callbase is an invoke instruction, and the return value is |
4134 | // used by a PHI node in a successor, we cannot change the return type of |
4135 | // the call because there is no place to put the cast instruction (without |
4136 | // breaking the critical edge). Bail out in this case. |
4137 | if (!Caller->use_empty()) { |
4138 | BasicBlock *PhisNotSupportedBlock = nullptr; |
4139 | if (auto *II = dyn_cast<InvokeInst>(Val: Caller)) |
4140 | PhisNotSupportedBlock = II->getNormalDest(); |
4141 | if (PhisNotSupportedBlock) |
4142 | for (User *U : Caller->users()) |
4143 | if (PHINode *PN = dyn_cast<PHINode>(Val: U)) |
4144 | if (PN->getParent() == PhisNotSupportedBlock) |
4145 | return false; |
4146 | } |
4147 | } |
4148 | |
4149 | unsigned NumActualArgs = Call.arg_size(); |
4150 | unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs); |
4151 | |
4152 | // Prevent us turning: |
4153 | // declare void @takes_i32_inalloca(i32* inalloca) |
4154 | // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) |
4155 | // |
4156 | // into: |
4157 | // call void @takes_i32_inalloca(i32* null) |
4158 | // |
4159 | // Similarly, avoid folding away bitcasts of byval calls. |
4160 | if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) || |
4161 | Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated)) |
4162 | return false; |
4163 | |
4164 | auto AI = Call.arg_begin(); |
4165 | for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { |
4166 | Type *ParamTy = FT->getParamType(i); |
4167 | Type *ActTy = (*AI)->getType(); |
4168 | |
4169 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL)) |
4170 | return false; // Cannot transform this parameter value. |
4171 | |
4172 | // Check if there are any incompatible attributes we cannot drop safely. |
4173 | if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i)) |
4174 | .overlaps(AM: AttributeFuncs::typeIncompatible( |
4175 | Ty: ParamTy, ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP))) |
4176 | return false; // Attribute not compatible with transformed value. |
4177 | |
4178 | if (Call.isInAllocaArgument(ArgNo: i) || |
4179 | CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated)) |
4180 | return false; // Cannot transform to and from inalloca/preallocated. |
4181 | |
4182 | if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError)) |
4183 | return false; |
4184 | |
4185 | if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) != |
4186 | Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal)) |
4187 | return false; // Cannot transform to or from byval. |
4188 | } |
4189 | |
4190 | if (Callee->isDeclaration()) { |
4191 | // Do not delete arguments unless we have a function body. |
4192 | if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) |
4193 | return false; |
4194 | |
4195 | // If the callee is just a declaration, don't change the varargsness of the |
4196 | // call. We don't want to introduce a varargs call where one doesn't |
4197 | // already exist. |
4198 | if (FT->isVarArg() != Call.getFunctionType()->isVarArg()) |
4199 | return false; |
4200 | |
4201 | // If both the callee and the cast type are varargs, we still have to make |
4202 | // sure the number of fixed parameters are the same or we have the same |
4203 | // ABI issues as if we introduce a varargs call. |
4204 | if (FT->isVarArg() && Call.getFunctionType()->isVarArg() && |
4205 | FT->getNumParams() != Call.getFunctionType()->getNumParams()) |
4206 | return false; |
4207 | } |
4208 | |
4209 | if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && |
4210 | !CallerPAL.isEmpty()) { |
4211 | // In this case we have more arguments than the new function type, but we |
4212 | // won't be dropping them. Check that these extra arguments have attributes |
4213 | // that are compatible with being a vararg call argument. |
4214 | unsigned SRetIdx; |
4215 | if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) && |
4216 | SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams()) |
4217 | return false; |
4218 | } |
4219 | |
4220 | // Okay, we decided that this is a safe thing to do: go ahead and start |
4221 | // inserting cast instructions as necessary. |
4222 | SmallVector<Value *, 8> Args; |
4223 | SmallVector<AttributeSet, 8> ArgAttrs; |
4224 | Args.reserve(N: NumActualArgs); |
4225 | ArgAttrs.reserve(N: NumActualArgs); |
4226 | |
4227 | // Get any return attributes. |
4228 | AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); |
4229 | |
4230 | // If the return value is not being used, the type may not be compatible |
4231 | // with the existing attributes. Wipe out any problematic attributes. |
4232 | RAttrs.remove(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy)); |
4233 | |
4234 | LLVMContext &Ctx = Call.getContext(); |
4235 | AI = Call.arg_begin(); |
4236 | for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { |
4237 | Type *ParamTy = FT->getParamType(i); |
4238 | |
4239 | Value *NewArg = *AI; |
4240 | if ((*AI)->getType() != ParamTy) |
4241 | NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy); |
4242 | Args.push_back(Elt: NewArg); |
4243 | |
4244 | // Add any parameter attributes except the ones incompatible with the new |
4245 | // type. Note that we made sure all incompatible ones are safe to drop. |
4246 | AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( |
4247 | Ty: ParamTy, ASK: AttributeFuncs::ASK_SAFE_TO_DROP); |
4248 | ArgAttrs.push_back( |
4249 | Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs)); |
4250 | } |
4251 | |
4252 | // If the function takes more arguments than the call was taking, add them |
4253 | // now. |
4254 | for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) { |
4255 | Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i))); |
4256 | ArgAttrs.push_back(Elt: AttributeSet()); |
4257 | } |
4258 | |
4259 | // If we are removing arguments to the function, emit an obnoxious warning. |
4260 | if (FT->getNumParams() < NumActualArgs) { |
4261 | // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 |
4262 | if (FT->isVarArg()) { |
4263 | // Add all of the arguments in their promoted form to the arg list. |
4264 | for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { |
4265 | Type *PTy = getPromotedType(Ty: (*AI)->getType()); |
4266 | Value *NewArg = *AI; |
4267 | if (PTy != (*AI)->getType()) { |
4268 | // Must promote to pass through va_arg area! |
4269 | Instruction::CastOps opcode = |
4270 | CastInst::getCastOpcode(Val: *AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false); |
4271 | NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy); |
4272 | } |
4273 | Args.push_back(Elt: NewArg); |
4274 | |
4275 | // Add any parameter attributes. |
4276 | ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i)); |
4277 | } |
4278 | } |
4279 | } |
4280 | |
4281 | AttributeSet FnAttrs = CallerPAL.getFnAttrs(); |
4282 | |
4283 | if (NewRetTy->isVoidTy()) |
4284 | Caller->setName("" ); // Void type should not have a name. |
4285 | |
4286 | assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) && |
4287 | "missing argument attributes" ); |
4288 | AttributeList NewCallerPAL = AttributeList::get( |
4289 | C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs); |
4290 | |
4291 | SmallVector<OperandBundleDef, 1> OpBundles; |
4292 | Call.getOperandBundlesAsDefs(Defs&: OpBundles); |
4293 | |
4294 | CallBase *NewCall; |
4295 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) { |
4296 | NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(), |
4297 | UnwindDest: II->getUnwindDest(), Args, OpBundles); |
4298 | } else { |
4299 | NewCall = Builder.CreateCall(Callee, Args, OpBundles); |
4300 | cast<CallInst>(Val: NewCall)->setTailCallKind( |
4301 | cast<CallInst>(Val: Caller)->getTailCallKind()); |
4302 | } |
4303 | NewCall->takeName(V: Caller); |
4304 | NewCall->setCallingConv(Call.getCallingConv()); |
4305 | NewCall->setAttributes(NewCallerPAL); |
4306 | |
4307 | // Preserve prof metadata if any. |
4308 | NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof}); |
4309 | |
4310 | // Insert a cast of the return type as necessary. |
4311 | Instruction *NC = NewCall; |
4312 | Value *NV = NC; |
4313 | if (OldRetTy != NV->getType() && !Caller->use_empty()) { |
4314 | if (!NV->getType()->isVoidTy()) { |
4315 | NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy); |
4316 | NC->setDebugLoc(Caller->getDebugLoc()); |
4317 | |
4318 | auto OptInsertPt = NewCall->getInsertionPointAfterDef(); |
4319 | assert(OptInsertPt && "No place to insert cast" ); |
4320 | InsertNewInstBefore(New: NC, Old: *OptInsertPt); |
4321 | Worklist.pushUsersToWorkList(I&: *Caller); |
4322 | } else { |
4323 | NV = PoisonValue::get(T: Caller->getType()); |
4324 | } |
4325 | } |
4326 | |
4327 | if (!Caller->use_empty()) |
4328 | replaceInstUsesWith(I&: *Caller, V: NV); |
4329 | else if (Caller->hasValueHandle()) { |
4330 | if (OldRetTy == NV->getType()) |
4331 | ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV); |
4332 | else |
4333 | // We cannot call ValueIsRAUWd with a different type, and the |
4334 | // actual tracked value will disappear. |
4335 | ValueHandleBase::ValueIsDeleted(V: Caller); |
4336 | } |
4337 | |
4338 | eraseInstFromFunction(I&: *Caller); |
4339 | return true; |
4340 | } |
4341 | |
4342 | /// Turn a call to a function created by init_trampoline / adjust_trampoline |
4343 | /// intrinsic pair into a direct call to the underlying function. |
4344 | Instruction * |
4345 | InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, |
4346 | IntrinsicInst &Tramp) { |
4347 | FunctionType *FTy = Call.getFunctionType(); |
4348 | AttributeList Attrs = Call.getAttributes(); |
4349 | |
4350 | // If the call already has the 'nest' attribute somewhere then give up - |
4351 | // otherwise 'nest' would occur twice after splicing in the chain. |
4352 | if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest)) |
4353 | return nullptr; |
4354 | |
4355 | Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: 1)->stripPointerCasts()); |
4356 | FunctionType *NestFTy = NestF->getFunctionType(); |
4357 | |
4358 | AttributeList NestAttrs = NestF->getAttributes(); |
4359 | if (!NestAttrs.isEmpty()) { |
4360 | unsigned NestArgNo = 0; |
4361 | Type *NestTy = nullptr; |
4362 | AttributeSet NestAttr; |
4363 | |
4364 | // Look for a parameter marked with the 'nest' attribute. |
4365 | for (FunctionType::param_iterator I = NestFTy->param_begin(), |
4366 | E = NestFTy->param_end(); |
4367 | I != E; ++NestArgNo, ++I) { |
4368 | AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo); |
4369 | if (AS.hasAttribute(Kind: Attribute::Nest)) { |
4370 | // Record the parameter type and any other attributes. |
4371 | NestTy = *I; |
4372 | NestAttr = AS; |
4373 | break; |
4374 | } |
4375 | } |
4376 | |
4377 | if (NestTy) { |
4378 | std::vector<Value*> NewArgs; |
4379 | std::vector<AttributeSet> NewArgAttrs; |
4380 | NewArgs.reserve(n: Call.arg_size() + 1); |
4381 | NewArgAttrs.reserve(n: Call.arg_size()); |
4382 | |
4383 | // Insert the nest argument into the call argument list, which may |
4384 | // mean appending it. Likewise for attributes. |
4385 | |
4386 | { |
4387 | unsigned ArgNo = 0; |
4388 | auto I = Call.arg_begin(), E = Call.arg_end(); |
4389 | do { |
4390 | if (ArgNo == NestArgNo) { |
4391 | // Add the chain argument and attributes. |
4392 | Value *NestVal = Tramp.getArgOperand(i: 2); |
4393 | if (NestVal->getType() != NestTy) |
4394 | NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest" ); |
4395 | NewArgs.push_back(x: NestVal); |
4396 | NewArgAttrs.push_back(x: NestAttr); |
4397 | } |
4398 | |
4399 | if (I == E) |
4400 | break; |
4401 | |
4402 | // Add the original argument and attributes. |
4403 | NewArgs.push_back(x: *I); |
4404 | NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo)); |
4405 | |
4406 | ++ArgNo; |
4407 | ++I; |
4408 | } while (true); |
4409 | } |
4410 | |
4411 | // The trampoline may have been bitcast to a bogus type (FTy). |
4412 | // Handle this by synthesizing a new function type, equal to FTy |
4413 | // with the chain parameter inserted. |
4414 | |
4415 | std::vector<Type*> NewTypes; |
4416 | NewTypes.reserve(n: FTy->getNumParams()+1); |
4417 | |
4418 | // Insert the chain's type into the list of parameter types, which may |
4419 | // mean appending it. |
4420 | { |
4421 | unsigned ArgNo = 0; |
4422 | FunctionType::param_iterator I = FTy->param_begin(), |
4423 | E = FTy->param_end(); |
4424 | |
4425 | do { |
4426 | if (ArgNo == NestArgNo) |
4427 | // Add the chain's type. |
4428 | NewTypes.push_back(x: NestTy); |
4429 | |
4430 | if (I == E) |
4431 | break; |
4432 | |
4433 | // Add the original type. |
4434 | NewTypes.push_back(x: *I); |
4435 | |
4436 | ++ArgNo; |
4437 | ++I; |
4438 | } while (true); |
4439 | } |
4440 | |
4441 | // Replace the trampoline call with a direct call. Let the generic |
4442 | // code sort out any function type mismatches. |
4443 | FunctionType *NewFTy = |
4444 | FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg()); |
4445 | AttributeList NewPAL = |
4446 | AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(), |
4447 | RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs); |
4448 | |
4449 | SmallVector<OperandBundleDef, 1> OpBundles; |
4450 | Call.getOperandBundlesAsDefs(Defs&: OpBundles); |
4451 | |
4452 | Instruction *NewCaller; |
4453 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) { |
4454 | NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(), |
4455 | IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles); |
4456 | cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv()); |
4457 | cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL); |
4458 | } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) { |
4459 | NewCaller = |
4460 | CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(), |
4461 | IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles); |
4462 | cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv()); |
4463 | cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL); |
4464 | } else { |
4465 | NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles); |
4466 | cast<CallInst>(Val: NewCaller)->setTailCallKind( |
4467 | cast<CallInst>(Val&: Call).getTailCallKind()); |
4468 | cast<CallInst>(Val: NewCaller)->setCallingConv( |
4469 | cast<CallInst>(Val&: Call).getCallingConv()); |
4470 | cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL); |
4471 | } |
4472 | NewCaller->setDebugLoc(Call.getDebugLoc()); |
4473 | |
4474 | return NewCaller; |
4475 | } |
4476 | } |
4477 | |
4478 | // Replace the trampoline call with a direct call. Since there is no 'nest' |
4479 | // parameter, there is no need to adjust the argument list. Let the generic |
4480 | // code sort out any function type mismatches. |
4481 | Call.setCalledFunction(FTy, Fn: NestF); |
4482 | return &Call; |
4483 | } |
4484 | |