1 | //===- InstCombineCasts.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the visit functions for cast operations. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstCombineInternal.h" |
14 | #include "llvm/ADT/SetVector.h" |
15 | #include "llvm/Analysis/ConstantFolding.h" |
16 | #include "llvm/IR/DataLayout.h" |
17 | #include "llvm/IR/DebugInfo.h" |
18 | #include "llvm/IR/PatternMatch.h" |
19 | #include "llvm/Support/KnownBits.h" |
20 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
21 | #include <optional> |
22 | |
23 | using namespace llvm; |
24 | using namespace PatternMatch; |
25 | |
26 | #define DEBUG_TYPE "instcombine" |
27 | |
28 | /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns |
29 | /// true for, actually insert the code to evaluate the expression. |
30 | Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, |
31 | bool isSigned) { |
32 | if (Constant *C = dyn_cast<Constant>(Val: V)) |
33 | return ConstantFoldIntegerCast(C, DestTy: Ty, IsSigned: isSigned, DL); |
34 | |
35 | // Otherwise, it must be an instruction. |
36 | Instruction *I = cast<Instruction>(Val: V); |
37 | Instruction *Res = nullptr; |
38 | unsigned Opc = I->getOpcode(); |
39 | switch (Opc) { |
40 | case Instruction::Add: |
41 | case Instruction::Sub: |
42 | case Instruction::Mul: |
43 | case Instruction::And: |
44 | case Instruction::Or: |
45 | case Instruction::Xor: |
46 | case Instruction::AShr: |
47 | case Instruction::LShr: |
48 | case Instruction::Shl: |
49 | case Instruction::UDiv: |
50 | case Instruction::URem: { |
51 | Value *LHS = EvaluateInDifferentType(V: I->getOperand(i: 0), Ty, isSigned); |
52 | Value *RHS = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty, isSigned); |
53 | Res = BinaryOperator::Create(Op: (Instruction::BinaryOps)Opc, S1: LHS, S2: RHS); |
54 | if (Opc == Instruction::LShr || Opc == Instruction::AShr) |
55 | Res->setIsExact(I->isExact()); |
56 | break; |
57 | } |
58 | case Instruction::Trunc: |
59 | case Instruction::ZExt: |
60 | case Instruction::SExt: |
61 | // If the source type of the cast is the type we're trying for then we can |
62 | // just return the source. There's no need to insert it because it is not |
63 | // new. |
64 | if (I->getOperand(i: 0)->getType() == Ty) |
65 | return I->getOperand(i: 0); |
66 | |
67 | // Otherwise, must be the same type of cast, so just reinsert a new one. |
68 | // This also handles the case of zext(trunc(x)) -> zext(x). |
69 | Res = CastInst::CreateIntegerCast(S: I->getOperand(i: 0), Ty, |
70 | isSigned: Opc == Instruction::SExt); |
71 | break; |
72 | case Instruction::Select: { |
73 | Value *True = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty, isSigned); |
74 | Value *False = EvaluateInDifferentType(V: I->getOperand(i: 2), Ty, isSigned); |
75 | Res = SelectInst::Create(C: I->getOperand(i: 0), S1: True, S2: False); |
76 | break; |
77 | } |
78 | case Instruction::PHI: { |
79 | PHINode *OPN = cast<PHINode>(Val: I); |
80 | PHINode *NPN = PHINode::Create(Ty, NumReservedValues: OPN->getNumIncomingValues()); |
81 | for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { |
82 | Value *V = |
83 | EvaluateInDifferentType(V: OPN->getIncomingValue(i), Ty, isSigned); |
84 | NPN->addIncoming(V, BB: OPN->getIncomingBlock(i)); |
85 | } |
86 | Res = NPN; |
87 | break; |
88 | } |
89 | case Instruction::FPToUI: |
90 | case Instruction::FPToSI: |
91 | Res = CastInst::Create( |
92 | static_cast<Instruction::CastOps>(Opc), S: I->getOperand(i: 0), Ty); |
93 | break; |
94 | case Instruction::Call: |
95 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
96 | switch (II->getIntrinsicID()) { |
97 | default: |
98 | llvm_unreachable("Unsupported call!" ); |
99 | case Intrinsic::vscale: { |
100 | Function *Fn = Intrinsic::getOrInsertDeclaration( |
101 | M: I->getModule(), id: Intrinsic::vscale, Tys: {Ty}); |
102 | Res = CallInst::Create(Ty: Fn->getFunctionType(), F: Fn); |
103 | break; |
104 | } |
105 | } |
106 | } |
107 | break; |
108 | case Instruction::ShuffleVector: { |
109 | auto *ScalarTy = cast<VectorType>(Val: Ty)->getElementType(); |
110 | auto *VTy = cast<VectorType>(Val: I->getOperand(i: 0)->getType()); |
111 | auto *FixedTy = VectorType::get(ElementType: ScalarTy, EC: VTy->getElementCount()); |
112 | Value *Op0 = EvaluateInDifferentType(V: I->getOperand(i: 0), Ty: FixedTy, isSigned); |
113 | Value *Op1 = EvaluateInDifferentType(V: I->getOperand(i: 1), Ty: FixedTy, isSigned); |
114 | Res = new ShuffleVectorInst(Op0, Op1, |
115 | cast<ShuffleVectorInst>(Val: I)->getShuffleMask()); |
116 | break; |
117 | } |
118 | default: |
119 | // TODO: Can handle more cases here. |
120 | llvm_unreachable("Unreachable!" ); |
121 | } |
122 | |
123 | Res->takeName(V: I); |
124 | return InsertNewInstWith(New: Res, Old: I->getIterator()); |
125 | } |
126 | |
127 | Instruction::CastOps |
128 | InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, |
129 | const CastInst *CI2) { |
130 | Type *SrcTy = CI1->getSrcTy(); |
131 | Type *MidTy = CI1->getDestTy(); |
132 | Type *DstTy = CI2->getDestTy(); |
133 | |
134 | Instruction::CastOps firstOp = CI1->getOpcode(); |
135 | Instruction::CastOps secondOp = CI2->getOpcode(); |
136 | Type *SrcIntPtrTy = |
137 | SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; |
138 | Type *MidIntPtrTy = |
139 | MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; |
140 | Type *DstIntPtrTy = |
141 | DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; |
142 | unsigned Res = CastInst::isEliminableCastPair(firstOpcode: firstOp, secondOpcode: secondOp, SrcTy, MidTy, |
143 | DstTy, SrcIntPtrTy, MidIntPtrTy, |
144 | DstIntPtrTy); |
145 | |
146 | // We don't want to form an inttoptr or ptrtoint that converts to an integer |
147 | // type that differs from the pointer size. |
148 | if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || |
149 | (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) |
150 | Res = 0; |
151 | |
152 | return Instruction::CastOps(Res); |
153 | } |
154 | |
155 | /// Implement the transforms common to all CastInst visitors. |
156 | Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { |
157 | Value *Src = CI.getOperand(i_nocapture: 0); |
158 | Type *Ty = CI.getType(); |
159 | |
160 | if (auto *SrcC = dyn_cast<Constant>(Val: Src)) |
161 | if (Constant *Res = ConstantFoldCastOperand(Opcode: CI.getOpcode(), C: SrcC, DestTy: Ty, DL)) |
162 | return replaceInstUsesWith(I&: CI, V: Res); |
163 | |
164 | // Try to eliminate a cast of a cast. |
165 | if (auto *CSrc = dyn_cast<CastInst>(Val: Src)) { // A->B->C cast |
166 | if (Instruction::CastOps NewOpc = isEliminableCastPair(CI1: CSrc, CI2: &CI)) { |
167 | // The first cast (CSrc) is eliminable so we need to fix up or replace |
168 | // the second cast (CI). CSrc will then have a good chance of being dead. |
169 | auto *Res = CastInst::Create(NewOpc, S: CSrc->getOperand(i_nocapture: 0), Ty); |
170 | // Point debug users of the dying cast to the new one. |
171 | if (CSrc->hasOneUse()) |
172 | replaceAllDbgUsesWith(From&: *CSrc, To&: *Res, DomPoint&: CI, DT); |
173 | return Res; |
174 | } |
175 | } |
176 | |
177 | if (auto *Sel = dyn_cast<SelectInst>(Val: Src)) { |
178 | // We are casting a select. Try to fold the cast into the select if the |
179 | // select does not have a compare instruction with matching operand types |
180 | // or the select is likely better done in a narrow type. |
181 | // Creating a select with operands that are different sizes than its |
182 | // condition may inhibit other folds and lead to worse codegen. |
183 | auto *Cmp = dyn_cast<CmpInst>(Val: Sel->getCondition()); |
184 | if (!Cmp || Cmp->getOperand(i_nocapture: 0)->getType() != Sel->getType() || |
185 | (CI.getOpcode() == Instruction::Trunc && |
186 | shouldChangeType(From: CI.getSrcTy(), To: CI.getType()))) { |
187 | |
188 | // If it's a bitcast involving vectors, make sure it has the same number |
189 | // of elements on both sides. |
190 | if (CI.getOpcode() != Instruction::BitCast || |
191 | match(V: &CI, P: m_ElementWiseBitCast(Op: m_Value()))) { |
192 | if (Instruction *NV = FoldOpIntoSelect(Op&: CI, SI: Sel)) { |
193 | replaceAllDbgUsesWith(From&: *Sel, To&: *NV, DomPoint&: CI, DT); |
194 | return NV; |
195 | } |
196 | } |
197 | } |
198 | } |
199 | |
200 | // If we are casting a PHI, then fold the cast into the PHI. |
201 | if (auto *PN = dyn_cast<PHINode>(Val: Src)) { |
202 | // Don't do this if it would create a PHI node with an illegal type from a |
203 | // legal type. |
204 | if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || |
205 | shouldChangeType(From: CI.getSrcTy(), To: CI.getType())) |
206 | if (Instruction *NV = foldOpIntoPhi(I&: CI, PN)) |
207 | return NV; |
208 | } |
209 | |
210 | // Canonicalize a unary shuffle after the cast if neither operation changes |
211 | // the size or element size of the input vector. |
212 | // TODO: We could allow size-changing ops if that doesn't harm codegen. |
213 | // cast (shuffle X, Mask) --> shuffle (cast X), Mask |
214 | Value *X; |
215 | ArrayRef<int> Mask; |
216 | if (match(V: Src, P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(Mask))))) { |
217 | // TODO: Allow scalable vectors? |
218 | auto *SrcTy = dyn_cast<FixedVectorType>(Val: X->getType()); |
219 | auto *DestTy = dyn_cast<FixedVectorType>(Val: Ty); |
220 | if (SrcTy && DestTy && |
221 | SrcTy->getNumElements() == DestTy->getNumElements() && |
222 | SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) { |
223 | Value *CastX = Builder.CreateCast(Op: CI.getOpcode(), V: X, DestTy); |
224 | return new ShuffleVectorInst(CastX, Mask); |
225 | } |
226 | } |
227 | |
228 | return nullptr; |
229 | } |
230 | |
231 | /// Constants and extensions/truncates from the destination type are always |
232 | /// free to be evaluated in that type. This is a helper for canEvaluate*. |
233 | static bool canAlwaysEvaluateInType(Value *V, Type *Ty) { |
234 | if (isa<Constant>(Val: V)) |
235 | return match(V, P: m_ImmConstant()); |
236 | |
237 | Value *X; |
238 | if ((match(V, P: m_ZExtOrSExt(Op: m_Value(V&: X))) || match(V, P: m_Trunc(Op: m_Value(V&: X)))) && |
239 | X->getType() == Ty) |
240 | return true; |
241 | |
242 | return false; |
243 | } |
244 | |
245 | /// Filter out values that we can not evaluate in the destination type for free. |
246 | /// This is a helper for canEvaluate*. |
247 | static bool canNotEvaluateInType(Value *V, Type *Ty) { |
248 | if (!isa<Instruction>(Val: V)) |
249 | return true; |
250 | // We don't extend or shrink something that has multiple uses -- doing so |
251 | // would require duplicating the instruction which isn't profitable. |
252 | if (!V->hasOneUse()) |
253 | return true; |
254 | |
255 | return false; |
256 | } |
257 | |
258 | /// Return true if we can evaluate the specified expression tree as type Ty |
259 | /// instead of its larger type, and arrive with the same value. |
260 | /// This is used by code that tries to eliminate truncates. |
261 | /// |
262 | /// Ty will always be a type smaller than V. We should return true if trunc(V) |
263 | /// can be computed by computing V in the smaller type. If V is an instruction, |
264 | /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only |
265 | /// makes sense if x and y can be efficiently truncated. |
266 | /// |
267 | /// This function works on both vectors and scalars. |
268 | /// |
269 | static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, |
270 | Instruction *CxtI) { |
271 | if (canAlwaysEvaluateInType(V, Ty)) |
272 | return true; |
273 | if (canNotEvaluateInType(V, Ty)) |
274 | return false; |
275 | |
276 | auto *I = cast<Instruction>(Val: V); |
277 | Type *OrigTy = V->getType(); |
278 | switch (I->getOpcode()) { |
279 | case Instruction::Add: |
280 | case Instruction::Sub: |
281 | case Instruction::Mul: |
282 | case Instruction::And: |
283 | case Instruction::Or: |
284 | case Instruction::Xor: |
285 | // These operators can all arbitrarily be extended or truncated. |
286 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
287 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
288 | |
289 | case Instruction::UDiv: |
290 | case Instruction::URem: { |
291 | // UDiv and URem can be truncated if all the truncated bits are zero. |
292 | uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); |
293 | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
294 | assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!" ); |
295 | APInt Mask = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth); |
296 | // Do not preserve the original context instruction. Simplifying div/rem |
297 | // based on later context may introduce a trap. |
298 | if (IC.MaskedValueIsZero(V: I->getOperand(i: 0), Mask, CxtI: I) && |
299 | IC.MaskedValueIsZero(V: I->getOperand(i: 1), Mask, CxtI: I)) { |
300 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI: I) && |
301 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI: I); |
302 | } |
303 | break; |
304 | } |
305 | case Instruction::Shl: { |
306 | // If we are truncating the result of this SHL, and if it's a shift of an |
307 | // inrange amount, we can always perform a SHL in a smaller type. |
308 | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
309 | KnownBits AmtKnownBits = |
310 | llvm::computeKnownBits(V: I->getOperand(i: 1), DL: IC.getDataLayout()); |
311 | if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth)) |
312 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
313 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
314 | break; |
315 | } |
316 | case Instruction::LShr: { |
317 | // If this is a truncate of a logical shr, we can truncate it to a smaller |
318 | // lshr iff we know that the bits we would otherwise be shifting in are |
319 | // already zeros. |
320 | // TODO: It is enough to check that the bits we would be shifting in are |
321 | // zero - use AmtKnownBits.getMaxValue(). |
322 | uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); |
323 | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
324 | KnownBits AmtKnownBits = IC.computeKnownBits(V: I->getOperand(i: 1), CxtI); |
325 | APInt MaxShiftAmt = AmtKnownBits.getMaxValue(); |
326 | APInt ShiftedBits = APInt::getBitsSetFrom(numBits: OrigBitWidth, loBit: BitWidth); |
327 | if (MaxShiftAmt.ult(RHS: BitWidth)) { |
328 | // If the only user is a trunc then we can narrow the shift if any new |
329 | // MSBs are not going to be used. |
330 | if (auto *Trunc = dyn_cast<TruncInst>(Val: V->user_back())) { |
331 | auto DemandedBits = Trunc->getType()->getScalarSizeInBits(); |
332 | if ((MaxShiftAmt + DemandedBits).ule(RHS: BitWidth)) |
333 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
334 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
335 | } |
336 | if (IC.MaskedValueIsZero(V: I->getOperand(i: 0), Mask: ShiftedBits, CxtI)) |
337 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
338 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
339 | } |
340 | break; |
341 | } |
342 | case Instruction::AShr: { |
343 | // If this is a truncate of an arithmetic shr, we can truncate it to a |
344 | // smaller ashr iff we know that all the bits from the sign bit of the |
345 | // original type and the sign bit of the truncate type are similar. |
346 | // TODO: It is enough to check that the bits we would be shifting in are |
347 | // similar to sign bit of the truncate type. |
348 | uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); |
349 | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
350 | KnownBits AmtKnownBits = |
351 | llvm::computeKnownBits(V: I->getOperand(i: 1), DL: IC.getDataLayout()); |
352 | unsigned ShiftedBits = OrigBitWidth - BitWidth; |
353 | if (AmtKnownBits.getMaxValue().ult(RHS: BitWidth) && |
354 | ShiftedBits < IC.ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI)) |
355 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
356 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
357 | break; |
358 | } |
359 | case Instruction::Trunc: |
360 | // trunc(trunc(x)) -> trunc(x) |
361 | return true; |
362 | case Instruction::ZExt: |
363 | case Instruction::SExt: |
364 | // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest |
365 | // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest |
366 | return true; |
367 | case Instruction::Select: { |
368 | SelectInst *SI = cast<SelectInst>(Val: I); |
369 | return canEvaluateTruncated(V: SI->getTrueValue(), Ty, IC, CxtI) && |
370 | canEvaluateTruncated(V: SI->getFalseValue(), Ty, IC, CxtI); |
371 | } |
372 | case Instruction::PHI: { |
373 | // We can change a phi if we can change all operands. Note that we never |
374 | // get into trouble with cyclic PHIs here because we only consider |
375 | // instructions with a single use. |
376 | PHINode *PN = cast<PHINode>(Val: I); |
377 | for (Value *IncValue : PN->incoming_values()) |
378 | if (!canEvaluateTruncated(V: IncValue, Ty, IC, CxtI)) |
379 | return false; |
380 | return true; |
381 | } |
382 | case Instruction::FPToUI: |
383 | case Instruction::FPToSI: { |
384 | // If the integer type can hold the max FP value, it is safe to cast |
385 | // directly to that type. Otherwise, we may create poison via overflow |
386 | // that did not exist in the original code. |
387 | Type *InputTy = I->getOperand(i: 0)->getType()->getScalarType(); |
388 | const fltSemantics &Semantics = InputTy->getFltSemantics(); |
389 | uint32_t MinBitWidth = |
390 | APFloatBase::semanticsIntSizeInBits(Semantics, |
391 | I->getOpcode() == Instruction::FPToSI); |
392 | return Ty->getScalarSizeInBits() >= MinBitWidth; |
393 | } |
394 | case Instruction::ShuffleVector: |
395 | return canEvaluateTruncated(V: I->getOperand(i: 0), Ty, IC, CxtI) && |
396 | canEvaluateTruncated(V: I->getOperand(i: 1), Ty, IC, CxtI); |
397 | default: |
398 | // TODO: Can handle more cases here. |
399 | break; |
400 | } |
401 | |
402 | return false; |
403 | } |
404 | |
405 | /// Given a vector that is bitcast to an integer, optionally logically |
406 | /// right-shifted, and truncated, convert it to an extractelement. |
407 | /// Example (big endian): |
408 | /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 |
409 | /// ---> |
410 | /// extractelement <4 x i32> %X, 1 |
411 | static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, |
412 | InstCombinerImpl &IC) { |
413 | Value *TruncOp = Trunc.getOperand(i_nocapture: 0); |
414 | Type *DestType = Trunc.getType(); |
415 | if (!TruncOp->hasOneUse() || !isa<IntegerType>(Val: DestType)) |
416 | return nullptr; |
417 | |
418 | Value *VecInput = nullptr; |
419 | ConstantInt *ShiftVal = nullptr; |
420 | if (!match(V: TruncOp, P: m_CombineOr(L: m_BitCast(Op: m_Value(V&: VecInput)), |
421 | R: m_LShr(L: m_BitCast(Op: m_Value(V&: VecInput)), |
422 | R: m_ConstantInt(CI&: ShiftVal)))) || |
423 | !isa<VectorType>(Val: VecInput->getType())) |
424 | return nullptr; |
425 | |
426 | VectorType *VecType = cast<VectorType>(Val: VecInput->getType()); |
427 | unsigned VecWidth = VecType->getPrimitiveSizeInBits(); |
428 | unsigned DestWidth = DestType->getPrimitiveSizeInBits(); |
429 | unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0; |
430 | |
431 | if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0)) |
432 | return nullptr; |
433 | |
434 | // If the element type of the vector doesn't match the result type, |
435 | // bitcast it to a vector type that we can extract from. |
436 | unsigned NumVecElts = VecWidth / DestWidth; |
437 | if (VecType->getElementType() != DestType) { |
438 | VecType = FixedVectorType::get(ElementType: DestType, NumElts: NumVecElts); |
439 | VecInput = IC.Builder.CreateBitCast(V: VecInput, DestTy: VecType, Name: "bc" ); |
440 | } |
441 | |
442 | unsigned Elt = ShiftAmount / DestWidth; |
443 | if (IC.getDataLayout().isBigEndian()) |
444 | Elt = NumVecElts - 1 - Elt; |
445 | |
446 | return ExtractElementInst::Create(Vec: VecInput, Idx: IC.Builder.getInt32(C: Elt)); |
447 | } |
448 | |
449 | /// Whenever an element is extracted from a vector, optionally shifted down, and |
450 | /// then truncated, canonicalize by converting it to a bitcast followed by an |
451 | /// extractelement. |
452 | /// |
453 | /// Examples (little endian): |
454 | /// trunc (extractelement <4 x i64> %X, 0) to i32 |
455 | /// ---> |
456 | /// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0 |
457 | /// |
458 | /// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8 |
459 | /// ---> |
460 | /// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1 |
461 | static Instruction *foldVecExtTruncToExtElt(TruncInst &Trunc, |
462 | InstCombinerImpl &IC) { |
463 | Value *Src = Trunc.getOperand(i_nocapture: 0); |
464 | Type *SrcType = Src->getType(); |
465 | Type *DstType = Trunc.getType(); |
466 | |
467 | // Only attempt this if we have simple aliasing of the vector elements. |
468 | // A badly fit destination size would result in an invalid cast. |
469 | unsigned SrcBits = SrcType->getScalarSizeInBits(); |
470 | unsigned DstBits = DstType->getScalarSizeInBits(); |
471 | unsigned TruncRatio = SrcBits / DstBits; |
472 | if ((SrcBits % DstBits) != 0) |
473 | return nullptr; |
474 | |
475 | Value *VecOp; |
476 | ConstantInt *Cst; |
477 | const APInt *ShiftAmount = nullptr; |
478 | if (!match(V: Src, P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)))) && |
479 | !match(V: Src, |
480 | P: m_OneUse(SubPattern: m_LShr(L: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_ConstantInt(CI&: Cst)), |
481 | R: m_APInt(Res&: ShiftAmount))))) |
482 | return nullptr; |
483 | |
484 | auto *VecOpTy = cast<VectorType>(Val: VecOp->getType()); |
485 | auto VecElts = VecOpTy->getElementCount(); |
486 | |
487 | uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio; |
488 | uint64_t VecOpIdx = Cst->getZExtValue(); |
489 | uint64_t NewIdx = IC.getDataLayout().isBigEndian() |
490 | ? (VecOpIdx + 1) * TruncRatio - 1 |
491 | : VecOpIdx * TruncRatio; |
492 | |
493 | // Adjust index by the whole number of truncated elements. |
494 | if (ShiftAmount) { |
495 | // Check shift amount is in range and shifts a whole number of truncated |
496 | // elements. |
497 | if (ShiftAmount->uge(RHS: SrcBits) || ShiftAmount->urem(RHS: DstBits) != 0) |
498 | return nullptr; |
499 | |
500 | uint64_t IdxOfs = ShiftAmount->udiv(RHS: DstBits).getZExtValue(); |
501 | NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs) |
502 | : (NewIdx + IdxOfs); |
503 | } |
504 | |
505 | assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() && |
506 | NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits" ); |
507 | |
508 | auto *BitCastTo = |
509 | VectorType::get(ElementType: DstType, NumElements: BitCastNumElts, Scalable: VecElts.isScalable()); |
510 | Value *BitCast = IC.Builder.CreateBitCast(V: VecOp, DestTy: BitCastTo); |
511 | return ExtractElementInst::Create(Vec: BitCast, Idx: IC.Builder.getInt32(C: NewIdx)); |
512 | } |
513 | |
514 | /// Funnel/Rotate left/right may occur in a wider type than necessary because of |
515 | /// type promotion rules. Try to narrow the inputs and convert to funnel shift. |
516 | Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) { |
517 | assert((isa<VectorType>(Trunc.getSrcTy()) || |
518 | shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) && |
519 | "Don't narrow to an illegal scalar type" ); |
520 | |
521 | // Bail out on strange types. It is possible to handle some of these patterns |
522 | // even with non-power-of-2 sizes, but it is not a likely scenario. |
523 | Type *DestTy = Trunc.getType(); |
524 | unsigned NarrowWidth = DestTy->getScalarSizeInBits(); |
525 | unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits(); |
526 | if (!isPowerOf2_32(Value: NarrowWidth)) |
527 | return nullptr; |
528 | |
529 | // First, find an or'd pair of opposite shifts: |
530 | // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)) |
531 | BinaryOperator *Or0, *Or1; |
532 | if (!match(V: Trunc.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_Or(L: m_BinOp(I&: Or0), R: m_BinOp(I&: Or1))))) |
533 | return nullptr; |
534 | |
535 | Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; |
536 | if (!match(V: Or0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) || |
537 | !match(V: Or1, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) || |
538 | Or0->getOpcode() == Or1->getOpcode()) |
539 | return nullptr; |
540 | |
541 | // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). |
542 | if (Or0->getOpcode() == BinaryOperator::LShr) { |
543 | std::swap(a&: Or0, b&: Or1); |
544 | std::swap(a&: ShVal0, b&: ShVal1); |
545 | std::swap(a&: ShAmt0, b&: ShAmt1); |
546 | } |
547 | assert(Or0->getOpcode() == BinaryOperator::Shl && |
548 | Or1->getOpcode() == BinaryOperator::LShr && |
549 | "Illegal or(shift,shift) pair" ); |
550 | |
551 | // Match the shift amount operands for a funnel/rotate pattern. This always |
552 | // matches a subtraction on the R operand. |
553 | auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { |
554 | // The shift amounts may add up to the narrow bit width: |
555 | // (shl ShVal0, L) | (lshr ShVal1, Width - L) |
556 | // If this is a funnel shift (different operands are shifted), then the |
557 | // shift amount can not over-shift (create poison) in the narrow type. |
558 | unsigned MaxShiftAmountWidth = Log2_32(Value: NarrowWidth); |
559 | APInt HiBitMask = ~APInt::getLowBitsSet(numBits: WideWidth, loBitsSet: MaxShiftAmountWidth); |
560 | if (ShVal0 == ShVal1 || MaskedValueIsZero(V: L, Mask: HiBitMask)) |
561 | if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L))))) |
562 | return L; |
563 | |
564 | // The following patterns currently only work for rotation patterns. |
565 | // TODO: Add more general funnel-shift compatible patterns. |
566 | if (ShVal0 != ShVal1) |
567 | return nullptr; |
568 | |
569 | // The shift amount may be masked with negation: |
570 | // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1))) |
571 | Value *X; |
572 | unsigned Mask = Width - 1; |
573 | if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) && |
574 | match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))) |
575 | return X; |
576 | |
577 | // Same as above, but the shift amount may be extended after masking: |
578 | if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) && |
579 | match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))) |
580 | return X; |
581 | |
582 | return nullptr; |
583 | }; |
584 | |
585 | Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth); |
586 | bool IsFshl = true; // Sub on LSHR. |
587 | if (!ShAmt) { |
588 | ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth); |
589 | IsFshl = false; // Sub on SHL. |
590 | } |
591 | if (!ShAmt) |
592 | return nullptr; |
593 | |
594 | // The right-shifted value must have high zeros in the wide type (for example |
595 | // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are |
596 | // truncated, so those do not matter. |
597 | APInt HiBitMask = APInt::getHighBitsSet(numBits: WideWidth, hiBitsSet: WideWidth - NarrowWidth); |
598 | if (!MaskedValueIsZero(V: ShVal1, Mask: HiBitMask, CxtI: &Trunc)) |
599 | return nullptr; |
600 | |
601 | // Adjust the width of ShAmt for narrowed funnel shift operation: |
602 | // - Zero-extend if ShAmt is narrower than the destination type. |
603 | // - Truncate if ShAmt is wider, discarding non-significant high-order bits. |
604 | // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal), |
605 | // zext/trunc(ShAmt)). |
606 | Value *NarrowShAmt = Builder.CreateZExtOrTrunc(V: ShAmt, DestTy); |
607 | |
608 | Value *X, *Y; |
609 | X = Y = Builder.CreateTrunc(V: ShVal0, DestTy); |
610 | if (ShVal0 != ShVal1) |
611 | Y = Builder.CreateTrunc(V: ShVal1, DestTy); |
612 | Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; |
613 | Function *F = |
614 | Intrinsic::getOrInsertDeclaration(M: Trunc.getModule(), id: IID, Tys: DestTy); |
615 | return CallInst::Create(Func: F, Args: {X, Y, NarrowShAmt}); |
616 | } |
617 | |
618 | /// Try to narrow the width of math or bitwise logic instructions by pulling a |
619 | /// truncate ahead of binary operators. |
620 | Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) { |
621 | Type *SrcTy = Trunc.getSrcTy(); |
622 | Type *DestTy = Trunc.getType(); |
623 | unsigned SrcWidth = SrcTy->getScalarSizeInBits(); |
624 | unsigned DestWidth = DestTy->getScalarSizeInBits(); |
625 | |
626 | if (!isa<VectorType>(Val: SrcTy) && !shouldChangeType(From: SrcTy, To: DestTy)) |
627 | return nullptr; |
628 | |
629 | BinaryOperator *BinOp; |
630 | if (!match(V: Trunc.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BinOp(I&: BinOp)))) |
631 | return nullptr; |
632 | |
633 | Value *BinOp0 = BinOp->getOperand(i_nocapture: 0); |
634 | Value *BinOp1 = BinOp->getOperand(i_nocapture: 1); |
635 | switch (BinOp->getOpcode()) { |
636 | case Instruction::And: |
637 | case Instruction::Or: |
638 | case Instruction::Xor: |
639 | case Instruction::Add: |
640 | case Instruction::Sub: |
641 | case Instruction::Mul: { |
642 | Constant *C; |
643 | if (match(V: BinOp0, P: m_Constant(C))) { |
644 | // trunc (binop C, X) --> binop (trunc C', X) |
645 | Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy); |
646 | Value *TruncX = Builder.CreateTrunc(V: BinOp1, DestTy); |
647 | return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowC, S2: TruncX); |
648 | } |
649 | if (match(V: BinOp1, P: m_Constant(C))) { |
650 | // trunc (binop X, C) --> binop (trunc X, C') |
651 | Constant *NarrowC = ConstantExpr::getTrunc(C, Ty: DestTy); |
652 | Value *TruncX = Builder.CreateTrunc(V: BinOp0, DestTy); |
653 | return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: TruncX, S2: NarrowC); |
654 | } |
655 | Value *X; |
656 | if (match(V: BinOp0, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) { |
657 | // trunc (binop (ext X), Y) --> binop X, (trunc Y) |
658 | Value *NarrowOp1 = Builder.CreateTrunc(V: BinOp1, DestTy); |
659 | return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: X, S2: NarrowOp1); |
660 | } |
661 | if (match(V: BinOp1, P: m_ZExtOrSExt(Op: m_Value(V&: X))) && X->getType() == DestTy) { |
662 | // trunc (binop Y, (ext X)) --> binop (trunc Y), X |
663 | Value *NarrowOp0 = Builder.CreateTrunc(V: BinOp0, DestTy); |
664 | return BinaryOperator::Create(Op: BinOp->getOpcode(), S1: NarrowOp0, S2: X); |
665 | } |
666 | break; |
667 | } |
668 | case Instruction::LShr: |
669 | case Instruction::AShr: { |
670 | // trunc (*shr (trunc A), C) --> trunc(*shr A, C) |
671 | Value *A; |
672 | Constant *C; |
673 | if (match(V: BinOp0, P: m_Trunc(Op: m_Value(V&: A))) && match(V: BinOp1, P: m_Constant(C))) { |
674 | unsigned MaxShiftAmt = SrcWidth - DestWidth; |
675 | // If the shift is small enough, all zero/sign bits created by the shift |
676 | // are removed by the trunc. |
677 | if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE, |
678 | Threshold: APInt(SrcWidth, MaxShiftAmt)))) { |
679 | auto *OldShift = cast<Instruction>(Val: Trunc.getOperand(i_nocapture: 0)); |
680 | bool IsExact = OldShift->isExact(); |
681 | if (Constant *ShAmt = ConstantFoldIntegerCast(C, DestTy: A->getType(), |
682 | /*IsSigned*/ true, DL)) { |
683 | ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C); |
684 | Value *Shift = |
685 | OldShift->getOpcode() == Instruction::AShr |
686 | ? Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact) |
687 | : Builder.CreateLShr(LHS: A, RHS: ShAmt, Name: OldShift->getName(), isExact: IsExact); |
688 | return CastInst::CreateTruncOrBitCast(S: Shift, Ty: DestTy); |
689 | } |
690 | } |
691 | } |
692 | break; |
693 | } |
694 | default: break; |
695 | } |
696 | |
697 | if (Instruction *NarrowOr = narrowFunnelShift(Trunc)) |
698 | return NarrowOr; |
699 | |
700 | return nullptr; |
701 | } |
702 | |
703 | /// Try to narrow the width of a splat shuffle. This could be generalized to any |
704 | /// shuffle with a constant operand, but we limit the transform to avoid |
705 | /// creating a shuffle type that targets may not be able to lower effectively. |
706 | static Instruction *shrinkSplatShuffle(TruncInst &Trunc, |
707 | InstCombiner::BuilderTy &Builder) { |
708 | auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Trunc.getOperand(i_nocapture: 0)); |
709 | if (Shuf && Shuf->hasOneUse() && match(V: Shuf->getOperand(i_nocapture: 1), P: m_Undef()) && |
710 | all_equal(Range: Shuf->getShuffleMask()) && |
711 | Shuf->getType() == Shuf->getOperand(i_nocapture: 0)->getType()) { |
712 | // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask |
713 | // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask |
714 | Value *NarrowOp = Builder.CreateTrunc(V: Shuf->getOperand(i_nocapture: 0), DestTy: Trunc.getType()); |
715 | return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask()); |
716 | } |
717 | |
718 | return nullptr; |
719 | } |
720 | |
721 | /// Try to narrow the width of an insert element. This could be generalized for |
722 | /// any vector constant, but we limit the transform to insertion into undef to |
723 | /// avoid potential backend problems from unsupported insertion widths. This |
724 | /// could also be extended to handle the case of inserting a scalar constant |
725 | /// into a vector variable. |
726 | static Instruction *shrinkInsertElt(CastInst &Trunc, |
727 | InstCombiner::BuilderTy &Builder) { |
728 | Instruction::CastOps Opcode = Trunc.getOpcode(); |
729 | assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) && |
730 | "Unexpected instruction for shrinking" ); |
731 | |
732 | auto *InsElt = dyn_cast<InsertElementInst>(Val: Trunc.getOperand(i_nocapture: 0)); |
733 | if (!InsElt || !InsElt->hasOneUse()) |
734 | return nullptr; |
735 | |
736 | Type *DestTy = Trunc.getType(); |
737 | Type *DestScalarTy = DestTy->getScalarType(); |
738 | Value *VecOp = InsElt->getOperand(i_nocapture: 0); |
739 | Value *ScalarOp = InsElt->getOperand(i_nocapture: 1); |
740 | Value *Index = InsElt->getOperand(i_nocapture: 2); |
741 | |
742 | if (match(V: VecOp, P: m_Undef())) { |
743 | // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index |
744 | // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index |
745 | UndefValue *NarrowUndef = UndefValue::get(T: DestTy); |
746 | Value *NarrowOp = Builder.CreateCast(Op: Opcode, V: ScalarOp, DestTy: DestScalarTy); |
747 | return InsertElementInst::Create(Vec: NarrowUndef, NewElt: NarrowOp, Idx: Index); |
748 | } |
749 | |
750 | return nullptr; |
751 | } |
752 | |
753 | Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { |
754 | if (Instruction *Result = commonCastTransforms(CI&: Trunc)) |
755 | return Result; |
756 | |
757 | Value *Src = Trunc.getOperand(i_nocapture: 0); |
758 | Type *DestTy = Trunc.getType(), *SrcTy = Src->getType(); |
759 | unsigned DestWidth = DestTy->getScalarSizeInBits(); |
760 | unsigned SrcWidth = SrcTy->getScalarSizeInBits(); |
761 | |
762 | // Attempt to truncate the entire input expression tree to the destination |
763 | // type. Only do this if the dest type is a simple type, don't convert the |
764 | // expression tree to something weird like i93 unless the source is also |
765 | // strange. |
766 | if ((DestTy->isVectorTy() || shouldChangeType(From: SrcTy, To: DestTy)) && |
767 | canEvaluateTruncated(V: Src, Ty: DestTy, IC&: *this, CxtI: &Trunc)) { |
768 | |
769 | // If this cast is a truncate, evaluting in a different type always |
770 | // eliminates the cast, so it is always a win. |
771 | LLVM_DEBUG( |
772 | dbgs() << "ICE: EvaluateInDifferentType converting expression type" |
773 | " to avoid cast: " |
774 | << Trunc << '\n'); |
775 | Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false); |
776 | assert(Res->getType() == DestTy); |
777 | return replaceInstUsesWith(I&: Trunc, V: Res); |
778 | } |
779 | |
780 | // For integer types, check if we can shorten the entire input expression to |
781 | // DestWidth * 2, which won't allow removing the truncate, but reducing the |
782 | // width may enable further optimizations, e.g. allowing for larger |
783 | // vectorization factors. |
784 | if (auto *DestITy = dyn_cast<IntegerType>(Val: DestTy)) { |
785 | if (DestWidth * 2 < SrcWidth) { |
786 | auto *NewDestTy = DestITy->getExtendedType(); |
787 | if (shouldChangeType(From: SrcTy, To: NewDestTy) && |
788 | canEvaluateTruncated(V: Src, Ty: NewDestTy, IC&: *this, CxtI: &Trunc)) { |
789 | LLVM_DEBUG( |
790 | dbgs() << "ICE: EvaluateInDifferentType converting expression type" |
791 | " to reduce the width of operand of" |
792 | << Trunc << '\n'); |
793 | Value *Res = EvaluateInDifferentType(V: Src, Ty: NewDestTy, isSigned: false); |
794 | return new TruncInst(Res, DestTy); |
795 | } |
796 | } |
797 | } |
798 | |
799 | // See if we can simplify any instructions used by the input whose sole |
800 | // purpose is to compute bits we don't care about. |
801 | if (SimplifyDemandedInstructionBits(Inst&: Trunc)) |
802 | return &Trunc; |
803 | |
804 | if (DestWidth == 1) { |
805 | Value *Zero = Constant::getNullValue(Ty: SrcTy); |
806 | |
807 | Value *X; |
808 | const APInt *C1; |
809 | Constant *C2; |
810 | if (match(V: Src, P: m_OneUse(SubPattern: m_Shr(L: m_Shl(L: m_Power2(V&: C1), R: m_Value(V&: X)), |
811 | R: m_ImmConstant(C&: C2))))) { |
812 | // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2 |
813 | Constant *Log2C1 = ConstantInt::get(Ty: SrcTy, V: C1->exactLogBase2()); |
814 | Constant *CmpC = ConstantExpr::getSub(C1: C2, C2: Log2C1); |
815 | return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC); |
816 | } |
817 | |
818 | if (match(V: Src, P: m_Shr(L: m_Value(V&: X), R: m_SpecificInt(V: SrcWidth - 1)))) { |
819 | // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0 |
820 | // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0 |
821 | return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero); |
822 | } |
823 | |
824 | Constant *C; |
825 | if (match(V: Src, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C))))) { |
826 | // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0 |
827 | Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt(SrcWidth, 1)); |
828 | Value *MaskC = Builder.CreateShl(LHS: One, RHS: C); |
829 | Value *And = Builder.CreateAnd(LHS: X, RHS: MaskC); |
830 | return new ICmpInst(ICmpInst::ICMP_NE, And, Zero); |
831 | } |
832 | if (match(V: Src, P: m_OneUse(SubPattern: m_c_Or(L: m_LShr(L: m_Value(V&: X), R: m_ImmConstant(C)), |
833 | R: m_Deferred(V: X))))) { |
834 | // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0 |
835 | Constant *One = ConstantInt::get(Ty: SrcTy, V: APInt(SrcWidth, 1)); |
836 | Value *MaskC = Builder.CreateShl(LHS: One, RHS: C); |
837 | Value *And = Builder.CreateAnd(LHS: X, RHS: Builder.CreateOr(LHS: MaskC, RHS: One)); |
838 | return new ICmpInst(ICmpInst::ICMP_NE, And, Zero); |
839 | } |
840 | |
841 | { |
842 | const APInt *C; |
843 | if (match(V: Src, P: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X))) && (*C)[0] == 1) { |
844 | // trunc (C << X) to i1 --> X == 0, where C is odd |
845 | return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero); |
846 | } |
847 | } |
848 | |
849 | if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) { |
850 | Value *X, *Y; |
851 | if (match(V: Src, P: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
852 | return new ICmpInst(ICmpInst::ICMP_NE, X, Y); |
853 | } |
854 | } |
855 | |
856 | Value *A, *B; |
857 | Constant *C; |
858 | if (match(V: Src, P: m_LShr(L: m_SExt(Op: m_Value(V&: A)), R: m_Constant(C)))) { |
859 | unsigned AWidth = A->getType()->getScalarSizeInBits(); |
860 | unsigned MaxShiftAmt = SrcWidth - std::max(a: DestWidth, b: AWidth); |
861 | auto *OldSh = cast<Instruction>(Val: Src); |
862 | bool IsExact = OldSh->isExact(); |
863 | |
864 | // If the shift is small enough, all zero bits created by the shift are |
865 | // removed by the trunc. |
866 | if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULE, |
867 | Threshold: APInt(SrcWidth, MaxShiftAmt)))) { |
868 | auto GetNewShAmt = [&](unsigned Width) { |
869 | Constant *MaxAmt = ConstantInt::get(Ty: SrcTy, V: Width - 1, IsSigned: false); |
870 | Constant *Cmp = |
871 | ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_ULT, LHS: C, RHS: MaxAmt, DL); |
872 | Constant *ShAmt = ConstantFoldSelectInstruction(Cond: Cmp, V1: C, V2: MaxAmt); |
873 | return ConstantFoldCastOperand(Opcode: Instruction::Trunc, C: ShAmt, DestTy: A->getType(), |
874 | DL); |
875 | }; |
876 | |
877 | // trunc (lshr (sext A), C) --> ashr A, C |
878 | if (A->getType() == DestTy) { |
879 | Constant *ShAmt = GetNewShAmt(DestWidth); |
880 | ShAmt = Constant::mergeUndefsWith(C: ShAmt, Other: C); |
881 | return IsExact ? BinaryOperator::CreateExactAShr(V1: A, V2: ShAmt) |
882 | : BinaryOperator::CreateAShr(V1: A, V2: ShAmt); |
883 | } |
884 | // The types are mismatched, so create a cast after shifting: |
885 | // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C) |
886 | if (Src->hasOneUse()) { |
887 | Constant *ShAmt = GetNewShAmt(AWidth); |
888 | Value *Shift = Builder.CreateAShr(LHS: A, RHS: ShAmt, Name: "" , isExact: IsExact); |
889 | return CastInst::CreateIntegerCast(S: Shift, Ty: DestTy, isSigned: true); |
890 | } |
891 | } |
892 | // TODO: Mask high bits with 'and'. |
893 | } |
894 | |
895 | if (Instruction *I = narrowBinOp(Trunc)) |
896 | return I; |
897 | |
898 | if (Instruction *I = shrinkSplatShuffle(Trunc, Builder)) |
899 | return I; |
900 | |
901 | if (Instruction *I = shrinkInsertElt(Trunc, Builder)) |
902 | return I; |
903 | |
904 | if (Src->hasOneUse() && |
905 | (isa<VectorType>(Val: SrcTy) || shouldChangeType(From: SrcTy, To: DestTy))) { |
906 | // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the |
907 | // dest type is native and cst < dest size. |
908 | if (match(V: Src, P: m_Shl(L: m_Value(V&: A), R: m_Constant(C))) && |
909 | !match(V: A, P: m_Shr(L: m_Value(), R: m_Constant()))) { |
910 | // Skip shifts of shift by constants. It undoes a combine in |
911 | // FoldShiftByConstant and is the extend in reg pattern. |
912 | APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth); |
913 | if (match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold))) { |
914 | Value *NewTrunc = Builder.CreateTrunc(V: A, DestTy, Name: A->getName() + ".tr" ); |
915 | return BinaryOperator::Create(Op: Instruction::Shl, S1: NewTrunc, |
916 | S2: ConstantExpr::getTrunc(C, Ty: DestTy)); |
917 | } |
918 | } |
919 | } |
920 | |
921 | if (Instruction *I = foldVecTruncToExtElt(Trunc, IC&: *this)) |
922 | return I; |
923 | |
924 | if (Instruction *I = foldVecExtTruncToExtElt(Trunc, IC&: *this)) |
925 | return I; |
926 | |
927 | // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C) |
928 | if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctlz>(Op0: m_ZExt(Op: m_Value(V&: A)), |
929 | Op1: m_Value(V&: B))))) { |
930 | unsigned AWidth = A->getType()->getScalarSizeInBits(); |
931 | if (AWidth == DestWidth && AWidth > Log2_32(Value: SrcWidth)) { |
932 | Value *WidthDiff = ConstantInt::get(Ty: A->getType(), V: SrcWidth - AWidth); |
933 | Value *NarrowCtlz = |
934 | Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Trunc.getType()}, Args: {A, B}); |
935 | return BinaryOperator::CreateAdd(V1: NarrowCtlz, V2: WidthDiff); |
936 | } |
937 | } |
938 | |
939 | if (match(V: Src, P: m_VScale())) { |
940 | if (Trunc.getFunction() && |
941 | Trunc.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) { |
942 | Attribute Attr = |
943 | Trunc.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange); |
944 | if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) |
945 | if (Log2_32(Value: *MaxVScale) < DestWidth) |
946 | return replaceInstUsesWith(I&: Trunc, V: Builder.CreateVScale(Ty: DestTy)); |
947 | } |
948 | } |
949 | |
950 | if (DestWidth == 1 && |
951 | (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) && |
952 | isKnownNonZero(V: Src, Q: SQ.getWithInstruction(I: &Trunc))) |
953 | return replaceInstUsesWith(I&: Trunc, V: ConstantInt::getTrue(Ty: DestTy)); |
954 | |
955 | bool Changed = false; |
956 | if (!Trunc.hasNoSignedWrap() && |
957 | ComputeMaxSignificantBits(Op: Src, CxtI: &Trunc) <= DestWidth) { |
958 | Trunc.setHasNoSignedWrap(true); |
959 | Changed = true; |
960 | } |
961 | if (!Trunc.hasNoUnsignedWrap() && |
962 | MaskedValueIsZero(V: Src, Mask: APInt::getBitsSetFrom(numBits: SrcWidth, loBit: DestWidth), |
963 | CxtI: &Trunc)) { |
964 | Trunc.setHasNoUnsignedWrap(true); |
965 | Changed = true; |
966 | } |
967 | |
968 | return Changed ? &Trunc : nullptr; |
969 | } |
970 | |
971 | Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, |
972 | ZExtInst &Zext) { |
973 | // If we are just checking for a icmp eq of a single bit and zext'ing it |
974 | // to an integer, then shift the bit to the appropriate place and then |
975 | // cast to integer to avoid the comparison. |
976 | |
977 | // FIXME: This set of transforms does not check for extra uses and/or creates |
978 | // an extra instruction (an optional final cast is not included |
979 | // in the transform comments). We may also want to favor icmp over |
980 | // shifts in cases of equal instructions because icmp has better |
981 | // analysis in general (invert the transform). |
982 | |
983 | const APInt *Op1CV; |
984 | if (match(V: Cmp->getOperand(i_nocapture: 1), P: m_APInt(Res&: Op1CV))) { |
985 | |
986 | // zext (x <s 0) to i32 --> x>>u31 true if signbit set. |
987 | if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) { |
988 | Value *In = Cmp->getOperand(i_nocapture: 0); |
989 | Value *Sh = ConstantInt::get(Ty: In->getType(), |
990 | V: In->getType()->getScalarSizeInBits() - 1); |
991 | In = Builder.CreateLShr(LHS: In, RHS: Sh, Name: In->getName() + ".lobit" ); |
992 | if (In->getType() != Zext.getType()) |
993 | In = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false /*ZExt*/); |
994 | |
995 | return replaceInstUsesWith(I&: Zext, V: In); |
996 | } |
997 | |
998 | // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. |
999 | // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. |
1000 | // zext (X != 0) to i32 --> X iff X has only the low bit set. |
1001 | // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. |
1002 | |
1003 | if (Op1CV->isZero() && Cmp->isEquality()) { |
1004 | // Exactly 1 possible 1? But not the high-bit because that is |
1005 | // canonicalized to this form. |
1006 | KnownBits Known = computeKnownBits(V: Cmp->getOperand(i_nocapture: 0), CxtI: &Zext); |
1007 | APInt KnownZeroMask(~Known.Zero); |
1008 | uint32_t ShAmt = KnownZeroMask.logBase2(); |
1009 | bool IsExpectShAmt = KnownZeroMask.isPowerOf2() && |
1010 | (Zext.getType()->getScalarSizeInBits() != ShAmt + 1); |
1011 | if (IsExpectShAmt && |
1012 | (Cmp->getOperand(i_nocapture: 0)->getType() == Zext.getType() || |
1013 | Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) { |
1014 | Value *In = Cmp->getOperand(i_nocapture: 0); |
1015 | if (ShAmt) { |
1016 | // Perform a logical shr by shiftamt. |
1017 | // Insert the shift to put the result in the low bit. |
1018 | In = Builder.CreateLShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: ShAmt), |
1019 | Name: In->getName() + ".lobit" ); |
1020 | } |
1021 | |
1022 | // Toggle the low bit for "X == 0". |
1023 | if (Cmp->getPredicate() == ICmpInst::ICMP_EQ) |
1024 | In = Builder.CreateXor(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), V: 1)); |
1025 | |
1026 | if (Zext.getType() == In->getType()) |
1027 | return replaceInstUsesWith(I&: Zext, V: In); |
1028 | |
1029 | Value *IntCast = Builder.CreateIntCast(V: In, DestTy: Zext.getType(), isSigned: false); |
1030 | return replaceInstUsesWith(I&: Zext, V: IntCast); |
1031 | } |
1032 | } |
1033 | } |
1034 | |
1035 | if (Cmp->isEquality()) { |
1036 | // Test if a bit is clear/set using a shifted-one mask: |
1037 | // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1 |
1038 | // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1 |
1039 | Value *X, *ShAmt; |
1040 | if (Cmp->hasOneUse() && match(V: Cmp->getOperand(i_nocapture: 1), P: m_ZeroInt()) && |
1041 | match(V: Cmp->getOperand(i_nocapture: 0), |
1042 | P: m_OneUse(SubPattern: m_c_And(L: m_Shl(L: m_One(), R: m_Value(V&: ShAmt)), R: m_Value(V&: X))))) { |
1043 | auto *And = cast<BinaryOperator>(Val: Cmp->getOperand(i_nocapture: 0)); |
1044 | Value *Shift = And->getOperand(i_nocapture: X == And->getOperand(i_nocapture: 0) ? 1 : 0); |
1045 | if (Zext.getType() == And->getType() || |
1046 | Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) { |
1047 | if (Cmp->getPredicate() == ICmpInst::ICMP_EQ) |
1048 | X = Builder.CreateNot(V: X); |
1049 | Value *Lshr = Builder.CreateLShr(LHS: X, RHS: ShAmt); |
1050 | Value *And1 = |
1051 | Builder.CreateAnd(LHS: Lshr, RHS: ConstantInt::get(Ty: X->getType(), V: 1)); |
1052 | return replaceInstUsesWith( |
1053 | I&: Zext, V: Builder.CreateZExtOrTrunc(V: And1, DestTy: Zext.getType())); |
1054 | } |
1055 | } |
1056 | } |
1057 | |
1058 | return nullptr; |
1059 | } |
1060 | |
1061 | /// Determine if the specified value can be computed in the specified wider type |
1062 | /// and produce the same low bits. If not, return false. |
1063 | /// |
1064 | /// If this function returns true, it can also return a non-zero number of bits |
1065 | /// (in BitsToClear) which indicates that the value it computes is correct for |
1066 | /// the zero extend, but that the additional BitsToClear bits need to be zero'd |
1067 | /// out. For example, to promote something like: |
1068 | /// |
1069 | /// %B = trunc i64 %A to i32 |
1070 | /// %C = lshr i32 %B, 8 |
1071 | /// %E = zext i32 %C to i64 |
1072 | /// |
1073 | /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be |
1074 | /// set to 8 to indicate that the promoted value needs to have bits 24-31 |
1075 | /// cleared in addition to bits 32-63. Since an 'and' will be generated to |
1076 | /// clear the top bits anyway, doing this has no extra cost. |
1077 | /// |
1078 | /// This function works on both vectors and scalars. |
1079 | static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, |
1080 | InstCombinerImpl &IC, Instruction *CxtI) { |
1081 | BitsToClear = 0; |
1082 | if (canAlwaysEvaluateInType(V, Ty)) |
1083 | return true; |
1084 | if (canNotEvaluateInType(V, Ty)) |
1085 | return false; |
1086 | |
1087 | auto *I = cast<Instruction>(Val: V); |
1088 | unsigned Tmp; |
1089 | switch (I->getOpcode()) { |
1090 | case Instruction::ZExt: // zext(zext(x)) -> zext(x). |
1091 | case Instruction::SExt: // zext(sext(x)) -> sext(x). |
1092 | case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x) |
1093 | return true; |
1094 | case Instruction::And: |
1095 | case Instruction::Or: |
1096 | case Instruction::Xor: |
1097 | case Instruction::Add: |
1098 | case Instruction::Sub: |
1099 | case Instruction::Mul: |
1100 | if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI) || |
1101 | !canEvaluateZExtd(V: I->getOperand(i: 1), Ty, BitsToClear&: Tmp, IC, CxtI)) |
1102 | return false; |
1103 | // These can all be promoted if neither operand has 'bits to clear'. |
1104 | if (BitsToClear == 0 && Tmp == 0) |
1105 | return true; |
1106 | |
1107 | // If the operation is an AND/OR/XOR and the bits to clear are zero in the |
1108 | // other side, BitsToClear is ok. |
1109 | if (Tmp == 0 && I->isBitwiseLogicOp()) { |
1110 | // We use MaskedValueIsZero here for generality, but the case we care |
1111 | // about the most is constant RHS. |
1112 | unsigned VSize = V->getType()->getScalarSizeInBits(); |
1113 | if (IC.MaskedValueIsZero(V: I->getOperand(i: 1), |
1114 | Mask: APInt::getHighBitsSet(numBits: VSize, hiBitsSet: BitsToClear), |
1115 | CxtI)) { |
1116 | // If this is an And instruction and all of the BitsToClear are |
1117 | // known to be zero we can reset BitsToClear. |
1118 | if (I->getOpcode() == Instruction::And) |
1119 | BitsToClear = 0; |
1120 | return true; |
1121 | } |
1122 | } |
1123 | |
1124 | // Otherwise, we don't know how to analyze this BitsToClear case yet. |
1125 | return false; |
1126 | |
1127 | case Instruction::Shl: { |
1128 | // We can promote shl(x, cst) if we can promote x. Since shl overwrites the |
1129 | // upper bits we can reduce BitsToClear by the shift amount. |
1130 | const APInt *Amt; |
1131 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Amt))) { |
1132 | if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI)) |
1133 | return false; |
1134 | uint64_t ShiftAmt = Amt->getZExtValue(); |
1135 | BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0; |
1136 | return true; |
1137 | } |
1138 | return false; |
1139 | } |
1140 | case Instruction::LShr: { |
1141 | // We can promote lshr(x, cst) if we can promote x. This requires the |
1142 | // ultimate 'and' to clear out the high zero bits we're clearing out though. |
1143 | const APInt *Amt; |
1144 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Amt))) { |
1145 | if (!canEvaluateZExtd(V: I->getOperand(i: 0), Ty, BitsToClear, IC, CxtI)) |
1146 | return false; |
1147 | BitsToClear += Amt->getZExtValue(); |
1148 | if (BitsToClear > V->getType()->getScalarSizeInBits()) |
1149 | BitsToClear = V->getType()->getScalarSizeInBits(); |
1150 | return true; |
1151 | } |
1152 | // Cannot promote variable LSHR. |
1153 | return false; |
1154 | } |
1155 | case Instruction::Select: |
1156 | if (!canEvaluateZExtd(V: I->getOperand(i: 1), Ty, BitsToClear&: Tmp, IC, CxtI) || |
1157 | !canEvaluateZExtd(V: I->getOperand(i: 2), Ty, BitsToClear, IC, CxtI) || |
1158 | // TODO: If important, we could handle the case when the BitsToClear are |
1159 | // known zero in the disagreeing side. |
1160 | Tmp != BitsToClear) |
1161 | return false; |
1162 | return true; |
1163 | |
1164 | case Instruction::PHI: { |
1165 | // We can change a phi if we can change all operands. Note that we never |
1166 | // get into trouble with cyclic PHIs here because we only consider |
1167 | // instructions with a single use. |
1168 | PHINode *PN = cast<PHINode>(Val: I); |
1169 | if (!canEvaluateZExtd(V: PN->getIncomingValue(i: 0), Ty, BitsToClear, IC, CxtI)) |
1170 | return false; |
1171 | for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) |
1172 | if (!canEvaluateZExtd(V: PN->getIncomingValue(i), Ty, BitsToClear&: Tmp, IC, CxtI) || |
1173 | // TODO: If important, we could handle the case when the BitsToClear |
1174 | // are known zero in the disagreeing input. |
1175 | Tmp != BitsToClear) |
1176 | return false; |
1177 | return true; |
1178 | } |
1179 | case Instruction::Call: |
1180 | // llvm.vscale() can always be executed in larger type, because the |
1181 | // value is automatically zero-extended. |
1182 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) |
1183 | if (II->getIntrinsicID() == Intrinsic::vscale) |
1184 | return true; |
1185 | return false; |
1186 | default: |
1187 | // TODO: Can handle more cases here. |
1188 | return false; |
1189 | } |
1190 | } |
1191 | |
1192 | Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) { |
1193 | // If this zero extend is only used by a truncate, let the truncate be |
1194 | // eliminated before we try to optimize this zext. |
1195 | if (Zext.hasOneUse() && isa<TruncInst>(Val: Zext.user_back()) && |
1196 | !isa<Constant>(Val: Zext.getOperand(i_nocapture: 0))) |
1197 | return nullptr; |
1198 | |
1199 | // If one of the common conversion will work, do it. |
1200 | if (Instruction *Result = commonCastTransforms(CI&: Zext)) |
1201 | return Result; |
1202 | |
1203 | Value *Src = Zext.getOperand(i_nocapture: 0); |
1204 | Type *SrcTy = Src->getType(), *DestTy = Zext.getType(); |
1205 | |
1206 | // zext nneg bool x -> 0 |
1207 | if (SrcTy->isIntOrIntVectorTy(BitWidth: 1) && Zext.hasNonNeg()) |
1208 | return replaceInstUsesWith(I&: Zext, V: Constant::getNullValue(Ty: Zext.getType())); |
1209 | |
1210 | // Try to extend the entire expression tree to the wide destination type. |
1211 | unsigned BitsToClear; |
1212 | if (shouldChangeType(From: SrcTy, To: DestTy) && |
1213 | canEvaluateZExtd(V: Src, Ty: DestTy, BitsToClear, IC&: *this, CxtI: &Zext)) { |
1214 | assert(BitsToClear <= SrcTy->getScalarSizeInBits() && |
1215 | "Can't clear more bits than in SrcTy" ); |
1216 | |
1217 | // Okay, we can transform this! Insert the new expression now. |
1218 | LLVM_DEBUG( |
1219 | dbgs() << "ICE: EvaluateInDifferentType converting expression type" |
1220 | " to avoid zero extend: " |
1221 | << Zext << '\n'); |
1222 | Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: false); |
1223 | assert(Res->getType() == DestTy); |
1224 | |
1225 | // Preserve debug values referring to Src if the zext is its last use. |
1226 | if (auto *SrcOp = dyn_cast<Instruction>(Val: Src)) |
1227 | if (SrcOp->hasOneUse()) |
1228 | replaceAllDbgUsesWith(From&: *SrcOp, To&: *Res, DomPoint&: Zext, DT); |
1229 | |
1230 | uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear; |
1231 | uint32_t DestBitSize = DestTy->getScalarSizeInBits(); |
1232 | |
1233 | // If the high bits are already filled with zeros, just replace this |
1234 | // cast with the result. |
1235 | if (MaskedValueIsZero( |
1236 | V: Res, Mask: APInt::getHighBitsSet(numBits: DestBitSize, hiBitsSet: DestBitSize - SrcBitsKept), |
1237 | CxtI: &Zext)) |
1238 | return replaceInstUsesWith(I&: Zext, V: Res); |
1239 | |
1240 | // We need to emit an AND to clear the high bits. |
1241 | Constant *C = ConstantInt::get(Ty: Res->getType(), |
1242 | V: APInt::getLowBitsSet(numBits: DestBitSize, loBitsSet: SrcBitsKept)); |
1243 | return BinaryOperator::CreateAnd(V1: Res, V2: C); |
1244 | } |
1245 | |
1246 | // If this is a TRUNC followed by a ZEXT then we are dealing with integral |
1247 | // types and if the sizes are just right we can convert this into a logical |
1248 | // 'and' which will be much cheaper than the pair of casts. |
1249 | if (auto *CSrc = dyn_cast<TruncInst>(Val: Src)) { // A->B->C cast |
1250 | // TODO: Subsume this into EvaluateInDifferentType. |
1251 | |
1252 | // Get the sizes of the types involved. We know that the intermediate type |
1253 | // will be smaller than A or C, but don't know the relation between A and C. |
1254 | Value *A = CSrc->getOperand(i_nocapture: 0); |
1255 | unsigned SrcSize = A->getType()->getScalarSizeInBits(); |
1256 | unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); |
1257 | unsigned DstSize = DestTy->getScalarSizeInBits(); |
1258 | // If we're actually extending zero bits, then if |
1259 | // SrcSize < DstSize: zext(a & mask) |
1260 | // SrcSize == DstSize: a & mask |
1261 | // SrcSize > DstSize: trunc(a) & mask |
1262 | if (SrcSize < DstSize) { |
1263 | APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize)); |
1264 | Constant *AndConst = ConstantInt::get(Ty: A->getType(), V: AndValue); |
1265 | Value *And = Builder.CreateAnd(LHS: A, RHS: AndConst, Name: CSrc->getName() + ".mask" ); |
1266 | return new ZExtInst(And, DestTy); |
1267 | } |
1268 | |
1269 | if (SrcSize == DstSize) { |
1270 | APInt AndValue(APInt::getLowBitsSet(numBits: SrcSize, loBitsSet: MidSize)); |
1271 | return BinaryOperator::CreateAnd(V1: A, V2: ConstantInt::get(Ty: A->getType(), |
1272 | V: AndValue)); |
1273 | } |
1274 | if (SrcSize > DstSize) { |
1275 | Value *Trunc = Builder.CreateTrunc(V: A, DestTy); |
1276 | APInt AndValue(APInt::getLowBitsSet(numBits: DstSize, loBitsSet: MidSize)); |
1277 | return BinaryOperator::CreateAnd(V1: Trunc, |
1278 | V2: ConstantInt::get(Ty: Trunc->getType(), |
1279 | V: AndValue)); |
1280 | } |
1281 | } |
1282 | |
1283 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src)) |
1284 | return transformZExtICmp(Cmp, Zext); |
1285 | |
1286 | // zext(trunc(X) & C) -> (X & zext(C)). |
1287 | Constant *C; |
1288 | Value *X; |
1289 | if (match(V: Src, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C)))) && |
1290 | X->getType() == DestTy) |
1291 | return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateZExt(V: C, DestTy)); |
1292 | |
1293 | // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)). |
1294 | Value *And; |
1295 | if (match(V: Src, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: And), R: m_Constant(C)))) && |
1296 | match(V: And, P: m_OneUse(SubPattern: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Specific(V: C)))) && |
1297 | X->getType() == DestTy) { |
1298 | Value *ZC = Builder.CreateZExt(V: C, DestTy); |
1299 | return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: ZC), V2: ZC); |
1300 | } |
1301 | |
1302 | // If we are truncating, masking, and then zexting back to the original type, |
1303 | // that's just a mask. This is not handled by canEvaluateZextd if the |
1304 | // intermediate values have extra uses. This could be generalized further for |
1305 | // a non-constant mask operand. |
1306 | // zext (and (trunc X), C) --> and X, (zext C) |
1307 | if (match(V: Src, P: m_And(L: m_Trunc(Op: m_Value(V&: X)), R: m_Constant(C))) && |
1308 | X->getType() == DestTy) { |
1309 | Value *ZextC = Builder.CreateZExt(V: C, DestTy); |
1310 | return BinaryOperator::CreateAnd(V1: X, V2: ZextC); |
1311 | } |
1312 | |
1313 | if (match(V: Src, P: m_VScale())) { |
1314 | if (Zext.getFunction() && |
1315 | Zext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) { |
1316 | Attribute Attr = |
1317 | Zext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange); |
1318 | if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) { |
1319 | unsigned TypeWidth = Src->getType()->getScalarSizeInBits(); |
1320 | if (Log2_32(Value: *MaxVScale) < TypeWidth) |
1321 | return replaceInstUsesWith(I&: Zext, V: Builder.CreateVScale(Ty: DestTy)); |
1322 | } |
1323 | } |
1324 | } |
1325 | |
1326 | if (!Zext.hasNonNeg()) { |
1327 | // If this zero extend is only used by a shift, add nneg flag. |
1328 | if (Zext.hasOneUse() && |
1329 | SrcTy->getScalarSizeInBits() > |
1330 | Log2_64_Ceil(Value: DestTy->getScalarSizeInBits()) && |
1331 | match(V: Zext.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &Zext)))) { |
1332 | Zext.setNonNeg(); |
1333 | return &Zext; |
1334 | } |
1335 | |
1336 | if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Zext))) { |
1337 | Zext.setNonNeg(); |
1338 | return &Zext; |
1339 | } |
1340 | } |
1341 | |
1342 | return nullptr; |
1343 | } |
1344 | |
1345 | /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp. |
1346 | Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp, |
1347 | SExtInst &Sext) { |
1348 | Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1); |
1349 | ICmpInst::Predicate Pred = Cmp->getPredicate(); |
1350 | |
1351 | // Don't bother if Op1 isn't of vector or integer type. |
1352 | if (!Op1->getType()->isIntOrIntVectorTy()) |
1353 | return nullptr; |
1354 | |
1355 | if (Pred == ICmpInst::ICMP_SLT && match(V: Op1, P: m_ZeroInt())) { |
1356 | // sext (x <s 0) --> ashr x, 31 (all ones if negative) |
1357 | Value *Sh = ConstantInt::get(Ty: Op0->getType(), |
1358 | V: Op0->getType()->getScalarSizeInBits() - 1); |
1359 | Value *In = Builder.CreateAShr(LHS: Op0, RHS: Sh, Name: Op0->getName() + ".lobit" ); |
1360 | if (In->getType() != Sext.getType()) |
1361 | In = Builder.CreateIntCast(V: In, DestTy: Sext.getType(), isSigned: true /*SExt*/); |
1362 | |
1363 | return replaceInstUsesWith(I&: Sext, V: In); |
1364 | } |
1365 | |
1366 | if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Val: Op1)) { |
1367 | // If we know that only one bit of the LHS of the icmp can be set and we |
1368 | // have an equality comparison with zero or a power of 2, we can transform |
1369 | // the icmp and sext into bitwise/integer operations. |
1370 | if (Cmp->hasOneUse() && |
1371 | Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ |
1372 | KnownBits Known = computeKnownBits(V: Op0, CxtI: &Sext); |
1373 | |
1374 | APInt KnownZeroMask(~Known.Zero); |
1375 | if (KnownZeroMask.isPowerOf2()) { |
1376 | Value *In = Cmp->getOperand(i_nocapture: 0); |
1377 | |
1378 | // If the icmp tests for a known zero bit we can constant fold it. |
1379 | if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { |
1380 | Value *V = Pred == ICmpInst::ICMP_NE ? |
1381 | ConstantInt::getAllOnesValue(Ty: Sext.getType()) : |
1382 | ConstantInt::getNullValue(Ty: Sext.getType()); |
1383 | return replaceInstUsesWith(I&: Sext, V); |
1384 | } |
1385 | |
1386 | if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { |
1387 | // sext ((x & 2^n) == 0) -> (x >> n) - 1 |
1388 | // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 |
1389 | unsigned ShiftAmt = KnownZeroMask.countr_zero(); |
1390 | // Perform a right shift to place the desired bit in the LSB. |
1391 | if (ShiftAmt) |
1392 | In = Builder.CreateLShr(LHS: In, |
1393 | RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt)); |
1394 | |
1395 | // At this point "In" is either 1 or 0. Subtract 1 to turn |
1396 | // {1, 0} -> {0, -1}. |
1397 | In = Builder.CreateAdd(LHS: In, |
1398 | RHS: ConstantInt::getAllOnesValue(Ty: In->getType()), |
1399 | Name: "sext" ); |
1400 | } else { |
1401 | // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 |
1402 | // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 |
1403 | unsigned ShiftAmt = KnownZeroMask.countl_zero(); |
1404 | // Perform a left shift to place the desired bit in the MSB. |
1405 | if (ShiftAmt) |
1406 | In = Builder.CreateShl(LHS: In, |
1407 | RHS: ConstantInt::get(Ty: In->getType(), V: ShiftAmt)); |
1408 | |
1409 | // Distribute the bit over the whole bit width. |
1410 | In = Builder.CreateAShr(LHS: In, RHS: ConstantInt::get(Ty: In->getType(), |
1411 | V: KnownZeroMask.getBitWidth() - 1), Name: "sext" ); |
1412 | } |
1413 | |
1414 | if (Sext.getType() == In->getType()) |
1415 | return replaceInstUsesWith(I&: Sext, V: In); |
1416 | return CastInst::CreateIntegerCast(S: In, Ty: Sext.getType(), isSigned: true/*SExt*/); |
1417 | } |
1418 | } |
1419 | } |
1420 | |
1421 | return nullptr; |
1422 | } |
1423 | |
1424 | /// Return true if we can take the specified value and return it as type Ty |
1425 | /// without inserting any new casts and without changing the value of the common |
1426 | /// low bits. This is used by code that tries to promote integer operations to |
1427 | /// a wider types will allow us to eliminate the extension. |
1428 | /// |
1429 | /// This function works on both vectors and scalars. |
1430 | /// |
1431 | static bool canEvaluateSExtd(Value *V, Type *Ty) { |
1432 | assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && |
1433 | "Can't sign extend type to a smaller type" ); |
1434 | if (canAlwaysEvaluateInType(V, Ty)) |
1435 | return true; |
1436 | if (canNotEvaluateInType(V, Ty)) |
1437 | return false; |
1438 | |
1439 | auto *I = cast<Instruction>(Val: V); |
1440 | switch (I->getOpcode()) { |
1441 | case Instruction::SExt: // sext(sext(x)) -> sext(x) |
1442 | case Instruction::ZExt: // sext(zext(x)) -> zext(x) |
1443 | case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x) |
1444 | return true; |
1445 | case Instruction::And: |
1446 | case Instruction::Or: |
1447 | case Instruction::Xor: |
1448 | case Instruction::Add: |
1449 | case Instruction::Sub: |
1450 | case Instruction::Mul: |
1451 | // These operators can all arbitrarily be extended if their inputs can. |
1452 | return canEvaluateSExtd(V: I->getOperand(i: 0), Ty) && |
1453 | canEvaluateSExtd(V: I->getOperand(i: 1), Ty); |
1454 | |
1455 | //case Instruction::Shl: TODO |
1456 | //case Instruction::LShr: TODO |
1457 | |
1458 | case Instruction::Select: |
1459 | return canEvaluateSExtd(V: I->getOperand(i: 1), Ty) && |
1460 | canEvaluateSExtd(V: I->getOperand(i: 2), Ty); |
1461 | |
1462 | case Instruction::PHI: { |
1463 | // We can change a phi if we can change all operands. Note that we never |
1464 | // get into trouble with cyclic PHIs here because we only consider |
1465 | // instructions with a single use. |
1466 | PHINode *PN = cast<PHINode>(Val: I); |
1467 | for (Value *IncValue : PN->incoming_values()) |
1468 | if (!canEvaluateSExtd(V: IncValue, Ty)) return false; |
1469 | return true; |
1470 | } |
1471 | default: |
1472 | // TODO: Can handle more cases here. |
1473 | break; |
1474 | } |
1475 | |
1476 | return false; |
1477 | } |
1478 | |
1479 | Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) { |
1480 | // If this sign extend is only used by a truncate, let the truncate be |
1481 | // eliminated before we try to optimize this sext. |
1482 | if (Sext.hasOneUse() && isa<TruncInst>(Val: Sext.user_back())) |
1483 | return nullptr; |
1484 | |
1485 | if (Instruction *I = commonCastTransforms(CI&: Sext)) |
1486 | return I; |
1487 | |
1488 | Value *Src = Sext.getOperand(i_nocapture: 0); |
1489 | Type *SrcTy = Src->getType(), *DestTy = Sext.getType(); |
1490 | unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); |
1491 | unsigned DestBitSize = DestTy->getScalarSizeInBits(); |
1492 | |
1493 | // If the value being extended is zero or positive, use a zext instead. |
1494 | if (isKnownNonNegative(V: Src, SQ: SQ.getWithInstruction(I: &Sext))) { |
1495 | auto CI = CastInst::Create(Instruction::ZExt, S: Src, Ty: DestTy); |
1496 | CI->setNonNeg(true); |
1497 | return CI; |
1498 | } |
1499 | |
1500 | // Try to extend the entire expression tree to the wide destination type. |
1501 | if (shouldChangeType(From: SrcTy, To: DestTy) && canEvaluateSExtd(V: Src, Ty: DestTy)) { |
1502 | // Okay, we can transform this! Insert the new expression now. |
1503 | LLVM_DEBUG( |
1504 | dbgs() << "ICE: EvaluateInDifferentType converting expression type" |
1505 | " to avoid sign extend: " |
1506 | << Sext << '\n'); |
1507 | Value *Res = EvaluateInDifferentType(V: Src, Ty: DestTy, isSigned: true); |
1508 | assert(Res->getType() == DestTy); |
1509 | |
1510 | // If the high bits are already filled with sign bit, just replace this |
1511 | // cast with the result. |
1512 | if (ComputeNumSignBits(Op: Res, CxtI: &Sext) > DestBitSize - SrcBitSize) |
1513 | return replaceInstUsesWith(I&: Sext, V: Res); |
1514 | |
1515 | // We need to emit a shl + ashr to do the sign extend. |
1516 | Value *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize-SrcBitSize); |
1517 | return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: Res, RHS: ShAmt, Name: "sext" ), |
1518 | V2: ShAmt); |
1519 | } |
1520 | |
1521 | Value *X; |
1522 | if (match(V: Src, P: m_Trunc(Op: m_Value(V&: X)))) { |
1523 | // If the input has more sign bits than bits truncated, then convert |
1524 | // directly to final type. |
1525 | unsigned XBitSize = X->getType()->getScalarSizeInBits(); |
1526 | if (ComputeNumSignBits(Op: X, CxtI: &Sext) > XBitSize - SrcBitSize) |
1527 | return CastInst::CreateIntegerCast(S: X, Ty: DestTy, /* isSigned */ true); |
1528 | |
1529 | // If input is a trunc from the destination type, then convert into shifts. |
1530 | if (Src->hasOneUse() && X->getType() == DestTy) { |
1531 | // sext (trunc X) --> ashr (shl X, C), C |
1532 | Constant *ShAmt = ConstantInt::get(Ty: DestTy, V: DestBitSize - SrcBitSize); |
1533 | return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShAmt), V2: ShAmt); |
1534 | } |
1535 | |
1536 | // If we are replacing shifted-in high zero bits with sign bits, convert |
1537 | // the logic shift to arithmetic shift and eliminate the cast to |
1538 | // intermediate type: |
1539 | // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C) |
1540 | Value *Y; |
1541 | if (Src->hasOneUse() && |
1542 | match(V: X, P: m_LShr(L: m_Value(V&: Y), |
1543 | R: m_SpecificIntAllowPoison(V: XBitSize - SrcBitSize)))) { |
1544 | Value *Ashr = Builder.CreateAShr(LHS: Y, RHS: XBitSize - SrcBitSize); |
1545 | return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, /* isSigned */ true); |
1546 | } |
1547 | } |
1548 | |
1549 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Src)) |
1550 | return transformSExtICmp(Cmp, Sext); |
1551 | |
1552 | // If the input is a shl/ashr pair of a same constant, then this is a sign |
1553 | // extension from a smaller value. If we could trust arbitrary bitwidth |
1554 | // integers, we could turn this into a truncate to the smaller bit and then |
1555 | // use a sext for the whole extension. Since we don't, look deeper and check |
1556 | // for a truncate. If the source and dest are the same type, eliminate the |
1557 | // trunc and extend and just do shifts. For example, turn: |
1558 | // %a = trunc i32 %i to i8 |
1559 | // %b = shl i8 %a, C |
1560 | // %c = ashr i8 %b, C |
1561 | // %d = sext i8 %c to i32 |
1562 | // into: |
1563 | // %a = shl i32 %i, 32-(8-C) |
1564 | // %d = ashr i32 %a, 32-(8-C) |
1565 | Value *A = nullptr; |
1566 | // TODO: Eventually this could be subsumed by EvaluateInDifferentType. |
1567 | Constant *BA = nullptr, *CA = nullptr; |
1568 | if (match(V: Src, P: m_AShr(L: m_Shl(L: m_Trunc(Op: m_Value(V&: A)), R: m_Constant(C&: BA)), |
1569 | R: m_ImmConstant(C&: CA))) && |
1570 | BA->isElementWiseEqual(Y: CA) && A->getType() == DestTy) { |
1571 | Constant *WideCurrShAmt = |
1572 | ConstantFoldCastOperand(Opcode: Instruction::SExt, C: CA, DestTy, DL); |
1573 | assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail" ); |
1574 | Constant *NumLowbitsLeft = ConstantExpr::getSub( |
1575 | C1: ConstantInt::get(Ty: DestTy, V: SrcTy->getScalarSizeInBits()), C2: WideCurrShAmt); |
1576 | Constant *NewShAmt = ConstantExpr::getSub( |
1577 | C1: ConstantInt::get(Ty: DestTy, V: DestTy->getScalarSizeInBits()), |
1578 | C2: NumLowbitsLeft); |
1579 | NewShAmt = |
1580 | Constant::mergeUndefsWith(C: Constant::mergeUndefsWith(C: NewShAmt, Other: BA), Other: CA); |
1581 | A = Builder.CreateShl(LHS: A, RHS: NewShAmt, Name: Sext.getName()); |
1582 | return BinaryOperator::CreateAShr(V1: A, V2: NewShAmt); |
1583 | } |
1584 | |
1585 | // Splatting a bit of constant-index across a value: |
1586 | // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1 |
1587 | // If the dest type is different, use a cast (adjust use check). |
1588 | if (match(V: Src, P: m_OneUse(SubPattern: m_AShr(L: m_Trunc(Op: m_Value(V&: X)), |
1589 | R: m_SpecificInt(V: SrcBitSize - 1))))) { |
1590 | Type *XTy = X->getType(); |
1591 | unsigned XBitSize = XTy->getScalarSizeInBits(); |
1592 | Constant *ShlAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - SrcBitSize); |
1593 | Constant *AshrAmtC = ConstantInt::get(Ty: XTy, V: XBitSize - 1); |
1594 | if (XTy == DestTy) |
1595 | return BinaryOperator::CreateAShr(V1: Builder.CreateShl(LHS: X, RHS: ShlAmtC), |
1596 | V2: AshrAmtC); |
1597 | if (cast<BinaryOperator>(Val: Src)->getOperand(i_nocapture: 0)->hasOneUse()) { |
1598 | Value *Ashr = Builder.CreateAShr(LHS: Builder.CreateShl(LHS: X, RHS: ShlAmtC), RHS: AshrAmtC); |
1599 | return CastInst::CreateIntegerCast(S: Ashr, Ty: DestTy, /* isSigned */ true); |
1600 | } |
1601 | } |
1602 | |
1603 | if (match(V: Src, P: m_VScale())) { |
1604 | if (Sext.getFunction() && |
1605 | Sext.getFunction()->hasFnAttribute(Kind: Attribute::VScaleRange)) { |
1606 | Attribute Attr = |
1607 | Sext.getFunction()->getFnAttribute(Kind: Attribute::VScaleRange); |
1608 | if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) |
1609 | if (Log2_32(Value: *MaxVScale) < (SrcBitSize - 1)) |
1610 | return replaceInstUsesWith(I&: Sext, V: Builder.CreateVScale(Ty: DestTy)); |
1611 | } |
1612 | } |
1613 | |
1614 | return nullptr; |
1615 | } |
1616 | |
1617 | /// Return a Constant* for the specified floating-point constant if it fits |
1618 | /// in the specified FP type without changing its value. |
1619 | static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { |
1620 | bool losesInfo; |
1621 | APFloat F = CFP->getValueAPF(); |
1622 | (void)F.convert(ToSemantics: Sem, RM: APFloat::rmNearestTiesToEven, losesInfo: &losesInfo); |
1623 | return !losesInfo; |
1624 | } |
1625 | |
1626 | static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) { |
1627 | if (CFP->getType() == Type::getPPC_FP128Ty(C&: CFP->getContext())) |
1628 | return nullptr; // No constant folding of this. |
1629 | // See if the value can be truncated to bfloat and then reextended. |
1630 | if (PreferBFloat && fitsInFPType(CFP, Sem: APFloat::BFloat())) |
1631 | return Type::getBFloatTy(C&: CFP->getContext()); |
1632 | // See if the value can be truncated to half and then reextended. |
1633 | if (!PreferBFloat && fitsInFPType(CFP, Sem: APFloat::IEEEhalf())) |
1634 | return Type::getHalfTy(C&: CFP->getContext()); |
1635 | // See if the value can be truncated to float and then reextended. |
1636 | if (fitsInFPType(CFP, Sem: APFloat::IEEEsingle())) |
1637 | return Type::getFloatTy(C&: CFP->getContext()); |
1638 | if (CFP->getType()->isDoubleTy()) |
1639 | return nullptr; // Won't shrink. |
1640 | if (fitsInFPType(CFP, Sem: APFloat::IEEEdouble())) |
1641 | return Type::getDoubleTy(C&: CFP->getContext()); |
1642 | // Don't try to shrink to various long double types. |
1643 | return nullptr; |
1644 | } |
1645 | |
1646 | // Determine if this is a vector of ConstantFPs and if so, return the minimal |
1647 | // type we can safely truncate all elements to. |
1648 | static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) { |
1649 | auto *CV = dyn_cast<Constant>(Val: V); |
1650 | auto *CVVTy = dyn_cast<FixedVectorType>(Val: V->getType()); |
1651 | if (!CV || !CVVTy) |
1652 | return nullptr; |
1653 | |
1654 | Type *MinType = nullptr; |
1655 | |
1656 | unsigned NumElts = CVVTy->getNumElements(); |
1657 | |
1658 | // For fixed-width vectors we find the minimal type by looking |
1659 | // through the constant values of the vector. |
1660 | for (unsigned i = 0; i != NumElts; ++i) { |
1661 | if (isa<UndefValue>(Val: CV->getAggregateElement(Elt: i))) |
1662 | continue; |
1663 | |
1664 | auto *CFP = dyn_cast_or_null<ConstantFP>(Val: CV->getAggregateElement(Elt: i)); |
1665 | if (!CFP) |
1666 | return nullptr; |
1667 | |
1668 | Type *T = shrinkFPConstant(CFP, PreferBFloat); |
1669 | if (!T) |
1670 | return nullptr; |
1671 | |
1672 | // If we haven't found a type yet or this type has a larger mantissa than |
1673 | // our previous type, this is our new minimal type. |
1674 | if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth()) |
1675 | MinType = T; |
1676 | } |
1677 | |
1678 | // Make a vector type from the minimal type. |
1679 | return MinType ? FixedVectorType::get(ElementType: MinType, NumElts) : nullptr; |
1680 | } |
1681 | |
1682 | /// Find the minimum FP type we can safely truncate to. |
1683 | static Type *getMinimumFPType(Value *V, bool PreferBFloat) { |
1684 | if (auto *FPExt = dyn_cast<FPExtInst>(Val: V)) |
1685 | return FPExt->getOperand(i_nocapture: 0)->getType(); |
1686 | |
1687 | // If this value is a constant, return the constant in the smallest FP type |
1688 | // that can accurately represent it. This allows us to turn |
1689 | // (float)((double)X+2.0) into x+2.0f. |
1690 | if (auto *CFP = dyn_cast<ConstantFP>(Val: V)) |
1691 | if (Type *T = shrinkFPConstant(CFP, PreferBFloat)) |
1692 | return T; |
1693 | |
1694 | // Try to shrink scalable and fixed splat vectors. |
1695 | if (auto *FPC = dyn_cast<Constant>(Val: V)) |
1696 | if (isa<VectorType>(Val: V->getType())) |
1697 | if (auto *Splat = dyn_cast_or_null<ConstantFP>(Val: FPC->getSplatValue())) |
1698 | if (Type *T = shrinkFPConstant(CFP: Splat, PreferBFloat)) |
1699 | return T; |
1700 | |
1701 | // Try to shrink a vector of FP constants. This returns nullptr on scalable |
1702 | // vectors |
1703 | if (Type *T = shrinkFPConstantVector(V, PreferBFloat)) |
1704 | return T; |
1705 | |
1706 | return V->getType(); |
1707 | } |
1708 | |
1709 | /// Return true if the cast from integer to FP can be proven to be exact for all |
1710 | /// possible inputs (the conversion does not lose any precision). |
1711 | static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) { |
1712 | CastInst::CastOps Opcode = I.getOpcode(); |
1713 | assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) && |
1714 | "Unexpected cast" ); |
1715 | Value *Src = I.getOperand(i_nocapture: 0); |
1716 | Type *SrcTy = Src->getType(); |
1717 | Type *FPTy = I.getType(); |
1718 | bool IsSigned = Opcode == Instruction::SIToFP; |
1719 | int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned; |
1720 | |
1721 | // Easy case - if the source integer type has less bits than the FP mantissa, |
1722 | // then the cast must be exact. |
1723 | int DestNumSigBits = FPTy->getFPMantissaWidth(); |
1724 | if (SrcSize <= DestNumSigBits) |
1725 | return true; |
1726 | |
1727 | // Cast from FP to integer and back to FP is independent of the intermediate |
1728 | // integer width because of poison on overflow. |
1729 | Value *F; |
1730 | if (match(V: Src, P: m_FPToSI(Op: m_Value(V&: F))) || match(V: Src, P: m_FPToUI(Op: m_Value(V&: F)))) { |
1731 | // If this is uitofp (fptosi F), the source needs an extra bit to avoid |
1732 | // potential rounding of negative FP input values. |
1733 | int SrcNumSigBits = F->getType()->getFPMantissaWidth(); |
1734 | if (!IsSigned && match(V: Src, P: m_FPToSI(Op: m_Value()))) |
1735 | SrcNumSigBits++; |
1736 | |
1737 | // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal |
1738 | // significant bits than the destination (and make sure neither type is |
1739 | // weird -- ppc_fp128). |
1740 | if (SrcNumSigBits > 0 && DestNumSigBits > 0 && |
1741 | SrcNumSigBits <= DestNumSigBits) |
1742 | return true; |
1743 | } |
1744 | |
1745 | // TODO: |
1746 | // Try harder to find if the source integer type has less significant bits. |
1747 | // For example, compute number of sign bits. |
1748 | KnownBits SrcKnown = IC.computeKnownBits(V: Src, CxtI: &I); |
1749 | int SigBits = (int)SrcTy->getScalarSizeInBits() - |
1750 | SrcKnown.countMinLeadingZeros() - |
1751 | SrcKnown.countMinTrailingZeros(); |
1752 | if (SigBits <= DestNumSigBits) |
1753 | return true; |
1754 | |
1755 | return false; |
1756 | } |
1757 | |
1758 | Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) { |
1759 | if (Instruction *I = commonCastTransforms(CI&: FPT)) |
1760 | return I; |
1761 | |
1762 | // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to |
1763 | // simplify this expression to avoid one or more of the trunc/extend |
1764 | // operations if we can do so without changing the numerical results. |
1765 | // |
1766 | // The exact manner in which the widths of the operands interact to limit |
1767 | // what we can and cannot do safely varies from operation to operation, and |
1768 | // is explained below in the various case statements. |
1769 | Type *Ty = FPT.getType(); |
1770 | auto *BO = dyn_cast<BinaryOperator>(Val: FPT.getOperand(i_nocapture: 0)); |
1771 | if (BO && BO->hasOneUse()) { |
1772 | Type *LHSMinType = |
1773 | getMinimumFPType(V: BO->getOperand(i_nocapture: 0), /*PreferBFloat=*/Ty->isBFloatTy()); |
1774 | Type *RHSMinType = |
1775 | getMinimumFPType(V: BO->getOperand(i_nocapture: 1), /*PreferBFloat=*/Ty->isBFloatTy()); |
1776 | unsigned OpWidth = BO->getType()->getFPMantissaWidth(); |
1777 | unsigned LHSWidth = LHSMinType->getFPMantissaWidth(); |
1778 | unsigned RHSWidth = RHSMinType->getFPMantissaWidth(); |
1779 | unsigned SrcWidth = std::max(a: LHSWidth, b: RHSWidth); |
1780 | unsigned DstWidth = Ty->getFPMantissaWidth(); |
1781 | switch (BO->getOpcode()) { |
1782 | default: break; |
1783 | case Instruction::FAdd: |
1784 | case Instruction::FSub: |
1785 | // For addition and subtraction, the infinitely precise result can |
1786 | // essentially be arbitrarily wide; proving that double rounding |
1787 | // will not occur because the result of OpI is exact (as we will for |
1788 | // FMul, for example) is hopeless. However, we *can* nonetheless |
1789 | // frequently know that double rounding cannot occur (or that it is |
1790 | // innocuous) by taking advantage of the specific structure of |
1791 | // infinitely-precise results that admit double rounding. |
1792 | // |
1793 | // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient |
1794 | // to represent both sources, we can guarantee that the double |
1795 | // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis, |
1796 | // "A Rigorous Framework for Fully Supporting the IEEE Standard ..." |
1797 | // for proof of this fact). |
1798 | // |
1799 | // Note: Figueroa does not consider the case where DstFormat != |
1800 | // SrcFormat. It's possible (likely even!) that this analysis |
1801 | // could be tightened for those cases, but they are rare (the main |
1802 | // case of interest here is (float)((double)float + float)). |
1803 | if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { |
1804 | Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty); |
1805 | Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty); |
1806 | Instruction *RI = BinaryOperator::Create(Op: BO->getOpcode(), S1: LHS, S2: RHS); |
1807 | RI->copyFastMathFlags(I: BO); |
1808 | return RI; |
1809 | } |
1810 | break; |
1811 | case Instruction::FMul: |
1812 | // For multiplication, the infinitely precise result has at most |
1813 | // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient |
1814 | // that such a value can be exactly represented, then no double |
1815 | // rounding can possibly occur; we can safely perform the operation |
1816 | // in the destination format if it can represent both sources. |
1817 | if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { |
1818 | Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty); |
1819 | Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty); |
1820 | return BinaryOperator::CreateFMulFMF(V1: LHS, V2: RHS, FMFSource: BO); |
1821 | } |
1822 | break; |
1823 | case Instruction::FDiv: |
1824 | // For division, we use again use the bound from Figueroa's |
1825 | // dissertation. I am entirely certain that this bound can be |
1826 | // tightened in the unbalanced operand case by an analysis based on |
1827 | // the diophantine rational approximation bound, but the well-known |
1828 | // condition used here is a good conservative first pass. |
1829 | // TODO: Tighten bound via rigorous analysis of the unbalanced case. |
1830 | if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { |
1831 | Value *LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: Ty); |
1832 | Value *RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: Ty); |
1833 | return BinaryOperator::CreateFDivFMF(V1: LHS, V2: RHS, FMFSource: BO); |
1834 | } |
1835 | break; |
1836 | case Instruction::FRem: { |
1837 | // Remainder is straightforward. Remainder is always exact, so the |
1838 | // type of OpI doesn't enter into things at all. We simply evaluate |
1839 | // in whichever source type is larger, then convert to the |
1840 | // destination type. |
1841 | if (SrcWidth == OpWidth) |
1842 | break; |
1843 | Value *LHS, *RHS; |
1844 | if (LHSWidth == SrcWidth) { |
1845 | LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: LHSMinType); |
1846 | RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: LHSMinType); |
1847 | } else { |
1848 | LHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 0), DestTy: RHSMinType); |
1849 | RHS = Builder.CreateFPTrunc(V: BO->getOperand(i_nocapture: 1), DestTy: RHSMinType); |
1850 | } |
1851 | |
1852 | Value *ExactResult = Builder.CreateFRemFMF(L: LHS, R: RHS, FMFSource: BO); |
1853 | return CastInst::CreateFPCast(S: ExactResult, Ty); |
1854 | } |
1855 | } |
1856 | } |
1857 | |
1858 | // (fptrunc (fneg x)) -> (fneg (fptrunc x)) |
1859 | Value *X; |
1860 | Instruction *Op = dyn_cast<Instruction>(Val: FPT.getOperand(i_nocapture: 0)); |
1861 | if (Op && Op->hasOneUse()) { |
1862 | FastMathFlags FMF = FPT.getFastMathFlags(); |
1863 | if (auto *FPMO = dyn_cast<FPMathOperator>(Val: Op)) |
1864 | FMF &= FPMO->getFastMathFlags(); |
1865 | |
1866 | if (match(V: Op, P: m_FNeg(X: m_Value(V&: X)))) { |
1867 | Value *InnerTrunc = Builder.CreateFPTruncFMF(V: X, DestTy: Ty, FMFSource: FMF); |
1868 | Value *Neg = Builder.CreateFNegFMF(V: InnerTrunc, FMFSource: FMF); |
1869 | return replaceInstUsesWith(I&: FPT, V: Neg); |
1870 | } |
1871 | |
1872 | // If we are truncating a select that has an extended operand, we can |
1873 | // narrow the other operand and do the select as a narrow op. |
1874 | Value *Cond, *X, *Y; |
1875 | if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_FPExt(Op: m_Value(V&: X)), R: m_Value(V&: Y))) && |
1876 | X->getType() == Ty) { |
1877 | // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y) |
1878 | Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF); |
1879 | Value *Sel = |
1880 | Builder.CreateSelectFMF(C: Cond, True: X, False: NarrowY, FMFSource: FMF, Name: "narrow.sel" , MDFrom: Op); |
1881 | return replaceInstUsesWith(I&: FPT, V: Sel); |
1882 | } |
1883 | if (match(V: Op, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: Y), R: m_FPExt(Op: m_Value(V&: X)))) && |
1884 | X->getType() == Ty) { |
1885 | // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X |
1886 | Value *NarrowY = Builder.CreateFPTruncFMF(V: Y, DestTy: Ty, FMFSource: FMF); |
1887 | Value *Sel = |
1888 | Builder.CreateSelectFMF(C: Cond, True: NarrowY, False: X, FMFSource: FMF, Name: "narrow.sel" , MDFrom: Op); |
1889 | return replaceInstUsesWith(I&: FPT, V: Sel); |
1890 | } |
1891 | } |
1892 | |
1893 | if (auto *II = dyn_cast<IntrinsicInst>(Val: FPT.getOperand(i_nocapture: 0))) { |
1894 | switch (II->getIntrinsicID()) { |
1895 | default: break; |
1896 | case Intrinsic::ceil: |
1897 | case Intrinsic::fabs: |
1898 | case Intrinsic::floor: |
1899 | case Intrinsic::nearbyint: |
1900 | case Intrinsic::rint: |
1901 | case Intrinsic::round: |
1902 | case Intrinsic::roundeven: |
1903 | case Intrinsic::trunc: { |
1904 | Value *Src = II->getArgOperand(i: 0); |
1905 | if (!Src->hasOneUse()) |
1906 | break; |
1907 | |
1908 | // Except for fabs, this transformation requires the input of the unary FP |
1909 | // operation to be itself an fpext from the type to which we're |
1910 | // truncating. |
1911 | if (II->getIntrinsicID() != Intrinsic::fabs) { |
1912 | FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Val: Src); |
1913 | if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty) |
1914 | break; |
1915 | } |
1916 | |
1917 | // Do unary FP operation on smaller type. |
1918 | // (fptrunc (fabs x)) -> (fabs (fptrunc x)) |
1919 | Value *InnerTrunc = Builder.CreateFPTrunc(V: Src, DestTy: Ty); |
1920 | Function *Overload = Intrinsic::getOrInsertDeclaration( |
1921 | M: FPT.getModule(), id: II->getIntrinsicID(), Tys: Ty); |
1922 | SmallVector<OperandBundleDef, 1> OpBundles; |
1923 | II->getOperandBundlesAsDefs(Defs&: OpBundles); |
1924 | CallInst *NewCI = |
1925 | CallInst::Create(Func: Overload, Args: {InnerTrunc}, Bundles: OpBundles, NameStr: II->getName()); |
1926 | // A normal value may be converted to an infinity. It means that we cannot |
1927 | // propagate ninf from the intrinsic. So we propagate FMF from fptrunc. |
1928 | NewCI->copyFastMathFlags(I: &FPT); |
1929 | return NewCI; |
1930 | } |
1931 | } |
1932 | } |
1933 | |
1934 | if (Instruction *I = shrinkInsertElt(Trunc&: FPT, Builder)) |
1935 | return I; |
1936 | |
1937 | Value *Src = FPT.getOperand(i_nocapture: 0); |
1938 | if (isa<SIToFPInst>(Val: Src) || isa<UIToFPInst>(Val: Src)) { |
1939 | auto *FPCast = cast<CastInst>(Val: Src); |
1940 | if (isKnownExactCastIntToFP(I&: *FPCast, IC&: *this)) |
1941 | return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: 0), Ty); |
1942 | } |
1943 | |
1944 | return nullptr; |
1945 | } |
1946 | |
1947 | Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) { |
1948 | // If the source operand is a cast from integer to FP and known exact, then |
1949 | // cast the integer operand directly to the destination type. |
1950 | Type *Ty = FPExt.getType(); |
1951 | Value *Src = FPExt.getOperand(i_nocapture: 0); |
1952 | if (isa<SIToFPInst>(Val: Src) || isa<UIToFPInst>(Val: Src)) { |
1953 | auto *FPCast = cast<CastInst>(Val: Src); |
1954 | if (isKnownExactCastIntToFP(I&: *FPCast, IC&: *this)) |
1955 | return CastInst::Create(FPCast->getOpcode(), S: FPCast->getOperand(i_nocapture: 0), Ty); |
1956 | } |
1957 | |
1958 | return commonCastTransforms(CI&: FPExt); |
1959 | } |
1960 | |
1961 | /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) |
1962 | /// This is safe if the intermediate type has enough bits in its mantissa to |
1963 | /// accurately represent all values of X. For example, this won't work with |
1964 | /// i64 -> float -> i64. |
1965 | Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) { |
1966 | if (!isa<UIToFPInst>(Val: FI.getOperand(i_nocapture: 0)) && !isa<SIToFPInst>(Val: FI.getOperand(i_nocapture: 0))) |
1967 | return nullptr; |
1968 | |
1969 | auto *OpI = cast<CastInst>(Val: FI.getOperand(i_nocapture: 0)); |
1970 | Value *X = OpI->getOperand(i_nocapture: 0); |
1971 | Type *XType = X->getType(); |
1972 | Type *DestType = FI.getType(); |
1973 | bool IsOutputSigned = isa<FPToSIInst>(Val: FI); |
1974 | |
1975 | // Since we can assume the conversion won't overflow, our decision as to |
1976 | // whether the input will fit in the float should depend on the minimum |
1977 | // of the input range and output range. |
1978 | |
1979 | // This means this is also safe for a signed input and unsigned output, since |
1980 | // a negative input would lead to undefined behavior. |
1981 | if (!isKnownExactCastIntToFP(I&: *OpI, IC&: *this)) { |
1982 | // The first cast may not round exactly based on the source integer width |
1983 | // and FP width, but the overflow UB rules can still allow this to fold. |
1984 | // If the destination type is narrow, that means the intermediate FP value |
1985 | // must be large enough to hold the source value exactly. |
1986 | // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior. |
1987 | int OutputSize = (int)DestType->getScalarSizeInBits(); |
1988 | if (OutputSize > OpI->getType()->getFPMantissaWidth()) |
1989 | return nullptr; |
1990 | } |
1991 | |
1992 | if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) { |
1993 | bool IsInputSigned = isa<SIToFPInst>(Val: OpI); |
1994 | if (IsInputSigned && IsOutputSigned) |
1995 | return new SExtInst(X, DestType); |
1996 | return new ZExtInst(X, DestType); |
1997 | } |
1998 | if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits()) |
1999 | return new TruncInst(X, DestType); |
2000 | |
2001 | assert(XType == DestType && "Unexpected types for int to FP to int casts" ); |
2002 | return replaceInstUsesWith(I&: FI, V: X); |
2003 | } |
2004 | |
2005 | static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) { |
2006 | // fpto{u/s}i non-norm --> 0 |
2007 | FPClassTest Mask = |
2008 | FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal; |
2009 | KnownFPClass FPClass = computeKnownFPClass( |
2010 | V: FI.getOperand(i: 0), InterestedClasses: Mask, SQ: IC.getSimplifyQuery().getWithInstruction(I: &FI)); |
2011 | if (FPClass.isKnownNever(Mask)) |
2012 | return IC.replaceInstUsesWith(I&: FI, V: ConstantInt::getNullValue(Ty: FI.getType())); |
2013 | |
2014 | return nullptr; |
2015 | } |
2016 | |
2017 | Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) { |
2018 | if (Instruction *I = foldItoFPtoI(FI)) |
2019 | return I; |
2020 | |
2021 | if (Instruction *I = foldFPtoI(FI, IC&: *this)) |
2022 | return I; |
2023 | |
2024 | return commonCastTransforms(CI&: FI); |
2025 | } |
2026 | |
2027 | Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) { |
2028 | if (Instruction *I = foldItoFPtoI(FI)) |
2029 | return I; |
2030 | |
2031 | if (Instruction *I = foldFPtoI(FI, IC&: *this)) |
2032 | return I; |
2033 | |
2034 | return commonCastTransforms(CI&: FI); |
2035 | } |
2036 | |
2037 | Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) { |
2038 | if (Instruction *R = commonCastTransforms(CI)) |
2039 | return R; |
2040 | if (!CI.hasNonNeg() && isKnownNonNegative(V: CI.getOperand(i_nocapture: 0), SQ)) { |
2041 | CI.setNonNeg(); |
2042 | return &CI; |
2043 | } |
2044 | return nullptr; |
2045 | } |
2046 | |
2047 | Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) { |
2048 | if (Instruction *R = commonCastTransforms(CI)) |
2049 | return R; |
2050 | if (isKnownNonNegative(V: CI.getOperand(i_nocapture: 0), SQ)) { |
2051 | auto *UI = |
2052 | CastInst::Create(Instruction::UIToFP, S: CI.getOperand(i_nocapture: 0), Ty: CI.getType()); |
2053 | UI->setNonNeg(true); |
2054 | return UI; |
2055 | } |
2056 | return nullptr; |
2057 | } |
2058 | |
2059 | Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) { |
2060 | // If the source integer type is not the intptr_t type for this target, do a |
2061 | // trunc or zext to the intptr_t type, then inttoptr of it. This allows the |
2062 | // cast to be exposed to other transforms. |
2063 | unsigned AS = CI.getAddressSpace(); |
2064 | if (CI.getOperand(i_nocapture: 0)->getType()->getScalarSizeInBits() != |
2065 | DL.getPointerSizeInBits(AS)) { |
2066 | Type *Ty = CI.getOperand(i_nocapture: 0)->getType()->getWithNewType( |
2067 | EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS)); |
2068 | Value *P = Builder.CreateZExtOrTrunc(V: CI.getOperand(i_nocapture: 0), DestTy: Ty); |
2069 | return new IntToPtrInst(P, CI.getType()); |
2070 | } |
2071 | |
2072 | if (Instruction *I = commonCastTransforms(CI)) |
2073 | return I; |
2074 | |
2075 | return nullptr; |
2076 | } |
2077 | |
2078 | Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) { |
2079 | // Look through chain of one-use GEPs. |
2080 | Type *PtrTy = Ptr->getType(); |
2081 | SmallVector<GEPOperator *> GEPs; |
2082 | while (true) { |
2083 | auto *GEP = dyn_cast<GEPOperator>(Val: Ptr); |
2084 | if (!GEP || !GEP->hasOneUse()) |
2085 | break; |
2086 | GEPs.push_back(Elt: GEP); |
2087 | Ptr = GEP->getPointerOperand(); |
2088 | } |
2089 | |
2090 | // Don't handle case where GEP converts from pointer to vector. |
2091 | if (GEPs.empty() || PtrTy != Ptr->getType()) |
2092 | return nullptr; |
2093 | |
2094 | // Check whether we know the integer value of the base pointer. |
2095 | Value *Res; |
2096 | Type *IdxTy = DL.getIndexType(PtrTy); |
2097 | if (match(V: Ptr, P: m_OneUse(SubPattern: m_IntToPtr(Op: m_Value(V&: Res)))) && |
2098 | Res->getType() == IntTy && IntTy == IdxTy) { |
2099 | // pass |
2100 | } else if (isa<ConstantPointerNull>(Val: Ptr)) { |
2101 | Res = Constant::getNullValue(Ty: IdxTy); |
2102 | } else { |
2103 | return nullptr; |
2104 | } |
2105 | |
2106 | // Perform the entire operation on integers instead. |
2107 | for (GEPOperator *GEP : reverse(C&: GEPs)) { |
2108 | Value *Offset = EmitGEPOffset(GEP); |
2109 | Res = Builder.CreateAdd(LHS: Res, RHS: Offset, Name: "" , HasNUW: GEP->hasNoUnsignedWrap()); |
2110 | } |
2111 | return Builder.CreateZExtOrTrunc(V: Res, DestTy: IntTy); |
2112 | } |
2113 | |
2114 | Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { |
2115 | // If the destination integer type is not the intptr_t type for this target, |
2116 | // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast |
2117 | // to be exposed to other transforms. |
2118 | Value *SrcOp = CI.getPointerOperand(); |
2119 | Type *SrcTy = SrcOp->getType(); |
2120 | Type *Ty = CI.getType(); |
2121 | unsigned AS = CI.getPointerAddressSpace(); |
2122 | unsigned TySize = Ty->getScalarSizeInBits(); |
2123 | unsigned PtrSize = DL.getPointerSizeInBits(AS); |
2124 | if (TySize != PtrSize) { |
2125 | Type *IntPtrTy = |
2126 | SrcTy->getWithNewType(EltTy: DL.getIntPtrType(C&: CI.getContext(), AddressSpace: AS)); |
2127 | Value *P = Builder.CreatePtrToInt(V: SrcOp, DestTy: IntPtrTy); |
2128 | return CastInst::CreateIntegerCast(S: P, Ty, /*isSigned=*/false); |
2129 | } |
2130 | |
2131 | // (ptrtoint (ptrmask P, M)) |
2132 | // -> (and (ptrtoint P), M) |
2133 | // This is generally beneficial as `and` is better supported than `ptrmask`. |
2134 | Value *Ptr, *Mask; |
2135 | if (match(V: SrcOp, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: Ptr), |
2136 | Op1: m_Value(V&: Mask)))) && |
2137 | Mask->getType() == Ty) |
2138 | return BinaryOperator::CreateAnd(V1: Builder.CreatePtrToInt(V: Ptr, DestTy: Ty), V2: Mask); |
2139 | |
2140 | if (Value *V = foldPtrToIntOfGEP(IntTy: Ty, Ptr: SrcOp)) |
2141 | return replaceInstUsesWith(I&: CI, V); |
2142 | |
2143 | Value *Vec, *Scalar, *Index; |
2144 | if (match(V: SrcOp, P: m_OneUse(SubPattern: m_InsertElt(Val: m_IntToPtr(Op: m_Value(V&: Vec)), |
2145 | Elt: m_Value(V&: Scalar), Idx: m_Value(V&: Index)))) && |
2146 | Vec->getType() == Ty) { |
2147 | assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type" ); |
2148 | // Convert the scalar to int followed by insert to eliminate one cast: |
2149 | // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index |
2150 | Value *NewCast = Builder.CreatePtrToInt(V: Scalar, DestTy: Ty->getScalarType()); |
2151 | return InsertElementInst::Create(Vec, NewElt: NewCast, Idx: Index); |
2152 | } |
2153 | |
2154 | return commonCastTransforms(CI); |
2155 | } |
2156 | |
2157 | /// This input value (which is known to have vector type) is being zero extended |
2158 | /// or truncated to the specified vector type. Since the zext/trunc is done |
2159 | /// using an integer type, we have a (bitcast(cast(bitcast))) pattern, |
2160 | /// endianness will impact which end of the vector that is extended or |
2161 | /// truncated. |
2162 | /// |
2163 | /// A vector is always stored with index 0 at the lowest address, which |
2164 | /// corresponds to the most significant bits for a big endian stored integer and |
2165 | /// the least significant bits for little endian. A trunc/zext of an integer |
2166 | /// impacts the big end of the integer. Thus, we need to add/remove elements at |
2167 | /// the front of the vector for big endian targets, and the back of the vector |
2168 | /// for little endian targets. |
2169 | /// |
2170 | /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. |
2171 | /// |
2172 | /// The source and destination vector types may have different element types. |
2173 | static Instruction * |
2174 | optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, |
2175 | InstCombinerImpl &IC) { |
2176 | // We can only do this optimization if the output is a multiple of the input |
2177 | // element size, or the input is a multiple of the output element size. |
2178 | // Convert the input type to have the same element type as the output. |
2179 | VectorType *SrcTy = cast<VectorType>(Val: InVal->getType()); |
2180 | |
2181 | if (SrcTy->getElementType() != DestTy->getElementType()) { |
2182 | // The input types don't need to be identical, but for now they must be the |
2183 | // same size. There is no specific reason we couldn't handle things like |
2184 | // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten |
2185 | // there yet. |
2186 | if (SrcTy->getElementType()->getPrimitiveSizeInBits() != |
2187 | DestTy->getElementType()->getPrimitiveSizeInBits()) |
2188 | return nullptr; |
2189 | |
2190 | SrcTy = |
2191 | FixedVectorType::get(ElementType: DestTy->getElementType(), |
2192 | NumElts: cast<FixedVectorType>(Val: SrcTy)->getNumElements()); |
2193 | InVal = IC.Builder.CreateBitCast(V: InVal, DestTy: SrcTy); |
2194 | } |
2195 | |
2196 | bool IsBigEndian = IC.getDataLayout().isBigEndian(); |
2197 | unsigned SrcElts = cast<FixedVectorType>(Val: SrcTy)->getNumElements(); |
2198 | unsigned DestElts = cast<FixedVectorType>(Val: DestTy)->getNumElements(); |
2199 | |
2200 | assert(SrcElts != DestElts && "Element counts should be different." ); |
2201 | |
2202 | // Now that the element types match, get the shuffle mask and RHS of the |
2203 | // shuffle to use, which depends on whether we're increasing or decreasing the |
2204 | // size of the input. |
2205 | auto ShuffleMaskStorage = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: SrcElts)); |
2206 | ArrayRef<int> ShuffleMask; |
2207 | Value *V2; |
2208 | |
2209 | if (SrcElts > DestElts) { |
2210 | // If we're shrinking the number of elements (rewriting an integer |
2211 | // truncate), just shuffle in the elements corresponding to the least |
2212 | // significant bits from the input and use poison as the second shuffle |
2213 | // input. |
2214 | V2 = PoisonValue::get(T: SrcTy); |
2215 | // Make sure the shuffle mask selects the "least significant bits" by |
2216 | // keeping elements from back of the src vector for big endian, and from the |
2217 | // front for little endian. |
2218 | ShuffleMask = ShuffleMaskStorage; |
2219 | if (IsBigEndian) |
2220 | ShuffleMask = ShuffleMask.take_back(N: DestElts); |
2221 | else |
2222 | ShuffleMask = ShuffleMask.take_front(N: DestElts); |
2223 | } else { |
2224 | // If we're increasing the number of elements (rewriting an integer zext), |
2225 | // shuffle in all of the elements from InVal. Fill the rest of the result |
2226 | // elements with zeros from a constant zero. |
2227 | V2 = Constant::getNullValue(Ty: SrcTy); |
2228 | // Use first elt from V2 when indicating zero in the shuffle mask. |
2229 | uint32_t NullElt = SrcElts; |
2230 | // Extend with null values in the "most significant bits" by adding elements |
2231 | // in front of the src vector for big endian, and at the back for little |
2232 | // endian. |
2233 | unsigned DeltaElts = DestElts - SrcElts; |
2234 | if (IsBigEndian) |
2235 | ShuffleMaskStorage.insert(I: ShuffleMaskStorage.begin(), NumToInsert: DeltaElts, Elt: NullElt); |
2236 | else |
2237 | ShuffleMaskStorage.append(NumInputs: DeltaElts, Elt: NullElt); |
2238 | ShuffleMask = ShuffleMaskStorage; |
2239 | } |
2240 | |
2241 | return new ShuffleVectorInst(InVal, V2, ShuffleMask); |
2242 | } |
2243 | |
2244 | static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { |
2245 | return Value % Ty->getPrimitiveSizeInBits() == 0; |
2246 | } |
2247 | |
2248 | static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { |
2249 | return Value / Ty->getPrimitiveSizeInBits(); |
2250 | } |
2251 | |
2252 | /// V is a value which is inserted into a vector of VecEltTy. |
2253 | /// Look through the value to see if we can decompose it into |
2254 | /// insertions into the vector. See the example in the comment for |
2255 | /// OptimizeIntegerToVectorInsertions for the pattern this handles. |
2256 | /// The type of V is always a non-zero multiple of VecEltTy's size. |
2257 | /// Shift is the number of bits between the lsb of V and the lsb of |
2258 | /// the vector. |
2259 | /// |
2260 | /// This returns false if the pattern can't be matched or true if it can, |
2261 | /// filling in Elements with the elements found here. |
2262 | static bool collectInsertionElements(Value *V, unsigned Shift, |
2263 | SmallVectorImpl<Value *> &Elements, |
2264 | Type *VecEltTy, bool isBigEndian) { |
2265 | assert(isMultipleOfTypeSize(Shift, VecEltTy) && |
2266 | "Shift should be a multiple of the element type size" ); |
2267 | |
2268 | // Undef values never contribute useful bits to the result. |
2269 | if (isa<UndefValue>(Val: V)) return true; |
2270 | |
2271 | // If we got down to a value of the right type, we win, try inserting into the |
2272 | // right element. |
2273 | if (V->getType() == VecEltTy) { |
2274 | // Inserting null doesn't actually insert any elements. |
2275 | if (Constant *C = dyn_cast<Constant>(Val: V)) |
2276 | if (C->isNullValue()) |
2277 | return true; |
2278 | |
2279 | unsigned ElementIndex = getTypeSizeIndex(Value: Shift, Ty: VecEltTy); |
2280 | if (isBigEndian) |
2281 | ElementIndex = Elements.size() - ElementIndex - 1; |
2282 | |
2283 | // Fail if multiple elements are inserted into this slot. |
2284 | if (Elements[ElementIndex]) |
2285 | return false; |
2286 | |
2287 | Elements[ElementIndex] = V; |
2288 | return true; |
2289 | } |
2290 | |
2291 | if (Constant *C = dyn_cast<Constant>(Val: V)) { |
2292 | // Figure out the # elements this provides, and bitcast it or slice it up |
2293 | // as required. |
2294 | unsigned NumElts = getTypeSizeIndex(Value: C->getType()->getPrimitiveSizeInBits(), |
2295 | Ty: VecEltTy); |
2296 | // If the constant is the size of a vector element, we just need to bitcast |
2297 | // it to the right type so it gets properly inserted. |
2298 | if (NumElts == 1) |
2299 | return collectInsertionElements(V: ConstantExpr::getBitCast(C, Ty: VecEltTy), |
2300 | Shift, Elements, VecEltTy, isBigEndian); |
2301 | |
2302 | // Okay, this is a constant that covers multiple elements. Slice it up into |
2303 | // pieces and insert each element-sized piece into the vector. |
2304 | if (!isa<IntegerType>(Val: C->getType())) |
2305 | C = ConstantExpr::getBitCast(C, Ty: IntegerType::get(C&: V->getContext(), |
2306 | NumBits: C->getType()->getPrimitiveSizeInBits())); |
2307 | unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); |
2308 | Type *ElementIntTy = IntegerType::get(C&: C->getContext(), NumBits: ElementSize); |
2309 | |
2310 | for (unsigned i = 0; i != NumElts; ++i) { |
2311 | unsigned ShiftI = i * ElementSize; |
2312 | Constant *Piece = ConstantFoldBinaryInstruction( |
2313 | Opcode: Instruction::LShr, V1: C, V2: ConstantInt::get(Ty: C->getType(), V: ShiftI)); |
2314 | if (!Piece) |
2315 | return false; |
2316 | |
2317 | Piece = ConstantExpr::getTrunc(C: Piece, Ty: ElementIntTy); |
2318 | if (!collectInsertionElements(V: Piece, Shift: ShiftI + Shift, Elements, VecEltTy, |
2319 | isBigEndian)) |
2320 | return false; |
2321 | } |
2322 | return true; |
2323 | } |
2324 | |
2325 | if (!V->hasOneUse()) return false; |
2326 | |
2327 | Instruction *I = dyn_cast<Instruction>(Val: V); |
2328 | if (!I) return false; |
2329 | switch (I->getOpcode()) { |
2330 | default: return false; // Unhandled case. |
2331 | case Instruction::BitCast: |
2332 | if (I->getOperand(i: 0)->getType()->isVectorTy()) |
2333 | return false; |
2334 | return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy, |
2335 | isBigEndian); |
2336 | case Instruction::ZExt: |
2337 | if (!isMultipleOfTypeSize( |
2338 | Value: I->getOperand(i: 0)->getType()->getPrimitiveSizeInBits(), |
2339 | Ty: VecEltTy)) |
2340 | return false; |
2341 | return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy, |
2342 | isBigEndian); |
2343 | case Instruction::Or: |
2344 | return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy, |
2345 | isBigEndian) && |
2346 | collectInsertionElements(V: I->getOperand(i: 1), Shift, Elements, VecEltTy, |
2347 | isBigEndian); |
2348 | case Instruction::Shl: { |
2349 | // Must be shifting by a constant that is a multiple of the element size. |
2350 | ConstantInt *CI = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1)); |
2351 | if (!CI) return false; |
2352 | Shift += CI->getZExtValue(); |
2353 | if (!isMultipleOfTypeSize(Value: Shift, Ty: VecEltTy)) return false; |
2354 | return collectInsertionElements(V: I->getOperand(i: 0), Shift, Elements, VecEltTy, |
2355 | isBigEndian); |
2356 | } |
2357 | |
2358 | } |
2359 | } |
2360 | |
2361 | |
2362 | /// If the input is an 'or' instruction, we may be doing shifts and ors to |
2363 | /// assemble the elements of the vector manually. |
2364 | /// Try to rip the code out and replace it with insertelements. This is to |
2365 | /// optimize code like this: |
2366 | /// |
2367 | /// %tmp37 = bitcast float %inc to i32 |
2368 | /// %tmp38 = zext i32 %tmp37 to i64 |
2369 | /// %tmp31 = bitcast float %inc5 to i32 |
2370 | /// %tmp32 = zext i32 %tmp31 to i64 |
2371 | /// %tmp33 = shl i64 %tmp32, 32 |
2372 | /// %ins35 = or i64 %tmp33, %tmp38 |
2373 | /// %tmp43 = bitcast i64 %ins35 to <2 x float> |
2374 | /// |
2375 | /// Into two insertelements that do "buildvector{%inc, %inc5}". |
2376 | static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, |
2377 | InstCombinerImpl &IC) { |
2378 | auto *DestVecTy = cast<FixedVectorType>(Val: CI.getType()); |
2379 | Value *IntInput = CI.getOperand(i_nocapture: 0); |
2380 | |
2381 | // if the int input is just an undef value do not try to optimize to vector |
2382 | // insertions as it will prevent undef propagation |
2383 | if (isa<UndefValue>(Val: IntInput)) |
2384 | return nullptr; |
2385 | |
2386 | SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); |
2387 | if (!collectInsertionElements(V: IntInput, Shift: 0, Elements, |
2388 | VecEltTy: DestVecTy->getElementType(), |
2389 | isBigEndian: IC.getDataLayout().isBigEndian())) |
2390 | return nullptr; |
2391 | |
2392 | // If we succeeded, we know that all of the element are specified by Elements |
2393 | // or are zero if Elements has a null entry. Recast this as a set of |
2394 | // insertions. |
2395 | Value *Result = Constant::getNullValue(Ty: CI.getType()); |
2396 | for (unsigned i = 0, e = Elements.size(); i != e; ++i) { |
2397 | if (!Elements[i]) continue; // Unset element. |
2398 | |
2399 | Result = IC.Builder.CreateInsertElement(Vec: Result, NewElt: Elements[i], |
2400 | Idx: IC.Builder.getInt32(C: i)); |
2401 | } |
2402 | |
2403 | return Result; |
2404 | } |
2405 | |
2406 | /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the |
2407 | /// vector followed by extract element. The backend tends to handle bitcasts of |
2408 | /// vectors better than bitcasts of scalars because vector registers are |
2409 | /// usually not type-specific like scalar integer or scalar floating-point. |
2410 | static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, |
2411 | InstCombinerImpl &IC) { |
2412 | Value *VecOp, *Index; |
2413 | if (!match(V: BitCast.getOperand(i_nocapture: 0), |
2414 | P: m_OneUse(SubPattern: m_ExtractElt(Val: m_Value(V&: VecOp), Idx: m_Value(V&: Index))))) |
2415 | return nullptr; |
2416 | |
2417 | // The bitcast must be to a vectorizable type, otherwise we can't make a new |
2418 | // type to extract from. |
2419 | Type *DestType = BitCast.getType(); |
2420 | VectorType *VecType = cast<VectorType>(Val: VecOp->getType()); |
2421 | if (VectorType::isValidElementType(ElemTy: DestType)) { |
2422 | auto *NewVecType = VectorType::get(ElementType: DestType, Other: VecType); |
2423 | auto *NewBC = IC.Builder.CreateBitCast(V: VecOp, DestTy: NewVecType, Name: "bc" ); |
2424 | return ExtractElementInst::Create(Vec: NewBC, Idx: Index); |
2425 | } |
2426 | |
2427 | // Only solve DestType is vector to avoid inverse transform in visitBitCast. |
2428 | // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest) |
2429 | auto *FixedVType = dyn_cast<FixedVectorType>(Val: VecType); |
2430 | if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1) |
2431 | return CastInst::Create(Instruction::BitCast, S: VecOp, Ty: DestType); |
2432 | |
2433 | return nullptr; |
2434 | } |
2435 | |
2436 | /// Change the type of a bitwise logic operation if we can eliminate a bitcast. |
2437 | static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, |
2438 | InstCombiner::BuilderTy &Builder) { |
2439 | Type *DestTy = BitCast.getType(); |
2440 | BinaryOperator *BO; |
2441 | |
2442 | if (!match(V: BitCast.getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BinOp(I&: BO))) || |
2443 | !BO->isBitwiseLogicOp()) |
2444 | return nullptr; |
2445 | |
2446 | // FIXME: This transform is restricted to vector types to avoid backend |
2447 | // problems caused by creating potentially illegal operations. If a fix-up is |
2448 | // added to handle that situation, we can remove this check. |
2449 | if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy()) |
2450 | return nullptr; |
2451 | |
2452 | if (DestTy->isFPOrFPVectorTy()) { |
2453 | Value *X, *Y; |
2454 | // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y)) |
2455 | if (match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && |
2456 | match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: Y))))) { |
2457 | if (X->getType()->isFPOrFPVectorTy() && |
2458 | Y->getType()->isIntOrIntVectorTy()) { |
2459 | Value *CastedOp = |
2460 | Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy: Y->getType()); |
2461 | Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: Y); |
2462 | return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy); |
2463 | } |
2464 | if (X->getType()->isIntOrIntVectorTy() && |
2465 | Y->getType()->isFPOrFPVectorTy()) { |
2466 | Value *CastedOp = |
2467 | Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 1), DestTy: X->getType()); |
2468 | Value *NewBO = Builder.CreateBinOp(Opc: BO->getOpcode(), LHS: CastedOp, RHS: X); |
2469 | return CastInst::CreateBitOrPointerCast(S: NewBO, Ty: DestTy); |
2470 | } |
2471 | } |
2472 | return nullptr; |
2473 | } |
2474 | |
2475 | if (!DestTy->isIntOrIntVectorTy()) |
2476 | return nullptr; |
2477 | |
2478 | Value *X; |
2479 | if (match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && |
2480 | X->getType() == DestTy && !isa<Constant>(Val: X)) { |
2481 | // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y)) |
2482 | Value *CastedOp1 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 1), DestTy); |
2483 | return BinaryOperator::Create(Op: BO->getOpcode(), S1: X, S2: CastedOp1); |
2484 | } |
2485 | |
2486 | if (match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && |
2487 | X->getType() == DestTy && !isa<Constant>(Val: X)) { |
2488 | // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X) |
2489 | Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy); |
2490 | return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: X); |
2491 | } |
2492 | |
2493 | // Canonicalize vector bitcasts to come before vector bitwise logic with a |
2494 | // constant. This eases recognition of special constants for later ops. |
2495 | // Example: |
2496 | // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b |
2497 | Constant *C; |
2498 | if (match(V: BO->getOperand(i_nocapture: 1), P: m_Constant(C))) { |
2499 | // bitcast (logic X, C) --> logic (bitcast X, C') |
2500 | Value *CastedOp0 = Builder.CreateBitCast(V: BO->getOperand(i_nocapture: 0), DestTy); |
2501 | Value *CastedC = Builder.CreateBitCast(V: C, DestTy); |
2502 | return BinaryOperator::Create(Op: BO->getOpcode(), S1: CastedOp0, S2: CastedC); |
2503 | } |
2504 | |
2505 | return nullptr; |
2506 | } |
2507 | |
2508 | /// Change the type of a select if we can eliminate a bitcast. |
2509 | static Instruction *foldBitCastSelect(BitCastInst &BitCast, |
2510 | InstCombiner::BuilderTy &Builder) { |
2511 | Value *Cond, *TVal, *FVal; |
2512 | if (!match(V: BitCast.getOperand(i_nocapture: 0), |
2513 | P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal))))) |
2514 | return nullptr; |
2515 | |
2516 | // A vector select must maintain the same number of elements in its operands. |
2517 | Type *CondTy = Cond->getType(); |
2518 | Type *DestTy = BitCast.getType(); |
2519 | if (auto *CondVTy = dyn_cast<VectorType>(Val: CondTy)) |
2520 | if (!DestTy->isVectorTy() || |
2521 | CondVTy->getElementCount() != |
2522 | cast<VectorType>(Val: DestTy)->getElementCount()) |
2523 | return nullptr; |
2524 | |
2525 | // FIXME: This transform is restricted from changing the select between |
2526 | // scalars and vectors to avoid backend problems caused by creating |
2527 | // potentially illegal operations. If a fix-up is added to handle that |
2528 | // situation, we can remove this check. |
2529 | if (DestTy->isVectorTy() != TVal->getType()->isVectorTy()) |
2530 | return nullptr; |
2531 | |
2532 | auto *Sel = cast<Instruction>(Val: BitCast.getOperand(i_nocapture: 0)); |
2533 | Value *X; |
2534 | if (match(V: TVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy && |
2535 | !isa<Constant>(Val: X)) { |
2536 | // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y)) |
2537 | Value *CastedVal = Builder.CreateBitCast(V: FVal, DestTy); |
2538 | return SelectInst::Create(C: Cond, S1: X, S2: CastedVal, NameStr: "" , InsertBefore: nullptr, MDFrom: Sel); |
2539 | } |
2540 | |
2541 | if (match(V: FVal, P: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X)))) && X->getType() == DestTy && |
2542 | !isa<Constant>(Val: X)) { |
2543 | // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X) |
2544 | Value *CastedVal = Builder.CreateBitCast(V: TVal, DestTy); |
2545 | return SelectInst::Create(C: Cond, S1: CastedVal, S2: X, NameStr: "" , InsertBefore: nullptr, MDFrom: Sel); |
2546 | } |
2547 | |
2548 | return nullptr; |
2549 | } |
2550 | |
2551 | /// Check if all users of CI are StoreInsts. |
2552 | static bool hasStoreUsersOnly(CastInst &CI) { |
2553 | for (User *U : CI.users()) { |
2554 | if (!isa<StoreInst>(Val: U)) |
2555 | return false; |
2556 | } |
2557 | return true; |
2558 | } |
2559 | |
2560 | /// This function handles following case |
2561 | /// |
2562 | /// A -> B cast |
2563 | /// PHI |
2564 | /// B -> A cast |
2565 | /// |
2566 | /// All the related PHI nodes can be replaced by new PHI nodes with type A. |
2567 | /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN. |
2568 | Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, |
2569 | PHINode *PN) { |
2570 | // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp. |
2571 | if (hasStoreUsersOnly(CI)) |
2572 | return nullptr; |
2573 | |
2574 | Value *Src = CI.getOperand(i_nocapture: 0); |
2575 | Type *SrcTy = Src->getType(); // Type B |
2576 | Type *DestTy = CI.getType(); // Type A |
2577 | |
2578 | SmallVector<PHINode *, 4> PhiWorklist; |
2579 | SmallSetVector<PHINode *, 4> OldPhiNodes; |
2580 | |
2581 | // Find all of the A->B casts and PHI nodes. |
2582 | // We need to inspect all related PHI nodes, but PHIs can be cyclic, so |
2583 | // OldPhiNodes is used to track all known PHI nodes, before adding a new |
2584 | // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first. |
2585 | PhiWorklist.push_back(Elt: PN); |
2586 | OldPhiNodes.insert(X: PN); |
2587 | while (!PhiWorklist.empty()) { |
2588 | auto *OldPN = PhiWorklist.pop_back_val(); |
2589 | for (Value *IncValue : OldPN->incoming_values()) { |
2590 | if (isa<Constant>(Val: IncValue)) |
2591 | continue; |
2592 | |
2593 | if (auto *LI = dyn_cast<LoadInst>(Val: IncValue)) { |
2594 | // If there is a sequence of one or more load instructions, each loaded |
2595 | // value is used as address of later load instruction, bitcast is |
2596 | // necessary to change the value type, don't optimize it. For |
2597 | // simplicity we give up if the load address comes from another load. |
2598 | Value *Addr = LI->getOperand(i_nocapture: 0); |
2599 | if (Addr == &CI || isa<LoadInst>(Val: Addr)) |
2600 | return nullptr; |
2601 | // Don't tranform "load <256 x i32>, <256 x i32>*" to |
2602 | // "load x86_amx, x86_amx*", because x86_amx* is invalid. |
2603 | // TODO: Remove this check when bitcast between vector and x86_amx |
2604 | // is replaced with a specific intrinsic. |
2605 | if (DestTy->isX86_AMXTy()) |
2606 | return nullptr; |
2607 | if (LI->hasOneUse() && LI->isSimple()) |
2608 | continue; |
2609 | // If a LoadInst has more than one use, changing the type of loaded |
2610 | // value may create another bitcast. |
2611 | return nullptr; |
2612 | } |
2613 | |
2614 | if (auto *PNode = dyn_cast<PHINode>(Val: IncValue)) { |
2615 | if (OldPhiNodes.insert(X: PNode)) |
2616 | PhiWorklist.push_back(Elt: PNode); |
2617 | continue; |
2618 | } |
2619 | |
2620 | auto *BCI = dyn_cast<BitCastInst>(Val: IncValue); |
2621 | // We can't handle other instructions. |
2622 | if (!BCI) |
2623 | return nullptr; |
2624 | |
2625 | // Verify it's a A->B cast. |
2626 | Type *TyA = BCI->getOperand(i_nocapture: 0)->getType(); |
2627 | Type *TyB = BCI->getType(); |
2628 | if (TyA != DestTy || TyB != SrcTy) |
2629 | return nullptr; |
2630 | } |
2631 | } |
2632 | |
2633 | // Check that each user of each old PHI node is something that we can |
2634 | // rewrite, so that all of the old PHI nodes can be cleaned up afterwards. |
2635 | for (auto *OldPN : OldPhiNodes) { |
2636 | for (User *V : OldPN->users()) { |
2637 | if (auto *SI = dyn_cast<StoreInst>(Val: V)) { |
2638 | if (!SI->isSimple() || SI->getOperand(i_nocapture: 0) != OldPN) |
2639 | return nullptr; |
2640 | } else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) { |
2641 | // Verify it's a B->A cast. |
2642 | Type *TyB = BCI->getOperand(i_nocapture: 0)->getType(); |
2643 | Type *TyA = BCI->getType(); |
2644 | if (TyA != DestTy || TyB != SrcTy) |
2645 | return nullptr; |
2646 | } else if (auto *PHI = dyn_cast<PHINode>(Val: V)) { |
2647 | // As long as the user is another old PHI node, then even if we don't |
2648 | // rewrite it, the PHI web we're considering won't have any users |
2649 | // outside itself, so it'll be dead. |
2650 | if (!OldPhiNodes.contains(key: PHI)) |
2651 | return nullptr; |
2652 | } else { |
2653 | return nullptr; |
2654 | } |
2655 | } |
2656 | } |
2657 | |
2658 | // For each old PHI node, create a corresponding new PHI node with a type A. |
2659 | SmallDenseMap<PHINode *, PHINode *> NewPNodes; |
2660 | for (auto *OldPN : OldPhiNodes) { |
2661 | Builder.SetInsertPoint(OldPN); |
2662 | PHINode *NewPN = Builder.CreatePHI(Ty: DestTy, NumReservedValues: OldPN->getNumOperands()); |
2663 | NewPNodes[OldPN] = NewPN; |
2664 | } |
2665 | |
2666 | // Fill in the operands of new PHI nodes. |
2667 | for (auto *OldPN : OldPhiNodes) { |
2668 | PHINode *NewPN = NewPNodes[OldPN]; |
2669 | for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) { |
2670 | Value *V = OldPN->getOperand(i_nocapture: j); |
2671 | Value *NewV = nullptr; |
2672 | if (auto *C = dyn_cast<Constant>(Val: V)) { |
2673 | NewV = ConstantExpr::getBitCast(C, Ty: DestTy); |
2674 | } else if (auto *LI = dyn_cast<LoadInst>(Val: V)) { |
2675 | // Explicitly perform load combine to make sure no opposing transform |
2676 | // can remove the bitcast in the meantime and trigger an infinite loop. |
2677 | Builder.SetInsertPoint(LI); |
2678 | NewV = combineLoadToNewType(LI&: *LI, NewTy: DestTy); |
2679 | // Remove the old load and its use in the old phi, which itself becomes |
2680 | // dead once the whole transform finishes. |
2681 | replaceInstUsesWith(I&: *LI, V: PoisonValue::get(T: LI->getType())); |
2682 | eraseInstFromFunction(I&: *LI); |
2683 | } else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) { |
2684 | NewV = BCI->getOperand(i_nocapture: 0); |
2685 | } else if (auto *PrevPN = dyn_cast<PHINode>(Val: V)) { |
2686 | NewV = NewPNodes[PrevPN]; |
2687 | } |
2688 | assert(NewV); |
2689 | NewPN->addIncoming(V: NewV, BB: OldPN->getIncomingBlock(i: j)); |
2690 | } |
2691 | } |
2692 | |
2693 | // Traverse all accumulated PHI nodes and process its users, |
2694 | // which are Stores and BitcCasts. Without this processing |
2695 | // NewPHI nodes could be replicated and could lead to extra |
2696 | // moves generated after DeSSA. |
2697 | // If there is a store with type B, change it to type A. |
2698 | |
2699 | |
2700 | // Replace users of BitCast B->A with NewPHI. These will help |
2701 | // later to get rid off a closure formed by OldPHI nodes. |
2702 | Instruction *RetVal = nullptr; |
2703 | for (auto *OldPN : OldPhiNodes) { |
2704 | PHINode *NewPN = NewPNodes[OldPN]; |
2705 | for (User *V : make_early_inc_range(Range: OldPN->users())) { |
2706 | if (auto *SI = dyn_cast<StoreInst>(Val: V)) { |
2707 | assert(SI->isSimple() && SI->getOperand(0) == OldPN); |
2708 | Builder.SetInsertPoint(SI); |
2709 | auto *NewBC = |
2710 | cast<BitCastInst>(Val: Builder.CreateBitCast(V: NewPN, DestTy: SrcTy)); |
2711 | SI->setOperand(i_nocapture: 0, Val_nocapture: NewBC); |
2712 | Worklist.push(I: SI); |
2713 | assert(hasStoreUsersOnly(*NewBC)); |
2714 | } |
2715 | else if (auto *BCI = dyn_cast<BitCastInst>(Val: V)) { |
2716 | Type *TyB = BCI->getOperand(i_nocapture: 0)->getType(); |
2717 | Type *TyA = BCI->getType(); |
2718 | assert(TyA == DestTy && TyB == SrcTy); |
2719 | (void) TyA; |
2720 | (void) TyB; |
2721 | Instruction *I = replaceInstUsesWith(I&: *BCI, V: NewPN); |
2722 | if (BCI == &CI) |
2723 | RetVal = I; |
2724 | } else if (auto *PHI = dyn_cast<PHINode>(Val: V)) { |
2725 | assert(OldPhiNodes.contains(PHI)); |
2726 | (void) PHI; |
2727 | } else { |
2728 | llvm_unreachable("all uses should be handled" ); |
2729 | } |
2730 | } |
2731 | } |
2732 | |
2733 | return RetVal; |
2734 | } |
2735 | |
2736 | /// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to |
2737 | /// copysign((bitcast Y to fp), X) |
2738 | static Value *foldCopySignIdioms(BitCastInst &CI, |
2739 | InstCombiner::BuilderTy &Builder, |
2740 | const SimplifyQuery &SQ) { |
2741 | Value *X, *Y; |
2742 | Type *FTy = CI.getType(); |
2743 | if (!FTy->isFPOrFPVectorTy()) |
2744 | return nullptr; |
2745 | if (!match(V: &CI, P: m_ElementWiseBitCast(Op: m_c_Or( |
2746 | L: m_And(L: m_ElementWiseBitCast(Op: m_Value(V&: X)), R: m_SignMask()), |
2747 | R: m_Value(V&: Y))))) |
2748 | return nullptr; |
2749 | if (X->getType() != FTy) |
2750 | return nullptr; |
2751 | if (!isKnownNonNegative(V: Y, SQ)) |
2752 | return nullptr; |
2753 | |
2754 | return Builder.CreateCopySign(LHS: Builder.CreateBitCast(V: Y, DestTy: FTy), RHS: X); |
2755 | } |
2756 | |
2757 | Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { |
2758 | // If the operands are integer typed then apply the integer transforms, |
2759 | // otherwise just apply the common ones. |
2760 | Value *Src = CI.getOperand(i_nocapture: 0); |
2761 | Type *SrcTy = Src->getType(); |
2762 | Type *DestTy = CI.getType(); |
2763 | |
2764 | // Get rid of casts from one type to the same type. These are useless and can |
2765 | // be replaced by the operand. |
2766 | if (DestTy == Src->getType()) |
2767 | return replaceInstUsesWith(I&: CI, V: Src); |
2768 | |
2769 | if (isa<FixedVectorType>(Val: DestTy)) { |
2770 | if (isa<IntegerType>(Val: SrcTy)) { |
2771 | // If this is a cast from an integer to vector, check to see if the input |
2772 | // is a trunc or zext of a bitcast from vector. If so, we can replace all |
2773 | // the casts with a shuffle and (potentially) a bitcast. |
2774 | if (isa<TruncInst>(Val: Src) || isa<ZExtInst>(Val: Src)) { |
2775 | CastInst *SrcCast = cast<CastInst>(Val: Src); |
2776 | if (BitCastInst *BCIn = dyn_cast<BitCastInst>(Val: SrcCast->getOperand(i_nocapture: 0))) |
2777 | if (isa<VectorType>(Val: BCIn->getOperand(i_nocapture: 0)->getType())) |
2778 | if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( |
2779 | InVal: BCIn->getOperand(i_nocapture: 0), DestTy: cast<VectorType>(Val: DestTy), IC&: *this)) |
2780 | return I; |
2781 | } |
2782 | |
2783 | // If the input is an 'or' instruction, we may be doing shifts and ors to |
2784 | // assemble the elements of the vector manually. Try to rip the code out |
2785 | // and replace it with insertelements. |
2786 | if (Value *V = optimizeIntegerToVectorInsertions(CI, IC&: *this)) |
2787 | return replaceInstUsesWith(I&: CI, V); |
2788 | } |
2789 | } |
2790 | |
2791 | if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(Val: SrcTy)) { |
2792 | if (SrcVTy->getNumElements() == 1) { |
2793 | // If our destination is not a vector, then make this a straight |
2794 | // scalar-scalar cast. |
2795 | if (!DestTy->isVectorTy()) { |
2796 | Value *Elem = |
2797 | Builder.CreateExtractElement(Vec: Src, |
2798 | Idx: Constant::getNullValue(Ty: Type::getInt32Ty(C&: CI.getContext()))); |
2799 | return CastInst::Create(Instruction::BitCast, S: Elem, Ty: DestTy); |
2800 | } |
2801 | |
2802 | // Otherwise, see if our source is an insert. If so, then use the scalar |
2803 | // component directly: |
2804 | // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m> |
2805 | if (auto *InsElt = dyn_cast<InsertElementInst>(Val: Src)) |
2806 | return new BitCastInst(InsElt->getOperand(i_nocapture: 1), DestTy); |
2807 | } |
2808 | |
2809 | // Convert an artificial vector insert into more analyzable bitwise logic. |
2810 | unsigned BitWidth = DestTy->getScalarSizeInBits(); |
2811 | Value *X, *Y; |
2812 | uint64_t IndexC; |
2813 | if (match(V: Src, P: m_OneUse(SubPattern: m_InsertElt(Val: m_OneUse(SubPattern: m_BitCast(Op: m_Value(V&: X))), |
2814 | Elt: m_Value(V&: Y), Idx: m_ConstantInt(V&: IndexC)))) && |
2815 | DestTy->isIntegerTy() && X->getType() == DestTy && |
2816 | Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) { |
2817 | // Adjust for big endian - the LSBs are at the high index. |
2818 | if (DL.isBigEndian()) |
2819 | IndexC = SrcVTy->getNumElements() - 1 - IndexC; |
2820 | |
2821 | // We only handle (endian-normalized) insert to index 0. Any other insert |
2822 | // would require a left-shift, so that is an extra instruction. |
2823 | if (IndexC == 0) { |
2824 | // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y) |
2825 | unsigned EltWidth = Y->getType()->getScalarSizeInBits(); |
2826 | APInt MaskC = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - EltWidth); |
2827 | Value *AndX = Builder.CreateAnd(LHS: X, RHS: MaskC); |
2828 | Value *ZextY = Builder.CreateZExt(V: Y, DestTy); |
2829 | return BinaryOperator::CreateOr(V1: AndX, V2: ZextY); |
2830 | } |
2831 | } |
2832 | } |
2833 | |
2834 | if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Src)) { |
2835 | // Okay, we have (bitcast (shuffle ..)). Check to see if this is |
2836 | // a bitcast to a vector with the same # elts. |
2837 | Value *ShufOp0 = Shuf->getOperand(i_nocapture: 0); |
2838 | Value *ShufOp1 = Shuf->getOperand(i_nocapture: 1); |
2839 | auto ShufElts = cast<VectorType>(Val: Shuf->getType())->getElementCount(); |
2840 | auto SrcVecElts = cast<VectorType>(Val: ShufOp0->getType())->getElementCount(); |
2841 | if (Shuf->hasOneUse() && DestTy->isVectorTy() && |
2842 | cast<VectorType>(Val: DestTy)->getElementCount() == ShufElts && |
2843 | ShufElts == SrcVecElts) { |
2844 | BitCastInst *Tmp; |
2845 | // If either of the operands is a cast from CI.getType(), then |
2846 | // evaluating the shuffle in the casted destination's type will allow |
2847 | // us to eliminate at least one cast. |
2848 | if (((Tmp = dyn_cast<BitCastInst>(Val: ShufOp0)) && |
2849 | Tmp->getOperand(i_nocapture: 0)->getType() == DestTy) || |
2850 | ((Tmp = dyn_cast<BitCastInst>(Val: ShufOp1)) && |
2851 | Tmp->getOperand(i_nocapture: 0)->getType() == DestTy)) { |
2852 | Value *LHS = Builder.CreateBitCast(V: ShufOp0, DestTy); |
2853 | Value *RHS = Builder.CreateBitCast(V: ShufOp1, DestTy); |
2854 | // Return a new shuffle vector. Use the same element ID's, as we |
2855 | // know the vector types match #elts. |
2856 | return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask()); |
2857 | } |
2858 | } |
2859 | |
2860 | // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized |
2861 | // as a byte/bit swap: |
2862 | // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X) |
2863 | // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X) |
2864 | if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 && |
2865 | Shuf->hasOneUse() && Shuf->isReverse()) { |
2866 | unsigned IntrinsicNum = 0; |
2867 | if (DL.isLegalInteger(Width: DestTy->getScalarSizeInBits()) && |
2868 | SrcTy->getScalarSizeInBits() == 8) { |
2869 | IntrinsicNum = Intrinsic::bswap; |
2870 | } else if (SrcTy->getScalarSizeInBits() == 1) { |
2871 | IntrinsicNum = Intrinsic::bitreverse; |
2872 | } |
2873 | if (IntrinsicNum != 0) { |
2874 | assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask" ); |
2875 | assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op" ); |
2876 | Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration( |
2877 | M: CI.getModule(), id: IntrinsicNum, Tys: DestTy); |
2878 | Value *ScalarX = Builder.CreateBitCast(V: ShufOp0, DestTy); |
2879 | return CallInst::Create(Func: BswapOrBitreverse, Args: {ScalarX}); |
2880 | } |
2881 | } |
2882 | } |
2883 | |
2884 | // Handle the A->B->A cast, and there is an intervening PHI node. |
2885 | if (PHINode *PN = dyn_cast<PHINode>(Val: Src)) |
2886 | if (Instruction *I = optimizeBitCastFromPhi(CI, PN)) |
2887 | return I; |
2888 | |
2889 | if (Instruction *I = canonicalizeBitCastExtElt(BitCast&: CI, IC&: *this)) |
2890 | return I; |
2891 | |
2892 | if (Instruction *I = foldBitCastBitwiseLogic(BitCast&: CI, Builder)) |
2893 | return I; |
2894 | |
2895 | if (Instruction *I = foldBitCastSelect(BitCast&: CI, Builder)) |
2896 | return I; |
2897 | |
2898 | if (Value *V = foldCopySignIdioms(CI, Builder, SQ: SQ.getWithInstruction(I: &CI))) |
2899 | return replaceInstUsesWith(I&: CI, V); |
2900 | |
2901 | return commonCastTransforms(CI); |
2902 | } |
2903 | |
2904 | Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) { |
2905 | return commonCastTransforms(CI); |
2906 | } |
2907 | |