1 | //===- InstCombineAndOrXor.cpp --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the visitAnd, visitOr, and visitXor functions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "InstCombineInternal.h" |
14 | #include "llvm/Analysis/CmpInstAnalysis.h" |
15 | #include "llvm/Analysis/InstructionSimplify.h" |
16 | #include "llvm/IR/ConstantRange.h" |
17 | #include "llvm/IR/Intrinsics.h" |
18 | #include "llvm/IR/PatternMatch.h" |
19 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
20 | #include "llvm/Transforms/Utils/Local.h" |
21 | |
22 | using namespace llvm; |
23 | using namespace PatternMatch; |
24 | |
25 | #define DEBUG_TYPE "instcombine" |
26 | |
27 | /// This is the complement of getICmpCode, which turns an opcode and two |
28 | /// operands into either a constant true or false, or a brand new ICmp |
29 | /// instruction. The sign is passed in to determine which kind of predicate to |
30 | /// use in the new icmp instruction. |
31 | static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS, |
32 | InstCombiner::BuilderTy &Builder) { |
33 | ICmpInst::Predicate NewPred; |
34 | if (Constant *TorF = getPredForICmpCode(Code, Sign, OpTy: LHS->getType(), Pred&: NewPred)) |
35 | return TorF; |
36 | return Builder.CreateICmp(P: NewPred, LHS, RHS); |
37 | } |
38 | |
39 | /// This is the complement of getFCmpCode, which turns an opcode and two |
40 | /// operands into either a FCmp instruction, or a true/false constant. |
41 | static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, |
42 | InstCombiner::BuilderTy &Builder) { |
43 | FCmpInst::Predicate NewPred; |
44 | if (Constant *TorF = getPredForFCmpCode(Code, OpTy: LHS->getType(), Pred&: NewPred)) |
45 | return TorF; |
46 | return Builder.CreateFCmp(P: NewPred, LHS, RHS); |
47 | } |
48 | |
49 | /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise |
50 | /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates |
51 | /// whether to treat V, Lo, and Hi as signed or not. |
52 | Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo, |
53 | const APInt &Hi, bool isSigned, |
54 | bool Inside) { |
55 | assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && |
56 | "Lo is not < Hi in range emission code!" ); |
57 | |
58 | Type *Ty = V->getType(); |
59 | |
60 | // V >= Min && V < Hi --> V < Hi |
61 | // V < Min || V >= Hi --> V >= Hi |
62 | ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; |
63 | if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { |
64 | Pred = isSigned ? ICmpInst::getSignedPredicate(pred: Pred) : Pred; |
65 | return Builder.CreateICmp(P: Pred, LHS: V, RHS: ConstantInt::get(Ty, V: Hi)); |
66 | } |
67 | |
68 | // V >= Lo && V < Hi --> V - Lo u< Hi - Lo |
69 | // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo |
70 | Value *VMinusLo = |
71 | Builder.CreateSub(LHS: V, RHS: ConstantInt::get(Ty, V: Lo), Name: V->getName() + ".off" ); |
72 | Constant *HiMinusLo = ConstantInt::get(Ty, V: Hi - Lo); |
73 | return Builder.CreateICmp(P: Pred, LHS: VMinusLo, RHS: HiMinusLo); |
74 | } |
75 | |
76 | /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns |
77 | /// that can be simplified. |
78 | /// One of A and B is considered the mask. The other is the value. This is |
79 | /// described as the "AMask" or "BMask" part of the enum. If the enum contains |
80 | /// only "Mask", then both A and B can be considered masks. If A is the mask, |
81 | /// then it was proven that (A & C) == C. This is trivial if C == A or C == 0. |
82 | /// If both A and C are constants, this proof is also easy. |
83 | /// For the following explanations, we assume that A is the mask. |
84 | /// |
85 | /// "AllOnes" declares that the comparison is true only if (A & B) == A or all |
86 | /// bits of A are set in B. |
87 | /// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes |
88 | /// |
89 | /// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all |
90 | /// bits of A are cleared in B. |
91 | /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes |
92 | /// |
93 | /// "Mixed" declares that (A & B) == C and C might or might not contain any |
94 | /// number of one bits and zero bits. |
95 | /// Example: (icmp eq (A & 3), 1) -> AMask_Mixed |
96 | /// |
97 | /// "Not" means that in above descriptions "==" should be replaced by "!=". |
98 | /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes |
99 | /// |
100 | /// If the mask A contains a single bit, then the following is equivalent: |
101 | /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) |
102 | /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) |
103 | enum MaskedICmpType { |
104 | AMask_AllOnes = 1, |
105 | AMask_NotAllOnes = 2, |
106 | BMask_AllOnes = 4, |
107 | BMask_NotAllOnes = 8, |
108 | Mask_AllZeros = 16, |
109 | Mask_NotAllZeros = 32, |
110 | AMask_Mixed = 64, |
111 | AMask_NotMixed = 128, |
112 | BMask_Mixed = 256, |
113 | BMask_NotMixed = 512 |
114 | }; |
115 | |
116 | /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C) |
117 | /// satisfies. |
118 | static unsigned getMaskedICmpType(Value *A, Value *B, Value *C, |
119 | ICmpInst::Predicate Pred) { |
120 | const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr; |
121 | match(V: A, P: m_APInt(Res&: ConstA)); |
122 | match(V: B, P: m_APInt(Res&: ConstB)); |
123 | match(V: C, P: m_APInt(Res&: ConstC)); |
124 | bool IsEq = (Pred == ICmpInst::ICMP_EQ); |
125 | bool IsAPow2 = ConstA && ConstA->isPowerOf2(); |
126 | bool IsBPow2 = ConstB && ConstB->isPowerOf2(); |
127 | unsigned MaskVal = 0; |
128 | if (ConstC && ConstC->isZero()) { |
129 | // if C is zero, then both A and B qualify as mask |
130 | MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed) |
131 | : (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed)); |
132 | if (IsAPow2) |
133 | MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed) |
134 | : (AMask_AllOnes | AMask_Mixed)); |
135 | if (IsBPow2) |
136 | MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed) |
137 | : (BMask_AllOnes | BMask_Mixed)); |
138 | return MaskVal; |
139 | } |
140 | |
141 | if (A == C) { |
142 | MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed) |
143 | : (AMask_NotAllOnes | AMask_NotMixed)); |
144 | if (IsAPow2) |
145 | MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed) |
146 | : (Mask_AllZeros | AMask_Mixed)); |
147 | } else if (ConstA && ConstC && ConstC->isSubsetOf(RHS: *ConstA)) { |
148 | MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed); |
149 | } |
150 | |
151 | if (B == C) { |
152 | MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed) |
153 | : (BMask_NotAllOnes | BMask_NotMixed)); |
154 | if (IsBPow2) |
155 | MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed) |
156 | : (Mask_AllZeros | BMask_Mixed)); |
157 | } else if (ConstB && ConstC && ConstC->isSubsetOf(RHS: *ConstB)) { |
158 | MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed); |
159 | } |
160 | |
161 | return MaskVal; |
162 | } |
163 | |
164 | /// Convert an analysis of a masked ICmp into its equivalent if all boolean |
165 | /// operations had the opposite sense. Since each "NotXXX" flag (recording !=) |
166 | /// is adjacent to the corresponding normal flag (recording ==), this just |
167 | /// involves swapping those bits over. |
168 | static unsigned conjugateICmpMask(unsigned Mask) { |
169 | unsigned NewMask; |
170 | NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros | |
171 | AMask_Mixed | BMask_Mixed)) |
172 | << 1; |
173 | |
174 | NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros | |
175 | AMask_NotMixed | BMask_NotMixed)) |
176 | >> 1; |
177 | |
178 | return NewMask; |
179 | } |
180 | |
181 | // Adapts the external decomposeBitTestICmp for local use. |
182 | static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, |
183 | Value *&X, Value *&Y, Value *&Z) { |
184 | APInt Mask; |
185 | if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask)) |
186 | return false; |
187 | |
188 | Y = ConstantInt::get(Ty: X->getType(), V: Mask); |
189 | Z = ConstantInt::get(Ty: X->getType(), V: 0); |
190 | return true; |
191 | } |
192 | |
193 | /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E). |
194 | /// Return the pattern classes (from MaskedICmpType) for the left hand side and |
195 | /// the right hand side as a pair. |
196 | /// LHS and RHS are the left hand side and the right hand side ICmps and PredL |
197 | /// and PredR are their predicates, respectively. |
198 | static std::optional<std::pair<unsigned, unsigned>> getMaskedTypeForICmpPair( |
199 | Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, ICmpInst *LHS, |
200 | ICmpInst *RHS, ICmpInst::Predicate &PredL, ICmpInst::Predicate &PredR) { |
201 | // Don't allow pointers. Splat vectors are fine. |
202 | if (!LHS->getOperand(i_nocapture: 0)->getType()->isIntOrIntVectorTy() || |
203 | !RHS->getOperand(i_nocapture: 0)->getType()->isIntOrIntVectorTy()) |
204 | return std::nullopt; |
205 | |
206 | // Here comes the tricky part: |
207 | // LHS might be of the form L11 & L12 == X, X == L21 & L22, |
208 | // and L11 & L12 == L21 & L22. The same goes for RHS. |
209 | // Now we must find those components L** and R**, that are equal, so |
210 | // that we can extract the parameters A, B, C, D, and E for the canonical |
211 | // above. |
212 | Value *L1 = LHS->getOperand(i_nocapture: 0); |
213 | Value *L2 = LHS->getOperand(i_nocapture: 1); |
214 | Value *L11, *L12, *L21, *L22; |
215 | // Check whether the icmp can be decomposed into a bit test. |
216 | if (decomposeBitTestICmp(LHS: L1, RHS: L2, Pred&: PredL, X&: L11, Y&: L12, Z&: L2)) { |
217 | L21 = L22 = L1 = nullptr; |
218 | } else { |
219 | // Look for ANDs in the LHS icmp. |
220 | if (!match(V: L1, P: m_And(L: m_Value(V&: L11), R: m_Value(V&: L12)))) { |
221 | // Any icmp can be viewed as being trivially masked; if it allows us to |
222 | // remove one, it's worth it. |
223 | L11 = L1; |
224 | L12 = Constant::getAllOnesValue(Ty: L1->getType()); |
225 | } |
226 | |
227 | if (!match(V: L2, P: m_And(L: m_Value(V&: L21), R: m_Value(V&: L22)))) { |
228 | L21 = L2; |
229 | L22 = Constant::getAllOnesValue(Ty: L2->getType()); |
230 | } |
231 | } |
232 | |
233 | // Bail if LHS was a icmp that can't be decomposed into an equality. |
234 | if (!ICmpInst::isEquality(P: PredL)) |
235 | return std::nullopt; |
236 | |
237 | Value *R1 = RHS->getOperand(i_nocapture: 0); |
238 | Value *R2 = RHS->getOperand(i_nocapture: 1); |
239 | Value *R11, *R12; |
240 | bool Ok = false; |
241 | if (decomposeBitTestICmp(LHS: R1, RHS: R2, Pred&: PredR, X&: R11, Y&: R12, Z&: R2)) { |
242 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
243 | A = R11; |
244 | D = R12; |
245 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
246 | A = R12; |
247 | D = R11; |
248 | } else { |
249 | return std::nullopt; |
250 | } |
251 | E = R2; |
252 | R1 = nullptr; |
253 | Ok = true; |
254 | } else { |
255 | if (!match(V: R1, P: m_And(L: m_Value(V&: R11), R: m_Value(V&: R12)))) { |
256 | // As before, model no mask as a trivial mask if it'll let us do an |
257 | // optimization. |
258 | R11 = R1; |
259 | R12 = Constant::getAllOnesValue(Ty: R1->getType()); |
260 | } |
261 | |
262 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
263 | A = R11; |
264 | D = R12; |
265 | E = R2; |
266 | Ok = true; |
267 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
268 | A = R12; |
269 | D = R11; |
270 | E = R2; |
271 | Ok = true; |
272 | } |
273 | } |
274 | |
275 | // Bail if RHS was a icmp that can't be decomposed into an equality. |
276 | if (!ICmpInst::isEquality(P: PredR)) |
277 | return std::nullopt; |
278 | |
279 | // Look for ANDs on the right side of the RHS icmp. |
280 | if (!Ok) { |
281 | if (!match(V: R2, P: m_And(L: m_Value(V&: R11), R: m_Value(V&: R12)))) { |
282 | R11 = R2; |
283 | R12 = Constant::getAllOnesValue(Ty: R2->getType()); |
284 | } |
285 | |
286 | if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { |
287 | A = R11; |
288 | D = R12; |
289 | E = R1; |
290 | Ok = true; |
291 | } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { |
292 | A = R12; |
293 | D = R11; |
294 | E = R1; |
295 | Ok = true; |
296 | } else { |
297 | return std::nullopt; |
298 | } |
299 | |
300 | assert(Ok && "Failed to find AND on the right side of the RHS icmp." ); |
301 | } |
302 | |
303 | if (L11 == A) { |
304 | B = L12; |
305 | C = L2; |
306 | } else if (L12 == A) { |
307 | B = L11; |
308 | C = L2; |
309 | } else if (L21 == A) { |
310 | B = L22; |
311 | C = L1; |
312 | } else if (L22 == A) { |
313 | B = L21; |
314 | C = L1; |
315 | } |
316 | |
317 | unsigned LeftType = getMaskedICmpType(A, B, C, Pred: PredL); |
318 | unsigned RightType = getMaskedICmpType(A, B: D, C: E, Pred: PredR); |
319 | return std::optional<std::pair<unsigned, unsigned>>( |
320 | std::make_pair(x&: LeftType, y&: RightType)); |
321 | } |
322 | |
323 | /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single |
324 | /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros |
325 | /// and the right hand side is of type BMask_Mixed. For example, |
326 | /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). |
327 | /// Also used for logical and/or, must be poison safe. |
328 | static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
329 | ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, |
330 | Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, |
331 | InstCombiner::BuilderTy &Builder) { |
332 | // We are given the canonical form: |
333 | // (icmp ne (A & B), 0) & (icmp eq (A & D), E). |
334 | // where D & E == E. |
335 | // |
336 | // If IsAnd is false, we get it in negated form: |
337 | // (icmp eq (A & B), 0) | (icmp ne (A & D), E) -> |
338 | // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)). |
339 | // |
340 | // We currently handle the case of B, C, D, E are constant. |
341 | // |
342 | const APInt *BCst, *CCst, *DCst, *OrigECst; |
343 | if (!match(V: B, P: m_APInt(Res&: BCst)) || !match(V: C, P: m_APInt(Res&: CCst)) || |
344 | !match(V: D, P: m_APInt(Res&: DCst)) || !match(V: E, P: m_APInt(Res&: OrigECst))) |
345 | return nullptr; |
346 | |
347 | ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; |
348 | |
349 | // Update E to the canonical form when D is a power of two and RHS is |
350 | // canonicalized as, |
351 | // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or |
352 | // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). |
353 | APInt ECst = *OrigECst; |
354 | if (PredR != NewCC) |
355 | ECst ^= *DCst; |
356 | |
357 | // If B or D is zero, skip because if LHS or RHS can be trivially folded by |
358 | // other folding rules and this pattern won't apply any more. |
359 | if (*BCst == 0 || *DCst == 0) |
360 | return nullptr; |
361 | |
362 | // If B and D don't intersect, ie. (B & D) == 0, no folding because we can't |
363 | // deduce anything from it. |
364 | // For example, |
365 | // (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding. |
366 | if ((*BCst & *DCst) == 0) |
367 | return nullptr; |
368 | |
369 | // If the following two conditions are met: |
370 | // |
371 | // 1. mask B covers only a single bit that's not covered by mask D, that is, |
372 | // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of |
373 | // B and D has only one bit set) and, |
374 | // |
375 | // 2. RHS (and E) indicates that the rest of B's bits are zero (in other |
376 | // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0 |
377 | // |
378 | // then that single bit in B must be one and thus the whole expression can be |
379 | // folded to |
380 | // (A & (B | D)) == (B & (B ^ D)) | E. |
381 | // |
382 | // For example, |
383 | // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) |
384 | // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) |
385 | if ((((*BCst & *DCst) & ECst) == 0) && |
386 | (*BCst & (*BCst ^ *DCst)).isPowerOf2()) { |
387 | APInt BorD = *BCst | *DCst; |
388 | APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst; |
389 | Value *NewMask = ConstantInt::get(Ty: A->getType(), V: BorD); |
390 | Value *NewMaskedValue = ConstantInt::get(Ty: A->getType(), V: BandBxorDorE); |
391 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewMask); |
392 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: NewMaskedValue); |
393 | } |
394 | |
395 | auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) { |
396 | return (*C1 & *C2) == *C1; |
397 | }; |
398 | auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) { |
399 | return (*C1 & *C2) == *C2; |
400 | }; |
401 | |
402 | // In the following, we consider only the cases where B is a superset of D, B |
403 | // is a subset of D, or B == D because otherwise there's at least one bit |
404 | // covered by B but not D, in which case we can't deduce much from it, so |
405 | // no folding (aside from the single must-be-one bit case right above.) |
406 | // For example, |
407 | // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding. |
408 | if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst)) |
409 | return nullptr; |
410 | |
411 | // At this point, either B is a superset of D, B is a subset of D or B == D. |
412 | |
413 | // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict |
414 | // and the whole expression becomes false (or true if negated), otherwise, no |
415 | // folding. |
416 | // For example, |
417 | // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. |
418 | // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. |
419 | if (ECst.isZero()) { |
420 | if (IsSubSetOrEqual(BCst, DCst)) |
421 | return ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
422 | return nullptr; |
423 | } |
424 | |
425 | // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == |
426 | // D. If B is a superset of (or equal to) D, since E is not zero, LHS is |
427 | // subsumed by RHS (RHS implies LHS.) So the whole expression becomes |
428 | // RHS. For example, |
429 | // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
430 | // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
431 | if (IsSuperSetOrEqual(BCst, DCst)) |
432 | return RHS; |
433 | // Otherwise, B is a subset of D. If B and E have a common bit set, |
434 | // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. |
435 | // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). |
436 | assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code" ); |
437 | if ((*BCst & ECst) != 0) |
438 | return RHS; |
439 | // Otherwise, LHS and RHS contradict and the whole expression becomes false |
440 | // (or true if negated.) For example, |
441 | // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false. |
442 | // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false. |
443 | return ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
444 | } |
445 | |
446 | /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single |
447 | /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side |
448 | /// aren't of the common mask pattern type. |
449 | /// Also used for logical and/or, must be poison safe. |
450 | static Value *foldLogOpOfMaskedICmpsAsymmetric( |
451 | ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C, |
452 | Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, |
453 | unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) { |
454 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
455 | "Expected equality predicates for masked type of icmps." ); |
456 | // Handle Mask_NotAllZeros-BMask_Mixed cases. |
457 | // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or |
458 | // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E) |
459 | // which gets swapped to |
460 | // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C). |
461 | if (!IsAnd) { |
462 | LHSMask = conjugateICmpMask(Mask: LHSMask); |
463 | RHSMask = conjugateICmpMask(Mask: RHSMask); |
464 | } |
465 | if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { |
466 | if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
467 | LHS, RHS, IsAnd, A, B, C, D, E, |
468 | PredL, PredR, Builder)) { |
469 | return V; |
470 | } |
471 | } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { |
472 | if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( |
473 | LHS: RHS, RHS: LHS, IsAnd, A, B: D, C: E, D: B, E: C, |
474 | PredL: PredR, PredR: PredL, Builder)) { |
475 | return V; |
476 | } |
477 | } |
478 | return nullptr; |
479 | } |
480 | |
481 | /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) |
482 | /// into a single (icmp(A & X) ==/!= Y). |
483 | static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, |
484 | bool IsLogical, |
485 | InstCombiner::BuilderTy &Builder) { |
486 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; |
487 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
488 | std::optional<std::pair<unsigned, unsigned>> MaskPair = |
489 | getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR); |
490 | if (!MaskPair) |
491 | return nullptr; |
492 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
493 | "Expected equality predicates for masked type of icmps." ); |
494 | unsigned LHSMask = MaskPair->first; |
495 | unsigned RHSMask = MaskPair->second; |
496 | unsigned Mask = LHSMask & RHSMask; |
497 | if (Mask == 0) { |
498 | // Even if the two sides don't share a common pattern, check if folding can |
499 | // still happen. |
500 | if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( |
501 | LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, |
502 | Builder)) |
503 | return V; |
504 | return nullptr; |
505 | } |
506 | |
507 | // In full generality: |
508 | // (icmp (A & B) Op C) | (icmp (A & D) Op E) |
509 | // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] |
510 | // |
511 | // If the latter can be converted into (icmp (A & X) Op Y) then the former is |
512 | // equivalent to (icmp (A & X) !Op Y). |
513 | // |
514 | // Therefore, we can pretend for the rest of this function that we're dealing |
515 | // with the conjunction, provided we flip the sense of any comparisons (both |
516 | // input and output). |
517 | |
518 | // In most cases we're going to produce an EQ for the "&&" case. |
519 | ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; |
520 | if (!IsAnd) { |
521 | // Convert the masking analysis into its equivalent with negated |
522 | // comparisons. |
523 | Mask = conjugateICmpMask(Mask); |
524 | } |
525 | |
526 | if (Mask & Mask_AllZeros) { |
527 | // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) |
528 | // -> (icmp eq (A & (B|D)), 0) |
529 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
530 | return nullptr; // TODO: Use freeze? |
531 | Value *NewOr = Builder.CreateOr(LHS: B, RHS: D); |
532 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewOr); |
533 | // We can't use C as zero because we might actually handle |
534 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
535 | // with B and D, having a single bit set. |
536 | Value *Zero = Constant::getNullValue(Ty: A->getType()); |
537 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: Zero); |
538 | } |
539 | if (Mask & BMask_AllOnes) { |
540 | // (icmp eq (A & B), B) & (icmp eq (A & D), D) |
541 | // -> (icmp eq (A & (B|D)), (B|D)) |
542 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
543 | return nullptr; // TODO: Use freeze? |
544 | Value *NewOr = Builder.CreateOr(LHS: B, RHS: D); |
545 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: NewOr); |
546 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd, RHS: NewOr); |
547 | } |
548 | if (Mask & AMask_AllOnes) { |
549 | // (icmp eq (A & B), A) & (icmp eq (A & D), A) |
550 | // -> (icmp eq (A & (B&D)), A) |
551 | if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(V: D)) |
552 | return nullptr; // TODO: Use freeze? |
553 | Value *NewAnd1 = Builder.CreateAnd(LHS: B, RHS: D); |
554 | Value *NewAnd2 = Builder.CreateAnd(LHS: A, RHS: NewAnd1); |
555 | return Builder.CreateICmp(P: NewCC, LHS: NewAnd2, RHS: A); |
556 | } |
557 | |
558 | // Remaining cases assume at least that B and D are constant, and depend on |
559 | // their actual values. This isn't strictly necessary, just a "handle the |
560 | // easy cases for now" decision. |
561 | const APInt *ConstB, *ConstD; |
562 | if (!match(V: B, P: m_APInt(Res&: ConstB)) || !match(V: D, P: m_APInt(Res&: ConstD))) |
563 | return nullptr; |
564 | |
565 | if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { |
566 | // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and |
567 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
568 | // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) |
569 | // Only valid if one of the masks is a superset of the other (check "B&D" is |
570 | // the same as either B or D). |
571 | APInt NewMask = *ConstB & *ConstD; |
572 | if (NewMask == *ConstB) |
573 | return LHS; |
574 | else if (NewMask == *ConstD) |
575 | return RHS; |
576 | } |
577 | |
578 | if (Mask & AMask_NotAllOnes) { |
579 | // (icmp ne (A & B), B) & (icmp ne (A & D), D) |
580 | // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) |
581 | // Only valid if one of the masks is a superset of the other (check "B|D" is |
582 | // the same as either B or D). |
583 | APInt NewMask = *ConstB | *ConstD; |
584 | if (NewMask == *ConstB) |
585 | return LHS; |
586 | else if (NewMask == *ConstD) |
587 | return RHS; |
588 | } |
589 | |
590 | if (Mask & (BMask_Mixed | BMask_NotMixed)) { |
591 | // Mixed: |
592 | // (icmp eq (A & B), C) & (icmp eq (A & D), E) |
593 | // We already know that B & C == C && D & E == E. |
594 | // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of |
595 | // C and E, which are shared by both the mask B and the mask D, don't |
596 | // contradict, then we can transform to |
597 | // -> (icmp eq (A & (B|D)), (C|E)) |
598 | // Currently, we only handle the case of B, C, D, and E being constant. |
599 | // We can't simply use C and E because we might actually handle |
600 | // (icmp ne (A & B), B) & (icmp eq (A & D), D) |
601 | // with B and D, having a single bit set. |
602 | |
603 | // NotMixed: |
604 | // (icmp ne (A & B), C) & (icmp ne (A & D), E) |
605 | // -> (icmp ne (A & (B & D)), (C & E)) |
606 | // Check the intersection (B & D) for inequality. |
607 | // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B |
608 | // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the |
609 | // B and the D, don't contradict. |
610 | // Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous |
611 | // operation should delete these icmps if it hadn't been met. |
612 | |
613 | const APInt *OldConstC, *OldConstE; |
614 | if (!match(V: C, P: m_APInt(Res&: OldConstC)) || !match(V: E, P: m_APInt(Res&: OldConstE))) |
615 | return nullptr; |
616 | |
617 | auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * { |
618 | CC = IsNot ? CmpInst::getInversePredicate(pred: CC) : CC; |
619 | const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC; |
620 | const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE; |
621 | |
622 | if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue()) |
623 | return IsNot ? nullptr : ConstantInt::get(Ty: LHS->getType(), V: !IsAnd); |
624 | |
625 | if (IsNot && !ConstB->isSubsetOf(RHS: *ConstD) && !ConstD->isSubsetOf(RHS: *ConstB)) |
626 | return nullptr; |
627 | |
628 | APInt BD, CE; |
629 | if (IsNot) { |
630 | BD = *ConstB & *ConstD; |
631 | CE = ConstC & ConstE; |
632 | } else { |
633 | BD = *ConstB | *ConstD; |
634 | CE = ConstC | ConstE; |
635 | } |
636 | Value *NewAnd = Builder.CreateAnd(LHS: A, RHS: BD); |
637 | Value *CEVal = ConstantInt::get(Ty: A->getType(), V: CE); |
638 | return Builder.CreateICmp(P: CC, LHS: CEVal, RHS: NewAnd); |
639 | }; |
640 | |
641 | if (Mask & BMask_Mixed) |
642 | return FoldBMixed(NewCC, false); |
643 | if (Mask & BMask_NotMixed) // can be else also |
644 | return FoldBMixed(NewCC, true); |
645 | } |
646 | return nullptr; |
647 | } |
648 | |
649 | /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. |
650 | /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n |
651 | /// If \p Inverted is true then the check is for the inverted range, e.g. |
652 | /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n |
653 | Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, |
654 | bool Inverted) { |
655 | // Check the lower range comparison, e.g. x >= 0 |
656 | // InstCombine already ensured that if there is a constant it's on the RHS. |
657 | ConstantInt *RangeStart = dyn_cast<ConstantInt>(Val: Cmp0->getOperand(i_nocapture: 1)); |
658 | if (!RangeStart) |
659 | return nullptr; |
660 | |
661 | ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() : |
662 | Cmp0->getPredicate()); |
663 | |
664 | // Accept x > -1 or x >= 0 (after potentially inverting the predicate). |
665 | if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) || |
666 | (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero()))) |
667 | return nullptr; |
668 | |
669 | ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() : |
670 | Cmp1->getPredicate()); |
671 | |
672 | Value *Input = Cmp0->getOperand(i_nocapture: 0); |
673 | Value *RangeEnd; |
674 | if (Cmp1->getOperand(i_nocapture: 0) == Input) { |
675 | // For the upper range compare we have: icmp x, n |
676 | RangeEnd = Cmp1->getOperand(i_nocapture: 1); |
677 | } else if (Cmp1->getOperand(i_nocapture: 1) == Input) { |
678 | // For the upper range compare we have: icmp n, x |
679 | RangeEnd = Cmp1->getOperand(i_nocapture: 0); |
680 | Pred1 = ICmpInst::getSwappedPredicate(pred: Pred1); |
681 | } else { |
682 | return nullptr; |
683 | } |
684 | |
685 | // Check the upper range comparison, e.g. x < n |
686 | ICmpInst::Predicate NewPred; |
687 | switch (Pred1) { |
688 | case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break; |
689 | case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break; |
690 | default: return nullptr; |
691 | } |
692 | |
693 | // This simplification is only valid if the upper range is not negative. |
694 | KnownBits Known = computeKnownBits(V: RangeEnd, /*Depth=*/0, CxtI: Cmp1); |
695 | if (!Known.isNonNegative()) |
696 | return nullptr; |
697 | |
698 | if (Inverted) |
699 | NewPred = ICmpInst::getInversePredicate(pred: NewPred); |
700 | |
701 | return Builder.CreateICmp(P: NewPred, LHS: Input, RHS: RangeEnd); |
702 | } |
703 | |
704 | // (or (icmp eq X, 0), (icmp eq X, Pow2OrZero)) |
705 | // -> (icmp eq (and X, Pow2OrZero), X) |
706 | // (and (icmp ne X, 0), (icmp ne X, Pow2OrZero)) |
707 | // -> (icmp ne (and X, Pow2OrZero), X) |
708 | static Value * |
709 | foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder, |
710 | ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, |
711 | const SimplifyQuery &Q) { |
712 | CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; |
713 | // Make sure we have right compares for our op. |
714 | if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred) |
715 | return nullptr; |
716 | |
717 | // Make it so we can match LHS against the (icmp eq/ne X, 0) just for |
718 | // simplicity. |
719 | if (match(V: RHS->getOperand(i_nocapture: 1), P: m_Zero())) |
720 | std::swap(a&: LHS, b&: RHS); |
721 | |
722 | Value *Pow2, *Op; |
723 | // Match the desired pattern: |
724 | // LHS: (icmp eq/ne X, 0) |
725 | // RHS: (icmp eq/ne X, Pow2OrZero) |
726 | // Skip if Pow2OrZero is 1. Either way it gets folded to (icmp ugt X, 1) but |
727 | // this form ends up slightly less canonical. |
728 | // We could potentially be more sophisticated than requiring LHS/RHS |
729 | // be one-use. We don't create additional instructions if only one |
730 | // of them is one-use. So cases where one is one-use and the other |
731 | // is two-use might be profitable. |
732 | if (!match(V: LHS, P: m_OneUse(SubPattern: m_ICmp(Pred, L: m_Value(V&: Op), R: m_Zero()))) || |
733 | !match(V: RHS, P: m_OneUse(SubPattern: m_c_ICmp(Pred, L: m_Specific(V: Op), R: m_Value(V&: Pow2)))) || |
734 | match(V: Pow2, P: m_One()) || |
735 | !isKnownToBeAPowerOfTwo(V: Pow2, DL: Q.DL, /*OrZero=*/true, /*Depth=*/0, AC: Q.AC, |
736 | CxtI: Q.CxtI, DT: Q.DT)) |
737 | return nullptr; |
738 | |
739 | Value *And = Builder.CreateAnd(LHS: Op, RHS: Pow2); |
740 | return Builder.CreateICmp(P: Pred, LHS: And, RHS: Op); |
741 | } |
742 | |
743 | // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) |
744 | // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) |
745 | Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, |
746 | ICmpInst *RHS, |
747 | Instruction *CxtI, |
748 | bool IsAnd, |
749 | bool IsLogical) { |
750 | CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; |
751 | if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred) |
752 | return nullptr; |
753 | |
754 | if (!match(V: LHS->getOperand(i_nocapture: 1), P: m_Zero()) || |
755 | !match(V: RHS->getOperand(i_nocapture: 1), P: m_Zero())) |
756 | return nullptr; |
757 | |
758 | Value *L1, *L2, *R1, *R2; |
759 | if (match(V: LHS->getOperand(i_nocapture: 0), P: m_And(L: m_Value(V&: L1), R: m_Value(V&: L2))) && |
760 | match(V: RHS->getOperand(i_nocapture: 0), P: m_And(L: m_Value(V&: R1), R: m_Value(V&: R2)))) { |
761 | if (L1 == R2 || L2 == R2) |
762 | std::swap(a&: R1, b&: R2); |
763 | if (L2 == R1) |
764 | std::swap(a&: L1, b&: L2); |
765 | |
766 | if (L1 == R1 && |
767 | isKnownToBeAPowerOfTwo(V: L2, OrZero: false, Depth: 0, CxtI) && |
768 | isKnownToBeAPowerOfTwo(V: R2, OrZero: false, Depth: 0, CxtI)) { |
769 | // If this is a logical and/or, then we must prevent propagation of a |
770 | // poison value from the RHS by inserting freeze. |
771 | if (IsLogical) |
772 | R2 = Builder.CreateFreeze(V: R2); |
773 | Value *Mask = Builder.CreateOr(LHS: L2, RHS: R2); |
774 | Value *Masked = Builder.CreateAnd(LHS: L1, RHS: Mask); |
775 | auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
776 | return Builder.CreateICmp(P: NewPred, LHS: Masked, RHS: Mask); |
777 | } |
778 | } |
779 | |
780 | return nullptr; |
781 | } |
782 | |
783 | /// General pattern: |
784 | /// X & Y |
785 | /// |
786 | /// Where Y is checking that all the high bits (covered by a mask 4294967168) |
787 | /// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0 |
788 | /// Pattern can be one of: |
789 | /// %t = add i32 %arg, 128 |
790 | /// %r = icmp ult i32 %t, 256 |
791 | /// Or |
792 | /// %t0 = shl i32 %arg, 24 |
793 | /// %t1 = ashr i32 %t0, 24 |
794 | /// %r = icmp eq i32 %t1, %arg |
795 | /// Or |
796 | /// %t0 = trunc i32 %arg to i8 |
797 | /// %t1 = sext i8 %t0 to i32 |
798 | /// %r = icmp eq i32 %t1, %arg |
799 | /// This pattern is a signed truncation check. |
800 | /// |
801 | /// And X is checking that some bit in that same mask is zero. |
802 | /// I.e. can be one of: |
803 | /// %r = icmp sgt i32 %arg, -1 |
804 | /// Or |
805 | /// %t = and i32 %arg, 2147483648 |
806 | /// %r = icmp eq i32 %t, 0 |
807 | /// |
808 | /// Since we are checking that all the bits in that mask are the same, |
809 | /// and a particular bit is zero, what we are really checking is that all the |
810 | /// masked bits are zero. |
811 | /// So this should be transformed to: |
812 | /// %r = icmp ult i32 %arg, 128 |
813 | static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, |
814 | Instruction &CxtI, |
815 | InstCombiner::BuilderTy &Builder) { |
816 | assert(CxtI.getOpcode() == Instruction::And); |
817 | |
818 | // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two) |
819 | auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X, |
820 | APInt &SignBitMask) -> bool { |
821 | CmpInst::Predicate Pred; |
822 | const APInt *I01, *I1; // powers of two; I1 == I01 << 1 |
823 | if (!(match(V: ICmp, |
824 | P: m_ICmp(Pred, L: m_Add(L: m_Value(V&: X), R: m_Power2(V&: I01)), R: m_Power2(V&: I1))) && |
825 | Pred == ICmpInst::ICMP_ULT && I1->ugt(RHS: *I01) && I01->shl(shiftAmt: 1) == *I1)) |
826 | return false; |
827 | // Which bit is the new sign bit as per the 'signed truncation' pattern? |
828 | SignBitMask = *I01; |
829 | return true; |
830 | }; |
831 | |
832 | // One icmp needs to be 'signed truncation check'. |
833 | // We need to match this first, else we will mismatch commutative cases. |
834 | Value *X1; |
835 | APInt HighestBit; |
836 | ICmpInst *OtherICmp; |
837 | if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit)) |
838 | OtherICmp = ICmp0; |
839 | else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit)) |
840 | OtherICmp = ICmp1; |
841 | else |
842 | return nullptr; |
843 | |
844 | assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)" ); |
845 | |
846 | // Try to match/decompose into: icmp eq (X & Mask), 0 |
847 | auto tryToDecompose = [](ICmpInst *ICmp, Value *&X, |
848 | APInt &UnsetBitsMask) -> bool { |
849 | CmpInst::Predicate Pred = ICmp->getPredicate(); |
850 | // Can it be decomposed into icmp eq (X & Mask), 0 ? |
851 | if (llvm::decomposeBitTestICmp(LHS: ICmp->getOperand(i_nocapture: 0), RHS: ICmp->getOperand(i_nocapture: 1), |
852 | Pred, X, Mask&: UnsetBitsMask, |
853 | /*LookThroughTrunc=*/false) && |
854 | Pred == ICmpInst::ICMP_EQ) |
855 | return true; |
856 | // Is it icmp eq (X & Mask), 0 already? |
857 | const APInt *Mask; |
858 | if (match(V: ICmp, P: m_ICmp(Pred, L: m_And(L: m_Value(V&: X), R: m_APInt(Res&: Mask)), R: m_Zero())) && |
859 | Pred == ICmpInst::ICMP_EQ) { |
860 | UnsetBitsMask = *Mask; |
861 | return true; |
862 | } |
863 | return false; |
864 | }; |
865 | |
866 | // And the other icmp needs to be decomposable into a bit test. |
867 | Value *X0; |
868 | APInt UnsetBitsMask; |
869 | if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask)) |
870 | return nullptr; |
871 | |
872 | assert(!UnsetBitsMask.isZero() && "empty mask makes no sense." ); |
873 | |
874 | // Are they working on the same value? |
875 | Value *X; |
876 | if (X1 == X0) { |
877 | // Ok as is. |
878 | X = X1; |
879 | } else if (match(V: X0, P: m_Trunc(Op: m_Specific(V: X1)))) { |
880 | UnsetBitsMask = UnsetBitsMask.zext(width: X1->getType()->getScalarSizeInBits()); |
881 | X = X1; |
882 | } else |
883 | return nullptr; |
884 | |
885 | // So which bits should be uniform as per the 'signed truncation check'? |
886 | // (all the bits starting with (i.e. including) HighestBit) |
887 | APInt SignBitsMask = ~(HighestBit - 1U); |
888 | |
889 | // UnsetBitsMask must have some common bits with SignBitsMask, |
890 | if (!UnsetBitsMask.intersects(RHS: SignBitsMask)) |
891 | return nullptr; |
892 | |
893 | // Does UnsetBitsMask contain any bits outside of SignBitsMask? |
894 | if (!UnsetBitsMask.isSubsetOf(RHS: SignBitsMask)) { |
895 | APInt OtherHighestBit = (~UnsetBitsMask) + 1U; |
896 | if (!OtherHighestBit.isPowerOf2()) |
897 | return nullptr; |
898 | HighestBit = APIntOps::umin(A: HighestBit, B: OtherHighestBit); |
899 | } |
900 | // Else, if it does not, then all is ok as-is. |
901 | |
902 | // %r = icmp ult %X, SignBit |
903 | return Builder.CreateICmpULT(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: HighestBit), |
904 | Name: CxtI.getName() + ".simplified" ); |
905 | } |
906 | |
907 | /// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and |
908 | /// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1). |
909 | /// Also used for logical and/or, must be poison safe. |
910 | static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, |
911 | InstCombiner::BuilderTy &Builder) { |
912 | CmpInst::Predicate Pred0, Pred1; |
913 | Value *X; |
914 | if (!match(V: Cmp0, P: m_ICmp(Pred&: Pred0, L: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: X)), |
915 | R: m_SpecificInt(V: 1))) || |
916 | !match(V: Cmp1, P: m_ICmp(Pred&: Pred1, L: m_Specific(V: X), R: m_ZeroInt()))) |
917 | return nullptr; |
918 | |
919 | Value *CtPop = Cmp0->getOperand(i_nocapture: 0); |
920 | if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE) |
921 | return Builder.CreateICmpUGT(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
922 | if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ) |
923 | return Builder.CreateICmpULT(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 2)); |
924 | |
925 | return nullptr; |
926 | } |
927 | |
928 | /// Reduce a pair of compares that check if a value has exactly 1 bit set. |
929 | /// Also used for logical and/or, must be poison safe. |
930 | static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, |
931 | InstCombiner::BuilderTy &Builder) { |
932 | // Handle 'and' / 'or' commutation: make the equality check the first operand. |
933 | if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE) |
934 | std::swap(a&: Cmp0, b&: Cmp1); |
935 | else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ) |
936 | std::swap(a&: Cmp0, b&: Cmp1); |
937 | |
938 | // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1 |
939 | CmpInst::Predicate Pred0, Pred1; |
940 | Value *X; |
941 | if (JoinedByAnd && match(V: Cmp0, P: m_ICmp(Pred&: Pred0, L: m_Value(V&: X), R: m_ZeroInt())) && |
942 | match(V: Cmp1, P: m_ICmp(Pred&: Pred1, L: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Specific(V: X)), |
943 | R: m_SpecificInt(V: 2))) && |
944 | Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) { |
945 | Value *CtPop = Cmp1->getOperand(i_nocapture: 0); |
946 | return Builder.CreateICmpEQ(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
947 | } |
948 | // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1 |
949 | if (!JoinedByAnd && match(V: Cmp0, P: m_ICmp(Pred&: Pred0, L: m_Value(V&: X), R: m_ZeroInt())) && |
950 | match(V: Cmp1, P: m_ICmp(Pred&: Pred1, L: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Specific(V: X)), |
951 | R: m_SpecificInt(V: 1))) && |
952 | Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) { |
953 | Value *CtPop = Cmp1->getOperand(i_nocapture: 0); |
954 | return Builder.CreateICmpNE(LHS: CtPop, RHS: ConstantInt::get(Ty: CtPop->getType(), V: 1)); |
955 | } |
956 | return nullptr; |
957 | } |
958 | |
959 | /// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff |
960 | /// B is a contiguous set of ones starting from the most significant bit |
961 | /// (negative power of 2), D and E are equal, and D is a contiguous set of ones |
962 | /// starting at the most significant zero bit in B. Parameter B supports masking |
963 | /// using undef/poison in either scalar or vector values. |
964 | static Value *foldNegativePower2AndShiftedMask( |
965 | Value *A, Value *B, Value *D, Value *E, ICmpInst::Predicate PredL, |
966 | ICmpInst::Predicate PredR, InstCombiner::BuilderTy &Builder) { |
967 | assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && |
968 | "Expected equality predicates for masked type of icmps." ); |
969 | if (PredL != ICmpInst::ICMP_EQ || PredR != ICmpInst::ICMP_NE) |
970 | return nullptr; |
971 | |
972 | if (!match(V: B, P: m_NegatedPower2()) || !match(V: D, P: m_ShiftedMask()) || |
973 | !match(V: E, P: m_ShiftedMask())) |
974 | return nullptr; |
975 | |
976 | // Test scalar arguments for conversion. B has been validated earlier to be a |
977 | // negative power of two and thus is guaranteed to have one or more contiguous |
978 | // ones starting from the MSB followed by zero or more contiguous zeros. D has |
979 | // been validated earlier to be a shifted set of one or more contiguous ones. |
980 | // In order to match, B leading ones and D leading zeros should be equal. The |
981 | // predicate that B be a negative power of 2 prevents the condition of there |
982 | // ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that |
983 | // D always be a shifted mask prevents the condition of D equaling 0. This |
984 | // prevents matching the condition where B contains the maximum number of |
985 | // leading one bits (-1) and D contains the maximum number of leading zero |
986 | // bits (0). |
987 | auto isReducible = [](const Value *B, const Value *D, const Value *E) { |
988 | const APInt *BCst, *DCst, *ECst; |
989 | return match(V: B, P: m_APIntAllowPoison(Res&: BCst)) && match(V: D, P: m_APInt(Res&: DCst)) && |
990 | match(V: E, P: m_APInt(Res&: ECst)) && *DCst == *ECst && |
991 | (isa<PoisonValue>(Val: B) || |
992 | (BCst->countLeadingOnes() == DCst->countLeadingZeros())); |
993 | }; |
994 | |
995 | // Test vector type arguments for conversion. |
996 | if (const auto *BVTy = dyn_cast<VectorType>(Val: B->getType())) { |
997 | const auto *BFVTy = dyn_cast<FixedVectorType>(Val: BVTy); |
998 | const auto *BConst = dyn_cast<Constant>(Val: B); |
999 | const auto *DConst = dyn_cast<Constant>(Val: D); |
1000 | const auto *EConst = dyn_cast<Constant>(Val: E); |
1001 | |
1002 | if (!BFVTy || !BConst || !DConst || !EConst) |
1003 | return nullptr; |
1004 | |
1005 | for (unsigned I = 0; I != BFVTy->getNumElements(); ++I) { |
1006 | const auto *BElt = BConst->getAggregateElement(Elt: I); |
1007 | const auto *DElt = DConst->getAggregateElement(Elt: I); |
1008 | const auto *EElt = EConst->getAggregateElement(Elt: I); |
1009 | |
1010 | if (!BElt || !DElt || !EElt) |
1011 | return nullptr; |
1012 | if (!isReducible(BElt, DElt, EElt)) |
1013 | return nullptr; |
1014 | } |
1015 | } else { |
1016 | // Test scalar type arguments for conversion. |
1017 | if (!isReducible(B, D, E)) |
1018 | return nullptr; |
1019 | } |
1020 | return Builder.CreateICmp(P: ICmpInst::ICMP_ULT, LHS: A, RHS: D); |
1021 | } |
1022 | |
1023 | /// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) & |
1024 | /// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and |
1025 | /// M is a contiguous shifted mask starting at the right most significant zero |
1026 | /// bit in P. SGT is supported as when P is the largest representable power of |
1027 | /// 2, an earlier optimization converts the expression into (icmp X s> -1). |
1028 | /// Parameter P supports masking using undef/poison in either scalar or vector |
1029 | /// values. |
1030 | static Value *foldPowerOf2AndShiftedMask(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1031 | bool JoinedByAnd, |
1032 | InstCombiner::BuilderTy &Builder) { |
1033 | if (!JoinedByAnd) |
1034 | return nullptr; |
1035 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; |
1036 | ICmpInst::Predicate CmpPred0 = Cmp0->getPredicate(), |
1037 | CmpPred1 = Cmp1->getPredicate(); |
1038 | // Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u< |
1039 | // 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X & |
1040 | // SignMask) == 0). |
1041 | std::optional<std::pair<unsigned, unsigned>> MaskPair = |
1042 | getMaskedTypeForICmpPair(A, B, C, D, E, LHS: Cmp0, RHS: Cmp1, PredL&: CmpPred0, PredR&: CmpPred1); |
1043 | if (!MaskPair) |
1044 | return nullptr; |
1045 | |
1046 | const auto compareBMask = BMask_NotMixed | BMask_NotAllOnes; |
1047 | unsigned CmpMask0 = MaskPair->first; |
1048 | unsigned CmpMask1 = MaskPair->second; |
1049 | if ((CmpMask0 & Mask_AllZeros) && (CmpMask1 == compareBMask)) { |
1050 | if (Value *V = foldNegativePower2AndShiftedMask(A, B, D, E, PredL: CmpPred0, |
1051 | PredR: CmpPred1, Builder)) |
1052 | return V; |
1053 | } else if ((CmpMask0 == compareBMask) && (CmpMask1 & Mask_AllZeros)) { |
1054 | if (Value *V = foldNegativePower2AndShiftedMask(A, B: D, D: B, E: C, PredL: CmpPred1, |
1055 | PredR: CmpPred0, Builder)) |
1056 | return V; |
1057 | } |
1058 | return nullptr; |
1059 | } |
1060 | |
1061 | /// Commuted variants are assumed to be handled by calling this function again |
1062 | /// with the parameters swapped. |
1063 | static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, |
1064 | ICmpInst *UnsignedICmp, bool IsAnd, |
1065 | const SimplifyQuery &Q, |
1066 | InstCombiner::BuilderTy &Builder) { |
1067 | Value *ZeroCmpOp; |
1068 | ICmpInst::Predicate EqPred; |
1069 | if (!match(V: ZeroICmp, P: m_ICmp(Pred&: EqPred, L: m_Value(V&: ZeroCmpOp), R: m_Zero())) || |
1070 | !ICmpInst::isEquality(P: EqPred)) |
1071 | return nullptr; |
1072 | |
1073 | ICmpInst::Predicate UnsignedPred; |
1074 | |
1075 | Value *A, *B; |
1076 | if (match(V: UnsignedICmp, |
1077 | P: m_c_ICmp(Pred&: UnsignedPred, L: m_Specific(V: ZeroCmpOp), R: m_Value(V&: A))) && |
1078 | match(V: ZeroCmpOp, P: m_c_Add(L: m_Specific(V: A), R: m_Value(V&: B))) && |
1079 | (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { |
1080 | auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { |
1081 | if (!isKnownNonZero(V: NonZero, Q)) |
1082 | std::swap(a&: NonZero, b&: Other); |
1083 | return isKnownNonZero(V: NonZero, Q); |
1084 | }; |
1085 | |
1086 | // Given ZeroCmpOp = (A + B) |
1087 | // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff |
1088 | // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff |
1089 | // with X being the value (A/B) that is known to be non-zero, |
1090 | // and Y being remaining value. |
1091 | if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && |
1092 | IsAnd && GetKnownNonZeroAndOther(B, A)) |
1093 | return Builder.CreateICmpULT(LHS: Builder.CreateNeg(V: B), RHS: A); |
1094 | if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && |
1095 | !IsAnd && GetKnownNonZeroAndOther(B, A)) |
1096 | return Builder.CreateICmpUGE(LHS: Builder.CreateNeg(V: B), RHS: A); |
1097 | } |
1098 | |
1099 | return nullptr; |
1100 | } |
1101 | |
1102 | struct IntPart { |
1103 | Value *From; |
1104 | unsigned StartBit; |
1105 | unsigned NumBits; |
1106 | }; |
1107 | |
1108 | /// Match an extraction of bits from an integer. |
1109 | static std::optional<IntPart> matchIntPart(Value *V) { |
1110 | Value *X; |
1111 | if (!match(V, P: m_OneUse(SubPattern: m_Trunc(Op: m_Value(V&: X))))) |
1112 | return std::nullopt; |
1113 | |
1114 | unsigned NumOriginalBits = X->getType()->getScalarSizeInBits(); |
1115 | unsigned = V->getType()->getScalarSizeInBits(); |
1116 | Value *Y; |
1117 | const APInt *Shift; |
1118 | // For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits |
1119 | // from Y, not any shifted-in zeroes. |
1120 | if (match(V: X, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: Y), R: m_APInt(Res&: Shift)))) && |
1121 | Shift->ule(RHS: NumOriginalBits - NumExtractedBits)) |
1122 | return {{.From: Y, .StartBit: (unsigned)Shift->getZExtValue(), .NumBits: NumExtractedBits}}; |
1123 | return {{.From: X, .StartBit: 0, .NumBits: NumExtractedBits}}; |
1124 | } |
1125 | |
1126 | /// Materialize an extraction of bits from an integer in IR. |
1127 | static Value *(const IntPart &P, IRBuilderBase &Builder) { |
1128 | Value *V = P.From; |
1129 | if (P.StartBit) |
1130 | V = Builder.CreateLShr(LHS: V, RHS: P.StartBit); |
1131 | Type *TruncTy = V->getType()->getWithNewBitWidth(NewBitWidth: P.NumBits); |
1132 | if (TruncTy != V->getType()) |
1133 | V = Builder.CreateTrunc(V, DestTy: TruncTy); |
1134 | return V; |
1135 | } |
1136 | |
1137 | /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01 |
1138 | /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01 |
1139 | /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer. |
1140 | Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1141 | bool IsAnd) { |
1142 | if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) |
1143 | return nullptr; |
1144 | |
1145 | CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
1146 | auto GetMatchPart = [&](ICmpInst *Cmp, |
1147 | unsigned OpNo) -> std::optional<IntPart> { |
1148 | if (Pred == Cmp->getPredicate()) |
1149 | return matchIntPart(V: Cmp->getOperand(i_nocapture: OpNo)); |
1150 | |
1151 | const APInt *C; |
1152 | // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to: |
1153 | // (icmp ult (xor x, y), 1 << C) so also look for that. |
1154 | if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) { |
1155 | if (!match(V: Cmp->getOperand(i_nocapture: 1), P: m_Power2(V&: C)) || |
1156 | !match(V: Cmp->getOperand(i_nocapture: 0), P: m_Xor(L: m_Value(), R: m_Value()))) |
1157 | return std::nullopt; |
1158 | } |
1159 | |
1160 | // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to: |
1161 | // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that. |
1162 | else if (Pred == CmpInst::ICMP_NE && |
1163 | Cmp->getPredicate() == CmpInst::ICMP_UGT) { |
1164 | if (!match(V: Cmp->getOperand(i_nocapture: 1), P: m_LowBitMask(V&: C)) || |
1165 | !match(V: Cmp->getOperand(i_nocapture: 0), P: m_Xor(L: m_Value(), R: m_Value()))) |
1166 | return std::nullopt; |
1167 | } else { |
1168 | return std::nullopt; |
1169 | } |
1170 | |
1171 | unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero(); |
1172 | Instruction *I = cast<Instruction>(Val: Cmp->getOperand(i_nocapture: 0)); |
1173 | return {{.From: I->getOperand(i: OpNo), .StartBit: From, .NumBits: C->getBitWidth() - From}}; |
1174 | }; |
1175 | |
1176 | std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0); |
1177 | std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1); |
1178 | std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0); |
1179 | std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1); |
1180 | if (!L0 || !R0 || !L1 || !R1) |
1181 | return nullptr; |
1182 | |
1183 | // Make sure the LHS/RHS compare a part of the same value, possibly after |
1184 | // an operand swap. |
1185 | if (L0->From != L1->From || R0->From != R1->From) { |
1186 | if (L0->From != R1->From || R0->From != L1->From) |
1187 | return nullptr; |
1188 | std::swap(lhs&: L1, rhs&: R1); |
1189 | } |
1190 | |
1191 | // Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being |
1192 | // the low part and L1/R1 being the high part. |
1193 | if (L0->StartBit + L0->NumBits != L1->StartBit || |
1194 | R0->StartBit + R0->NumBits != R1->StartBit) { |
1195 | if (L1->StartBit + L1->NumBits != L0->StartBit || |
1196 | R1->StartBit + R1->NumBits != R0->StartBit) |
1197 | return nullptr; |
1198 | std::swap(lhs&: L0, rhs&: L1); |
1199 | std::swap(lhs&: R0, rhs&: R1); |
1200 | } |
1201 | |
1202 | // We can simplify to a comparison of these larger parts of the integers. |
1203 | IntPart L = {.From: L0->From, .StartBit: L0->StartBit, .NumBits: L0->NumBits + L1->NumBits}; |
1204 | IntPart R = {.From: R0->From, .StartBit: R0->StartBit, .NumBits: R0->NumBits + R1->NumBits}; |
1205 | Value *LValue = extractIntPart(P: L, Builder); |
1206 | Value *RValue = extractIntPart(P: R, Builder); |
1207 | return Builder.CreateICmp(P: Pred, LHS: LValue, RHS: RValue); |
1208 | } |
1209 | |
1210 | /// Reduce logic-of-compares with equality to a constant by substituting a |
1211 | /// common operand with the constant. Callers are expected to call this with |
1212 | /// Cmp0/Cmp1 switched to handle logic op commutativity. |
1213 | static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1214 | bool IsAnd, bool IsLogical, |
1215 | InstCombiner::BuilderTy &Builder, |
1216 | const SimplifyQuery &Q) { |
1217 | // Match an equality compare with a non-poison constant as Cmp0. |
1218 | // Also, give up if the compare can be constant-folded to avoid looping. |
1219 | ICmpInst::Predicate Pred0; |
1220 | Value *X; |
1221 | Constant *C; |
1222 | if (!match(V: Cmp0, P: m_ICmp(Pred&: Pred0, L: m_Value(V&: X), R: m_Constant(C))) || |
1223 | !isGuaranteedNotToBeUndefOrPoison(V: C) || isa<Constant>(Val: X)) |
1224 | return nullptr; |
1225 | if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) || |
1226 | (!IsAnd && Pred0 != ICmpInst::ICMP_NE)) |
1227 | return nullptr; |
1228 | |
1229 | // The other compare must include a common operand (X). Canonicalize the |
1230 | // common operand as operand 1 (Pred1 is swapped if the common operand was |
1231 | // operand 0). |
1232 | Value *Y; |
1233 | ICmpInst::Predicate Pred1; |
1234 | if (!match(V: Cmp1, P: m_c_ICmp(Pred&: Pred1, L: m_Value(V&: Y), R: m_Specific(V: X)))) |
1235 | return nullptr; |
1236 | |
1237 | // Replace variable with constant value equivalence to remove a variable use: |
1238 | // (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C) |
1239 | // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C) |
1240 | // Can think of the 'or' substitution with the 'and' bool equivalent: |
1241 | // A || B --> A || (!A && B) |
1242 | Value *SubstituteCmp = simplifyICmpInst(Predicate: Pred1, LHS: Y, RHS: C, Q); |
1243 | if (!SubstituteCmp) { |
1244 | // If we need to create a new instruction, require that the old compare can |
1245 | // be removed. |
1246 | if (!Cmp1->hasOneUse()) |
1247 | return nullptr; |
1248 | SubstituteCmp = Builder.CreateICmp(P: Pred1, LHS: Y, RHS: C); |
1249 | } |
1250 | if (IsLogical) |
1251 | return IsAnd ? Builder.CreateLogicalAnd(Cond1: Cmp0, Cond2: SubstituteCmp) |
1252 | : Builder.CreateLogicalOr(Cond1: Cmp0, Cond2: SubstituteCmp); |
1253 | return Builder.CreateBinOp(Opc: IsAnd ? Instruction::And : Instruction::Or, LHS: Cmp0, |
1254 | RHS: SubstituteCmp); |
1255 | } |
1256 | |
1257 | /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2) |
1258 | /// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2) |
1259 | /// into a single comparison using range-based reasoning. |
1260 | /// NOTE: This is also used for logical and/or, must be poison-safe! |
1261 | Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, |
1262 | ICmpInst *ICmp2, |
1263 | bool IsAnd) { |
1264 | ICmpInst::Predicate Pred1, Pred2; |
1265 | Value *V1, *V2; |
1266 | const APInt *C1, *C2; |
1267 | if (!match(V: ICmp1, P: m_ICmp(Pred&: Pred1, L: m_Value(V&: V1), R: m_APInt(Res&: C1))) || |
1268 | !match(V: ICmp2, P: m_ICmp(Pred&: Pred2, L: m_Value(V&: V2), R: m_APInt(Res&: C2)))) |
1269 | return nullptr; |
1270 | |
1271 | // Look through add of a constant offset on V1, V2, or both operands. This |
1272 | // allows us to interpret the V + C' < C'' range idiom into a proper range. |
1273 | const APInt *Offset1 = nullptr, *Offset2 = nullptr; |
1274 | if (V1 != V2) { |
1275 | Value *X; |
1276 | if (match(V: V1, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: Offset1)))) |
1277 | V1 = X; |
1278 | if (match(V: V2, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: Offset2)))) |
1279 | V2 = X; |
1280 | } |
1281 | |
1282 | if (V1 != V2) |
1283 | return nullptr; |
1284 | |
1285 | ConstantRange CR1 = ConstantRange::makeExactICmpRegion( |
1286 | Pred: IsAnd ? ICmpInst::getInversePredicate(pred: Pred1) : Pred1, Other: *C1); |
1287 | if (Offset1) |
1288 | CR1 = CR1.subtract(CI: *Offset1); |
1289 | |
1290 | ConstantRange CR2 = ConstantRange::makeExactICmpRegion( |
1291 | Pred: IsAnd ? ICmpInst::getInversePredicate(pred: Pred2) : Pred2, Other: *C2); |
1292 | if (Offset2) |
1293 | CR2 = CR2.subtract(CI: *Offset2); |
1294 | |
1295 | Type *Ty = V1->getType(); |
1296 | Value *NewV = V1; |
1297 | std::optional<ConstantRange> CR = CR1.exactUnionWith(CR: CR2); |
1298 | if (!CR) { |
1299 | if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() || |
1300 | CR2.isWrappedSet()) |
1301 | return nullptr; |
1302 | |
1303 | // Check whether we have equal-size ranges that only differ by one bit. |
1304 | // In that case we can apply a mask to map one range onto the other. |
1305 | APInt LowerDiff = CR1.getLower() ^ CR2.getLower(); |
1306 | APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); |
1307 | APInt CR1Size = CR1.getUpper() - CR1.getLower(); |
1308 | if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || |
1309 | CR1Size != CR2.getUpper() - CR2.getLower()) |
1310 | return nullptr; |
1311 | |
1312 | CR = CR1.getLower().ult(RHS: CR2.getLower()) ? CR1 : CR2; |
1313 | NewV = Builder.CreateAnd(LHS: NewV, RHS: ConstantInt::get(Ty, V: ~LowerDiff)); |
1314 | } |
1315 | |
1316 | if (IsAnd) |
1317 | CR = CR->inverse(); |
1318 | |
1319 | CmpInst::Predicate NewPred; |
1320 | APInt NewC, Offset; |
1321 | CR->getEquivalentICmp(Pred&: NewPred, RHS&: NewC, Offset); |
1322 | |
1323 | if (Offset != 0) |
1324 | NewV = Builder.CreateAdd(LHS: NewV, RHS: ConstantInt::get(Ty, V: Offset)); |
1325 | return Builder.CreateICmp(P: NewPred, LHS: NewV, RHS: ConstantInt::get(Ty, V: NewC)); |
1326 | } |
1327 | |
1328 | /// Ignore all operations which only change the sign of a value, returning the |
1329 | /// underlying magnitude value. |
1330 | static Value *stripSignOnlyFPOps(Value *Val) { |
1331 | match(V: Val, P: m_FNeg(X: m_Value(V&: Val))); |
1332 | match(V: Val, P: m_FAbs(Op0: m_Value(V&: Val))); |
1333 | match(V: Val, P: m_CopySign(Op0: m_Value(V&: Val), Op1: m_Value())); |
1334 | return Val; |
1335 | } |
1336 | |
1337 | /// Matches canonical form of isnan, fcmp ord x, 0 |
1338 | static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) { |
1339 | return P == FCmpInst::FCMP_ORD && match(V: RHS, P: m_AnyZeroFP()); |
1340 | } |
1341 | |
1342 | /// Matches fcmp u__ x, +/-inf |
1343 | static bool matchUnorderedInfCompare(FCmpInst::Predicate P, Value *LHS, |
1344 | Value *RHS) { |
1345 | return FCmpInst::isUnordered(predicate: P) && match(V: RHS, P: m_Inf()); |
1346 | } |
1347 | |
1348 | /// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf |
1349 | /// |
1350 | /// Clang emits this pattern for doing an isfinite check in __builtin_isnormal. |
1351 | static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS, |
1352 | FCmpInst *RHS) { |
1353 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
1354 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
1355 | FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
1356 | |
1357 | if (!matchIsNotNaN(P: PredL, LHS: LHS0, RHS: LHS1) || |
1358 | !matchUnorderedInfCompare(P: PredR, LHS: RHS0, RHS: RHS1)) |
1359 | return nullptr; |
1360 | |
1361 | IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
1362 | FastMathFlags FMF = LHS->getFastMathFlags(); |
1363 | FMF &= RHS->getFastMathFlags(); |
1364 | Builder.setFastMathFlags(FMF); |
1365 | |
1366 | return Builder.CreateFCmp(P: FCmpInst::getOrderedPredicate(Pred: PredR), LHS: RHS0, RHS: RHS1); |
1367 | } |
1368 | |
1369 | Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, |
1370 | bool IsAnd, bool IsLogicalSelect) { |
1371 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
1372 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
1373 | FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
1374 | |
1375 | if (LHS0 == RHS1 && RHS0 == LHS1) { |
1376 | // Swap RHS operands to match LHS. |
1377 | PredR = FCmpInst::getSwappedPredicate(pred: PredR); |
1378 | std::swap(a&: RHS0, b&: RHS1); |
1379 | } |
1380 | |
1381 | // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). |
1382 | // Suppose the relation between x and y is R, where R is one of |
1383 | // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for |
1384 | // testing the desired relations. |
1385 | // |
1386 | // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: |
1387 | // bool(R & CC0) && bool(R & CC1) |
1388 | // = bool((R & CC0) & (R & CC1)) |
1389 | // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency |
1390 | // |
1391 | // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: |
1392 | // bool(R & CC0) || bool(R & CC1) |
1393 | // = bool((R & CC0) | (R & CC1)) |
1394 | // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;) |
1395 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
1396 | unsigned FCmpCodeL = getFCmpCode(CC: PredL); |
1397 | unsigned FCmpCodeR = getFCmpCode(CC: PredR); |
1398 | unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR; |
1399 | |
1400 | // Intersect the fast math flags. |
1401 | // TODO: We can union the fast math flags unless this is a logical select. |
1402 | IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
1403 | FastMathFlags FMF = LHS->getFastMathFlags(); |
1404 | FMF &= RHS->getFastMathFlags(); |
1405 | Builder.setFastMathFlags(FMF); |
1406 | |
1407 | return getFCmpValue(Code: NewPred, LHS: LHS0, RHS: LHS1, Builder); |
1408 | } |
1409 | |
1410 | // This transform is not valid for a logical select. |
1411 | if (!IsLogicalSelect && |
1412 | ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || |
1413 | (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && |
1414 | !IsAnd))) { |
1415 | if (LHS0->getType() != RHS0->getType()) |
1416 | return nullptr; |
1417 | |
1418 | // FCmp canonicalization ensures that (fcmp ord/uno X, X) and |
1419 | // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0). |
1420 | if (match(V: LHS1, P: m_PosZeroFP()) && match(V: RHS1, P: m_PosZeroFP())) |
1421 | // Ignore the constants because they are obviously not NANs: |
1422 | // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) |
1423 | // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) |
1424 | return Builder.CreateFCmp(P: PredL, LHS: LHS0, RHS: RHS0); |
1425 | } |
1426 | |
1427 | if (IsAnd && stripSignOnlyFPOps(Val: LHS0) == stripSignOnlyFPOps(Val: RHS0)) { |
1428 | // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf |
1429 | // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf |
1430 | if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS)) |
1431 | return Left; |
1432 | if (Value *Right = matchIsFiniteTest(Builder, LHS: RHS, RHS: LHS)) |
1433 | return Right; |
1434 | } |
1435 | |
1436 | // Turn at least two fcmps with constants into llvm.is.fpclass. |
1437 | // |
1438 | // If we can represent a combined value test with one class call, we can |
1439 | // potentially eliminate 4-6 instructions. If we can represent a test with a |
1440 | // single fcmp with fneg and fabs, that's likely a better canonical form. |
1441 | if (LHS->hasOneUse() && RHS->hasOneUse()) { |
1442 | auto [ClassValRHS, ClassMaskRHS] = |
1443 | fcmpToClassTest(Pred: PredR, F: *RHS->getFunction(), LHS: RHS0, RHS: RHS1); |
1444 | if (ClassValRHS) { |
1445 | auto [ClassValLHS, ClassMaskLHS] = |
1446 | fcmpToClassTest(Pred: PredL, F: *LHS->getFunction(), LHS: LHS0, RHS: LHS1); |
1447 | if (ClassValLHS == ClassValRHS) { |
1448 | unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS) |
1449 | : (ClassMaskLHS | ClassMaskRHS); |
1450 | return Builder.CreateIntrinsic( |
1451 | ID: Intrinsic::is_fpclass, Types: {ClassValLHS->getType()}, |
1452 | Args: {ClassValLHS, Builder.getInt32(C: CombinedMask)}); |
1453 | } |
1454 | } |
1455 | } |
1456 | |
1457 | // Canonicalize the range check idiom: |
1458 | // and (fcmp olt/ole/ult/ule x, C), (fcmp ogt/oge/ugt/uge x, -C) |
1459 | // --> fabs(x) olt/ole/ult/ule C |
1460 | // or (fcmp ogt/oge/ugt/uge x, C), (fcmp olt/ole/ult/ule x, -C) |
1461 | // --> fabs(x) ogt/oge/ugt/uge C |
1462 | // TODO: Generalize to handle a negated variable operand? |
1463 | const APFloat *LHSC, *RHSC; |
1464 | if (LHS0 == RHS0 && LHS->hasOneUse() && RHS->hasOneUse() && |
1465 | FCmpInst::getSwappedPredicate(pred: PredL) == PredR && |
1466 | match(V: LHS1, P: m_APFloatAllowPoison(Res&: LHSC)) && |
1467 | match(V: RHS1, P: m_APFloatAllowPoison(Res&: RHSC)) && |
1468 | LHSC->bitwiseIsEqual(RHS: neg(X: *RHSC))) { |
1469 | auto IsLessThanOrLessEqual = [](FCmpInst::Predicate Pred) { |
1470 | switch (Pred) { |
1471 | case FCmpInst::FCMP_OLT: |
1472 | case FCmpInst::FCMP_OLE: |
1473 | case FCmpInst::FCMP_ULT: |
1474 | case FCmpInst::FCMP_ULE: |
1475 | return true; |
1476 | default: |
1477 | return false; |
1478 | } |
1479 | }; |
1480 | if (IsLessThanOrLessEqual(IsAnd ? PredR : PredL)) { |
1481 | std::swap(a&: LHSC, b&: RHSC); |
1482 | std::swap(a&: PredL, b&: PredR); |
1483 | } |
1484 | if (IsLessThanOrLessEqual(IsAnd ? PredL : PredR)) { |
1485 | BuilderTy::FastMathFlagGuard Guard(Builder); |
1486 | Builder.setFastMathFlags(LHS->getFastMathFlags() | |
1487 | RHS->getFastMathFlags()); |
1488 | |
1489 | Value *FAbs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: LHS0); |
1490 | return Builder.CreateFCmp(P: PredL, LHS: FAbs, |
1491 | RHS: ConstantFP::get(Ty: LHS0->getType(), V: *LHSC)); |
1492 | } |
1493 | } |
1494 | |
1495 | return nullptr; |
1496 | } |
1497 | |
1498 | /// Match an fcmp against a special value that performs a test possible by |
1499 | /// llvm.is.fpclass. |
1500 | static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal, |
1501 | uint64_t &ClassMask) { |
1502 | auto *FCmp = dyn_cast<FCmpInst>(Val: Op); |
1503 | if (!FCmp || !FCmp->hasOneUse()) |
1504 | return false; |
1505 | |
1506 | std::tie(args&: ClassVal, args&: ClassMask) = |
1507 | fcmpToClassTest(Pred: FCmp->getPredicate(), F: *FCmp->getParent()->getParent(), |
1508 | LHS: FCmp->getOperand(i_nocapture: 0), RHS: FCmp->getOperand(i_nocapture: 1)); |
1509 | return ClassVal != nullptr; |
1510 | } |
1511 | |
1512 | /// or (is_fpclass x, mask0), (is_fpclass x, mask1) |
1513 | /// -> is_fpclass x, (mask0 | mask1) |
1514 | /// and (is_fpclass x, mask0), (is_fpclass x, mask1) |
1515 | /// -> is_fpclass x, (mask0 & mask1) |
1516 | /// xor (is_fpclass x, mask0), (is_fpclass x, mask1) |
1517 | /// -> is_fpclass x, (mask0 ^ mask1) |
1518 | Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO, |
1519 | Value *Op0, Value *Op1) { |
1520 | Value *ClassVal0 = nullptr; |
1521 | Value *ClassVal1 = nullptr; |
1522 | uint64_t ClassMask0, ClassMask1; |
1523 | |
1524 | // Restrict to folding one fcmp into one is.fpclass for now, don't introduce a |
1525 | // new class. |
1526 | // |
1527 | // TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is |
1528 | // better. |
1529 | |
1530 | bool IsLHSClass = |
1531 | match(V: Op0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::is_fpclass>( |
1532 | Op0: m_Value(V&: ClassVal0), Op1: m_ConstantInt(V&: ClassMask0)))); |
1533 | bool IsRHSClass = |
1534 | match(V: Op1, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::is_fpclass>( |
1535 | Op0: m_Value(V&: ClassVal1), Op1: m_ConstantInt(V&: ClassMask1)))); |
1536 | if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op: Op0, ClassVal&: ClassVal0, ClassMask&: ClassMask0)) && |
1537 | (IsRHSClass || matchIsFPClassLikeFCmp(Op: Op1, ClassVal&: ClassVal1, ClassMask&: ClassMask1)))) && |
1538 | ClassVal0 == ClassVal1) { |
1539 | unsigned NewClassMask; |
1540 | switch (BO.getOpcode()) { |
1541 | case Instruction::And: |
1542 | NewClassMask = ClassMask0 & ClassMask1; |
1543 | break; |
1544 | case Instruction::Or: |
1545 | NewClassMask = ClassMask0 | ClassMask1; |
1546 | break; |
1547 | case Instruction::Xor: |
1548 | NewClassMask = ClassMask0 ^ ClassMask1; |
1549 | break; |
1550 | default: |
1551 | llvm_unreachable("not a binary logic operator" ); |
1552 | } |
1553 | |
1554 | if (IsLHSClass) { |
1555 | auto *II = cast<IntrinsicInst>(Val: Op0); |
1556 | II->setArgOperand( |
1557 | i: 1, v: ConstantInt::get(Ty: II->getArgOperand(i: 1)->getType(), V: NewClassMask)); |
1558 | return replaceInstUsesWith(I&: BO, V: II); |
1559 | } |
1560 | |
1561 | if (IsRHSClass) { |
1562 | auto *II = cast<IntrinsicInst>(Val: Op1); |
1563 | II->setArgOperand( |
1564 | i: 1, v: ConstantInt::get(Ty: II->getArgOperand(i: 1)->getType(), V: NewClassMask)); |
1565 | return replaceInstUsesWith(I&: BO, V: II); |
1566 | } |
1567 | |
1568 | CallInst *NewClass = |
1569 | Builder.CreateIntrinsic(ID: Intrinsic::is_fpclass, Types: {ClassVal0->getType()}, |
1570 | Args: {ClassVal0, Builder.getInt32(C: NewClassMask)}); |
1571 | return replaceInstUsesWith(I&: BO, V: NewClass); |
1572 | } |
1573 | |
1574 | return nullptr; |
1575 | } |
1576 | |
1577 | /// Look for the pattern that conditionally negates a value via math operations: |
1578 | /// cond.splat = sext i1 cond |
1579 | /// sub = add cond.splat, x |
1580 | /// xor = xor sub, cond.splat |
1581 | /// and rewrite it to do the same, but via logical operations: |
1582 | /// value.neg = sub 0, value |
1583 | /// cond = select i1 neg, value.neg, value |
1584 | Instruction *InstCombinerImpl::canonicalizeConditionalNegationViaMathToSelect( |
1585 | BinaryOperator &I) { |
1586 | assert(I.getOpcode() == BinaryOperator::Xor && "Only for xor!" ); |
1587 | Value *Cond, *X; |
1588 | // As per complexity ordering, `xor` is not commutative here. |
1589 | if (!match(V: &I, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Value()), R: m_Value())) || |
1590 | !match(V: I.getOperand(i_nocapture: 1), P: m_SExt(Op: m_Value(V&: Cond))) || |
1591 | !Cond->getType()->isIntOrIntVectorTy(BitWidth: 1) || |
1592 | !match(V: I.getOperand(i_nocapture: 0), P: m_c_Add(L: m_SExt(Op: m_Specific(V: Cond)), R: m_Value(V&: X)))) |
1593 | return nullptr; |
1594 | return SelectInst::Create(C: Cond, S1: Builder.CreateNeg(V: X, Name: X->getName() + ".neg" ), |
1595 | S2: X); |
1596 | } |
1597 | |
1598 | /// This a limited reassociation for a special case (see above) where we are |
1599 | /// checking if two values are either both NAN (unordered) or not-NAN (ordered). |
1600 | /// This could be handled more generally in '-reassociation', but it seems like |
1601 | /// an unlikely pattern for a large number of logic ops and fcmps. |
1602 | static Instruction *reassociateFCmps(BinaryOperator &BO, |
1603 | InstCombiner::BuilderTy &Builder) { |
1604 | Instruction::BinaryOps Opcode = BO.getOpcode(); |
1605 | assert((Opcode == Instruction::And || Opcode == Instruction::Or) && |
1606 | "Expecting and/or op for fcmp transform" ); |
1607 | |
1608 | // There are 4 commuted variants of the pattern. Canonicalize operands of this |
1609 | // logic op so an fcmp is operand 0 and a matching logic op is operand 1. |
1610 | Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1), *X; |
1611 | FCmpInst::Predicate Pred; |
1612 | if (match(V: Op1, P: m_FCmp(Pred, L: m_Value(), R: m_AnyZeroFP()))) |
1613 | std::swap(a&: Op0, b&: Op1); |
1614 | |
1615 | // Match inner binop and the predicate for combining 2 NAN checks into 1. |
1616 | Value *BO10, *BO11; |
1617 | FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD |
1618 | : FCmpInst::FCMP_UNO; |
1619 | if (!match(V: Op0, P: m_FCmp(Pred, L: m_Value(V&: X), R: m_AnyZeroFP())) || Pred != NanPred || |
1620 | !match(V: Op1, P: m_BinOp(Opcode, L: m_Value(V&: BO10), R: m_Value(V&: BO11)))) |
1621 | return nullptr; |
1622 | |
1623 | // The inner logic op must have a matching fcmp operand. |
1624 | Value *Y; |
1625 | if (!match(V: BO10, P: m_FCmp(Pred, L: m_Value(V&: Y), R: m_AnyZeroFP())) || |
1626 | Pred != NanPred || X->getType() != Y->getType()) |
1627 | std::swap(a&: BO10, b&: BO11); |
1628 | |
1629 | if (!match(V: BO10, P: m_FCmp(Pred, L: m_Value(V&: Y), R: m_AnyZeroFP())) || |
1630 | Pred != NanPred || X->getType() != Y->getType()) |
1631 | return nullptr; |
1632 | |
1633 | // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z |
1634 | // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z |
1635 | Value *NewFCmp = Builder.CreateFCmp(P: Pred, LHS: X, RHS: Y); |
1636 | if (auto *NewFCmpInst = dyn_cast<FCmpInst>(Val: NewFCmp)) { |
1637 | // Intersect FMF from the 2 source fcmps. |
1638 | NewFCmpInst->copyIRFlags(V: Op0); |
1639 | NewFCmpInst->andIRFlags(V: BO10); |
1640 | } |
1641 | return BinaryOperator::Create(Op: Opcode, S1: NewFCmp, S2: BO11); |
1642 | } |
1643 | |
1644 | /// Match variations of De Morgan's Laws: |
1645 | /// (~A & ~B) == (~(A | B)) |
1646 | /// (~A | ~B) == (~(A & B)) |
1647 | static Instruction *matchDeMorgansLaws(BinaryOperator &I, |
1648 | InstCombiner &IC) { |
1649 | const Instruction::BinaryOps Opcode = I.getOpcode(); |
1650 | assert((Opcode == Instruction::And || Opcode == Instruction::Or) && |
1651 | "Trying to match De Morgan's Laws with something other than and/or" ); |
1652 | |
1653 | // Flip the logic operation. |
1654 | const Instruction::BinaryOps FlippedOpcode = |
1655 | (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; |
1656 | |
1657 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1658 | Value *A, *B; |
1659 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) && |
1660 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: B)))) && |
1661 | !IC.isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse()) && |
1662 | !IC.isFreeToInvert(V: B, WillInvertAllUses: B->hasOneUse())) { |
1663 | Value *AndOr = |
1664 | IC.Builder.CreateBinOp(Opc: FlippedOpcode, LHS: A, RHS: B, Name: I.getName() + ".demorgan" ); |
1665 | return BinaryOperator::CreateNot(Op: AndOr); |
1666 | } |
1667 | |
1668 | // The 'not' ops may require reassociation. |
1669 | // (A & ~B) & ~C --> A & ~(B | C) |
1670 | // (~B & A) & ~C --> A & ~(B | C) |
1671 | // (A | ~B) | ~C --> A | ~(B & C) |
1672 | // (~B | A) | ~C --> A | ~(B & C) |
1673 | Value *C; |
1674 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_BinOp(Opcode, L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))))) && |
1675 | match(V: Op1, P: m_Not(V: m_Value(V&: C)))) { |
1676 | Value *FlippedBO = IC.Builder.CreateBinOp(Opc: FlippedOpcode, LHS: B, RHS: C); |
1677 | return BinaryOperator::Create(Op: Opcode, S1: A, S2: IC.Builder.CreateNot(V: FlippedBO)); |
1678 | } |
1679 | |
1680 | return nullptr; |
1681 | } |
1682 | |
1683 | bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) { |
1684 | Value *CastSrc = CI->getOperand(i_nocapture: 0); |
1685 | |
1686 | // Noop casts and casts of constants should be eliminated trivially. |
1687 | if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(Val: CastSrc)) |
1688 | return false; |
1689 | |
1690 | // If this cast is paired with another cast that can be eliminated, we prefer |
1691 | // to have it eliminated. |
1692 | if (const auto *PrecedingCI = dyn_cast<CastInst>(Val: CastSrc)) |
1693 | if (isEliminableCastPair(CI1: PrecedingCI, CI2: CI)) |
1694 | return false; |
1695 | |
1696 | return true; |
1697 | } |
1698 | |
1699 | /// Fold {and,or,xor} (cast X), C. |
1700 | static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, |
1701 | InstCombinerImpl &IC) { |
1702 | Constant *C = dyn_cast<Constant>(Val: Logic.getOperand(i_nocapture: 1)); |
1703 | if (!C) |
1704 | return nullptr; |
1705 | |
1706 | auto LogicOpc = Logic.getOpcode(); |
1707 | Type *DestTy = Logic.getType(); |
1708 | Type *SrcTy = Cast->getSrcTy(); |
1709 | |
1710 | // Move the logic operation ahead of a zext or sext if the constant is |
1711 | // unchanged in the smaller source type. Performing the logic in a smaller |
1712 | // type may provide more information to later folds, and the smaller logic |
1713 | // instruction may be cheaper (particularly in the case of vectors). |
1714 | Value *X; |
1715 | if (match(V: Cast, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) { |
1716 | if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, TruncTy: SrcTy)) { |
1717 | // LogicOpc (zext X), C --> zext (LogicOpc X, C) |
1718 | Value *NewOp = IC.Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: TruncC); |
1719 | return new ZExtInst(NewOp, DestTy); |
1720 | } |
1721 | } |
1722 | |
1723 | if (match(V: Cast, P: m_OneUse(SubPattern: m_SExtLike(Op: m_Value(V&: X))))) { |
1724 | if (Constant *TruncC = IC.getLosslessSignedTrunc(C, TruncTy: SrcTy)) { |
1725 | // LogicOpc (sext X), C --> sext (LogicOpc X, C) |
1726 | Value *NewOp = IC.Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: TruncC); |
1727 | return new SExtInst(NewOp, DestTy); |
1728 | } |
1729 | } |
1730 | |
1731 | return nullptr; |
1732 | } |
1733 | |
1734 | /// Fold {and,or,xor} (cast X), Y. |
1735 | Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { |
1736 | auto LogicOpc = I.getOpcode(); |
1737 | assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding" ); |
1738 | |
1739 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1740 | |
1741 | // fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the |
1742 | // type of A) |
1743 | // -> bitwise(zext(A < 0), zext(icmp)) |
1744 | // -> zext(bitwise(A < 0, icmp)) |
1745 | auto FoldBitwiseICmpZeroWithICmp = [&](Value *Op0, |
1746 | Value *Op1) -> Instruction * { |
1747 | ICmpInst::Predicate Pred; |
1748 | Value *A; |
1749 | bool IsMatched = |
1750 | match(V: Op0, |
1751 | P: m_OneUse(SubPattern: m_LShr( |
1752 | L: m_Value(V&: A), |
1753 | R: m_SpecificInt(V: Op0->getType()->getScalarSizeInBits() - 1)))) && |
1754 | match(V: Op1, P: m_OneUse(SubPattern: m_ZExt(Op: m_ICmp(Pred, L: m_Value(), R: m_Value())))); |
1755 | |
1756 | if (!IsMatched) |
1757 | return nullptr; |
1758 | |
1759 | auto *ICmpL = |
1760 | Builder.CreateICmpSLT(LHS: A, RHS: Constant::getNullValue(Ty: A->getType())); |
1761 | auto *ICmpR = cast<ZExtInst>(Val: Op1)->getOperand(i_nocapture: 0); |
1762 | auto *BitwiseOp = Builder.CreateBinOp(Opc: LogicOpc, LHS: ICmpL, RHS: ICmpR); |
1763 | |
1764 | return new ZExtInst(BitwiseOp, Op0->getType()); |
1765 | }; |
1766 | |
1767 | if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op0, Op1)) |
1768 | return Ret; |
1769 | |
1770 | if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op1, Op0)) |
1771 | return Ret; |
1772 | |
1773 | CastInst *Cast0 = dyn_cast<CastInst>(Val: Op0); |
1774 | if (!Cast0) |
1775 | return nullptr; |
1776 | |
1777 | // This must be a cast from an integer or integer vector source type to allow |
1778 | // transformation of the logic operation to the source type. |
1779 | Type *DestTy = I.getType(); |
1780 | Type *SrcTy = Cast0->getSrcTy(); |
1781 | if (!SrcTy->isIntOrIntVectorTy()) |
1782 | return nullptr; |
1783 | |
1784 | if (Instruction *Ret = foldLogicCastConstant(Logic&: I, Cast: Cast0, IC&: *this)) |
1785 | return Ret; |
1786 | |
1787 | CastInst *Cast1 = dyn_cast<CastInst>(Val: Op1); |
1788 | if (!Cast1) |
1789 | return nullptr; |
1790 | |
1791 | // Both operands of the logic operation are casts. The casts must be the |
1792 | // same kind for reduction. |
1793 | Instruction::CastOps CastOpcode = Cast0->getOpcode(); |
1794 | if (CastOpcode != Cast1->getOpcode()) |
1795 | return nullptr; |
1796 | |
1797 | // If the source types do not match, but the casts are matching extends, we |
1798 | // can still narrow the logic op. |
1799 | if (SrcTy != Cast1->getSrcTy()) { |
1800 | Value *X, *Y; |
1801 | if (match(V: Cast0, P: m_OneUse(SubPattern: m_ZExtOrSExt(Op: m_Value(V&: X)))) && |
1802 | match(V: Cast1, P: m_OneUse(SubPattern: m_ZExtOrSExt(Op: m_Value(V&: Y))))) { |
1803 | // Cast the narrower source to the wider source type. |
1804 | unsigned XNumBits = X->getType()->getScalarSizeInBits(); |
1805 | unsigned YNumBits = Y->getType()->getScalarSizeInBits(); |
1806 | if (XNumBits < YNumBits) |
1807 | X = Builder.CreateCast(Op: CastOpcode, V: X, DestTy: Y->getType()); |
1808 | else |
1809 | Y = Builder.CreateCast(Op: CastOpcode, V: Y, DestTy: X->getType()); |
1810 | // Do the logic op in the intermediate width, then widen more. |
1811 | Value *NarrowLogic = Builder.CreateBinOp(Opc: LogicOpc, LHS: X, RHS: Y); |
1812 | return CastInst::Create(CastOpcode, S: NarrowLogic, Ty: DestTy); |
1813 | } |
1814 | |
1815 | // Give up for other cast opcodes. |
1816 | return nullptr; |
1817 | } |
1818 | |
1819 | Value *Cast0Src = Cast0->getOperand(i_nocapture: 0); |
1820 | Value *Cast1Src = Cast1->getOperand(i_nocapture: 0); |
1821 | |
1822 | // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) |
1823 | if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && |
1824 | shouldOptimizeCast(CI: Cast0) && shouldOptimizeCast(CI: Cast1)) { |
1825 | Value *NewOp = Builder.CreateBinOp(Opc: LogicOpc, LHS: Cast0Src, RHS: Cast1Src, |
1826 | Name: I.getName()); |
1827 | return CastInst::Create(CastOpcode, S: NewOp, Ty: DestTy); |
1828 | } |
1829 | |
1830 | return nullptr; |
1831 | } |
1832 | |
1833 | static Instruction *foldAndToXor(BinaryOperator &I, |
1834 | InstCombiner::BuilderTy &Builder) { |
1835 | assert(I.getOpcode() == Instruction::And); |
1836 | Value *Op0 = I.getOperand(i_nocapture: 0); |
1837 | Value *Op1 = I.getOperand(i_nocapture: 1); |
1838 | Value *A, *B; |
1839 | |
1840 | // Operand complexity canonicalization guarantees that the 'or' is Op0. |
1841 | // (A | B) & ~(A & B) --> A ^ B |
1842 | // (A | B) & ~(B & A) --> A ^ B |
1843 | if (match(V: &I, P: m_BinOp(L: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)), |
1844 | R: m_Not(V: m_c_And(L: m_Deferred(V: A), R: m_Deferred(V: B)))))) |
1845 | return BinaryOperator::CreateXor(V1: A, V2: B); |
1846 | |
1847 | // (A | ~B) & (~A | B) --> ~(A ^ B) |
1848 | // (A | ~B) & (B | ~A) --> ~(A ^ B) |
1849 | // (~B | A) & (~A | B) --> ~(A ^ B) |
1850 | // (~B | A) & (B | ~A) --> ~(A ^ B) |
1851 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1852 | if (match(V: &I, P: m_BinOp(L: m_c_Or(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
1853 | R: m_c_Or(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
1854 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
1855 | |
1856 | return nullptr; |
1857 | } |
1858 | |
1859 | static Instruction *foldOrToXor(BinaryOperator &I, |
1860 | InstCombiner::BuilderTy &Builder) { |
1861 | assert(I.getOpcode() == Instruction::Or); |
1862 | Value *Op0 = I.getOperand(i_nocapture: 0); |
1863 | Value *Op1 = I.getOperand(i_nocapture: 1); |
1864 | Value *A, *B; |
1865 | |
1866 | // Operand complexity canonicalization guarantees that the 'and' is Op0. |
1867 | // (A & B) | ~(A | B) --> ~(A ^ B) |
1868 | // (A & B) | ~(B | A) --> ~(A ^ B) |
1869 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1870 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
1871 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B))))) |
1872 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
1873 | |
1874 | // Operand complexity canonicalization guarantees that the 'xor' is Op0. |
1875 | // (A ^ B) | ~(A | B) --> ~(A & B) |
1876 | // (A ^ B) | ~(B | A) --> ~(A & B) |
1877 | if (Op0->hasOneUse() || Op1->hasOneUse()) |
1878 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
1879 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B))))) |
1880 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
1881 | |
1882 | // (A & ~B) | (~A & B) --> A ^ B |
1883 | // (A & ~B) | (B & ~A) --> A ^ B |
1884 | // (~B & A) | (~A & B) --> A ^ B |
1885 | // (~B & A) | (B & ~A) --> A ^ B |
1886 | if (match(V: Op0, P: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B)))) && |
1887 | match(V: Op1, P: m_c_And(L: m_Not(V: m_Specific(V: A)), R: m_Specific(V: B)))) |
1888 | return BinaryOperator::CreateXor(V1: A, V2: B); |
1889 | |
1890 | return nullptr; |
1891 | } |
1892 | |
1893 | /// Return true if a constant shift amount is always less than the specified |
1894 | /// bit-width. If not, the shift could create poison in the narrower type. |
1895 | static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { |
1896 | APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); |
1897 | return match(V: C, P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold)); |
1898 | } |
1899 | |
1900 | /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and |
1901 | /// a common zext operand: and (binop (zext X), C), (zext X). |
1902 | Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { |
1903 | // This transform could also apply to {or, and, xor}, but there are better |
1904 | // folds for those cases, so we don't expect those patterns here. AShr is not |
1905 | // handled because it should always be transformed to LShr in this sequence. |
1906 | // The subtract transform is different because it has a constant on the left. |
1907 | // Add/mul commute the constant to RHS; sub with constant RHS becomes add. |
1908 | Value *Op0 = And.getOperand(i_nocapture: 0), *Op1 = And.getOperand(i_nocapture: 1); |
1909 | Constant *C; |
1910 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1911 | !match(V: Op0, P: m_OneUse(SubPattern: m_Mul(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1912 | !match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1913 | !match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_Specific(V: Op1), R: m_Constant(C)))) && |
1914 | !match(V: Op0, P: m_OneUse(SubPattern: m_Sub(L: m_Constant(C), R: m_Specific(V: Op1))))) |
1915 | return nullptr; |
1916 | |
1917 | Value *X; |
1918 | if (!match(V: Op1, P: m_ZExt(Op: m_Value(V&: X))) || Op1->hasNUsesOrMore(N: 3)) |
1919 | return nullptr; |
1920 | |
1921 | Type *Ty = And.getType(); |
1922 | if (!isa<VectorType>(Val: Ty) && !shouldChangeType(From: Ty, To: X->getType())) |
1923 | return nullptr; |
1924 | |
1925 | // If we're narrowing a shift, the shift amount must be safe (less than the |
1926 | // width) in the narrower type. If the shift amount is greater, instsimplify |
1927 | // usually handles that case, but we can't guarantee/assert it. |
1928 | Instruction::BinaryOps Opc = cast<BinaryOperator>(Val: Op0)->getOpcode(); |
1929 | if (Opc == Instruction::LShr || Opc == Instruction::Shl) |
1930 | if (!canNarrowShiftAmt(C, BitWidth: X->getType()->getScalarSizeInBits())) |
1931 | return nullptr; |
1932 | |
1933 | // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X) |
1934 | // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X) |
1935 | Value *NewC = ConstantExpr::getTrunc(C, Ty: X->getType()); |
1936 | Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, LHS: NewC, RHS: X) |
1937 | : Builder.CreateBinOp(Opc, LHS: X, RHS: NewC); |
1938 | return new ZExtInst(Builder.CreateAnd(LHS: NewBO, RHS: X), Ty); |
1939 | } |
1940 | |
1941 | /// Try folding relatively complex patterns for both And and Or operations |
1942 | /// with all And and Or swapped. |
1943 | static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, |
1944 | InstCombiner::BuilderTy &Builder) { |
1945 | const Instruction::BinaryOps Opcode = I.getOpcode(); |
1946 | assert(Opcode == Instruction::And || Opcode == Instruction::Or); |
1947 | |
1948 | // Flip the logic operation. |
1949 | const Instruction::BinaryOps FlippedOpcode = |
1950 | (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; |
1951 | |
1952 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
1953 | Value *A, *B, *C, *X, *Y, *Dummy; |
1954 | |
1955 | // Match following expressions: |
1956 | // (~(A | B) & C) |
1957 | // (~(A & B) | C) |
1958 | // Captures X = ~(A | B) or ~(A & B) |
1959 | const auto matchNotOrAnd = |
1960 | [Opcode, FlippedOpcode](Value *Op, auto m_A, auto m_B, auto m_C, |
1961 | Value *&X, bool CountUses = false) -> bool { |
1962 | if (CountUses && !Op->hasOneUse()) |
1963 | return false; |
1964 | |
1965 | if (match(Op, m_c_BinOp(FlippedOpcode, |
1966 | m_CombineAnd(m_Value(V&: X), |
1967 | m_Not(m_c_BinOp(Opcode, m_A, m_B))), |
1968 | m_C))) |
1969 | return !CountUses || X->hasOneUse(); |
1970 | |
1971 | return false; |
1972 | }; |
1973 | |
1974 | // (~(A | B) & C) | ... --> ... |
1975 | // (~(A & B) | C) & ... --> ... |
1976 | // TODO: One use checks are conservative. We just need to check that a total |
1977 | // number of multiple used values does not exceed reduction |
1978 | // in operations. |
1979 | if (matchNotOrAnd(Op0, m_Value(V&: A), m_Value(V&: B), m_Value(V&: C), X)) { |
1980 | // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A |
1981 | // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A) |
1982 | if (matchNotOrAnd(Op1, m_Specific(V: A), m_Specific(V: C), m_Specific(V: B), Dummy, |
1983 | true)) { |
1984 | Value *Xor = Builder.CreateXor(LHS: B, RHS: C); |
1985 | return (Opcode == Instruction::Or) |
1986 | ? BinaryOperator::CreateAnd(V1: Xor, V2: Builder.CreateNot(V: A)) |
1987 | : BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Xor, RHS: A)); |
1988 | } |
1989 | |
1990 | // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B |
1991 | // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B) |
1992 | if (matchNotOrAnd(Op1, m_Specific(V: B), m_Specific(V: C), m_Specific(V: A), Dummy, |
1993 | true)) { |
1994 | Value *Xor = Builder.CreateXor(LHS: A, RHS: C); |
1995 | return (Opcode == Instruction::Or) |
1996 | ? BinaryOperator::CreateAnd(V1: Xor, V2: Builder.CreateNot(V: B)) |
1997 | : BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Xor, RHS: B)); |
1998 | } |
1999 | |
2000 | // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A) |
2001 | // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A) |
2002 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2003 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C))))))) |
2004 | return BinaryOperator::CreateNot(Op: Builder.CreateBinOp( |
2005 | Opc: Opcode, LHS: Builder.CreateBinOp(Opc: FlippedOpcode, LHS: B, RHS: C), RHS: A)); |
2006 | |
2007 | // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B) |
2008 | // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B) |
2009 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2010 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: B), R: m_Specific(V: C))))))) |
2011 | return BinaryOperator::CreateNot(Op: Builder.CreateBinOp( |
2012 | Opc: Opcode, LHS: Builder.CreateBinOp(Opc: FlippedOpcode, LHS: A, RHS: C), RHS: B)); |
2013 | |
2014 | // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B))) |
2015 | // Note, the pattern with swapped and/or is not handled because the |
2016 | // result is more undefined than a source: |
2017 | // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid. |
2018 | if (Opcode == Instruction::Or && Op0->hasOneUse() && |
2019 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_CombineAnd( |
2020 | L: m_Value(V&: Y), |
2021 | R: m_c_BinOp(Opcode, L: m_Specific(V: C), |
2022 | R: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))))))) { |
2023 | // X = ~(A | B) |
2024 | // Y = (C | (A ^ B) |
2025 | Value *Or = cast<BinaryOperator>(Val: X)->getOperand(i_nocapture: 0); |
2026 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Or, RHS: Y)); |
2027 | } |
2028 | } |
2029 | |
2030 | // (~A & B & C) | ... --> ... |
2031 | // (~A | B | C) | ... --> ... |
2032 | // TODO: One use checks are conservative. We just need to check that a total |
2033 | // number of multiple used values does not exceed reduction |
2034 | // in operations. |
2035 | if (match(V: Op0, |
2036 | P: m_OneUse(SubPattern: m_c_BinOp(Opcode: FlippedOpcode, |
2037 | L: m_BinOp(Opcode: FlippedOpcode, L: m_Value(V&: B), R: m_Value(V&: C)), |
2038 | R: m_CombineAnd(L: m_Value(V&: X), R: m_Not(V: m_Value(V&: A)))))) || |
2039 | match(V: Op0, P: m_OneUse(SubPattern: m_c_BinOp( |
2040 | Opcode: FlippedOpcode, |
2041 | L: m_c_BinOp(Opcode: FlippedOpcode, L: m_Value(V&: C), |
2042 | R: m_CombineAnd(L: m_Value(V&: X), R: m_Not(V: m_Value(V&: A)))), |
2043 | R: m_Value(V&: B))))) { |
2044 | // X = ~A |
2045 | // (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C)) |
2046 | // (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C)) |
2047 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2048 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: B)), |
2049 | R: m_Specific(V: C))))) || |
2050 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2051 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: B), R: m_Specific(V: C)), |
2052 | R: m_Specific(V: A))))) || |
2053 | match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_c_BinOp( |
2054 | Opcode, L: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C)), |
2055 | R: m_Specific(V: B)))))) { |
2056 | Value *Xor = Builder.CreateXor(LHS: B, RHS: C); |
2057 | return (Opcode == Instruction::Or) |
2058 | ? BinaryOperator::CreateNot(Op: Builder.CreateOr(LHS: Xor, RHS: A)) |
2059 | : BinaryOperator::CreateOr(V1: Xor, V2: X); |
2060 | } |
2061 | |
2062 | // (~A & B & C) | ~(A | B) --> (C | ~B) & ~A |
2063 | // (~A | B | C) & ~(A & B) --> (C & ~B) | ~A |
2064 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2065 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: B))))))) |
2066 | return BinaryOperator::Create( |
2067 | Op: FlippedOpcode, S1: Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: Builder.CreateNot(V: B)), |
2068 | S2: X); |
2069 | |
2070 | // (~A & B & C) | ~(A | C) --> (B | ~C) & ~A |
2071 | // (~A | B | C) & ~(A & C) --> (B & ~C) | ~A |
2072 | if (match(V: Op1, P: m_OneUse(SubPattern: m_Not(V: m_OneUse( |
2073 | SubPattern: m_c_BinOp(Opcode, L: m_Specific(V: A), R: m_Specific(V: C))))))) |
2074 | return BinaryOperator::Create( |
2075 | Op: FlippedOpcode, S1: Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: Builder.CreateNot(V: C)), |
2076 | S2: X); |
2077 | } |
2078 | |
2079 | return nullptr; |
2080 | } |
2081 | |
2082 | /// Try to reassociate a pair of binops so that values with one use only are |
2083 | /// part of the same instruction. This may enable folds that are limited with |
2084 | /// multi-use restrictions and makes it more likely to match other patterns that |
2085 | /// are looking for a common operand. |
2086 | static Instruction *reassociateForUses(BinaryOperator &BO, |
2087 | InstCombinerImpl::BuilderTy &Builder) { |
2088 | Instruction::BinaryOps Opcode = BO.getOpcode(); |
2089 | Value *X, *Y, *Z; |
2090 | if (match(V: &BO, |
2091 | P: m_c_BinOp(Opcode, L: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: X), R: m_Value(V&: Y))), |
2092 | R: m_OneUse(SubPattern: m_Value(V&: Z))))) { |
2093 | if (!isa<Constant>(Val: X) && !isa<Constant>(Val: Y) && !isa<Constant>(Val: Z)) { |
2094 | // (X op Y) op Z --> (Y op Z) op X |
2095 | if (!X->hasOneUse()) { |
2096 | Value *YZ = Builder.CreateBinOp(Opc: Opcode, LHS: Y, RHS: Z); |
2097 | return BinaryOperator::Create(Op: Opcode, S1: YZ, S2: X); |
2098 | } |
2099 | // (X op Y) op Z --> (X op Z) op Y |
2100 | if (!Y->hasOneUse()) { |
2101 | Value *XZ = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Z); |
2102 | return BinaryOperator::Create(Op: Opcode, S1: XZ, S2: Y); |
2103 | } |
2104 | } |
2105 | } |
2106 | |
2107 | return nullptr; |
2108 | } |
2109 | |
2110 | // Match |
2111 | // (X + C2) | C |
2112 | // (X + C2) ^ C |
2113 | // (X + C2) & C |
2114 | // and convert to do the bitwise logic first: |
2115 | // (X | C) + C2 |
2116 | // (X ^ C) + C2 |
2117 | // (X & C) + C2 |
2118 | // iff bits affected by logic op are lower than last bit affected by math op |
2119 | static Instruction *canonicalizeLogicFirst(BinaryOperator &I, |
2120 | InstCombiner::BuilderTy &Builder) { |
2121 | Type *Ty = I.getType(); |
2122 | Instruction::BinaryOps OpC = I.getOpcode(); |
2123 | Value *Op0 = I.getOperand(i_nocapture: 0); |
2124 | Value *Op1 = I.getOperand(i_nocapture: 1); |
2125 | Value *X; |
2126 | const APInt *C, *C2; |
2127 | |
2128 | if (!(match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C2)))) && |
2129 | match(V: Op1, P: m_APInt(Res&: C)))) |
2130 | return nullptr; |
2131 | |
2132 | unsigned Width = Ty->getScalarSizeInBits(); |
2133 | unsigned LastOneMath = Width - C2->countr_zero(); |
2134 | |
2135 | switch (OpC) { |
2136 | case Instruction::And: |
2137 | if (C->countl_one() < LastOneMath) |
2138 | return nullptr; |
2139 | break; |
2140 | case Instruction::Xor: |
2141 | case Instruction::Or: |
2142 | if (C->countl_zero() < LastOneMath) |
2143 | return nullptr; |
2144 | break; |
2145 | default: |
2146 | llvm_unreachable("Unexpected BinaryOp!" ); |
2147 | } |
2148 | |
2149 | Value *NewBinOp = Builder.CreateBinOp(Opc: OpC, LHS: X, RHS: ConstantInt::get(Ty, V: *C)); |
2150 | return BinaryOperator::CreateWithCopiedFlags(Opc: Instruction::Add, V1: NewBinOp, |
2151 | V2: ConstantInt::get(Ty, V: *C2), CopyO: Op0); |
2152 | } |
2153 | |
2154 | // binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) -> |
2155 | // shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt) |
2156 | // where both shifts are the same and AddC is a valid shift amount. |
2157 | Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { |
2158 | assert((I.isBitwiseLogicOp() || I.getOpcode() == Instruction::Add) && |
2159 | "Unexpected opcode" ); |
2160 | |
2161 | Value *ShAmt; |
2162 | Constant *ShiftedC1, *ShiftedC2, *AddC; |
2163 | Type *Ty = I.getType(); |
2164 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
2165 | if (!match(V: &I, P: m_c_BinOp(L: m_Shift(L: m_ImmConstant(C&: ShiftedC1), R: m_Value(V&: ShAmt)), |
2166 | R: m_Shift(L: m_ImmConstant(C&: ShiftedC2), |
2167 | R: m_AddLike(L: m_Deferred(V: ShAmt), |
2168 | R: m_ImmConstant(C&: AddC)))))) |
2169 | return nullptr; |
2170 | |
2171 | // Make sure the add constant is a valid shift amount. |
2172 | if (!match(V: AddC, |
2173 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(BitWidth, BitWidth)))) |
2174 | return nullptr; |
2175 | |
2176 | // Avoid constant expressions. |
2177 | auto *Op0Inst = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 0)); |
2178 | auto *Op1Inst = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1)); |
2179 | if (!Op0Inst || !Op1Inst) |
2180 | return nullptr; |
2181 | |
2182 | // Both shifts must be the same. |
2183 | Instruction::BinaryOps ShiftOp = |
2184 | static_cast<Instruction::BinaryOps>(Op0Inst->getOpcode()); |
2185 | if (ShiftOp != Op1Inst->getOpcode()) |
2186 | return nullptr; |
2187 | |
2188 | // For adds, only left shifts are supported. |
2189 | if (I.getOpcode() == Instruction::Add && ShiftOp != Instruction::Shl) |
2190 | return nullptr; |
2191 | |
2192 | Value *NewC = Builder.CreateBinOp( |
2193 | Opc: I.getOpcode(), LHS: ShiftedC1, RHS: Builder.CreateBinOp(Opc: ShiftOp, LHS: ShiftedC2, RHS: AddC)); |
2194 | return BinaryOperator::Create(Op: ShiftOp, S1: NewC, S2: ShAmt); |
2195 | } |
2196 | |
2197 | // Fold and/or/xor with two equal intrinsic IDs: |
2198 | // bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) |
2199 | // -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) |
2200 | // bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) |
2201 | // -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) |
2202 | // bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) |
2203 | // bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) |
2204 | // bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) |
2205 | // bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) |
2206 | static Instruction * |
2207 | foldBitwiseLogicWithIntrinsics(BinaryOperator &I, |
2208 | InstCombiner::BuilderTy &Builder) { |
2209 | assert(I.isBitwiseLogicOp() && "Should and/or/xor" ); |
2210 | if (!I.getOperand(i_nocapture: 0)->hasOneUse()) |
2211 | return nullptr; |
2212 | IntrinsicInst *X = dyn_cast<IntrinsicInst>(Val: I.getOperand(i_nocapture: 0)); |
2213 | if (!X) |
2214 | return nullptr; |
2215 | |
2216 | IntrinsicInst *Y = dyn_cast<IntrinsicInst>(Val: I.getOperand(i_nocapture: 1)); |
2217 | if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) |
2218 | return nullptr; |
2219 | |
2220 | Intrinsic::ID IID = X->getIntrinsicID(); |
2221 | const APInt *RHSC; |
2222 | // Try to match constant RHS. |
2223 | if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || |
2224 | !match(V: I.getOperand(i_nocapture: 1), P: m_APInt(Res&: RHSC)))) |
2225 | return nullptr; |
2226 | |
2227 | switch (IID) { |
2228 | case Intrinsic::fshl: |
2229 | case Intrinsic::fshr: { |
2230 | if (X->getOperand(i_nocapture: 2) != Y->getOperand(i_nocapture: 2)) |
2231 | return nullptr; |
2232 | Value *NewOp0 = |
2233 | Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 0), RHS: Y->getOperand(i_nocapture: 0)); |
2234 | Value *NewOp1 = |
2235 | Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 1), RHS: Y->getOperand(i_nocapture: 1)); |
2236 | Function *F = Intrinsic::getDeclaration(M: I.getModule(), id: IID, Tys: I.getType()); |
2237 | return CallInst::Create(Func: F, Args: {NewOp0, NewOp1, X->getOperand(i_nocapture: 2)}); |
2238 | } |
2239 | case Intrinsic::bswap: |
2240 | case Intrinsic::bitreverse: { |
2241 | Value *NewOp0 = Builder.CreateBinOp( |
2242 | Opc: I.getOpcode(), LHS: X->getOperand(i_nocapture: 0), |
2243 | RHS: Y ? Y->getOperand(i_nocapture: 0) |
2244 | : ConstantInt::get(Ty: I.getType(), V: IID == Intrinsic::bswap |
2245 | ? RHSC->byteSwap() |
2246 | : RHSC->reverseBits())); |
2247 | Function *F = Intrinsic::getDeclaration(M: I.getModule(), id: IID, Tys: I.getType()); |
2248 | return CallInst::Create(Func: F, Args: {NewOp0}); |
2249 | } |
2250 | default: |
2251 | return nullptr; |
2252 | } |
2253 | } |
2254 | |
2255 | // Try to simplify V by replacing occurrences of Op with RepOp, but only look |
2256 | // through bitwise operations. In particular, for X | Y we try to replace Y with |
2257 | // 0 inside X and for X & Y we try to replace Y with -1 inside X. |
2258 | // Return the simplified result of X if successful, and nullptr otherwise. |
2259 | // If SimplifyOnly is true, no new instructions will be created. |
2260 | static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp, |
2261 | bool SimplifyOnly, |
2262 | InstCombinerImpl &IC, |
2263 | unsigned Depth = 0) { |
2264 | if (Op == RepOp) |
2265 | return nullptr; |
2266 | |
2267 | if (V == Op) |
2268 | return RepOp; |
2269 | |
2270 | auto *I = dyn_cast<BinaryOperator>(Val: V); |
2271 | if (!I || !I->isBitwiseLogicOp() || Depth >= 3) |
2272 | return nullptr; |
2273 | |
2274 | if (!I->hasOneUse()) |
2275 | SimplifyOnly = true; |
2276 | |
2277 | Value *NewOp0 = simplifyAndOrWithOpReplaced(V: I->getOperand(i_nocapture: 0), Op, RepOp, |
2278 | SimplifyOnly, IC, Depth: Depth + 1); |
2279 | Value *NewOp1 = simplifyAndOrWithOpReplaced(V: I->getOperand(i_nocapture: 1), Op, RepOp, |
2280 | SimplifyOnly, IC, Depth: Depth + 1); |
2281 | if (!NewOp0 && !NewOp1) |
2282 | return nullptr; |
2283 | |
2284 | if (!NewOp0) |
2285 | NewOp0 = I->getOperand(i_nocapture: 0); |
2286 | if (!NewOp1) |
2287 | NewOp1 = I->getOperand(i_nocapture: 1); |
2288 | |
2289 | if (Value *Res = simplifyBinOp(Opcode: I->getOpcode(), LHS: NewOp0, RHS: NewOp1, |
2290 | Q: IC.getSimplifyQuery().getWithInstruction(I))) |
2291 | return Res; |
2292 | |
2293 | if (SimplifyOnly) |
2294 | return nullptr; |
2295 | return IC.Builder.CreateBinOp(Opc: I->getOpcode(), LHS: NewOp0, RHS: NewOp1); |
2296 | } |
2297 | |
2298 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
2299 | // here. We should standardize that construct where it is needed or choose some |
2300 | // other way to ensure that commutated variants of patterns are not missed. |
2301 | Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { |
2302 | Type *Ty = I.getType(); |
2303 | |
2304 | if (Value *V = simplifyAndInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
2305 | Q: SQ.getWithInstruction(I: &I))) |
2306 | return replaceInstUsesWith(I, V); |
2307 | |
2308 | if (SimplifyAssociativeOrCommutative(I)) |
2309 | return &I; |
2310 | |
2311 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
2312 | return X; |
2313 | |
2314 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
2315 | return Phi; |
2316 | |
2317 | // See if we can simplify any instructions used by the instruction whose sole |
2318 | // purpose is to compute bits we don't care about. |
2319 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
2320 | return &I; |
2321 | |
2322 | // Do this before using distributive laws to catch simple and/or/not patterns. |
2323 | if (Instruction *Xor = foldAndToXor(I, Builder)) |
2324 | return Xor; |
2325 | |
2326 | if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) |
2327 | return X; |
2328 | |
2329 | // (A|B)&(A|C) -> A|(B&C) etc |
2330 | if (Value *V = foldUsingDistributiveLaws(I)) |
2331 | return replaceInstUsesWith(I, V); |
2332 | |
2333 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
2334 | return R; |
2335 | |
2336 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
2337 | |
2338 | Value *X, *Y; |
2339 | const APInt *C; |
2340 | if ((match(V: Op0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_One(), R: m_Value(V&: X)))) || |
2341 | (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_APInt(Res&: C), R: m_Value(V&: X)))) && (*C)[0])) && |
2342 | match(V: Op1, P: m_One())) { |
2343 | // (1 >> X) & 1 --> zext(X == 0) |
2344 | // (C << X) & 1 --> zext(X == 0), when C is odd |
2345 | Value *IsZero = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty, V: 0)); |
2346 | return new ZExtInst(IsZero, Ty); |
2347 | } |
2348 | |
2349 | // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y |
2350 | Value *Neg; |
2351 | if (match(V: &I, |
2352 | P: m_c_And(L: m_CombineAnd(L: m_Value(V&: Neg), |
2353 | R: m_OneUse(SubPattern: m_Neg(V: m_And(L: m_Value(), R: m_One())))), |
2354 | R: m_Value(V&: Y)))) { |
2355 | Value *Cmp = Builder.CreateIsNull(Arg: Neg); |
2356 | return SelectInst::Create(C: Cmp, S1: ConstantInt::getNullValue(Ty), S2: Y); |
2357 | } |
2358 | |
2359 | // Canonicalize: |
2360 | // (X +/- Y) & Y --> ~X & Y when Y is a power of 2. |
2361 | if (match(V: &I, P: m_c_And(L: m_Value(V&: Y), R: m_OneUse(SubPattern: m_CombineOr( |
2362 | L: m_c_Add(L: m_Value(V&: X), R: m_Deferred(V: Y)), |
2363 | R: m_Sub(L: m_Value(V&: X), R: m_Deferred(V: Y)))))) && |
2364 | isKnownToBeAPowerOfTwo(V: Y, /*OrZero*/ true, /*Depth*/ 0, CxtI: &I)) |
2365 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: X), V2: Y); |
2366 | |
2367 | if (match(V: Op1, P: m_APInt(Res&: C))) { |
2368 | const APInt *XorC; |
2369 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_APInt(Res&: XorC))))) { |
2370 | // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) |
2371 | Constant *NewC = ConstantInt::get(Ty, V: *C & *XorC); |
2372 | Value *And = Builder.CreateAnd(LHS: X, RHS: Op1); |
2373 | And->takeName(V: Op0); |
2374 | return BinaryOperator::CreateXor(V1: And, V2: NewC); |
2375 | } |
2376 | |
2377 | const APInt *OrC; |
2378 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: OrC))))) { |
2379 | // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) |
2380 | // NOTE: This reduces the number of bits set in the & mask, which |
2381 | // can expose opportunities for store narrowing for scalars. |
2382 | // NOTE: SimplifyDemandedBits should have already removed bits from C1 |
2383 | // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in |
2384 | // above, but this feels safer. |
2385 | APInt Together = *C & *OrC; |
2386 | Value *And = Builder.CreateAnd(LHS: X, RHS: ConstantInt::get(Ty, V: Together ^ *C)); |
2387 | And->takeName(V: Op0); |
2388 | return BinaryOperator::CreateOr(V1: And, V2: ConstantInt::get(Ty, V: Together)); |
2389 | } |
2390 | |
2391 | unsigned Width = Ty->getScalarSizeInBits(); |
2392 | const APInt *ShiftC; |
2393 | if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_AShr(L: m_Value(V&: X), R: m_APInt(Res&: ShiftC))))) && |
2394 | ShiftC->ult(RHS: Width)) { |
2395 | if (*C == APInt::getLowBitsSet(numBits: Width, loBitsSet: Width - ShiftC->getZExtValue())) { |
2396 | // We are clearing high bits that were potentially set by sext+ashr: |
2397 | // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC |
2398 | Value *Sext = Builder.CreateSExt(V: X, DestTy: Ty); |
2399 | Constant *ShAmtC = ConstantInt::get(Ty, V: ShiftC->zext(width: Width)); |
2400 | return BinaryOperator::CreateLShr(V1: Sext, V2: ShAmtC); |
2401 | } |
2402 | } |
2403 | |
2404 | // If this 'and' clears the sign-bits added by ashr, replace with lshr: |
2405 | // and (ashr X, ShiftC), C --> lshr X, ShiftC |
2406 | if (match(V: Op0, P: m_AShr(L: m_Value(V&: X), R: m_APInt(Res&: ShiftC))) && ShiftC->ult(RHS: Width) && |
2407 | C->isMask(numBits: Width - ShiftC->getZExtValue())) |
2408 | return BinaryOperator::CreateLShr(V1: X, V2: ConstantInt::get(Ty, V: *ShiftC)); |
2409 | |
2410 | const APInt *AddC; |
2411 | if (match(V: Op0, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: AddC)))) { |
2412 | // If we are masking the result of the add down to exactly one bit and |
2413 | // the constant we are adding has no bits set below that bit, then the |
2414 | // add is flipping a single bit. Example: |
2415 | // (X + 4) & 4 --> (X & 4) ^ 4 |
2416 | if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) { |
2417 | assert((*C & *AddC) != 0 && "Expected common bit" ); |
2418 | Value *NewAnd = Builder.CreateAnd(LHS: X, RHS: Op1); |
2419 | return BinaryOperator::CreateXor(V1: NewAnd, V2: Op1); |
2420 | } |
2421 | } |
2422 | |
2423 | // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the |
2424 | // bitwidth of X and OP behaves well when given trunc(C1) and X. |
2425 | auto isNarrowableBinOpcode = [](BinaryOperator *B) { |
2426 | switch (B->getOpcode()) { |
2427 | case Instruction::Xor: |
2428 | case Instruction::Or: |
2429 | case Instruction::Mul: |
2430 | case Instruction::Add: |
2431 | case Instruction::Sub: |
2432 | return true; |
2433 | default: |
2434 | return false; |
2435 | } |
2436 | }; |
2437 | BinaryOperator *BO; |
2438 | if (match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(I&: BO))) && isNarrowableBinOpcode(BO)) { |
2439 | Instruction::BinaryOps BOpcode = BO->getOpcode(); |
2440 | Value *X; |
2441 | const APInt *C1; |
2442 | // TODO: The one-use restrictions could be relaxed a little if the AND |
2443 | // is going to be removed. |
2444 | // Try to narrow the 'and' and a binop with constant operand: |
2445 | // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC) |
2446 | if (match(V: BO, P: m_c_BinOp(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))), R: m_APInt(Res&: C1))) && |
2447 | C->isIntN(N: X->getType()->getScalarSizeInBits())) { |
2448 | unsigned XWidth = X->getType()->getScalarSizeInBits(); |
2449 | Constant *TruncC1 = ConstantInt::get(Ty: X->getType(), V: C1->trunc(width: XWidth)); |
2450 | Value *BinOp = isa<ZExtInst>(Val: BO->getOperand(i_nocapture: 0)) |
2451 | ? Builder.CreateBinOp(Opc: BOpcode, LHS: X, RHS: TruncC1) |
2452 | : Builder.CreateBinOp(Opc: BOpcode, LHS: TruncC1, RHS: X); |
2453 | Constant *TruncC = ConstantInt::get(Ty: X->getType(), V: C->trunc(width: XWidth)); |
2454 | Value *And = Builder.CreateAnd(LHS: BinOp, RHS: TruncC); |
2455 | return new ZExtInst(And, Ty); |
2456 | } |
2457 | |
2458 | // Similar to above: if the mask matches the zext input width, then the |
2459 | // 'and' can be eliminated, so we can truncate the other variable op: |
2460 | // and (bo (zext X), Y), C --> zext (bo X, (trunc Y)) |
2461 | if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 0)) && |
2462 | match(V: BO->getOperand(i_nocapture: 0), P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && |
2463 | C->isMask(numBits: X->getType()->getScalarSizeInBits())) { |
2464 | Y = BO->getOperand(i_nocapture: 1); |
2465 | Value *TrY = Builder.CreateTrunc(V: Y, DestTy: X->getType(), Name: Y->getName() + ".tr" ); |
2466 | Value *NewBO = |
2467 | Builder.CreateBinOp(Opc: BOpcode, LHS: X, RHS: TrY, Name: BO->getName() + ".narrow" ); |
2468 | return new ZExtInst(NewBO, Ty); |
2469 | } |
2470 | // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X) |
2471 | if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 1)) && |
2472 | match(V: BO->getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && |
2473 | C->isMask(numBits: X->getType()->getScalarSizeInBits())) { |
2474 | Y = BO->getOperand(i_nocapture: 0); |
2475 | Value *TrY = Builder.CreateTrunc(V: Y, DestTy: X->getType(), Name: Y->getName() + ".tr" ); |
2476 | Value *NewBO = |
2477 | Builder.CreateBinOp(Opc: BOpcode, LHS: TrY, RHS: X, Name: BO->getName() + ".narrow" ); |
2478 | return new ZExtInst(NewBO, Ty); |
2479 | } |
2480 | } |
2481 | |
2482 | // This is intentionally placed after the narrowing transforms for |
2483 | // efficiency (transform directly to the narrow logic op if possible). |
2484 | // If the mask is only needed on one incoming arm, push the 'and' op up. |
2485 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y)))) || |
2486 | match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2487 | APInt NotAndMask(~(*C)); |
2488 | BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Val: Op0)->getOpcode(); |
2489 | if (MaskedValueIsZero(V: X, Mask: NotAndMask, Depth: 0, CxtI: &I)) { |
2490 | // Not masking anything out for the LHS, move mask to RHS. |
2491 | // and ({x}or X, Y), C --> {x}or X, (and Y, C) |
2492 | Value *NewRHS = Builder.CreateAnd(LHS: Y, RHS: Op1, Name: Y->getName() + ".masked" ); |
2493 | return BinaryOperator::Create(Op: BinOp, S1: X, S2: NewRHS); |
2494 | } |
2495 | if (!isa<Constant>(Val: Y) && MaskedValueIsZero(V: Y, Mask: NotAndMask, Depth: 0, CxtI: &I)) { |
2496 | // Not masking anything out for the RHS, move mask to LHS. |
2497 | // and ({x}or X, Y), C --> {x}or (and X, C), Y |
2498 | Value *NewLHS = Builder.CreateAnd(LHS: X, RHS: Op1, Name: X->getName() + ".masked" ); |
2499 | return BinaryOperator::Create(Op: BinOp, S1: NewLHS, S2: Y); |
2500 | } |
2501 | } |
2502 | |
2503 | // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 |
2504 | // constant, test if the shift amount equals the offset bit index: |
2505 | // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 |
2506 | // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 |
2507 | if (C->isPowerOf2() && |
2508 | match(V: Op0, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Power2(V&: ShiftC), R: m_Value(V&: X))))) { |
2509 | int Log2ShiftC = ShiftC->exactLogBase2(); |
2510 | int Log2C = C->exactLogBase2(); |
2511 | bool IsShiftLeft = |
2512 | cast<BinaryOperator>(Val: Op0)->getOpcode() == Instruction::Shl; |
2513 | int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C; |
2514 | assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask" ); |
2515 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty, V: BitNum)); |
2516 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C), |
2517 | S2: ConstantInt::getNullValue(Ty)); |
2518 | } |
2519 | |
2520 | Constant *C1, *C2; |
2521 | const APInt *C3 = C; |
2522 | Value *X; |
2523 | if (C3->isPowerOf2()) { |
2524 | Constant *Log2C3 = ConstantInt::get(Ty, V: C3->countr_zero()); |
2525 | if (match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Shl(L: m_ImmConstant(C&: C1), R: m_Value(V&: X)), |
2526 | R: m_ImmConstant(C&: C2)))) && |
2527 | match(V: C1, P: m_Power2())) { |
2528 | Constant *Log2C1 = ConstantExpr::getExactLogBase2(C: C1); |
2529 | Constant *LshrC = ConstantExpr::getAdd(C1: C2, C2: Log2C3); |
2530 | KnownBits KnownLShrc = computeKnownBits(V: LshrC, Depth: 0, CxtI: nullptr); |
2531 | if (KnownLShrc.getMaxValue().ult(RHS: Width)) { |
2532 | // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth: |
2533 | // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0 |
2534 | Constant *CmpC = ConstantExpr::getSub(C1: LshrC, C2: Log2C1); |
2535 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: CmpC); |
2536 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C3), |
2537 | S2: ConstantInt::getNullValue(Ty)); |
2538 | } |
2539 | } |
2540 | |
2541 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_LShr(L: m_ImmConstant(C&: C1), R: m_Value(V&: X)), |
2542 | R: m_ImmConstant(C&: C2)))) && |
2543 | match(V: C1, P: m_Power2())) { |
2544 | Constant *Log2C1 = ConstantExpr::getExactLogBase2(C: C1); |
2545 | Constant *Cmp = |
2546 | ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_ULT, LHS: Log2C3, RHS: C2, DL); |
2547 | if (Cmp && Cmp->isZeroValue()) { |
2548 | // iff C1,C3 is pow2 and Log2(C3) >= C2: |
2549 | // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0 |
2550 | Constant *ShlC = ConstantExpr::getAdd(C1: C2, C2: Log2C1); |
2551 | Constant *CmpC = ConstantExpr::getSub(C1: ShlC, C2: Log2C3); |
2552 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: CmpC); |
2553 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty, V: *C3), |
2554 | S2: ConstantInt::getNullValue(Ty)); |
2555 | } |
2556 | } |
2557 | } |
2558 | } |
2559 | |
2560 | // If we are clearing the sign bit of a floating-point value, convert this to |
2561 | // fabs, then cast back to integer. |
2562 | // |
2563 | // This is a generous interpretation for noimplicitfloat, this is not a true |
2564 | // floating-point operation. |
2565 | // |
2566 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
2567 | // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt |
2568 | Value *CastOp; |
2569 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
2570 | match(V: Op1, P: m_MaxSignedValue()) && |
2571 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
2572 | Kind: Attribute::NoImplicitFloat)) { |
2573 | Type *EltTy = CastOp->getType()->getScalarType(); |
2574 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
2575 | Value *FAbs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: CastOp); |
2576 | return new BitCastInst(FAbs, I.getType()); |
2577 | } |
2578 | } |
2579 | |
2580 | // and(shl(zext(X), Y), SignMask) -> and(sext(X), SignMask) |
2581 | // where Y is a valid shift amount. |
2582 | if (match(V: &I, P: m_And(L: m_OneUse(SubPattern: m_Shl(L: m_ZExt(Op: m_Value(V&: X)), R: m_Value(V&: Y))), |
2583 | R: m_SignMask())) && |
2584 | match(V: Y, P: m_SpecificInt_ICMP( |
2585 | Predicate: ICmpInst::Predicate::ICMP_EQ, |
2586 | Threshold: APInt(Ty->getScalarSizeInBits(), |
2587 | Ty->getScalarSizeInBits() - |
2588 | X->getType()->getScalarSizeInBits())))) { |
2589 | auto *SExt = Builder.CreateSExt(V: X, DestTy: Ty, Name: X->getName() + ".signext" ); |
2590 | return BinaryOperator::CreateAnd(V1: SExt, V2: Op1); |
2591 | } |
2592 | |
2593 | if (Instruction *Z = narrowMaskedBinOp(And&: I)) |
2594 | return Z; |
2595 | |
2596 | if (I.getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
2597 | if (auto *SI0 = dyn_cast<SelectInst>(Val: Op0)) { |
2598 | if (auto *R = |
2599 | foldAndOrOfSelectUsingImpliedCond(Op: Op1, SI&: *SI0, /* IsAnd */ true)) |
2600 | return R; |
2601 | } |
2602 | if (auto *SI1 = dyn_cast<SelectInst>(Val: Op1)) { |
2603 | if (auto *R = |
2604 | foldAndOrOfSelectUsingImpliedCond(Op: Op0, SI&: *SI1, /* IsAnd */ true)) |
2605 | return R; |
2606 | } |
2607 | } |
2608 | |
2609 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
2610 | return FoldedLogic; |
2611 | |
2612 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, IC&: *this)) |
2613 | return DeMorgan; |
2614 | |
2615 | { |
2616 | Value *A, *B, *C; |
2617 | // A & ~(A ^ B) --> A & B |
2618 | if (match(V: Op1, P: m_Not(V: m_c_Xor(L: m_Specific(V: Op0), R: m_Value(V&: B))))) |
2619 | return BinaryOperator::CreateAnd(V1: Op0, V2: B); |
2620 | // ~(A ^ B) & A --> A & B |
2621 | if (match(V: Op0, P: m_Not(V: m_c_Xor(L: m_Specific(V: Op1), R: m_Value(V&: B))))) |
2622 | return BinaryOperator::CreateAnd(V1: Op1, V2: B); |
2623 | |
2624 | // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C |
2625 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
2626 | match(V: Op1, P: m_Xor(L: m_Xor(L: m_Specific(V: B), R: m_Value(V&: C)), R: m_Specific(V: A)))) { |
2627 | Value *NotC = Op1->hasOneUse() |
2628 | ? Builder.CreateNot(V: C) |
2629 | : getFreelyInverted(V: C, WillInvertAllUses: C->hasOneUse(), Builder: &Builder); |
2630 | if (NotC != nullptr) |
2631 | return BinaryOperator::CreateAnd(V1: Op0, V2: NotC); |
2632 | } |
2633 | |
2634 | // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C |
2635 | if (match(V: Op0, P: m_Xor(L: m_Xor(L: m_Value(V&: A), R: m_Value(V&: C)), R: m_Value(V&: B))) && |
2636 | match(V: Op1, P: m_Xor(L: m_Specific(V: B), R: m_Specific(V: A)))) { |
2637 | Value *NotC = Op0->hasOneUse() |
2638 | ? Builder.CreateNot(V: C) |
2639 | : getFreelyInverted(V: C, WillInvertAllUses: C->hasOneUse(), Builder: &Builder); |
2640 | if (NotC != nullptr) |
2641 | return BinaryOperator::CreateAnd(V1: Op1, V2: Builder.CreateNot(V: C)); |
2642 | } |
2643 | |
2644 | // (A | B) & (~A ^ B) -> A & B |
2645 | // (A | B) & (B ^ ~A) -> A & B |
2646 | // (B | A) & (~A ^ B) -> A & B |
2647 | // (B | A) & (B ^ ~A) -> A & B |
2648 | if (match(V: Op1, P: m_c_Xor(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2649 | match(V: Op0, P: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2650 | return BinaryOperator::CreateAnd(V1: A, V2: B); |
2651 | |
2652 | // (~A ^ B) & (A | B) -> A & B |
2653 | // (~A ^ B) & (B | A) -> A & B |
2654 | // (B ^ ~A) & (A | B) -> A & B |
2655 | // (B ^ ~A) & (B | A) -> A & B |
2656 | if (match(V: Op0, P: m_c_Xor(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2657 | match(V: Op1, P: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2658 | return BinaryOperator::CreateAnd(V1: A, V2: B); |
2659 | |
2660 | // (~A | B) & (A ^ B) -> ~A & B |
2661 | // (~A | B) & (B ^ A) -> ~A & B |
2662 | // (B | ~A) & (A ^ B) -> ~A & B |
2663 | // (B | ~A) & (B ^ A) -> ~A & B |
2664 | if (match(V: Op0, P: m_c_Or(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2665 | match(V: Op1, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2666 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: A), V2: B); |
2667 | |
2668 | // (A ^ B) & (~A | B) -> ~A & B |
2669 | // (B ^ A) & (~A | B) -> ~A & B |
2670 | // (A ^ B) & (B | ~A) -> ~A & B |
2671 | // (B ^ A) & (B | ~A) -> ~A & B |
2672 | if (match(V: Op1, P: m_c_Or(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B))) && |
2673 | match(V: Op0, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
2674 | return BinaryOperator::CreateAnd(V1: Builder.CreateNot(V: A), V2: B); |
2675 | } |
2676 | |
2677 | { |
2678 | ICmpInst *LHS = dyn_cast<ICmpInst>(Val: Op0); |
2679 | ICmpInst *RHS = dyn_cast<ICmpInst>(Val: Op1); |
2680 | if (LHS && RHS) |
2681 | if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ true)) |
2682 | return replaceInstUsesWith(I, V: Res); |
2683 | |
2684 | // TODO: Make this recursive; it's a little tricky because an arbitrary |
2685 | // number of 'and' instructions might have to be created. |
2686 | if (LHS && match(V: Op1, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2687 | bool IsLogical = isa<SelectInst>(Val: Op1); |
2688 | // LHS & (X && Y) --> (LHS && X) && Y |
2689 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
2690 | if (Value *Res = |
2691 | foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ true, IsLogical)) |
2692 | return replaceInstUsesWith(I, V: IsLogical |
2693 | ? Builder.CreateLogicalAnd(Cond1: Res, Cond2: Y) |
2694 | : Builder.CreateAnd(LHS: Res, RHS: Y)); |
2695 | // LHS & (X && Y) --> X && (LHS & Y) |
2696 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
2697 | if (Value *Res = foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ true, |
2698 | /* IsLogical */ false)) |
2699 | return replaceInstUsesWith(I, V: IsLogical |
2700 | ? Builder.CreateLogicalAnd(Cond1: X, Cond2: Res) |
2701 | : Builder.CreateAnd(LHS: X, RHS: Res)); |
2702 | } |
2703 | if (RHS && match(V: Op0, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
2704 | bool IsLogical = isa<SelectInst>(Val: Op0); |
2705 | // (X && Y) & RHS --> (X && RHS) && Y |
2706 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
2707 | if (Value *Res = |
2708 | foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ true, IsLogical)) |
2709 | return replaceInstUsesWith(I, V: IsLogical |
2710 | ? Builder.CreateLogicalAnd(Cond1: Res, Cond2: Y) |
2711 | : Builder.CreateAnd(LHS: Res, RHS: Y)); |
2712 | // (X && Y) & RHS --> X && (Y & RHS) |
2713 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
2714 | if (Value *Res = foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ true, |
2715 | /* IsLogical */ false)) |
2716 | return replaceInstUsesWith(I, V: IsLogical |
2717 | ? Builder.CreateLogicalAnd(Cond1: X, Cond2: Res) |
2718 | : Builder.CreateAnd(LHS: X, RHS: Res)); |
2719 | } |
2720 | } |
2721 | |
2722 | if (FCmpInst *LHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 0))) |
2723 | if (FCmpInst *RHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 1))) |
2724 | if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true)) |
2725 | return replaceInstUsesWith(I, V: Res); |
2726 | |
2727 | if (Instruction *FoldedFCmps = reassociateFCmps(BO&: I, Builder)) |
2728 | return FoldedFCmps; |
2729 | |
2730 | if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) |
2731 | return CastedAnd; |
2732 | |
2733 | if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) |
2734 | return Sel; |
2735 | |
2736 | // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>. |
2737 | // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold |
2738 | // with binop identity constant. But creating a select with non-constant |
2739 | // arm may not be reversible due to poison semantics. Is that a good |
2740 | // canonicalization? |
2741 | Value *A, *B; |
2742 | if (match(V: &I, P: m_c_And(L: m_SExt(Op: m_Value(V&: A)), R: m_Value(V&: B))) && |
2743 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2744 | return SelectInst::Create(C: A, S1: B, S2: Constant::getNullValue(Ty)); |
2745 | |
2746 | // Similarly, a 'not' of the bool translates to a swap of the select arms: |
2747 | // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B |
2748 | if (match(V: &I, P: m_c_And(L: m_Not(V: m_SExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
2749 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2750 | return SelectInst::Create(C: A, S1: Constant::getNullValue(Ty), S2: B); |
2751 | |
2752 | // and(zext(A), B) -> A ? (B & 1) : 0 |
2753 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
2754 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2755 | return SelectInst::Create(C: A, S1: Builder.CreateAnd(LHS: B, RHS: ConstantInt::get(Ty, V: 1)), |
2756 | S2: Constant::getNullValue(Ty)); |
2757 | |
2758 | // (-1 + A) & B --> A ? 0 : B where A is 0/1. |
2759 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_Add(L: m_ZExtOrSelf(Op: m_Value(V&: A)), R: m_AllOnes())), |
2760 | R: m_Value(V&: B)))) { |
2761 | if (A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
2762 | return SelectInst::Create(C: A, S1: Constant::getNullValue(Ty), S2: B); |
2763 | if (computeKnownBits(V: A, /* Depth */ 0, CxtI: &I).countMaxActiveBits() <= 1) { |
2764 | return SelectInst::Create( |
2765 | C: Builder.CreateICmpEQ(LHS: A, RHS: Constant::getNullValue(Ty: A->getType())), S1: B, |
2766 | S2: Constant::getNullValue(Ty)); |
2767 | } |
2768 | } |
2769 | |
2770 | // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext |
2771 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_SExtOrSelf( |
2772 | Op: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: C)))), |
2773 | R: m_Value(V&: Y))) && |
2774 | *C == X->getType()->getScalarSizeInBits() - 1) { |
2775 | Value *IsNeg = Builder.CreateIsNeg(Arg: X, Name: "isneg" ); |
2776 | return SelectInst::Create(C: IsNeg, S1: Y, S2: ConstantInt::getNullValue(Ty)); |
2777 | } |
2778 | // If there's a 'not' of the shifted value, swap the select operands: |
2779 | // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y -- with optional sext |
2780 | if (match(V: &I, P: m_c_And(L: m_OneUse(SubPattern: m_SExtOrSelf( |
2781 | Op: m_Not(V: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: C))))), |
2782 | R: m_Value(V&: Y))) && |
2783 | *C == X->getType()->getScalarSizeInBits() - 1) { |
2784 | Value *IsNeg = Builder.CreateIsNeg(Arg: X, Name: "isneg" ); |
2785 | return SelectInst::Create(C: IsNeg, S1: ConstantInt::getNullValue(Ty), S2: Y); |
2786 | } |
2787 | |
2788 | // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions |
2789 | if (sinkNotIntoOtherHandOfLogicalOp(I)) |
2790 | return &I; |
2791 | |
2792 | // An and recurrence w/loop invariant step is equivelent to (and start, step) |
2793 | PHINode *PN = nullptr; |
2794 | Value *Start = nullptr, *Step = nullptr; |
2795 | if (matchSimpleRecurrence(I: &I, P&: PN, Start, Step) && DT.dominates(Def: Step, User: PN)) |
2796 | return replaceInstUsesWith(I, V: Builder.CreateAnd(LHS: Start, RHS: Step)); |
2797 | |
2798 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
2799 | return R; |
2800 | |
2801 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
2802 | return Canonicalized; |
2803 | |
2804 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
2805 | return Folded; |
2806 | |
2807 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
2808 | return Res; |
2809 | |
2810 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
2811 | return Res; |
2812 | |
2813 | if (Value *V = |
2814 | simplifyAndOrWithOpReplaced(V: Op0, Op: Op1, RepOp: Constant::getAllOnesValue(Ty), |
2815 | /*SimplifyOnly*/ false, IC&: *this)) |
2816 | return BinaryOperator::CreateAnd(V1: V, V2: Op1); |
2817 | if (Value *V = |
2818 | simplifyAndOrWithOpReplaced(V: Op1, Op: Op0, RepOp: Constant::getAllOnesValue(Ty), |
2819 | /*SimplifyOnly*/ false, IC&: *this)) |
2820 | return BinaryOperator::CreateAnd(V1: Op0, V2: V); |
2821 | |
2822 | return nullptr; |
2823 | } |
2824 | |
2825 | Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I, |
2826 | bool MatchBSwaps, |
2827 | bool MatchBitReversals) { |
2828 | SmallVector<Instruction *, 4> Insts; |
2829 | if (!recognizeBSwapOrBitReverseIdiom(I: &I, MatchBSwaps, MatchBitReversals, |
2830 | InsertedInsts&: Insts)) |
2831 | return nullptr; |
2832 | Instruction *LastInst = Insts.pop_back_val(); |
2833 | LastInst->removeFromParent(); |
2834 | |
2835 | for (auto *Inst : Insts) |
2836 | Worklist.push(I: Inst); |
2837 | return LastInst; |
2838 | } |
2839 | |
2840 | std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>> |
2841 | InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) { |
2842 | // TODO: Can we reduce the code duplication between this and the related |
2843 | // rotate matching code under visitSelect and visitTrunc? |
2844 | assert(Or.getOpcode() == BinaryOperator::Or && "Expecting or instruction" ); |
2845 | |
2846 | unsigned Width = Or.getType()->getScalarSizeInBits(); |
2847 | |
2848 | Instruction *Or0, *Or1; |
2849 | if (!match(V: Or.getOperand(i: 0), P: m_Instruction(I&: Or0)) || |
2850 | !match(V: Or.getOperand(i: 1), P: m_Instruction(I&: Or1))) |
2851 | return std::nullopt; |
2852 | |
2853 | bool IsFshl = true; // Sub on LSHR. |
2854 | SmallVector<Value *, 3> FShiftArgs; |
2855 | |
2856 | // First, find an or'd pair of opposite shifts: |
2857 | // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) |
2858 | if (isa<BinaryOperator>(Val: Or0) && isa<BinaryOperator>(Val: Or1)) { |
2859 | Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; |
2860 | if (!match(V: Or0, |
2861 | P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal0), R: m_Value(V&: ShAmt0)))) || |
2862 | !match(V: Or1, |
2863 | P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: ShVal1), R: m_Value(V&: ShAmt1)))) || |
2864 | Or0->getOpcode() == Or1->getOpcode()) |
2865 | return std::nullopt; |
2866 | |
2867 | // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). |
2868 | if (Or0->getOpcode() == BinaryOperator::LShr) { |
2869 | std::swap(a&: Or0, b&: Or1); |
2870 | std::swap(a&: ShVal0, b&: ShVal1); |
2871 | std::swap(a&: ShAmt0, b&: ShAmt1); |
2872 | } |
2873 | assert(Or0->getOpcode() == BinaryOperator::Shl && |
2874 | Or1->getOpcode() == BinaryOperator::LShr && |
2875 | "Illegal or(shift,shift) pair" ); |
2876 | |
2877 | // Match the shift amount operands for a funnel shift pattern. This always |
2878 | // matches a subtraction on the R operand. |
2879 | auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { |
2880 | // Check for constant shift amounts that sum to the bitwidth. |
2881 | const APInt *LI, *RI; |
2882 | if (match(V: L, P: m_APIntAllowPoison(Res&: LI)) && match(V: R, P: m_APIntAllowPoison(Res&: RI))) |
2883 | if (LI->ult(RHS: Width) && RI->ult(RHS: Width) && (*LI + *RI) == Width) |
2884 | return ConstantInt::get(Ty: L->getType(), V: *LI); |
2885 | |
2886 | Constant *LC, *RC; |
2887 | if (match(V: L, P: m_Constant(C&: LC)) && match(V: R, P: m_Constant(C&: RC)) && |
2888 | match(V: L, |
2889 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(Width, Width))) && |
2890 | match(V: R, |
2891 | P: m_SpecificInt_ICMP(Predicate: ICmpInst::ICMP_ULT, Threshold: APInt(Width, Width))) && |
2892 | match(V: ConstantExpr::getAdd(C1: LC, C2: RC), P: m_SpecificIntAllowPoison(V: Width))) |
2893 | return ConstantExpr::mergeUndefsWith(C: LC, Other: RC); |
2894 | |
2895 | // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. |
2896 | // We limit this to X < Width in case the backend re-expands the |
2897 | // intrinsic, and has to reintroduce a shift modulo operation (InstCombine |
2898 | // might remove it after this fold). This still doesn't guarantee that the |
2899 | // final codegen will match this original pattern. |
2900 | if (match(V: R, P: m_OneUse(SubPattern: m_Sub(L: m_SpecificInt(V: Width), R: m_Specific(V: L))))) { |
2901 | KnownBits KnownL = computeKnownBits(V: L, /*Depth*/ 0, CxtI: &Or); |
2902 | return KnownL.getMaxValue().ult(RHS: Width) ? L : nullptr; |
2903 | } |
2904 | |
2905 | // For non-constant cases, the following patterns currently only work for |
2906 | // rotation patterns. |
2907 | // TODO: Add general funnel-shift compatible patterns. |
2908 | if (ShVal0 != ShVal1) |
2909 | return nullptr; |
2910 | |
2911 | // For non-constant cases we don't support non-pow2 shift masks. |
2912 | // TODO: Is it worth matching urem as well? |
2913 | if (!isPowerOf2_32(Value: Width)) |
2914 | return nullptr; |
2915 | |
2916 | // The shift amount may be masked with negation: |
2917 | // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1))) |
2918 | Value *X; |
2919 | unsigned Mask = Width - 1; |
2920 | if (match(V: L, P: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask))) && |
2921 | match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask)))) |
2922 | return X; |
2923 | |
2924 | // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1))) |
2925 | if (match(V: R, P: m_And(L: m_Neg(V: m_Specific(V: L)), R: m_SpecificInt(V: Mask)))) |
2926 | return L; |
2927 | |
2928 | // Similar to above, but the shift amount may be extended after masking, |
2929 | // so return the extended value as the parameter for the intrinsic. |
2930 | if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) && |
2931 | match(V: R, |
2932 | P: m_And(L: m_Neg(V: m_ZExt(Op: m_And(L: m_Specific(V: X), R: m_SpecificInt(V: Mask)))), |
2933 | R: m_SpecificInt(V: Mask)))) |
2934 | return L; |
2935 | |
2936 | if (match(V: L, P: m_ZExt(Op: m_And(L: m_Value(V&: X), R: m_SpecificInt(V: Mask)))) && |
2937 | match(V: R, P: m_ZExt(Op: m_And(L: m_Neg(V: m_Specific(V: X)), R: m_SpecificInt(V: Mask))))) |
2938 | return L; |
2939 | |
2940 | return nullptr; |
2941 | }; |
2942 | |
2943 | Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); |
2944 | if (!ShAmt) { |
2945 | ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); |
2946 | IsFshl = false; // Sub on SHL. |
2947 | } |
2948 | if (!ShAmt) |
2949 | return std::nullopt; |
2950 | |
2951 | FShiftArgs = {ShVal0, ShVal1, ShAmt}; |
2952 | } else if (isa<ZExtInst>(Val: Or0) || isa<ZExtInst>(Val: Or1)) { |
2953 | // If there are two 'or' instructions concat variables in opposite order: |
2954 | // |
2955 | // Slot1 and Slot2 are all zero bits. |
2956 | // | Slot1 | Low | Slot2 | High | |
2957 | // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High) |
2958 | // | Slot2 | High | Slot1 | Low | |
2959 | // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low) |
2960 | // |
2961 | // the latter 'or' can be safely convert to |
2962 | // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt |
2963 | // if ZextLowShlAmt + ZextHighShlAmt == Width. |
2964 | if (!isa<ZExtInst>(Val: Or1)) |
2965 | std::swap(a&: Or0, b&: Or1); |
2966 | |
2967 | Value *High, *ZextHigh, *Low; |
2968 | const APInt *ZextHighShlAmt; |
2969 | if (!match(V: Or0, |
2970 | P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: ZextHigh), R: m_APInt(Res&: ZextHighShlAmt))))) |
2971 | return std::nullopt; |
2972 | |
2973 | if (!match(V: Or1, P: m_ZExt(Op: m_Value(V&: Low))) || |
2974 | !match(V: ZextHigh, P: m_ZExt(Op: m_Value(V&: High)))) |
2975 | return std::nullopt; |
2976 | |
2977 | unsigned HighSize = High->getType()->getScalarSizeInBits(); |
2978 | unsigned LowSize = Low->getType()->getScalarSizeInBits(); |
2979 | // Make sure High does not overlap with Low and most significant bits of |
2980 | // High aren't shifted out. |
2981 | if (ZextHighShlAmt->ult(RHS: LowSize) || ZextHighShlAmt->ugt(RHS: Width - HighSize)) |
2982 | return std::nullopt; |
2983 | |
2984 | for (User *U : ZextHigh->users()) { |
2985 | Value *X, *Y; |
2986 | if (!match(V: U, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
2987 | continue; |
2988 | |
2989 | if (!isa<ZExtInst>(Val: Y)) |
2990 | std::swap(a&: X, b&: Y); |
2991 | |
2992 | const APInt *ZextLowShlAmt; |
2993 | if (!match(V: X, P: m_Shl(L: m_Specific(V: Or1), R: m_APInt(Res&: ZextLowShlAmt))) || |
2994 | !match(V: Y, P: m_Specific(V: ZextHigh)) || !DT.dominates(Def: U, User: &Or)) |
2995 | continue; |
2996 | |
2997 | // HighLow is good concat. If sum of two shifts amount equals to Width, |
2998 | // LowHigh must also be a good concat. |
2999 | if (*ZextLowShlAmt + *ZextHighShlAmt != Width) |
3000 | continue; |
3001 | |
3002 | // Low must not overlap with High and most significant bits of Low must |
3003 | // not be shifted out. |
3004 | assert(ZextLowShlAmt->uge(HighSize) && |
3005 | ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat" ); |
3006 | |
3007 | FShiftArgs = {U, U, ConstantInt::get(Ty: Or0->getType(), V: *ZextHighShlAmt)}; |
3008 | break; |
3009 | } |
3010 | } |
3011 | |
3012 | if (FShiftArgs.empty()) |
3013 | return std::nullopt; |
3014 | |
3015 | Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; |
3016 | return std::make_pair(x&: IID, y&: FShiftArgs); |
3017 | } |
3018 | |
3019 | /// Match UB-safe variants of the funnel shift intrinsic. |
3020 | static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { |
3021 | if (auto Opt = IC.convertOrOfShiftsToFunnelShift(Or)) { |
3022 | auto [IID, FShiftArgs] = *Opt; |
3023 | Function *F = Intrinsic::getDeclaration(M: Or.getModule(), id: IID, Tys: Or.getType()); |
3024 | return CallInst::Create(Func: F, Args: FShiftArgs); |
3025 | } |
3026 | |
3027 | return nullptr; |
3028 | } |
3029 | |
3030 | /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. |
3031 | static Instruction *matchOrConcat(Instruction &Or, |
3032 | InstCombiner::BuilderTy &Builder) { |
3033 | assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'" ); |
3034 | Value *Op0 = Or.getOperand(i: 0), *Op1 = Or.getOperand(i: 1); |
3035 | Type *Ty = Or.getType(); |
3036 | |
3037 | unsigned Width = Ty->getScalarSizeInBits(); |
3038 | if ((Width & 1) != 0) |
3039 | return nullptr; |
3040 | unsigned HalfWidth = Width / 2; |
3041 | |
3042 | // Canonicalize zext (lower half) to LHS. |
3043 | if (!isa<ZExtInst>(Val: Op0)) |
3044 | std::swap(a&: Op0, b&: Op1); |
3045 | |
3046 | // Find lower/upper half. |
3047 | Value *LowerSrc, *ShlVal, *UpperSrc; |
3048 | const APInt *C; |
3049 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: LowerSrc)))) || |
3050 | !match(V: Op1, P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: ShlVal), R: m_APInt(Res&: C)))) || |
3051 | !match(V: ShlVal, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: UpperSrc))))) |
3052 | return nullptr; |
3053 | if (*C != HalfWidth || LowerSrc->getType() != UpperSrc->getType() || |
3054 | LowerSrc->getType()->getScalarSizeInBits() != HalfWidth) |
3055 | return nullptr; |
3056 | |
3057 | auto ConcatIntrinsicCalls = [&](Intrinsic::ID id, Value *Lo, Value *Hi) { |
3058 | Value *NewLower = Builder.CreateZExt(V: Lo, DestTy: Ty); |
3059 | Value *NewUpper = Builder.CreateZExt(V: Hi, DestTy: Ty); |
3060 | NewUpper = Builder.CreateShl(LHS: NewUpper, RHS: HalfWidth); |
3061 | Value *BinOp = Builder.CreateOr(LHS: NewLower, RHS: NewUpper); |
3062 | Function *F = Intrinsic::getDeclaration(M: Or.getModule(), id, Tys: Ty); |
3063 | return Builder.CreateCall(Callee: F, Args: BinOp); |
3064 | }; |
3065 | |
3066 | // BSWAP: Push the concat down, swapping the lower/upper sources. |
3067 | // concat(bswap(x),bswap(y)) -> bswap(concat(x,y)) |
3068 | Value *LowerBSwap, *UpperBSwap; |
3069 | if (match(V: LowerSrc, P: m_BSwap(Op0: m_Value(V&: LowerBSwap))) && |
3070 | match(V: UpperSrc, P: m_BSwap(Op0: m_Value(V&: UpperBSwap)))) |
3071 | return ConcatIntrinsicCalls(Intrinsic::bswap, UpperBSwap, LowerBSwap); |
3072 | |
3073 | // BITREVERSE: Push the concat down, swapping the lower/upper sources. |
3074 | // concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y)) |
3075 | Value *LowerBRev, *UpperBRev; |
3076 | if (match(V: LowerSrc, P: m_BitReverse(Op0: m_Value(V&: LowerBRev))) && |
3077 | match(V: UpperSrc, P: m_BitReverse(Op0: m_Value(V&: UpperBRev)))) |
3078 | return ConcatIntrinsicCalls(Intrinsic::bitreverse, UpperBRev, LowerBRev); |
3079 | |
3080 | return nullptr; |
3081 | } |
3082 | |
3083 | /// If all elements of two constant vectors are 0/-1 and inverses, return true. |
3084 | static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { |
3085 | unsigned NumElts = cast<FixedVectorType>(Val: C1->getType())->getNumElements(); |
3086 | for (unsigned i = 0; i != NumElts; ++i) { |
3087 | Constant *EltC1 = C1->getAggregateElement(Elt: i); |
3088 | Constant *EltC2 = C2->getAggregateElement(Elt: i); |
3089 | if (!EltC1 || !EltC2) |
3090 | return false; |
3091 | |
3092 | // One element must be all ones, and the other must be all zeros. |
3093 | if (!((match(V: EltC1, P: m_Zero()) && match(V: EltC2, P: m_AllOnes())) || |
3094 | (match(V: EltC2, P: m_Zero()) && match(V: EltC1, P: m_AllOnes())))) |
3095 | return false; |
3096 | } |
3097 | return true; |
3098 | } |
3099 | |
3100 | /// We have an expression of the form (A & C) | (B & D). If A is a scalar or |
3101 | /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of |
3102 | /// B, it can be used as the condition operand of a select instruction. |
3103 | /// We will detect (A & C) | ~(B | D) when the flag ABIsTheSame enabled. |
3104 | Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B, |
3105 | bool ABIsTheSame) { |
3106 | // We may have peeked through bitcasts in the caller. |
3107 | // Exit immediately if we don't have (vector) integer types. |
3108 | Type *Ty = A->getType(); |
3109 | if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy()) |
3110 | return nullptr; |
3111 | |
3112 | // If A is the 'not' operand of B and has enough signbits, we have our answer. |
3113 | if (ABIsTheSame ? (A == B) : match(V: B, P: m_Not(V: m_Specific(V: A)))) { |
3114 | // If these are scalars or vectors of i1, A can be used directly. |
3115 | if (Ty->isIntOrIntVectorTy(BitWidth: 1)) |
3116 | return A; |
3117 | |
3118 | // If we look through a vector bitcast, the caller will bitcast the operands |
3119 | // to match the condition's number of bits (N x i1). |
3120 | // To make this poison-safe, disallow bitcast from wide element to narrow |
3121 | // element. That could allow poison in lanes where it was not present in the |
3122 | // original code. |
3123 | A = peekThroughBitcast(V: A); |
3124 | if (A->getType()->isIntOrIntVectorTy()) { |
3125 | unsigned NumSignBits = ComputeNumSignBits(Op: A); |
3126 | if (NumSignBits == A->getType()->getScalarSizeInBits() && |
3127 | NumSignBits <= Ty->getScalarSizeInBits()) |
3128 | return Builder.CreateTrunc(V: A, DestTy: CmpInst::makeCmpResultType(opnd_type: A->getType())); |
3129 | } |
3130 | return nullptr; |
3131 | } |
3132 | |
3133 | // TODO: add support for sext and constant case |
3134 | if (ABIsTheSame) |
3135 | return nullptr; |
3136 | |
3137 | // If both operands are constants, see if the constants are inverse bitmasks. |
3138 | Constant *AConst, *BConst; |
3139 | if (match(V: A, P: m_Constant(C&: AConst)) && match(V: B, P: m_Constant(C&: BConst))) |
3140 | if (AConst == ConstantExpr::getNot(C: BConst) && |
3141 | ComputeNumSignBits(Op: A) == Ty->getScalarSizeInBits()) |
3142 | return Builder.CreateZExtOrTrunc(V: A, DestTy: CmpInst::makeCmpResultType(opnd_type: Ty)); |
3143 | |
3144 | // Look for more complex patterns. The 'not' op may be hidden behind various |
3145 | // casts. Look through sexts and bitcasts to find the booleans. |
3146 | Value *Cond; |
3147 | Value *NotB; |
3148 | if (match(V: A, P: m_SExt(Op: m_Value(V&: Cond))) && |
3149 | Cond->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
3150 | // A = sext i1 Cond; B = sext (not (i1 Cond)) |
3151 | if (match(V: B, P: m_SExt(Op: m_Not(V: m_Specific(V: Cond))))) |
3152 | return Cond; |
3153 | |
3154 | // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond))) |
3155 | // TODO: The one-use checks are unnecessary or misplaced. If the caller |
3156 | // checked for uses on logic ops/casts, that should be enough to |
3157 | // make this transform worthwhile. |
3158 | if (match(V: B, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotB))))) { |
3159 | NotB = peekThroughBitcast(V: NotB, OneUseOnly: true); |
3160 | if (match(V: NotB, P: m_SExt(Op: m_Specific(V: Cond)))) |
3161 | return Cond; |
3162 | } |
3163 | } |
3164 | |
3165 | // All scalar (and most vector) possibilities should be handled now. |
3166 | // Try more matches that only apply to non-splat constant vectors. |
3167 | if (!Ty->isVectorTy()) |
3168 | return nullptr; |
3169 | |
3170 | // If both operands are xor'd with constants using the same sexted boolean |
3171 | // operand, see if the constants are inverse bitmasks. |
3172 | // TODO: Use ConstantExpr::getNot()? |
3173 | if (match(V: A, P: (m_Xor(L: m_SExt(Op: m_Value(V&: Cond)), R: m_Constant(C&: AConst)))) && |
3174 | match(V: B, P: (m_Xor(L: m_SExt(Op: m_Specific(V: Cond)), R: m_Constant(C&: BConst)))) && |
3175 | Cond->getType()->isIntOrIntVectorTy(BitWidth: 1) && |
3176 | areInverseVectorBitmasks(C1: AConst, C2: BConst)) { |
3177 | AConst = ConstantExpr::getTrunc(C: AConst, Ty: CmpInst::makeCmpResultType(opnd_type: Ty)); |
3178 | return Builder.CreateXor(LHS: Cond, RHS: AConst); |
3179 | } |
3180 | return nullptr; |
3181 | } |
3182 | |
3183 | /// We have an expression of the form (A & B) | (C & D). Try to simplify this |
3184 | /// to "A' ? B : D", where A' is a boolean or vector of booleans. |
3185 | /// When InvertFalseVal is set to true, we try to match the pattern |
3186 | /// where we have peeked through a 'not' op and A and C are the same: |
3187 | /// (A & B) | ~(A | D) --> (A & B) | (~A & ~D) --> A' ? B : ~D |
3188 | Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *B, Value *C, |
3189 | Value *D, bool InvertFalseVal) { |
3190 | // The potential condition of the select may be bitcasted. In that case, look |
3191 | // through its bitcast and the corresponding bitcast of the 'not' condition. |
3192 | Type *OrigType = A->getType(); |
3193 | A = peekThroughBitcast(V: A, OneUseOnly: true); |
3194 | C = peekThroughBitcast(V: C, OneUseOnly: true); |
3195 | if (Value *Cond = getSelectCondition(A, B: C, ABIsTheSame: InvertFalseVal)) { |
3196 | // ((bc Cond) & B) | ((bc ~Cond) & D) --> bc (select Cond, (bc B), (bc D)) |
3197 | // If this is a vector, we may need to cast to match the condition's length. |
3198 | // The bitcasts will either all exist or all not exist. The builder will |
3199 | // not create unnecessary casts if the types already match. |
3200 | Type *SelTy = A->getType(); |
3201 | if (auto *VecTy = dyn_cast<VectorType>(Val: Cond->getType())) { |
3202 | // For a fixed or scalable vector get N from <{vscale x} N x iM> |
3203 | unsigned Elts = VecTy->getElementCount().getKnownMinValue(); |
3204 | // For a fixed or scalable vector, get the size in bits of N x iM; for a |
3205 | // scalar this is just M. |
3206 | unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinValue(); |
3207 | Type *EltTy = Builder.getIntNTy(N: SelEltSize / Elts); |
3208 | SelTy = VectorType::get(ElementType: EltTy, EC: VecTy->getElementCount()); |
3209 | } |
3210 | Value *BitcastB = Builder.CreateBitCast(V: B, DestTy: SelTy); |
3211 | if (InvertFalseVal) |
3212 | D = Builder.CreateNot(V: D); |
3213 | Value *BitcastD = Builder.CreateBitCast(V: D, DestTy: SelTy); |
3214 | Value *Select = Builder.CreateSelect(C: Cond, True: BitcastB, False: BitcastD); |
3215 | return Builder.CreateBitCast(V: Select, DestTy: OrigType); |
3216 | } |
3217 | |
3218 | return nullptr; |
3219 | } |
3220 | |
3221 | // (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1))) |
3222 | // (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1))) |
3223 | static Value *foldAndOrOfICmpEqConstantAndICmp(ICmpInst *LHS, ICmpInst *RHS, |
3224 | bool IsAnd, bool IsLogical, |
3225 | IRBuilderBase &Builder) { |
3226 | Value *LHS0 = LHS->getOperand(i_nocapture: 0); |
3227 | Value *RHS0 = RHS->getOperand(i_nocapture: 0); |
3228 | Value *RHS1 = RHS->getOperand(i_nocapture: 1); |
3229 | |
3230 | ICmpInst::Predicate LPred = |
3231 | IsAnd ? LHS->getInversePredicate() : LHS->getPredicate(); |
3232 | ICmpInst::Predicate RPred = |
3233 | IsAnd ? RHS->getInversePredicate() : RHS->getPredicate(); |
3234 | |
3235 | const APInt *CInt; |
3236 | if (LPred != ICmpInst::ICMP_EQ || |
3237 | !match(V: LHS->getOperand(i_nocapture: 1), P: m_APIntAllowPoison(Res&: CInt)) || |
3238 | !LHS0->getType()->isIntOrIntVectorTy() || |
3239 | !(LHS->hasOneUse() || RHS->hasOneUse())) |
3240 | return nullptr; |
3241 | |
3242 | auto MatchRHSOp = [LHS0, CInt](const Value *RHSOp) { |
3243 | return match(V: RHSOp, |
3244 | P: m_Add(L: m_Specific(V: LHS0), R: m_SpecificIntAllowPoison(V: -*CInt))) || |
3245 | (CInt->isZero() && RHSOp == LHS0); |
3246 | }; |
3247 | |
3248 | Value *Other; |
3249 | if (RPred == ICmpInst::ICMP_ULT && MatchRHSOp(RHS1)) |
3250 | Other = RHS0; |
3251 | else if (RPred == ICmpInst::ICMP_UGT && MatchRHSOp(RHS0)) |
3252 | Other = RHS1; |
3253 | else |
3254 | return nullptr; |
3255 | |
3256 | if (IsLogical) |
3257 | Other = Builder.CreateFreeze(V: Other); |
3258 | |
3259 | return Builder.CreateICmp( |
3260 | P: IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE, |
3261 | LHS: Builder.CreateSub(LHS: LHS0, RHS: ConstantInt::get(Ty: LHS0->getType(), V: *CInt + 1)), |
3262 | RHS: Other); |
3263 | } |
3264 | |
3265 | /// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible. |
3266 | /// If IsLogical is true, then the and/or is in select form and the transform |
3267 | /// must be poison-safe. |
3268 | Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, |
3269 | Instruction &I, bool IsAnd, |
3270 | bool IsLogical) { |
3271 | const SimplifyQuery Q = SQ.getWithInstruction(I: &I); |
3272 | |
3273 | // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) |
3274 | // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) |
3275 | // if K1 and K2 are a one-bit mask. |
3276 | if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, CxtI: &I, IsAnd, IsLogical)) |
3277 | return V; |
3278 | |
3279 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
3280 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *RHS0 = RHS->getOperand(i_nocapture: 0); |
3281 | Value *LHS1 = LHS->getOperand(i_nocapture: 1), *RHS1 = RHS->getOperand(i_nocapture: 1); |
3282 | |
3283 | const APInt *LHSC = nullptr, *RHSC = nullptr; |
3284 | match(V: LHS1, P: m_APInt(Res&: LHSC)); |
3285 | match(V: RHS1, P: m_APInt(Res&: RHSC)); |
3286 | |
3287 | // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) |
3288 | // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) |
3289 | if (predicatesFoldable(P1: PredL, P2: PredR)) { |
3290 | if (LHS0 == RHS1 && LHS1 == RHS0) { |
3291 | PredL = ICmpInst::getSwappedPredicate(pred: PredL); |
3292 | std::swap(a&: LHS0, b&: LHS1); |
3293 | } |
3294 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
3295 | unsigned Code = IsAnd ? getICmpCode(Pred: PredL) & getICmpCode(Pred: PredR) |
3296 | : getICmpCode(Pred: PredL) | getICmpCode(Pred: PredR); |
3297 | bool IsSigned = LHS->isSigned() || RHS->isSigned(); |
3298 | return getNewICmpValue(Code, Sign: IsSigned, LHS: LHS0, RHS: LHS1, Builder); |
3299 | } |
3300 | } |
3301 | |
3302 | // handle (roughly): |
3303 | // (icmp ne (A & B), C) | (icmp ne (A & D), E) |
3304 | // (icmp eq (A & B), C) & (icmp eq (A & D), E) |
3305 | if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder)) |
3306 | return V; |
3307 | |
3308 | if (Value *V = |
3309 | foldAndOrOfICmpEqConstantAndICmp(LHS, RHS, IsAnd, IsLogical, Builder)) |
3310 | return V; |
3311 | // We can treat logical like bitwise here, because both operands are used on |
3312 | // the LHS, and as such poison from both will propagate. |
3313 | if (Value *V = foldAndOrOfICmpEqConstantAndICmp(LHS: RHS, RHS: LHS, IsAnd, |
3314 | /*IsLogical*/ false, Builder)) |
3315 | return V; |
3316 | |
3317 | if (Value *V = |
3318 | foldAndOrOfICmpsWithConstEq(Cmp0: LHS, Cmp1: RHS, IsAnd, IsLogical, Builder, Q)) |
3319 | return V; |
3320 | // We can convert this case to bitwise and, because both operands are used |
3321 | // on the LHS, and as such poison from both will propagate. |
3322 | if (Value *V = foldAndOrOfICmpsWithConstEq(Cmp0: RHS, Cmp1: LHS, IsAnd, |
3323 | /*IsLogical*/ false, Builder, Q)) |
3324 | return V; |
3325 | |
3326 | if (Value *V = foldIsPowerOf2OrZero(Cmp0: LHS, Cmp1: RHS, IsAnd, Builder)) |
3327 | return V; |
3328 | if (Value *V = foldIsPowerOf2OrZero(Cmp0: RHS, Cmp1: LHS, IsAnd, Builder)) |
3329 | return V; |
3330 | |
3331 | // TODO: One of these directions is fine with logical and/or, the other could |
3332 | // be supported by inserting freeze. |
3333 | if (!IsLogical) { |
3334 | // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n |
3335 | // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n |
3336 | if (Value *V = simplifyRangeCheck(Cmp0: LHS, Cmp1: RHS, /*Inverted=*/!IsAnd)) |
3337 | return V; |
3338 | |
3339 | // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n |
3340 | // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n |
3341 | if (Value *V = simplifyRangeCheck(Cmp0: RHS, Cmp1: LHS, /*Inverted=*/!IsAnd)) |
3342 | return V; |
3343 | } |
3344 | |
3345 | // TODO: Add conjugated or fold, check whether it is safe for logical and/or. |
3346 | if (IsAnd && !IsLogical) |
3347 | if (Value *V = foldSignedTruncationCheck(ICmp0: LHS, ICmp1: RHS, CxtI&: I, Builder)) |
3348 | return V; |
3349 | |
3350 | if (Value *V = foldIsPowerOf2(Cmp0: LHS, Cmp1: RHS, JoinedByAnd: IsAnd, Builder)) |
3351 | return V; |
3352 | |
3353 | if (Value *V = foldPowerOf2AndShiftedMask(Cmp0: LHS, Cmp1: RHS, JoinedByAnd: IsAnd, Builder)) |
3354 | return V; |
3355 | |
3356 | // TODO: Verify whether this is safe for logical and/or. |
3357 | if (!IsLogical) { |
3358 | if (Value *X = foldUnsignedUnderflowCheck(ZeroICmp: LHS, UnsignedICmp: RHS, IsAnd, Q, Builder)) |
3359 | return X; |
3360 | if (Value *X = foldUnsignedUnderflowCheck(ZeroICmp: RHS, UnsignedICmp: LHS, IsAnd, Q, Builder)) |
3361 | return X; |
3362 | } |
3363 | |
3364 | if (Value *X = foldEqOfParts(Cmp0: LHS, Cmp1: RHS, IsAnd)) |
3365 | return X; |
3366 | |
3367 | // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) |
3368 | // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) |
3369 | // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs. |
3370 | if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3371 | PredL == PredR && match(V: LHS1, P: m_ZeroInt()) && match(V: RHS1, P: m_ZeroInt()) && |
3372 | LHS0->getType() == RHS0->getType()) { |
3373 | Value *NewOr = Builder.CreateOr(LHS: LHS0, RHS: RHS0); |
3374 | return Builder.CreateICmp(P: PredL, LHS: NewOr, |
3375 | RHS: Constant::getNullValue(Ty: NewOr->getType())); |
3376 | } |
3377 | |
3378 | // (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1) |
3379 | // (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1) |
3380 | if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3381 | PredL == PredR && match(V: LHS1, P: m_AllOnes()) && match(V: RHS1, P: m_AllOnes()) && |
3382 | LHS0->getType() == RHS0->getType()) { |
3383 | Value *NewAnd = Builder.CreateAnd(LHS: LHS0, RHS: RHS0); |
3384 | return Builder.CreateICmp(P: PredL, LHS: NewAnd, |
3385 | RHS: Constant::getAllOnesValue(Ty: LHS0->getType())); |
3386 | } |
3387 | |
3388 | if (!IsLogical) |
3389 | if (Value *V = |
3390 | foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q)) |
3391 | return V; |
3392 | |
3393 | // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). |
3394 | if (!LHSC || !RHSC) |
3395 | return nullptr; |
3396 | |
3397 | // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 |
3398 | // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2 |
3399 | // where CMAX is the all ones value for the truncated type, |
3400 | // iff the lower bits of C2 and CA are zero. |
3401 | if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
3402 | PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) { |
3403 | Value *V; |
3404 | const APInt *AndC, *SmallC = nullptr, *BigC = nullptr; |
3405 | |
3406 | // (trunc x) == C1 & (and x, CA) == C2 |
3407 | // (and x, CA) == C2 & (trunc x) == C1 |
3408 | if (match(V: RHS0, P: m_Trunc(Op: m_Value(V))) && |
3409 | match(V: LHS0, P: m_And(L: m_Specific(V), R: m_APInt(Res&: AndC)))) { |
3410 | SmallC = RHSC; |
3411 | BigC = LHSC; |
3412 | } else if (match(V: LHS0, P: m_Trunc(Op: m_Value(V))) && |
3413 | match(V: RHS0, P: m_And(L: m_Specific(V), R: m_APInt(Res&: AndC)))) { |
3414 | SmallC = LHSC; |
3415 | BigC = RHSC; |
3416 | } |
3417 | |
3418 | if (SmallC && BigC) { |
3419 | unsigned BigBitSize = BigC->getBitWidth(); |
3420 | unsigned SmallBitSize = SmallC->getBitWidth(); |
3421 | |
3422 | // Check that the low bits are zero. |
3423 | APInt Low = APInt::getLowBitsSet(numBits: BigBitSize, loBitsSet: SmallBitSize); |
3424 | if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) { |
3425 | Value *NewAnd = Builder.CreateAnd(LHS: V, RHS: Low | *AndC); |
3426 | APInt N = SmallC->zext(width: BigBitSize) | *BigC; |
3427 | Value *NewVal = ConstantInt::get(Ty: NewAnd->getType(), V: N); |
3428 | return Builder.CreateICmp(P: PredL, LHS: NewAnd, RHS: NewVal); |
3429 | } |
3430 | } |
3431 | } |
3432 | |
3433 | // Match naive pattern (and its inverted form) for checking if two values |
3434 | // share same sign. An example of the pattern: |
3435 | // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1) |
3436 | // Inverted form (example): |
3437 | // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) |
3438 | bool TrueIfSignedL, TrueIfSignedR; |
3439 | if (isSignBitCheck(Pred: PredL, RHS: *LHSC, TrueIfSigned&: TrueIfSignedL) && |
3440 | isSignBitCheck(Pred: PredR, RHS: *RHSC, TrueIfSigned&: TrueIfSignedR) && |
3441 | (RHS->hasOneUse() || LHS->hasOneUse())) { |
3442 | Value *X, *Y; |
3443 | if (IsAnd) { |
3444 | if ((TrueIfSignedL && !TrueIfSignedR && |
3445 | match(V: LHS0, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3446 | match(V: RHS0, P: m_c_And(L: m_Specific(V: X), R: m_Specific(V: Y)))) || |
3447 | (!TrueIfSignedL && TrueIfSignedR && |
3448 | match(V: LHS0, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3449 | match(V: RHS0, P: m_c_Or(L: m_Specific(V: X), R: m_Specific(V: Y))))) { |
3450 | Value *NewXor = Builder.CreateXor(LHS: X, RHS: Y); |
3451 | return Builder.CreateIsNeg(Arg: NewXor); |
3452 | } |
3453 | } else { |
3454 | if ((TrueIfSignedL && !TrueIfSignedR && |
3455 | match(V: LHS0, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3456 | match(V: RHS0, P: m_c_Or(L: m_Specific(V: X), R: m_Specific(V: Y)))) || |
3457 | (!TrueIfSignedL && TrueIfSignedR && |
3458 | match(V: LHS0, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))) && |
3459 | match(V: RHS0, P: m_c_And(L: m_Specific(V: X), R: m_Specific(V: Y))))) { |
3460 | Value *NewXor = Builder.CreateXor(LHS: X, RHS: Y); |
3461 | return Builder.CreateIsNotNeg(Arg: NewXor); |
3462 | } |
3463 | } |
3464 | } |
3465 | |
3466 | return foldAndOrOfICmpsUsingRanges(ICmp1: LHS, ICmp2: RHS, IsAnd); |
3467 | } |
3468 | |
3469 | static Value *foldOrOfInversions(BinaryOperator &I, |
3470 | InstCombiner::BuilderTy &Builder) { |
3471 | assert(I.getOpcode() == Instruction::Or && |
3472 | "Simplification only supports or at the moment." ); |
3473 | |
3474 | Value *Cmp1, *Cmp2, *Cmp3, *Cmp4; |
3475 | if (!match(V: I.getOperand(i_nocapture: 0), P: m_And(L: m_Value(V&: Cmp1), R: m_Value(V&: Cmp2))) || |
3476 | !match(V: I.getOperand(i_nocapture: 1), P: m_And(L: m_Value(V&: Cmp3), R: m_Value(V&: Cmp4)))) |
3477 | return nullptr; |
3478 | |
3479 | // Check if any two pairs of the and operations are inversions of each other. |
3480 | if (isKnownInversion(X: Cmp1, Y: Cmp3) && isKnownInversion(X: Cmp2, Y: Cmp4)) |
3481 | return Builder.CreateXor(LHS: Cmp1, RHS: Cmp4); |
3482 | if (isKnownInversion(X: Cmp1, Y: Cmp4) && isKnownInversion(X: Cmp2, Y: Cmp3)) |
3483 | return Builder.CreateXor(LHS: Cmp1, RHS: Cmp3); |
3484 | |
3485 | return nullptr; |
3486 | } |
3487 | |
3488 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
3489 | // here. We should standardize that construct where it is needed or choose some |
3490 | // other way to ensure that commutated variants of patterns are not missed. |
3491 | Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { |
3492 | if (Value *V = simplifyOrInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
3493 | Q: SQ.getWithInstruction(I: &I))) |
3494 | return replaceInstUsesWith(I, V); |
3495 | |
3496 | if (SimplifyAssociativeOrCommutative(I)) |
3497 | return &I; |
3498 | |
3499 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
3500 | return X; |
3501 | |
3502 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
3503 | return Phi; |
3504 | |
3505 | // See if we can simplify any instructions used by the instruction whose sole |
3506 | // purpose is to compute bits we don't care about. |
3507 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
3508 | return &I; |
3509 | |
3510 | // Do this before using distributive laws to catch simple and/or/not patterns. |
3511 | if (Instruction *Xor = foldOrToXor(I, Builder)) |
3512 | return Xor; |
3513 | |
3514 | if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) |
3515 | return X; |
3516 | |
3517 | // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D |
3518 | // (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C |
3519 | if (Value *V = foldOrOfInversions(I, Builder)) |
3520 | return replaceInstUsesWith(I, V); |
3521 | |
3522 | // (A&B)|(A&C) -> A&(B|C) etc |
3523 | if (Value *V = foldUsingDistributiveLaws(I)) |
3524 | return replaceInstUsesWith(I, V); |
3525 | |
3526 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
3527 | Type *Ty = I.getType(); |
3528 | if (Ty->isIntOrIntVectorTy(BitWidth: 1)) { |
3529 | if (auto *SI0 = dyn_cast<SelectInst>(Val: Op0)) { |
3530 | if (auto *R = |
3531 | foldAndOrOfSelectUsingImpliedCond(Op: Op1, SI&: *SI0, /* IsAnd */ false)) |
3532 | return R; |
3533 | } |
3534 | if (auto *SI1 = dyn_cast<SelectInst>(Val: Op1)) { |
3535 | if (auto *R = |
3536 | foldAndOrOfSelectUsingImpliedCond(Op: Op0, SI&: *SI1, /* IsAnd */ false)) |
3537 | return R; |
3538 | } |
3539 | } |
3540 | |
3541 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
3542 | return FoldedLogic; |
3543 | |
3544 | if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, |
3545 | /*MatchBitReversals*/ true)) |
3546 | return BitOp; |
3547 | |
3548 | if (Instruction *Funnel = matchFunnelShift(Or&: I, IC&: *this)) |
3549 | return Funnel; |
3550 | |
3551 | if (Instruction *Concat = matchOrConcat(Or&: I, Builder)) |
3552 | return replaceInstUsesWith(I, V: Concat); |
3553 | |
3554 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
3555 | return R; |
3556 | |
3557 | if (Instruction *R = tryFoldInstWithCtpopWithNot(I: &I)) |
3558 | return R; |
3559 | |
3560 | Value *X, *Y; |
3561 | const APInt *CV; |
3562 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_APInt(Res&: CV))), R: m_Value(V&: Y))) && |
3563 | !CV->isAllOnes() && MaskedValueIsZero(V: Y, Mask: *CV, Depth: 0, CxtI: &I)) { |
3564 | // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0 |
3565 | // The check for a 'not' op is for efficiency (if Y is known zero --> ~X). |
3566 | Value *Or = Builder.CreateOr(LHS: X, RHS: Y); |
3567 | return BinaryOperator::CreateXor(V1: Or, V2: ConstantInt::get(Ty, V: *CV)); |
3568 | } |
3569 | |
3570 | // If the operands have no common bits set: |
3571 | // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1) |
3572 | if (match(V: &I, P: m_c_DisjointOr(L: m_OneUse(SubPattern: m_Mul(L: m_Value(V&: X), R: m_Value(V&: Y))), |
3573 | R: m_Deferred(V: X)))) { |
3574 | Value *IncrementY = Builder.CreateAdd(LHS: Y, RHS: ConstantInt::get(Ty, V: 1)); |
3575 | return BinaryOperator::CreateMul(V1: X, V2: IncrementY); |
3576 | } |
3577 | |
3578 | // (A & C) | (B & D) |
3579 | Value *A, *B, *C, *D; |
3580 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: C))) && |
3581 | match(V: Op1, P: m_And(L: m_Value(V&: B), R: m_Value(V&: D)))) { |
3582 | |
3583 | // (A & C0) | (B & C1) |
3584 | const APInt *C0, *C1; |
3585 | if (match(V: C, P: m_APInt(Res&: C0)) && match(V: D, P: m_APInt(Res&: C1))) { |
3586 | Value *X; |
3587 | if (*C0 == ~*C1) { |
3588 | // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B |
3589 | if (match(V: A, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: B)))) |
3590 | return BinaryOperator::CreateOr(V1: Builder.CreateAnd(LHS: X, RHS: *C0), V2: B); |
3591 | // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A |
3592 | if (match(V: B, P: m_c_Or(L: m_Specific(V: A), R: m_Value(V&: X)))) |
3593 | return BinaryOperator::CreateOr(V1: Builder.CreateAnd(LHS: X, RHS: *C1), V2: A); |
3594 | |
3595 | // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B |
3596 | if (match(V: A, P: m_c_Xor(L: m_Value(V&: X), R: m_Specific(V: B)))) |
3597 | return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: *C0), V2: B); |
3598 | // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A |
3599 | if (match(V: B, P: m_c_Xor(L: m_Specific(V: A), R: m_Value(V&: X)))) |
3600 | return BinaryOperator::CreateXor(V1: Builder.CreateAnd(LHS: X, RHS: *C1), V2: A); |
3601 | } |
3602 | |
3603 | if ((*C0 & *C1).isZero()) { |
3604 | // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1) |
3605 | // iff (C0 & C1) == 0 and (X & ~C0) == 0 |
3606 | if (match(V: A, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: B))) && |
3607 | MaskedValueIsZero(V: X, Mask: ~*C0, Depth: 0, CxtI: &I)) { |
3608 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3609 | return BinaryOperator::CreateAnd(V1: A, V2: C01); |
3610 | } |
3611 | // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1) |
3612 | // iff (C0 & C1) == 0 and (X & ~C1) == 0 |
3613 | if (match(V: B, P: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: A))) && |
3614 | MaskedValueIsZero(V: X, Mask: ~*C1, Depth: 0, CxtI: &I)) { |
3615 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3616 | return BinaryOperator::CreateAnd(V1: B, V2: C01); |
3617 | } |
3618 | // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1) |
3619 | // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0. |
3620 | const APInt *C2, *C3; |
3621 | if (match(V: A, P: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: C2))) && |
3622 | match(V: B, P: m_Or(L: m_Specific(V: X), R: m_APInt(Res&: C3))) && |
3623 | (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) { |
3624 | Value *Or = Builder.CreateOr(LHS: X, RHS: *C2 | *C3, Name: "bitfield" ); |
3625 | Constant *C01 = ConstantInt::get(Ty, V: *C0 | *C1); |
3626 | return BinaryOperator::CreateAnd(V1: Or, V2: C01); |
3627 | } |
3628 | } |
3629 | } |
3630 | |
3631 | // Don't try to form a select if it's unlikely that we'll get rid of at |
3632 | // least one of the operands. A select is generally more expensive than the |
3633 | // 'or' that it is replacing. |
3634 | if (Op0->hasOneUse() || Op1->hasOneUse()) { |
3635 | // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. |
3636 | if (Value *V = matchSelectFromAndOr(A, B: C, C: B, D)) |
3637 | return replaceInstUsesWith(I, V); |
3638 | if (Value *V = matchSelectFromAndOr(A, B: C, C: D, D: B)) |
3639 | return replaceInstUsesWith(I, V); |
3640 | if (Value *V = matchSelectFromAndOr(A: C, B: A, C: B, D)) |
3641 | return replaceInstUsesWith(I, V); |
3642 | if (Value *V = matchSelectFromAndOr(A: C, B: A, C: D, D: B)) |
3643 | return replaceInstUsesWith(I, V); |
3644 | if (Value *V = matchSelectFromAndOr(A: B, B: D, C: A, D: C)) |
3645 | return replaceInstUsesWith(I, V); |
3646 | if (Value *V = matchSelectFromAndOr(A: B, B: D, C, D: A)) |
3647 | return replaceInstUsesWith(I, V); |
3648 | if (Value *V = matchSelectFromAndOr(A: D, B, C: A, D: C)) |
3649 | return replaceInstUsesWith(I, V); |
3650 | if (Value *V = matchSelectFromAndOr(A: D, B, C, D: A)) |
3651 | return replaceInstUsesWith(I, V); |
3652 | } |
3653 | } |
3654 | |
3655 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: C))) && |
3656 | match(V: Op1, P: m_Not(V: m_Or(L: m_Value(V&: B), R: m_Value(V&: D)))) && |
3657 | (Op0->hasOneUse() || Op1->hasOneUse())) { |
3658 | // (Cond & C) | ~(Cond | D) -> Cond ? C : ~D |
3659 | if (Value *V = matchSelectFromAndOr(A, B: C, C: B, D, InvertFalseVal: true)) |
3660 | return replaceInstUsesWith(I, V); |
3661 | if (Value *V = matchSelectFromAndOr(A, B: C, C: D, D: B, InvertFalseVal: true)) |
3662 | return replaceInstUsesWith(I, V); |
3663 | if (Value *V = matchSelectFromAndOr(A: C, B: A, C: B, D, InvertFalseVal: true)) |
3664 | return replaceInstUsesWith(I, V); |
3665 | if (Value *V = matchSelectFromAndOr(A: C, B: A, C: D, D: B, InvertFalseVal: true)) |
3666 | return replaceInstUsesWith(I, V); |
3667 | } |
3668 | |
3669 | // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C |
3670 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) |
3671 | if (match(V: Op1, |
3672 | P: m_c_Xor(L: m_c_Xor(L: m_Specific(V: B), R: m_Value(V&: C)), R: m_Specific(V: A))) || |
3673 | match(V: Op1, P: m_c_Xor(L: m_c_Xor(L: m_Specific(V: A), R: m_Value(V&: C)), R: m_Specific(V: B)))) |
3674 | return BinaryOperator::CreateOr(V1: Op0, V2: C); |
3675 | |
3676 | // ((B ^ C) ^ A) | (A ^ B) -> (A ^ B) | C |
3677 | if (match(V: Op1, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) |
3678 | if (match(V: Op0, |
3679 | P: m_c_Xor(L: m_c_Xor(L: m_Specific(V: B), R: m_Value(V&: C)), R: m_Specific(V: A))) || |
3680 | match(V: Op0, P: m_c_Xor(L: m_c_Xor(L: m_Specific(V: A), R: m_Value(V&: C)), R: m_Specific(V: B)))) |
3681 | return BinaryOperator::CreateOr(V1: Op1, V2: C); |
3682 | |
3683 | if (Instruction *DeMorgan = matchDeMorgansLaws(I, IC&: *this)) |
3684 | return DeMorgan; |
3685 | |
3686 | // Canonicalize xor to the RHS. |
3687 | bool SwappedForXor = false; |
3688 | if (match(V: Op0, P: m_Xor(L: m_Value(), R: m_Value()))) { |
3689 | std::swap(a&: Op0, b&: Op1); |
3690 | SwappedForXor = true; |
3691 | } |
3692 | |
3693 | if (match(V: Op1, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
3694 | // (A | ?) | (A ^ B) --> (A | ?) | B |
3695 | // (B | ?) | (A ^ B) --> (B | ?) | A |
3696 | if (match(V: Op0, P: m_c_Or(L: m_Specific(V: A), R: m_Value()))) |
3697 | return BinaryOperator::CreateOr(V1: Op0, V2: B); |
3698 | if (match(V: Op0, P: m_c_Or(L: m_Specific(V: B), R: m_Value()))) |
3699 | return BinaryOperator::CreateOr(V1: Op0, V2: A); |
3700 | |
3701 | // (A & B) | (A ^ B) --> A | B |
3702 | // (B & A) | (A ^ B) --> A | B |
3703 | if (match(V: Op0, P: m_c_And(L: m_Specific(V: A), R: m_Specific(V: B)))) |
3704 | return BinaryOperator::CreateOr(V1: A, V2: B); |
3705 | |
3706 | // ~A | (A ^ B) --> ~(A & B) |
3707 | // ~B | (A ^ B) --> ~(A & B) |
3708 | // The swap above should always make Op0 the 'not'. |
3709 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3710 | (match(V: Op0, P: m_Not(V: m_Specific(V: A))) || match(V: Op0, P: m_Not(V: m_Specific(V: B))))) |
3711 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
3712 | |
3713 | // Same as above, but peek through an 'and' to the common operand: |
3714 | // ~(A & ?) | (A ^ B) --> ~((A & ?) & B) |
3715 | // ~(B & ?) | (A ^ B) --> ~((B & ?) & A) |
3716 | Instruction *And; |
3717 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3718 | match(V: Op0, P: m_Not(V: m_CombineAnd(L: m_Instruction(I&: And), |
3719 | R: m_c_And(L: m_Specific(V: A), R: m_Value()))))) |
3720 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: And, RHS: B)); |
3721 | if ((Op0->hasOneUse() || Op1->hasOneUse()) && |
3722 | match(V: Op0, P: m_Not(V: m_CombineAnd(L: m_Instruction(I&: And), |
3723 | R: m_c_And(L: m_Specific(V: B), R: m_Value()))))) |
3724 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: And, RHS: A)); |
3725 | |
3726 | // (~A | C) | (A ^ B) --> ~(A & B) | C |
3727 | // (~B | C) | (A ^ B) --> ~(A & B) | C |
3728 | if (Op0->hasOneUse() && Op1->hasOneUse() && |
3729 | (match(V: Op0, P: m_c_Or(L: m_Not(V: m_Specific(V: A)), R: m_Value(V&: C))) || |
3730 | match(V: Op0, P: m_c_Or(L: m_Not(V: m_Specific(V: B)), R: m_Value(V&: C))))) { |
3731 | Value *Nand = Builder.CreateNot(V: Builder.CreateAnd(LHS: A, RHS: B), Name: "nand" ); |
3732 | return BinaryOperator::CreateOr(V1: Nand, V2: C); |
3733 | } |
3734 | } |
3735 | |
3736 | if (SwappedForXor) |
3737 | std::swap(a&: Op0, b&: Op1); |
3738 | |
3739 | { |
3740 | ICmpInst *LHS = dyn_cast<ICmpInst>(Val: Op0); |
3741 | ICmpInst *RHS = dyn_cast<ICmpInst>(Val: Op1); |
3742 | if (LHS && RHS) |
3743 | if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ false)) |
3744 | return replaceInstUsesWith(I, V: Res); |
3745 | |
3746 | // TODO: Make this recursive; it's a little tricky because an arbitrary |
3747 | // number of 'or' instructions might have to be created. |
3748 | Value *X, *Y; |
3749 | if (LHS && match(V: Op1, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
3750 | bool IsLogical = isa<SelectInst>(Val: Op1); |
3751 | // LHS | (X || Y) --> (LHS || X) || Y |
3752 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
3753 | if (Value *Res = |
3754 | foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ false, IsLogical)) |
3755 | return replaceInstUsesWith(I, V: IsLogical |
3756 | ? Builder.CreateLogicalOr(Cond1: Res, Cond2: Y) |
3757 | : Builder.CreateOr(LHS: Res, RHS: Y)); |
3758 | // LHS | (X || Y) --> X || (LHS | Y) |
3759 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
3760 | if (Value *Res = foldAndOrOfICmps(LHS, RHS: Cmp, I, /* IsAnd */ false, |
3761 | /* IsLogical */ false)) |
3762 | return replaceInstUsesWith(I, V: IsLogical |
3763 | ? Builder.CreateLogicalOr(Cond1: X, Cond2: Res) |
3764 | : Builder.CreateOr(LHS: X, RHS: Res)); |
3765 | } |
3766 | if (RHS && match(V: Op0, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
3767 | bool IsLogical = isa<SelectInst>(Val: Op0); |
3768 | // (X || Y) | RHS --> (X || RHS) || Y |
3769 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: X)) |
3770 | if (Value *Res = |
3771 | foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ false, IsLogical)) |
3772 | return replaceInstUsesWith(I, V: IsLogical |
3773 | ? Builder.CreateLogicalOr(Cond1: Res, Cond2: Y) |
3774 | : Builder.CreateOr(LHS: Res, RHS: Y)); |
3775 | // (X || Y) | RHS --> X || (Y | RHS) |
3776 | if (auto *Cmp = dyn_cast<ICmpInst>(Val: Y)) |
3777 | if (Value *Res = foldAndOrOfICmps(LHS: Cmp, RHS, I, /* IsAnd */ false, |
3778 | /* IsLogical */ false)) |
3779 | return replaceInstUsesWith(I, V: IsLogical |
3780 | ? Builder.CreateLogicalOr(Cond1: X, Cond2: Res) |
3781 | : Builder.CreateOr(LHS: X, RHS: Res)); |
3782 | } |
3783 | } |
3784 | |
3785 | if (FCmpInst *LHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 0))) |
3786 | if (FCmpInst *RHS = dyn_cast<FCmpInst>(Val: I.getOperand(i_nocapture: 1))) |
3787 | if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false)) |
3788 | return replaceInstUsesWith(I, V: Res); |
3789 | |
3790 | if (Instruction *FoldedFCmps = reassociateFCmps(BO&: I, Builder)) |
3791 | return FoldedFCmps; |
3792 | |
3793 | if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) |
3794 | return CastedOr; |
3795 | |
3796 | if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) |
3797 | return Sel; |
3798 | |
3799 | // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>. |
3800 | // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold |
3801 | // with binop identity constant. But creating a select with non-constant |
3802 | // arm may not be reversible due to poison semantics. Is that a good |
3803 | // canonicalization? |
3804 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: A))), R: m_Value(V&: B))) && |
3805 | A->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
3806 | return SelectInst::Create(C: A, S1: ConstantInt::getAllOnesValue(Ty), S2: B); |
3807 | |
3808 | // Note: If we've gotten to the point of visiting the outer OR, then the |
3809 | // inner one couldn't be simplified. If it was a constant, then it won't |
3810 | // be simplified by a later pass either, so we try swapping the inner/outer |
3811 | // ORs in the hopes that we'll be able to simplify it this way. |
3812 | // (X|C) | V --> (X|V) | C |
3813 | ConstantInt *CI; |
3814 | if (Op0->hasOneUse() && !match(V: Op1, P: m_ConstantInt()) && |
3815 | match(V: Op0, P: m_Or(L: m_Value(V&: A), R: m_ConstantInt(CI)))) { |
3816 | Value *Inner = Builder.CreateOr(LHS: A, RHS: Op1); |
3817 | Inner->takeName(V: Op0); |
3818 | return BinaryOperator::CreateOr(V1: Inner, V2: CI); |
3819 | } |
3820 | |
3821 | // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) |
3822 | // Since this OR statement hasn't been optimized further yet, we hope |
3823 | // that this transformation will allow the new ORs to be optimized. |
3824 | { |
3825 | Value *X = nullptr, *Y = nullptr; |
3826 | if (Op0->hasOneUse() && Op1->hasOneUse() && |
3827 | match(V: Op0, P: m_Select(C: m_Value(V&: X), L: m_Value(V&: A), R: m_Value(V&: B))) && |
3828 | match(V: Op1, P: m_Select(C: m_Value(V&: Y), L: m_Value(V&: C), R: m_Value(V&: D))) && X == Y) { |
3829 | Value *orTrue = Builder.CreateOr(LHS: A, RHS: C); |
3830 | Value *orFalse = Builder.CreateOr(LHS: B, RHS: D); |
3831 | return SelectInst::Create(C: X, S1: orTrue, S2: orFalse); |
3832 | } |
3833 | } |
3834 | |
3835 | // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. |
3836 | { |
3837 | Value *X, *Y; |
3838 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_AShr( |
3839 | L: m_NSWSub(L: m_Value(V&: Y), R: m_Value(V&: X)), |
3840 | R: m_SpecificInt(V: Ty->getScalarSizeInBits() - 1))), |
3841 | R: m_Deferred(V: X)))) { |
3842 | Value *NewICmpInst = Builder.CreateICmpSGT(LHS: X, RHS: Y); |
3843 | Value *AllOnes = ConstantInt::getAllOnesValue(Ty); |
3844 | return SelectInst::Create(C: NewICmpInst, S1: AllOnes, S2: X); |
3845 | } |
3846 | } |
3847 | |
3848 | { |
3849 | // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B |
3850 | // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B |
3851 | // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B |
3852 | // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B |
3853 | const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * { |
3854 | if (match(V: Lhs, P: m_c_Xor(L: m_And(L: m_Value(V&: A), R: m_Value(V&: B)), R: m_Deferred(V: A))) && |
3855 | match(V: Rhs, |
3856 | P: m_c_Xor(L: m_And(L: m_Specific(V: A), R: m_Specific(V: B)), R: m_Specific(V: B)))) { |
3857 | return BinaryOperator::CreateXor(V1: A, V2: B); |
3858 | } |
3859 | return nullptr; |
3860 | }; |
3861 | |
3862 | if (Instruction *Result = TryXorOpt(Op0, Op1)) |
3863 | return Result; |
3864 | if (Instruction *Result = TryXorOpt(Op1, Op0)) |
3865 | return Result; |
3866 | } |
3867 | |
3868 | if (Instruction *V = |
3869 | canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) |
3870 | return V; |
3871 | |
3872 | CmpInst::Predicate Pred; |
3873 | Value *Mul, *Ov, *MulIsNotZero, *UMulWithOv; |
3874 | // Check if the OR weakens the overflow condition for umul.with.overflow by |
3875 | // treating any non-zero result as overflow. In that case, we overflow if both |
3876 | // umul.with.overflow operands are != 0, as in that case the result can only |
3877 | // be 0, iff the multiplication overflows. |
3878 | if (match(V: &I, |
3879 | P: m_c_Or(L: m_CombineAnd(L: m_ExtractValue<1>(V: m_Value(V&: UMulWithOv)), |
3880 | R: m_Value(V&: Ov)), |
3881 | R: m_CombineAnd(L: m_ICmp(Pred, |
3882 | L: m_CombineAnd(L: m_ExtractValue<0>( |
3883 | V: m_Deferred(V: UMulWithOv)), |
3884 | R: m_Value(V&: Mul)), |
3885 | R: m_ZeroInt()), |
3886 | R: m_Value(V&: MulIsNotZero)))) && |
3887 | (Ov->hasOneUse() || (MulIsNotZero->hasOneUse() && Mul->hasOneUse())) && |
3888 | Pred == CmpInst::ICMP_NE) { |
3889 | Value *A, *B; |
3890 | if (match(V: UMulWithOv, P: m_Intrinsic<Intrinsic::umul_with_overflow>( |
3891 | Op0: m_Value(V&: A), Op1: m_Value(V&: B)))) { |
3892 | Value *NotNullA = Builder.CreateIsNotNull(Arg: A); |
3893 | Value *NotNullB = Builder.CreateIsNotNull(Arg: B); |
3894 | return BinaryOperator::CreateAnd(V1: NotNullA, V2: NotNullB); |
3895 | } |
3896 | } |
3897 | |
3898 | /// Res, Overflow = xxx_with_overflow X, C1 |
3899 | /// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into |
3900 | /// "Overflow | icmp pred X, C2 +/- C1". |
3901 | const WithOverflowInst *WO; |
3902 | const Value *WOV; |
3903 | const APInt *C1, *C2; |
3904 | if (match(V: &I, P: m_c_Or(L: m_CombineAnd(L: m_ExtractValue<1>(V: m_CombineAnd( |
3905 | L: m_WithOverflowInst(I&: WO), R: m_Value(V&: WOV))), |
3906 | R: m_Value(V&: Ov)), |
3907 | R: m_OneUse(SubPattern: m_ICmp(Pred, L: m_ExtractValue<0>(V: m_Deferred(V: WOV)), |
3908 | R: m_APInt(Res&: C2))))) && |
3909 | (WO->getBinaryOp() == Instruction::Add || |
3910 | WO->getBinaryOp() == Instruction::Sub) && |
3911 | (ICmpInst::isEquality(P: Pred) || |
3912 | WO->isSigned() == ICmpInst::isSigned(predicate: Pred)) && |
3913 | match(V: WO->getRHS(), P: m_APInt(Res&: C1))) { |
3914 | bool Overflow; |
3915 | APInt NewC = WO->getBinaryOp() == Instruction::Add |
3916 | ? (ICmpInst::isSigned(predicate: Pred) ? C2->ssub_ov(RHS: *C1, Overflow) |
3917 | : C2->usub_ov(RHS: *C1, Overflow)) |
3918 | : (ICmpInst::isSigned(predicate: Pred) ? C2->sadd_ov(RHS: *C1, Overflow) |
3919 | : C2->uadd_ov(RHS: *C1, Overflow)); |
3920 | if (!Overflow || ICmpInst::isEquality(P: Pred)) { |
3921 | Value *NewCmp = Builder.CreateICmp( |
3922 | P: Pred, LHS: WO->getLHS(), RHS: ConstantInt::get(Ty: WO->getLHS()->getType(), V: NewC)); |
3923 | return BinaryOperator::CreateOr(V1: Ov, V2: NewCmp); |
3924 | } |
3925 | } |
3926 | |
3927 | // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions |
3928 | if (sinkNotIntoOtherHandOfLogicalOp(I)) |
3929 | return &I; |
3930 | |
3931 | // Improve "get low bit mask up to and including bit X" pattern: |
3932 | // (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X) |
3933 | if (match(V: &I, P: m_c_Or(L: m_Add(L: m_Shl(L: m_One(), R: m_Value(V&: X)), R: m_AllOnes()), |
3934 | R: m_Shl(L: m_One(), R: m_Deferred(V: X)))) && |
3935 | match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_Value()), R: m_Value()))) { |
3936 | Value *Sub = Builder.CreateSub( |
3937 | LHS: ConstantInt::get(Ty, V: Ty->getScalarSizeInBits() - 1), RHS: X); |
3938 | return BinaryOperator::CreateLShr(V1: Constant::getAllOnesValue(Ty), V2: Sub); |
3939 | } |
3940 | |
3941 | // An or recurrence w/loop invariant step is equivelent to (or start, step) |
3942 | PHINode *PN = nullptr; |
3943 | Value *Start = nullptr, *Step = nullptr; |
3944 | if (matchSimpleRecurrence(I: &I, P&: PN, Start, Step) && DT.dominates(Def: Step, User: PN)) |
3945 | return replaceInstUsesWith(I, V: Builder.CreateOr(LHS: Start, RHS: Step)); |
3946 | |
3947 | // (A & B) | (C | D) or (C | D) | (A & B) |
3948 | // Can be combined if C or D is of type (A/B & X) |
3949 | if (match(V: &I, P: m_c_Or(L: m_OneUse(SubPattern: m_And(L: m_Value(V&: A), R: m_Value(V&: B))), |
3950 | R: m_OneUse(SubPattern: m_Or(L: m_Value(V&: C), R: m_Value(V&: D)))))) { |
3951 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3952 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3953 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3954 | // (A & B) | (C | ?) -> C | (? | (A & B)) |
3955 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3956 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3957 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3958 | // (C | ?) | (A & B) -> C | (? | (A & B)) |
3959 | if (match(V: D, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: A), R: m_Value()))) || |
3960 | match(V: D, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: B), R: m_Value())))) |
3961 | return BinaryOperator::CreateOr( |
3962 | V1: C, V2: Builder.CreateOr(LHS: D, RHS: Builder.CreateAnd(LHS: A, RHS: B))); |
3963 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3964 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3965 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3966 | // (A & B) | (? | D) -> (? | (A & B)) | D |
3967 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3968 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3969 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3970 | // (? | D) | (A & B) -> (? | (A & B)) | D |
3971 | if (match(V: C, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: A), R: m_Value()))) || |
3972 | match(V: C, P: m_OneUse(SubPattern: m_c_And(L: m_Specific(V: B), R: m_Value())))) |
3973 | return BinaryOperator::CreateOr( |
3974 | V1: Builder.CreateOr(LHS: C, RHS: Builder.CreateAnd(LHS: A, RHS: B)), V2: D); |
3975 | } |
3976 | |
3977 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
3978 | return R; |
3979 | |
3980 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
3981 | return Canonicalized; |
3982 | |
3983 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
3984 | return Folded; |
3985 | |
3986 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
3987 | return Res; |
3988 | |
3989 | // If we are setting the sign bit of a floating-point value, convert |
3990 | // this to fneg(fabs), then cast back to integer. |
3991 | // |
3992 | // If the result isn't immediately cast back to a float, this will increase |
3993 | // the number of instructions. This is still probably a better canonical form |
3994 | // as it enables FP value tracking. |
3995 | // |
3996 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
3997 | // |
3998 | // This is generous interpretation of noimplicitfloat, this is not a true |
3999 | // floating-point operation. |
4000 | Value *CastOp; |
4001 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
4002 | match(V: Op1, P: m_SignMask()) && |
4003 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
4004 | Kind: Attribute::NoImplicitFloat)) { |
4005 | Type *EltTy = CastOp->getType()->getScalarType(); |
4006 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
4007 | Value *FAbs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: CastOp); |
4008 | Value *FNegFAbs = Builder.CreateFNeg(V: FAbs); |
4009 | return new BitCastInst(FNegFAbs, I.getType()); |
4010 | } |
4011 | } |
4012 | |
4013 | // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 |
4014 | if (match(V: Op0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_APInt(Res&: C1)))) && |
4015 | match(V: Op1, P: m_APInt(Res&: C2))) { |
4016 | KnownBits KnownX = computeKnownBits(V: X, /*Depth*/ 0, CxtI: &I); |
4017 | if ((KnownX.One & *C2) == *C2) |
4018 | return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty, V: *C1 | *C2)); |
4019 | } |
4020 | |
4021 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
4022 | return Res; |
4023 | |
4024 | if (Value *V = |
4025 | simplifyAndOrWithOpReplaced(V: Op0, Op: Op1, RepOp: Constant::getNullValue(Ty), |
4026 | /*SimplifyOnly*/ false, IC&: *this)) |
4027 | return BinaryOperator::CreateOr(V1: V, V2: Op1); |
4028 | if (Value *V = |
4029 | simplifyAndOrWithOpReplaced(V: Op1, Op: Op0, RepOp: Constant::getNullValue(Ty), |
4030 | /*SimplifyOnly*/ false, IC&: *this)) |
4031 | return BinaryOperator::CreateOr(V1: Op0, V2: V); |
4032 | |
4033 | if (cast<PossiblyDisjointInst>(Val&: I).isDisjoint()) |
4034 | if (Value *V = SimplifyAddWithRemainder(I)) |
4035 | return replaceInstUsesWith(I, V); |
4036 | |
4037 | return nullptr; |
4038 | } |
4039 | |
4040 | /// A ^ B can be specified using other logic ops in a variety of patterns. We |
4041 | /// can fold these early and efficiently by morphing an existing instruction. |
4042 | static Instruction *foldXorToXor(BinaryOperator &I, |
4043 | InstCombiner::BuilderTy &Builder) { |
4044 | assert(I.getOpcode() == Instruction::Xor); |
4045 | Value *Op0 = I.getOperand(i_nocapture: 0); |
4046 | Value *Op1 = I.getOperand(i_nocapture: 1); |
4047 | Value *A, *B; |
4048 | |
4049 | // There are 4 commuted variants for each of the basic patterns. |
4050 | |
4051 | // (A & B) ^ (A | B) -> A ^ B |
4052 | // (A & B) ^ (B | A) -> A ^ B |
4053 | // (A | B) ^ (A & B) -> A ^ B |
4054 | // (A | B) ^ (B & A) -> A ^ B |
4055 | if (match(V: &I, P: m_c_Xor(L: m_And(L: m_Value(V&: A), R: m_Value(V&: B)), |
4056 | R: m_c_Or(L: m_Deferred(V: A), R: m_Deferred(V: B))))) |
4057 | return BinaryOperator::CreateXor(V1: A, V2: B); |
4058 | |
4059 | // (A | ~B) ^ (~A | B) -> A ^ B |
4060 | // (~B | A) ^ (~A | B) -> A ^ B |
4061 | // (~A | B) ^ (A | ~B) -> A ^ B |
4062 | // (B | ~A) ^ (A | ~B) -> A ^ B |
4063 | if (match(V: &I, P: m_Xor(L: m_c_Or(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
4064 | R: m_c_Or(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
4065 | return BinaryOperator::CreateXor(V1: A, V2: B); |
4066 | |
4067 | // (A & ~B) ^ (~A & B) -> A ^ B |
4068 | // (~B & A) ^ (~A & B) -> A ^ B |
4069 | // (~A & B) ^ (A & ~B) -> A ^ B |
4070 | // (B & ~A) ^ (A & ~B) -> A ^ B |
4071 | if (match(V: &I, P: m_Xor(L: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B))), |
4072 | R: m_c_And(L: m_Not(V: m_Deferred(V: A)), R: m_Deferred(V: B))))) |
4073 | return BinaryOperator::CreateXor(V1: A, V2: B); |
4074 | |
4075 | // For the remaining cases we need to get rid of one of the operands. |
4076 | if (!Op0->hasOneUse() && !Op1->hasOneUse()) |
4077 | return nullptr; |
4078 | |
4079 | // (A | B) ^ ~(A & B) -> ~(A ^ B) |
4080 | // (A | B) ^ ~(B & A) -> ~(A ^ B) |
4081 | // (A & B) ^ ~(A | B) -> ~(A ^ B) |
4082 | // (A & B) ^ ~(B | A) -> ~(A ^ B) |
4083 | // Complexity sorting ensures the not will be on the right side. |
4084 | if ((match(V: Op0, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4085 | match(V: Op1, P: m_Not(V: m_c_And(L: m_Specific(V: A), R: m_Specific(V: B))))) || |
4086 | (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4087 | match(V: Op1, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))))) |
4088 | return BinaryOperator::CreateNot(Op: Builder.CreateXor(LHS: A, RHS: B)); |
4089 | |
4090 | return nullptr; |
4091 | } |
4092 | |
4093 | Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, |
4094 | BinaryOperator &I) { |
4095 | assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && |
4096 | I.getOperand(1) == RHS && "Should be 'xor' with these operands" ); |
4097 | |
4098 | ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); |
4099 | Value *LHS0 = LHS->getOperand(i_nocapture: 0), *LHS1 = LHS->getOperand(i_nocapture: 1); |
4100 | Value *RHS0 = RHS->getOperand(i_nocapture: 0), *RHS1 = RHS->getOperand(i_nocapture: 1); |
4101 | |
4102 | if (predicatesFoldable(P1: PredL, P2: PredR)) { |
4103 | if (LHS0 == RHS1 && LHS1 == RHS0) { |
4104 | std::swap(a&: LHS0, b&: LHS1); |
4105 | PredL = ICmpInst::getSwappedPredicate(pred: PredL); |
4106 | } |
4107 | if (LHS0 == RHS0 && LHS1 == RHS1) { |
4108 | // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) |
4109 | unsigned Code = getICmpCode(Pred: PredL) ^ getICmpCode(Pred: PredR); |
4110 | bool IsSigned = LHS->isSigned() || RHS->isSigned(); |
4111 | return getNewICmpValue(Code, Sign: IsSigned, LHS: LHS0, RHS: LHS1, Builder); |
4112 | } |
4113 | } |
4114 | |
4115 | // TODO: This can be generalized to compares of non-signbits using |
4116 | // decomposeBitTestICmp(). It could be enhanced more by using (something like) |
4117 | // foldLogOpOfMaskedICmps(). |
4118 | const APInt *LC, *RC; |
4119 | if (match(V: LHS1, P: m_APInt(Res&: LC)) && match(V: RHS1, P: m_APInt(Res&: RC)) && |
4120 | LHS0->getType() == RHS0->getType() && |
4121 | LHS0->getType()->isIntOrIntVectorTy()) { |
4122 | // Convert xor of signbit tests to signbit test of xor'd values: |
4123 | // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 |
4124 | // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 |
4125 | // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 |
4126 | // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 |
4127 | bool TrueIfSignedL, TrueIfSignedR; |
4128 | if ((LHS->hasOneUse() || RHS->hasOneUse()) && |
4129 | isSignBitCheck(Pred: PredL, RHS: *LC, TrueIfSigned&: TrueIfSignedL) && |
4130 | isSignBitCheck(Pred: PredR, RHS: *RC, TrueIfSigned&: TrueIfSignedR)) { |
4131 | Value *XorLR = Builder.CreateXor(LHS: LHS0, RHS: RHS0); |
4132 | return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(Arg: XorLR) : |
4133 | Builder.CreateIsNotNeg(Arg: XorLR); |
4134 | } |
4135 | |
4136 | // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2) |
4137 | // into a single comparison using range-based reasoning. |
4138 | if (LHS0 == RHS0) { |
4139 | ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred: PredL, Other: *LC); |
4140 | ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred: PredR, Other: *RC); |
4141 | auto CRUnion = CR1.exactUnionWith(CR: CR2); |
4142 | auto CRIntersect = CR1.exactIntersectWith(CR: CR2); |
4143 | if (CRUnion && CRIntersect) |
4144 | if (auto CR = CRUnion->exactIntersectWith(CR: CRIntersect->inverse())) { |
4145 | if (CR->isFullSet()) |
4146 | return ConstantInt::getTrue(Ty: I.getType()); |
4147 | if (CR->isEmptySet()) |
4148 | return ConstantInt::getFalse(Ty: I.getType()); |
4149 | |
4150 | CmpInst::Predicate NewPred; |
4151 | APInt NewC, Offset; |
4152 | CR->getEquivalentICmp(Pred&: NewPred, RHS&: NewC, Offset); |
4153 | |
4154 | if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) || |
4155 | (LHS->hasOneUse() && RHS->hasOneUse())) { |
4156 | Value *NewV = LHS0; |
4157 | Type *Ty = LHS0->getType(); |
4158 | if (!Offset.isZero()) |
4159 | NewV = Builder.CreateAdd(LHS: NewV, RHS: ConstantInt::get(Ty, V: Offset)); |
4160 | return Builder.CreateICmp(P: NewPred, LHS: NewV, |
4161 | RHS: ConstantInt::get(Ty, V: NewC)); |
4162 | } |
4163 | } |
4164 | } |
4165 | } |
4166 | |
4167 | // Instead of trying to imitate the folds for and/or, decompose this 'xor' |
4168 | // into those logic ops. That is, try to turn this into an and-of-icmps |
4169 | // because we have many folds for that pattern. |
4170 | // |
4171 | // This is based on a truth table definition of xor: |
4172 | // X ^ Y --> (X | Y) & !(X & Y) |
4173 | if (Value *OrICmp = simplifyBinOp(Opcode: Instruction::Or, LHS, RHS, Q: SQ)) { |
4174 | // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). |
4175 | // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). |
4176 | if (Value *AndICmp = simplifyBinOp(Opcode: Instruction::And, LHS, RHS, Q: SQ)) { |
4177 | // TODO: Independently handle cases where the 'and' side is a constant. |
4178 | ICmpInst *X = nullptr, *Y = nullptr; |
4179 | if (OrICmp == LHS && AndICmp == RHS) { |
4180 | // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y |
4181 | X = LHS; |
4182 | Y = RHS; |
4183 | } |
4184 | if (OrICmp == RHS && AndICmp == LHS) { |
4185 | // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X |
4186 | X = RHS; |
4187 | Y = LHS; |
4188 | } |
4189 | if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(V: Y, IgnoredUser: &I))) { |
4190 | // Invert the predicate of 'Y', thus inverting its output. |
4191 | Y->setPredicate(Y->getInversePredicate()); |
4192 | // So, are there other uses of Y? |
4193 | if (!Y->hasOneUse()) { |
4194 | // We need to adapt other uses of Y though. Get a value that matches |
4195 | // the original value of Y before inversion. While this increases |
4196 | // immediate instruction count, we have just ensured that all the |
4197 | // users are freely-invertible, so that 'not' *will* get folded away. |
4198 | BuilderTy::InsertPointGuard Guard(Builder); |
4199 | // Set insertion point to right after the Y. |
4200 | Builder.SetInsertPoint(TheBB: Y->getParent(), IP: ++(Y->getIterator())); |
4201 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4202 | // Replace all uses of Y (excluding the one in NotY!) with NotY. |
4203 | Worklist.pushUsersToWorkList(I&: *Y); |
4204 | Y->replaceUsesWithIf(New: NotY, |
4205 | ShouldReplace: [NotY](Use &U) { return U.getUser() != NotY; }); |
4206 | } |
4207 | // All done. |
4208 | return Builder.CreateAnd(LHS, RHS); |
4209 | } |
4210 | } |
4211 | } |
4212 | |
4213 | return nullptr; |
4214 | } |
4215 | |
4216 | /// If we have a masked merge, in the canonical form of: |
4217 | /// (assuming that A only has one use.) |
4218 | /// | A | |B| |
4219 | /// ((x ^ y) & M) ^ y |
4220 | /// | D | |
4221 | /// * If M is inverted: |
4222 | /// | D | |
4223 | /// ((x ^ y) & ~M) ^ y |
4224 | /// We can canonicalize by swapping the final xor operand |
4225 | /// to eliminate the 'not' of the mask. |
4226 | /// ((x ^ y) & M) ^ x |
4227 | /// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops |
4228 | /// because that shortens the dependency chain and improves analysis: |
4229 | /// (x & M) | (y & ~M) |
4230 | static Instruction *visitMaskedMerge(BinaryOperator &I, |
4231 | InstCombiner::BuilderTy &Builder) { |
4232 | Value *B, *X, *D; |
4233 | Value *M; |
4234 | if (!match(V: &I, P: m_c_Xor(L: m_Value(V&: B), |
4235 | R: m_OneUse(SubPattern: m_c_And( |
4236 | L: m_CombineAnd(L: m_c_Xor(L: m_Deferred(V: B), R: m_Value(V&: X)), |
4237 | R: m_Value(V&: D)), |
4238 | R: m_Value(V&: M)))))) |
4239 | return nullptr; |
4240 | |
4241 | Value *NotM; |
4242 | if (match(V: M, P: m_Not(V: m_Value(V&: NotM)))) { |
4243 | // De-invert the mask and swap the value in B part. |
4244 | Value *NewA = Builder.CreateAnd(LHS: D, RHS: NotM); |
4245 | return BinaryOperator::CreateXor(V1: NewA, V2: X); |
4246 | } |
4247 | |
4248 | Constant *C; |
4249 | if (D->hasOneUse() && match(V: M, P: m_Constant(C))) { |
4250 | // Propagating undef is unsafe. Clamp undef elements to -1. |
4251 | Type *EltTy = C->getType()->getScalarType(); |
4252 | C = Constant::replaceUndefsWith(C, Replacement: ConstantInt::getAllOnesValue(Ty: EltTy)); |
4253 | // Unfold. |
4254 | Value *LHS = Builder.CreateAnd(LHS: X, RHS: C); |
4255 | Value *NotC = Builder.CreateNot(V: C); |
4256 | Value *RHS = Builder.CreateAnd(LHS: B, RHS: NotC); |
4257 | return BinaryOperator::CreateOr(V1: LHS, V2: RHS); |
4258 | } |
4259 | |
4260 | return nullptr; |
4261 | } |
4262 | |
4263 | static Instruction *foldNotXor(BinaryOperator &I, |
4264 | InstCombiner::BuilderTy &Builder) { |
4265 | Value *X, *Y; |
4266 | // FIXME: one-use check is not needed in general, but currently we are unable |
4267 | // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182) |
4268 | if (!match(V: &I, P: m_Not(V: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: X), R: m_Value(V&: Y)))))) |
4269 | return nullptr; |
4270 | |
4271 | auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) { |
4272 | return A == C || A == D || B == C || B == D; |
4273 | }; |
4274 | |
4275 | Value *A, *B, *C, *D; |
4276 | // Canonicalize ~((A & B) ^ (A | ?)) -> (A & B) | ~(A | ?) |
4277 | // 4 commuted variants |
4278 | if (match(V: X, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4279 | match(V: Y, P: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))) && hasCommonOperand(A, B, C, D)) { |
4280 | Value *NotY = Builder.CreateNot(V: Y); |
4281 | return BinaryOperator::CreateOr(V1: X, V2: NotY); |
4282 | }; |
4283 | |
4284 | // Canonicalize ~((A | ?) ^ (A & B)) -> (A & B) | ~(A | ?) |
4285 | // 4 commuted variants |
4286 | if (match(V: Y, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4287 | match(V: X, P: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))) && hasCommonOperand(A, B, C, D)) { |
4288 | Value *NotX = Builder.CreateNot(V: X); |
4289 | return BinaryOperator::CreateOr(V1: Y, V2: NotX); |
4290 | }; |
4291 | |
4292 | return nullptr; |
4293 | } |
4294 | |
4295 | /// Canonicalize a shifty way to code absolute value to the more common pattern |
4296 | /// that uses negation and select. |
4297 | static Instruction *canonicalizeAbs(BinaryOperator &Xor, |
4298 | InstCombiner::BuilderTy &Builder) { |
4299 | assert(Xor.getOpcode() == Instruction::Xor && "Expected an xor instruction." ); |
4300 | |
4301 | // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1. |
4302 | // We're relying on the fact that we only do this transform when the shift has |
4303 | // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase |
4304 | // instructions). |
4305 | Value *Op0 = Xor.getOperand(i_nocapture: 0), *Op1 = Xor.getOperand(i_nocapture: 1); |
4306 | if (Op0->hasNUses(N: 2)) |
4307 | std::swap(a&: Op0, b&: Op1); |
4308 | |
4309 | Type *Ty = Xor.getType(); |
4310 | Value *A; |
4311 | const APInt *ShAmt; |
4312 | if (match(V: Op1, P: m_AShr(L: m_Value(V&: A), R: m_APInt(Res&: ShAmt))) && |
4313 | Op1->hasNUses(N: 2) && *ShAmt == Ty->getScalarSizeInBits() - 1 && |
4314 | match(V: Op0, P: m_OneUse(SubPattern: m_c_Add(L: m_Specific(V: A), R: m_Specific(V: Op1))))) { |
4315 | // Op1 = ashr i32 A, 31 ; smear the sign bit |
4316 | // xor (add A, Op1), Op1 ; add -1 and flip bits if negative |
4317 | // --> (A < 0) ? -A : A |
4318 | Value *IsNeg = Builder.CreateIsNeg(Arg: A); |
4319 | // Copy the nsw flags from the add to the negate. |
4320 | auto *Add = cast<BinaryOperator>(Val: Op0); |
4321 | Value *NegA = Add->hasNoUnsignedWrap() |
4322 | ? Constant::getNullValue(Ty: A->getType()) |
4323 | : Builder.CreateNeg(V: A, Name: "" , HasNSW: Add->hasNoSignedWrap()); |
4324 | return SelectInst::Create(C: IsNeg, S1: NegA, S2: A); |
4325 | } |
4326 | return nullptr; |
4327 | } |
4328 | |
4329 | static bool canFreelyInvert(InstCombiner &IC, Value *Op, |
4330 | Instruction *IgnoredUser) { |
4331 | auto *I = dyn_cast<Instruction>(Val: Op); |
4332 | return I && IC.isFreeToInvert(V: I, /*WillInvertAllUses=*/true) && |
4333 | IC.canFreelyInvertAllUsersOf(V: I, IgnoredUser); |
4334 | } |
4335 | |
4336 | static Value *freelyInvert(InstCombinerImpl &IC, Value *Op, |
4337 | Instruction *IgnoredUser) { |
4338 | auto *I = cast<Instruction>(Val: Op); |
4339 | IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef()); |
4340 | Value *NotOp = IC.Builder.CreateNot(V: Op, Name: Op->getName() + ".not" ); |
4341 | Op->replaceUsesWithIf(New: NotOp, |
4342 | ShouldReplace: [NotOp](Use &U) { return U.getUser() != NotOp; }); |
4343 | IC.freelyInvertAllUsersOf(V: NotOp, IgnoredUser); |
4344 | return NotOp; |
4345 | } |
4346 | |
4347 | // Transform |
4348 | // z = ~(x &/| y) |
4349 | // into: |
4350 | // z = ((~x) |/& (~y)) |
4351 | // iff both x and y are free to invert and all uses of z can be freely updated. |
4352 | bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) { |
4353 | Value *Op0, *Op1; |
4354 | if (!match(V: &I, P: m_LogicalOp(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) |
4355 | return false; |
4356 | |
4357 | // If this logic op has not been simplified yet, just bail out and let that |
4358 | // happen first. Otherwise, the code below may wrongly invert. |
4359 | if (Op0 == Op1) |
4360 | return false; |
4361 | |
4362 | Instruction::BinaryOps NewOpc = |
4363 | match(V: &I, P: m_LogicalAnd()) ? Instruction::Or : Instruction::And; |
4364 | bool IsBinaryOp = isa<BinaryOperator>(Val: I); |
4365 | |
4366 | // Can our users be adapted? |
4367 | if (!InstCombiner::canFreelyInvertAllUsersOf(V: &I, /*IgnoredUser=*/nullptr)) |
4368 | return false; |
4369 | |
4370 | // And can the operands be adapted? |
4371 | if (!canFreelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I) || !canFreelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I)) |
4372 | return false; |
4373 | |
4374 | Op0 = freelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I); |
4375 | Op1 = freelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I); |
4376 | |
4377 | Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); |
4378 | Value *NewLogicOp; |
4379 | if (IsBinaryOp) |
4380 | NewLogicOp = Builder.CreateBinOp(Opc: NewOpc, LHS: Op0, RHS: Op1, Name: I.getName() + ".not" ); |
4381 | else |
4382 | NewLogicOp = |
4383 | Builder.CreateLogicalOp(Opc: NewOpc, Cond1: Op0, Cond2: Op1, Name: I.getName() + ".not" ); |
4384 | |
4385 | replaceInstUsesWith(I, V: NewLogicOp); |
4386 | // We can not just create an outer `not`, it will most likely be immediately |
4387 | // folded back, reconstructing our initial pattern, and causing an |
4388 | // infinite combine loop, so immediately manually fold it away. |
4389 | freelyInvertAllUsersOf(V: NewLogicOp); |
4390 | return true; |
4391 | } |
4392 | |
4393 | // Transform |
4394 | // z = (~x) &/| y |
4395 | // into: |
4396 | // z = ~(x |/& (~y)) |
4397 | // iff y is free to invert and all uses of z can be freely updated. |
4398 | bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) { |
4399 | Value *Op0, *Op1; |
4400 | if (!match(V: &I, P: m_LogicalOp(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) |
4401 | return false; |
4402 | Instruction::BinaryOps NewOpc = |
4403 | match(V: &I, P: m_LogicalAnd()) ? Instruction::Or : Instruction::And; |
4404 | bool IsBinaryOp = isa<BinaryOperator>(Val: I); |
4405 | |
4406 | Value *NotOp0 = nullptr; |
4407 | Value *NotOp1 = nullptr; |
4408 | Value **OpToInvert = nullptr; |
4409 | if (match(V: Op0, P: m_Not(V: m_Value(V&: NotOp0))) && canFreelyInvert(IC&: *this, Op: Op1, IgnoredUser: &I)) { |
4410 | Op0 = NotOp0; |
4411 | OpToInvert = &Op1; |
4412 | } else if (match(V: Op1, P: m_Not(V: m_Value(V&: NotOp1))) && |
4413 | canFreelyInvert(IC&: *this, Op: Op0, IgnoredUser: &I)) { |
4414 | Op1 = NotOp1; |
4415 | OpToInvert = &Op0; |
4416 | } else |
4417 | return false; |
4418 | |
4419 | // And can our users be adapted? |
4420 | if (!InstCombiner::canFreelyInvertAllUsersOf(V: &I, /*IgnoredUser=*/nullptr)) |
4421 | return false; |
4422 | |
4423 | *OpToInvert = freelyInvert(IC&: *this, Op: *OpToInvert, IgnoredUser: &I); |
4424 | |
4425 | Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); |
4426 | Value *NewBinOp; |
4427 | if (IsBinaryOp) |
4428 | NewBinOp = Builder.CreateBinOp(Opc: NewOpc, LHS: Op0, RHS: Op1, Name: I.getName() + ".not" ); |
4429 | else |
4430 | NewBinOp = Builder.CreateLogicalOp(Opc: NewOpc, Cond1: Op0, Cond2: Op1, Name: I.getName() + ".not" ); |
4431 | replaceInstUsesWith(I, V: NewBinOp); |
4432 | // We can not just create an outer `not`, it will most likely be immediately |
4433 | // folded back, reconstructing our initial pattern, and causing an |
4434 | // infinite combine loop, so immediately manually fold it away. |
4435 | freelyInvertAllUsersOf(V: NewBinOp); |
4436 | return true; |
4437 | } |
4438 | |
4439 | Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { |
4440 | Value *NotOp; |
4441 | if (!match(V: &I, P: m_Not(V: m_Value(V&: NotOp)))) |
4442 | return nullptr; |
4443 | |
4444 | // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. |
4445 | // We must eliminate the and/or (one-use) for these transforms to not increase |
4446 | // the instruction count. |
4447 | // |
4448 | // ~(~X & Y) --> (X | ~Y) |
4449 | // ~(Y & ~X) --> (X | ~Y) |
4450 | // |
4451 | // Note: The logical matches do not check for the commuted patterns because |
4452 | // those are handled via SimplifySelectsFeedingBinaryOp(). |
4453 | Type *Ty = I.getType(); |
4454 | Value *X, *Y; |
4455 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4456 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4457 | return BinaryOperator::CreateOr(V1: X, V2: NotY); |
4458 | } |
4459 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4460 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4461 | return SelectInst::Create(C: X, S1: ConstantInt::getTrue(Ty), S2: NotY); |
4462 | } |
4463 | |
4464 | // ~(~X | Y) --> (X & ~Y) |
4465 | // ~(Y | ~X) --> (X & ~Y) |
4466 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4467 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4468 | return BinaryOperator::CreateAnd(V1: X, V2: NotY); |
4469 | } |
4470 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))))) { |
4471 | Value *NotY = Builder.CreateNot(V: Y, Name: Y->getName() + ".not" ); |
4472 | return SelectInst::Create(C: X, S1: NotY, S2: ConstantInt::getFalse(Ty)); |
4473 | } |
4474 | |
4475 | // Is this a 'not' (~) fed by a binary operator? |
4476 | BinaryOperator *NotVal; |
4477 | if (match(V: NotOp, P: m_BinOp(I&: NotVal))) { |
4478 | // ~((-X) | Y) --> (X - 1) & (~Y) |
4479 | if (match(V: NotVal, |
4480 | P: m_OneUse(SubPattern: m_c_Or(L: m_OneUse(SubPattern: m_Neg(V: m_Value(V&: X))), R: m_Value(V&: Y))))) { |
4481 | Value *DecX = Builder.CreateAdd(LHS: X, RHS: ConstantInt::getAllOnesValue(Ty)); |
4482 | Value *NotY = Builder.CreateNot(V: Y); |
4483 | return BinaryOperator::CreateAnd(V1: DecX, V2: NotY); |
4484 | } |
4485 | |
4486 | // ~(~X >>s Y) --> (X >>s Y) |
4487 | if (match(V: NotVal, P: m_AShr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) |
4488 | return BinaryOperator::CreateAShr(V1: X, V2: Y); |
4489 | |
4490 | // Treat lshr with non-negative operand as ashr. |
4491 | // ~(~X >>u Y) --> (X >>s Y) iff X is known negative |
4492 | if (match(V: NotVal, P: m_LShr(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y))) && |
4493 | isKnownNegative(V: X, DL: SQ.getWithInstruction(I: NotVal))) |
4494 | return BinaryOperator::CreateAShr(V1: X, V2: Y); |
4495 | |
4496 | // Bit-hack form of a signbit test for iN type: |
4497 | // ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN |
4498 | unsigned FullShift = Ty->getScalarSizeInBits() - 1; |
4499 | if (match(V: NotVal, P: m_OneUse(SubPattern: m_AShr(L: m_Value(V&: X), R: m_SpecificInt(V: FullShift))))) { |
4500 | Value *IsNotNeg = Builder.CreateIsNotNeg(Arg: X, Name: "isnotneg" ); |
4501 | return new SExtInst(IsNotNeg, Ty); |
4502 | } |
4503 | |
4504 | // If we are inverting a right-shifted constant, we may be able to eliminate |
4505 | // the 'not' by inverting the constant and using the opposite shift type. |
4506 | // Canonicalization rules ensure that only a negative constant uses 'ashr', |
4507 | // but we must check that in case that transform has not fired yet. |
4508 | |
4509 | // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits) |
4510 | Constant *C; |
4511 | if (match(V: NotVal, P: m_AShr(L: m_Constant(C), R: m_Value(V&: Y))) && |
4512 | match(V: C, P: m_Negative())) |
4513 | return BinaryOperator::CreateLShr(V1: ConstantExpr::getNot(C), V2: Y); |
4514 | |
4515 | // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits) |
4516 | if (match(V: NotVal, P: m_LShr(L: m_Constant(C), R: m_Value(V&: Y))) && |
4517 | match(V: C, P: m_NonNegative())) |
4518 | return BinaryOperator::CreateAShr(V1: ConstantExpr::getNot(C), V2: Y); |
4519 | |
4520 | // ~(X + C) --> ~C - X |
4521 | if (match(V: NotVal, P: m_Add(L: m_Value(V&: X), R: m_ImmConstant(C)))) |
4522 | return BinaryOperator::CreateSub(V1: ConstantExpr::getNot(C), V2: X); |
4523 | |
4524 | // ~(X - Y) --> ~X + Y |
4525 | // FIXME: is it really beneficial to sink the `not` here? |
4526 | if (match(V: NotVal, P: m_Sub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
4527 | if (isa<Constant>(Val: X) || NotVal->hasOneUse()) |
4528 | return BinaryOperator::CreateAdd(V1: Builder.CreateNot(V: X), V2: Y); |
4529 | |
4530 | // ~(~X + Y) --> X - Y |
4531 | if (match(V: NotVal, P: m_c_Add(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) |
4532 | return BinaryOperator::CreateWithCopiedFlags(Opc: Instruction::Sub, V1: X, V2: Y, |
4533 | CopyO: NotVal); |
4534 | } |
4535 | |
4536 | // not (cmp A, B) = !cmp A, B |
4537 | CmpInst::Predicate Pred; |
4538 | if (match(V: NotOp, P: m_Cmp(Pred, L: m_Value(), R: m_Value())) && |
4539 | (NotOp->hasOneUse() || |
4540 | InstCombiner::canFreelyInvertAllUsersOf(V: cast<Instruction>(Val: NotOp), |
4541 | /*IgnoredUser=*/nullptr))) { |
4542 | cast<CmpInst>(Val: NotOp)->setPredicate(CmpInst::getInversePredicate(pred: Pred)); |
4543 | freelyInvertAllUsersOf(V: NotOp); |
4544 | return &I; |
4545 | } |
4546 | |
4547 | // Move a 'not' ahead of casts of a bool to enable logic reduction: |
4548 | // not (bitcast (sext i1 X)) --> bitcast (sext (not i1 X)) |
4549 | if (match(V: NotOp, P: m_OneUse(SubPattern: m_BitCast(Op: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X)))))) && X->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
4550 | Type *SextTy = cast<BitCastOperator>(Val: NotOp)->getSrcTy(); |
4551 | Value *NotX = Builder.CreateNot(V: X); |
4552 | Value *Sext = Builder.CreateSExt(V: NotX, DestTy: SextTy); |
4553 | return CastInst::CreateBitOrPointerCast(S: Sext, Ty); |
4554 | } |
4555 | |
4556 | if (auto *NotOpI = dyn_cast<Instruction>(Val: NotOp)) |
4557 | if (sinkNotIntoLogicalOp(I&: *NotOpI)) |
4558 | return &I; |
4559 | |
4560 | // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max: |
4561 | // ~min(~X, ~Y) --> max(X, Y) |
4562 | // ~max(~X, Y) --> min(X, ~Y) |
4563 | auto *II = dyn_cast<IntrinsicInst>(Val: NotOp); |
4564 | if (II && II->hasOneUse()) { |
4565 | if (match(V: NotOp, P: m_c_MaxOrMin(L: m_Not(V: m_Value(V&: X)), R: m_Value(V&: Y)))) { |
4566 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()); |
4567 | Value *NotY = Builder.CreateNot(V: Y); |
4568 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: NotY); |
4569 | return replaceInstUsesWith(I, V: InvMaxMin); |
4570 | } |
4571 | |
4572 | if (II->getIntrinsicID() == Intrinsic::is_fpclass) { |
4573 | ConstantInt *ClassMask = cast<ConstantInt>(Val: II->getArgOperand(i: 1)); |
4574 | II->setArgOperand( |
4575 | i: 1, v: ConstantInt::get(Ty: ClassMask->getType(), |
4576 | V: ~ClassMask->getZExtValue() & fcAllFlags)); |
4577 | return replaceInstUsesWith(I, V: II); |
4578 | } |
4579 | } |
4580 | |
4581 | if (NotOp->hasOneUse()) { |
4582 | // Pull 'not' into operands of select if both operands are one-use compares |
4583 | // or one is one-use compare and the other one is a constant. |
4584 | // Inverting the predicates eliminates the 'not' operation. |
4585 | // Example: |
4586 | // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) --> |
4587 | // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?) |
4588 | // not (select ?, (cmp TPred, ?, ?), true --> |
4589 | // select ?, (cmp InvTPred, ?, ?), false |
4590 | if (auto *Sel = dyn_cast<SelectInst>(Val: NotOp)) { |
4591 | Value *TV = Sel->getTrueValue(); |
4592 | Value *FV = Sel->getFalseValue(); |
4593 | auto *CmpT = dyn_cast<CmpInst>(Val: TV); |
4594 | auto *CmpF = dyn_cast<CmpInst>(Val: FV); |
4595 | bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(Val: TV); |
4596 | bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(Val: FV); |
4597 | if (InvertibleT && InvertibleF) { |
4598 | if (CmpT) |
4599 | CmpT->setPredicate(CmpT->getInversePredicate()); |
4600 | else |
4601 | Sel->setTrueValue(ConstantExpr::getNot(C: cast<Constant>(Val: TV))); |
4602 | if (CmpF) |
4603 | CmpF->setPredicate(CmpF->getInversePredicate()); |
4604 | else |
4605 | Sel->setFalseValue(ConstantExpr::getNot(C: cast<Constant>(Val: FV))); |
4606 | return replaceInstUsesWith(I, V: Sel); |
4607 | } |
4608 | } |
4609 | } |
4610 | |
4611 | if (Instruction *NewXor = foldNotXor(I, Builder)) |
4612 | return NewXor; |
4613 | |
4614 | // TODO: Could handle multi-use better by checking if all uses of NotOp (other |
4615 | // than I) can be inverted. |
4616 | if (Value *R = getFreelyInverted(V: NotOp, WillInvertAllUses: NotOp->hasOneUse(), Builder: &Builder)) |
4617 | return replaceInstUsesWith(I, V: R); |
4618 | |
4619 | return nullptr; |
4620 | } |
4621 | |
4622 | // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches |
4623 | // here. We should standardize that construct where it is needed or choose some |
4624 | // other way to ensure that commutated variants of patterns are not missed. |
4625 | Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { |
4626 | if (Value *V = simplifyXorInst(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1), |
4627 | Q: SQ.getWithInstruction(I: &I))) |
4628 | return replaceInstUsesWith(I, V); |
4629 | |
4630 | if (SimplifyAssociativeOrCommutative(I)) |
4631 | return &I; |
4632 | |
4633 | if (Instruction *X = foldVectorBinop(Inst&: I)) |
4634 | return X; |
4635 | |
4636 | if (Instruction *Phi = foldBinopWithPhiOperands(BO&: I)) |
4637 | return Phi; |
4638 | |
4639 | if (Instruction *NewXor = foldXorToXor(I, Builder)) |
4640 | return NewXor; |
4641 | |
4642 | // (A&B)^(A&C) -> A&(B^C) etc |
4643 | if (Value *V = foldUsingDistributiveLaws(I)) |
4644 | return replaceInstUsesWith(I, V); |
4645 | |
4646 | // See if we can simplify any instructions used by the instruction whose sole |
4647 | // purpose is to compute bits we don't care about. |
4648 | if (SimplifyDemandedInstructionBits(Inst&: I)) |
4649 | return &I; |
4650 | |
4651 | if (Instruction *R = foldNot(I)) |
4652 | return R; |
4653 | |
4654 | if (Instruction *R = foldBinOpShiftWithShift(I)) |
4655 | return R; |
4656 | |
4657 | // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) |
4658 | // This it a special case in haveNoCommonBitsSet, but the computeKnownBits |
4659 | // calls in there are unnecessary as SimplifyDemandedInstructionBits should |
4660 | // have already taken care of those cases. |
4661 | Value *Op0 = I.getOperand(i_nocapture: 0), *Op1 = I.getOperand(i_nocapture: 1); |
4662 | Value *M; |
4663 | if (match(V: &I, P: m_c_Xor(L: m_c_And(L: m_Not(V: m_Value(V&: M)), R: m_Value()), |
4664 | R: m_c_And(L: m_Deferred(V: M), R: m_Value())))) { |
4665 | if (isGuaranteedNotToBeUndef(V: M)) |
4666 | return BinaryOperator::CreateDisjointOr(V1: Op0, V2: Op1); |
4667 | else |
4668 | return BinaryOperator::CreateOr(V1: Op0, V2: Op1); |
4669 | } |
4670 | |
4671 | if (Instruction *Xor = visitMaskedMerge(I, Builder)) |
4672 | return Xor; |
4673 | |
4674 | Value *X, *Y; |
4675 | Constant *C1; |
4676 | if (match(V: Op1, P: m_Constant(C&: C1))) { |
4677 | Constant *C2; |
4678 | |
4679 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: X), R: m_ImmConstant(C&: C2)))) && |
4680 | match(V: C1, P: m_ImmConstant())) { |
4681 | // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2) |
4682 | C2 = Constant::replaceUndefsWith( |
4683 | C: C2, Replacement: Constant::getAllOnesValue(Ty: C2->getType()->getScalarType())); |
4684 | Value *And = Builder.CreateAnd( |
4685 | LHS: X, RHS: Constant::mergeUndefsWith(C: ConstantExpr::getNot(C: C2), Other: C1)); |
4686 | return BinaryOperator::CreateXor( |
4687 | V1: And, V2: Constant::mergeUndefsWith(C: ConstantExpr::getXor(C1, C2), Other: C1)); |
4688 | } |
4689 | |
4690 | // Use DeMorgan and reassociation to eliminate a 'not' op. |
4691 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Not(V: m_Value(V&: X)), R: m_Constant(C&: C2))))) { |
4692 | // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1 |
4693 | Value *And = Builder.CreateAnd(LHS: X, RHS: ConstantExpr::getNot(C: C2)); |
4694 | return BinaryOperator::CreateXor(V1: And, V2: ConstantExpr::getNot(C: C1)); |
4695 | } |
4696 | if (match(V: Op0, P: m_OneUse(SubPattern: m_And(L: m_Not(V: m_Value(V&: X)), R: m_Constant(C&: C2))))) { |
4697 | // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1 |
4698 | Value *Or = Builder.CreateOr(LHS: X, RHS: ConstantExpr::getNot(C: C2)); |
4699 | return BinaryOperator::CreateXor(V1: Or, V2: ConstantExpr::getNot(C: C1)); |
4700 | } |
4701 | |
4702 | // Convert xor ([trunc] (ashr X, BW-1)), C => |
4703 | // select(X >s -1, C, ~C) |
4704 | // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the |
4705 | // constant depending on whether this input is less than 0. |
4706 | const APInt *CA; |
4707 | if (match(V: Op0, P: m_OneUse(SubPattern: m_TruncOrSelf( |
4708 | Op: m_AShr(L: m_Value(V&: X), R: m_APIntAllowPoison(Res&: CA))))) && |
4709 | *CA == X->getType()->getScalarSizeInBits() - 1 && |
4710 | !match(V: C1, P: m_AllOnes())) { |
4711 | assert(!C1->isZeroValue() && "Unexpected xor with 0" ); |
4712 | Value *IsNotNeg = Builder.CreateIsNotNeg(Arg: X); |
4713 | return SelectInst::Create(C: IsNotNeg, S1: Op1, S2: Builder.CreateNot(V: Op1)); |
4714 | } |
4715 | } |
4716 | |
4717 | Type *Ty = I.getType(); |
4718 | { |
4719 | const APInt *RHSC; |
4720 | if (match(V: Op1, P: m_APInt(Res&: RHSC))) { |
4721 | Value *X; |
4722 | const APInt *C; |
4723 | // (C - X) ^ signmaskC --> (C + signmaskC) - X |
4724 | if (RHSC->isSignMask() && match(V: Op0, P: m_Sub(L: m_APInt(Res&: C), R: m_Value(V&: X)))) |
4725 | return BinaryOperator::CreateSub(V1: ConstantInt::get(Ty, V: *C + *RHSC), V2: X); |
4726 | |
4727 | // (X + C) ^ signmaskC --> X + (C + signmaskC) |
4728 | if (RHSC->isSignMask() && match(V: Op0, P: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C)))) |
4729 | return BinaryOperator::CreateAdd(V1: X, V2: ConstantInt::get(Ty, V: *C + *RHSC)); |
4730 | |
4731 | // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0 |
4732 | if (match(V: Op0, P: m_Or(L: m_Value(V&: X), R: m_APInt(Res&: C))) && |
4733 | MaskedValueIsZero(V: X, Mask: *C, Depth: 0, CxtI: &I)) |
4734 | return BinaryOperator::CreateXor(V1: X, V2: ConstantInt::get(Ty, V: *C ^ *RHSC)); |
4735 | |
4736 | // When X is a power-of-two or zero and zero input is poison: |
4737 | // ctlz(i32 X) ^ 31 --> cttz(X) |
4738 | // cttz(i32 X) ^ 31 --> ctlz(X) |
4739 | auto *II = dyn_cast<IntrinsicInst>(Val: Op0); |
4740 | if (II && II->hasOneUse() && *RHSC == Ty->getScalarSizeInBits() - 1) { |
4741 | Intrinsic::ID IID = II->getIntrinsicID(); |
4742 | if ((IID == Intrinsic::ctlz || IID == Intrinsic::cttz) && |
4743 | match(V: II->getArgOperand(i: 1), P: m_One()) && |
4744 | isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), /*OrZero */ true)) { |
4745 | IID = (IID == Intrinsic::ctlz) ? Intrinsic::cttz : Intrinsic::ctlz; |
4746 | Function *F = Intrinsic::getDeclaration(M: II->getModule(), id: IID, Tys: Ty); |
4747 | return CallInst::Create(Func: F, Args: {II->getArgOperand(i: 0), Builder.getTrue()}); |
4748 | } |
4749 | } |
4750 | |
4751 | // If RHSC is inverting the remaining bits of shifted X, |
4752 | // canonicalize to a 'not' before the shift to help SCEV and codegen: |
4753 | // (X << C) ^ RHSC --> ~X << C |
4754 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: C)))) && |
4755 | *RHSC == APInt::getAllOnes(numBits: Ty->getScalarSizeInBits()).shl(ShiftAmt: *C)) { |
4756 | Value *NotX = Builder.CreateNot(V: X); |
4757 | return BinaryOperator::CreateShl(V1: NotX, V2: ConstantInt::get(Ty, V: *C)); |
4758 | } |
4759 | // (X >>u C) ^ RHSC --> ~X >>u C |
4760 | if (match(V: Op0, P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C)))) && |
4761 | *RHSC == APInt::getAllOnes(numBits: Ty->getScalarSizeInBits()).lshr(ShiftAmt: *C)) { |
4762 | Value *NotX = Builder.CreateNot(V: X); |
4763 | return BinaryOperator::CreateLShr(V1: NotX, V2: ConstantInt::get(Ty, V: *C)); |
4764 | } |
4765 | // TODO: We could handle 'ashr' here as well. That would be matching |
4766 | // a 'not' op and moving it before the shift. Doing that requires |
4767 | // preventing the inverse fold in canShiftBinOpWithConstantRHS(). |
4768 | } |
4769 | |
4770 | // If we are XORing the sign bit of a floating-point value, convert |
4771 | // this to fneg, then cast back to integer. |
4772 | // |
4773 | // This is generous interpretation of noimplicitfloat, this is not a true |
4774 | // floating-point operation. |
4775 | // |
4776 | // Assumes any IEEE-represented type has the sign bit in the high bit. |
4777 | // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt |
4778 | Value *CastOp; |
4779 | if (match(V: Op0, P: m_ElementWiseBitCast(Op: m_Value(V&: CastOp))) && |
4780 | match(V: Op1, P: m_SignMask()) && |
4781 | !Builder.GetInsertBlock()->getParent()->hasFnAttribute( |
4782 | Kind: Attribute::NoImplicitFloat)) { |
4783 | Type *EltTy = CastOp->getType()->getScalarType(); |
4784 | if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { |
4785 | Value *FNeg = Builder.CreateFNeg(V: CastOp); |
4786 | return new BitCastInst(FNeg, I.getType()); |
4787 | } |
4788 | } |
4789 | } |
4790 | |
4791 | // FIXME: This should not be limited to scalar (pull into APInt match above). |
4792 | { |
4793 | Value *X; |
4794 | ConstantInt *C1, *C2, *C3; |
4795 | // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) |
4796 | if (match(V: Op1, P: m_ConstantInt(CI&: C3)) && |
4797 | match(V: Op0, P: m_LShr(L: m_Xor(L: m_Value(V&: X), R: m_ConstantInt(CI&: C1)), |
4798 | R: m_ConstantInt(CI&: C2))) && |
4799 | Op0->hasOneUse()) { |
4800 | // fold (C1 >> C2) ^ C3 |
4801 | APInt FoldConst = C1->getValue().lshr(ShiftAmt: C2->getValue()); |
4802 | FoldConst ^= C3->getValue(); |
4803 | // Prepare the two operands. |
4804 | auto *Opnd0 = Builder.CreateLShr(LHS: X, RHS: C2); |
4805 | Opnd0->takeName(V: Op0); |
4806 | return BinaryOperator::CreateXor(V1: Opnd0, V2: ConstantInt::get(Ty, V: FoldConst)); |
4807 | } |
4808 | } |
4809 | |
4810 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) |
4811 | return FoldedLogic; |
4812 | |
4813 | // Y ^ (X | Y) --> X & ~Y |
4814 | // Y ^ (Y | X) --> X & ~Y |
4815 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: Op0))))) |
4816 | return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateNot(V: Op0)); |
4817 | // (X | Y) ^ Y --> X & ~Y |
4818 | // (Y | X) ^ Y --> X & ~Y |
4819 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_Or(L: m_Value(V&: X), R: m_Specific(V: Op1))))) |
4820 | return BinaryOperator::CreateAnd(V1: X, V2: Builder.CreateNot(V: Op1)); |
4821 | |
4822 | // Y ^ (X & Y) --> ~X & Y |
4823 | // Y ^ (Y & X) --> ~X & Y |
4824 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_And(L: m_Value(V&: X), R: m_Specific(V: Op0))))) |
4825 | return BinaryOperator::CreateAnd(V1: Op0, V2: Builder.CreateNot(V: X)); |
4826 | // (X & Y) ^ Y --> ~X & Y |
4827 | // (Y & X) ^ Y --> ~X & Y |
4828 | // Canonical form is (X & C) ^ C; don't touch that. |
4829 | // TODO: A 'not' op is better for analysis and codegen, but demanded bits must |
4830 | // be fixed to prefer that (otherwise we get infinite looping). |
4831 | if (!match(V: Op1, P: m_Constant()) && |
4832 | match(V: Op0, P: m_OneUse(SubPattern: m_c_And(L: m_Value(V&: X), R: m_Specific(V: Op1))))) |
4833 | return BinaryOperator::CreateAnd(V1: Op1, V2: Builder.CreateNot(V: X)); |
4834 | |
4835 | Value *A, *B, *C; |
4836 | // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants. |
4837 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))), |
4838 | R: m_OneUse(SubPattern: m_c_Or(L: m_Deferred(V: A), R: m_Value(V&: C)))))) |
4839 | return BinaryOperator::CreateXor( |
4840 | V1: Builder.CreateAnd(LHS: Builder.CreateNot(V: A), RHS: C), V2: B); |
4841 | |
4842 | // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants. |
4843 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))), |
4844 | R: m_OneUse(SubPattern: m_c_Or(L: m_Deferred(V: B), R: m_Value(V&: C)))))) |
4845 | return BinaryOperator::CreateXor( |
4846 | V1: Builder.CreateAnd(LHS: Builder.CreateNot(V: B), RHS: C), V2: A); |
4847 | |
4848 | // (A & B) ^ (A ^ B) -> (A | B) |
4849 | if (match(V: Op0, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4850 | match(V: Op1, P: m_c_Xor(L: m_Specific(V: A), R: m_Specific(V: B)))) |
4851 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4852 | // (A ^ B) ^ (A & B) -> (A | B) |
4853 | if (match(V: Op0, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B))) && |
4854 | match(V: Op1, P: m_c_And(L: m_Specific(V: A), R: m_Specific(V: B)))) |
4855 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4856 | |
4857 | // (A & ~B) ^ ~A -> ~(A & B) |
4858 | // (~B & A) ^ ~A -> ~(A & B) |
4859 | if (match(V: Op0, P: m_c_And(L: m_Value(V&: A), R: m_Not(V: m_Value(V&: B)))) && |
4860 | match(V: Op1, P: m_Not(V: m_Specific(V: A)))) |
4861 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: A, RHS: B)); |
4862 | |
4863 | // (~A & B) ^ A --> A | B -- There are 4 commuted variants. |
4864 | if (match(V: &I, P: m_c_Xor(L: m_c_And(L: m_Not(V: m_Value(V&: A)), R: m_Value(V&: B)), R: m_Deferred(V: A)))) |
4865 | return BinaryOperator::CreateOr(V1: A, V2: B); |
4866 | |
4867 | // (~A | B) ^ A --> ~(A & B) |
4868 | if (match(V: Op0, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Specific(V: Op1)), R: m_Value(V&: B))))) |
4869 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Op1, RHS: B)); |
4870 | |
4871 | // A ^ (~A | B) --> ~(A & B) |
4872 | if (match(V: Op1, P: m_OneUse(SubPattern: m_c_Or(L: m_Not(V: m_Specific(V: Op0)), R: m_Value(V&: B))))) |
4873 | return BinaryOperator::CreateNot(Op: Builder.CreateAnd(LHS: Op0, RHS: B)); |
4874 | |
4875 | // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. |
4876 | // TODO: Loosen one-use restriction if common operand is a constant. |
4877 | Value *D; |
4878 | if (match(V: Op0, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)))) && |
4879 | match(V: Op1, P: m_OneUse(SubPattern: m_Or(L: m_Value(V&: C), R: m_Value(V&: D))))) { |
4880 | if (B == C || B == D) |
4881 | std::swap(a&: A, b&: B); |
4882 | if (A == C) |
4883 | std::swap(a&: C, b&: D); |
4884 | if (A == D) { |
4885 | Value *NotA = Builder.CreateNot(V: A); |
4886 | return BinaryOperator::CreateAnd(V1: Builder.CreateXor(LHS: B, RHS: C), V2: NotA); |
4887 | } |
4888 | } |
4889 | |
4890 | // (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants. |
4891 | if (I.getType()->isIntOrIntVectorTy(BitWidth: 1) && |
4892 | match(V: Op0, P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) && |
4893 | match(V: Op1, P: m_OneUse(SubPattern: m_LogicalOr(L: m_Value(V&: C), R: m_Value(V&: D))))) { |
4894 | bool NeedFreeze = isa<SelectInst>(Val: Op0) && isa<SelectInst>(Val: Op1) && B == D; |
4895 | if (B == C || B == D) |
4896 | std::swap(a&: A, b&: B); |
4897 | if (A == C) |
4898 | std::swap(a&: C, b&: D); |
4899 | if (A == D) { |
4900 | if (NeedFreeze) |
4901 | A = Builder.CreateFreeze(V: A); |
4902 | Value *NotB = Builder.CreateNot(V: B); |
4903 | return SelectInst::Create(C: A, S1: NotB, S2: C); |
4904 | } |
4905 | } |
4906 | |
4907 | if (auto *LHS = dyn_cast<ICmpInst>(Val: I.getOperand(i_nocapture: 0))) |
4908 | if (auto *RHS = dyn_cast<ICmpInst>(Val: I.getOperand(i_nocapture: 1))) |
4909 | if (Value *V = foldXorOfICmps(LHS, RHS, I)) |
4910 | return replaceInstUsesWith(I, V); |
4911 | |
4912 | if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) |
4913 | return CastedXor; |
4914 | |
4915 | if (Instruction *Abs = canonicalizeAbs(Xor&: I, Builder)) |
4916 | return Abs; |
4917 | |
4918 | // Otherwise, if all else failed, try to hoist the xor-by-constant: |
4919 | // (X ^ C) ^ Y --> (X ^ Y) ^ C |
4920 | // Just like we do in other places, we completely avoid the fold |
4921 | // for constantexprs, at least to avoid endless combine loop. |
4922 | if (match(V: &I, P: m_c_Xor(L: m_OneUse(SubPattern: m_Xor(L: m_CombineAnd(L: m_Value(V&: X), |
4923 | R: m_Unless(M: m_ConstantExpr())), |
4924 | R: m_ImmConstant(C&: C1))), |
4925 | R: m_Value(V&: Y)))) |
4926 | return BinaryOperator::CreateXor(V1: Builder.CreateXor(LHS: X, RHS: Y), V2: C1); |
4927 | |
4928 | if (Instruction *R = reassociateForUses(BO&: I, Builder)) |
4929 | return R; |
4930 | |
4931 | if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) |
4932 | return Canonicalized; |
4933 | |
4934 | if (Instruction *Folded = foldLogicOfIsFPClass(BO&: I, Op0, Op1)) |
4935 | return Folded; |
4936 | |
4937 | if (Instruction *Folded = canonicalizeConditionalNegationViaMathToSelect(I)) |
4938 | return Folded; |
4939 | |
4940 | if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) |
4941 | return Res; |
4942 | |
4943 | if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) |
4944 | return Res; |
4945 | |
4946 | return nullptr; |
4947 | } |
4948 | |