1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Analysis/IVDescriptors.h"
14#include "llvm/Analysis/DemandedBits.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/Analysis/ScalarEvolution.h"
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/PatternMatch.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/KnownBits.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
33bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
34 SmallPtrSetImpl<Instruction *> &Set) {
35 for (const Use &Use : I->operands())
36 if (!Set.count(Ptr: dyn_cast<Instruction>(Val: Use)))
37 return false;
38 return true;
39}
40
41bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
42 switch (Kind) {
43 default:
44 break;
45 case RecurKind::AddChainWithSubs:
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
56 case RecurKind::AnyOf:
57 case RecurKind::FindFirstIVSMin:
58 case RecurKind::FindFirstIVUMin:
59 case RecurKind::FindLastIVSMax:
60 case RecurKind::FindLastIVUMax:
61 // TODO: Make type-agnostic.
62 case RecurKind::FindLast:
63 return true;
64 }
65 return false;
66}
67
68bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
69 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
70}
71
72/// Determines if Phi may have been type-promoted. If Phi has a single user
73/// that ANDs the Phi with a type mask, return the user. RT is updated to
74/// account for the narrower bit width represented by the mask, and the AND
75/// instruction is added to CI.
76static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
77 SmallPtrSetImpl<Instruction *> &Visited,
78 SmallPtrSetImpl<Instruction *> &CI) {
79 if (!Phi->hasOneUse())
80 return Phi;
81
82 const APInt *M = nullptr;
83 Instruction *I, *J = cast<Instruction>(Val: Phi->use_begin()->getUser());
84
85 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
86 // with a new integer type of the corresponding bit width.
87 if (match(V: J, P: m_And(L: m_Instruction(I), R: m_APInt(Res&: M)))) {
88 int32_t Bits = (*M + 1).exactLogBase2();
89 if (Bits > 0) {
90 RT = IntegerType::get(C&: Phi->getContext(), NumBits: Bits);
91 Visited.insert(Ptr: Phi);
92 CI.insert(Ptr: J);
93 return J;
94 }
95 }
96 return Phi;
97}
98
99/// Compute the minimal bit width needed to represent a reduction whose exit
100/// instruction is given by Exit.
101static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
102 DemandedBits *DB,
103 AssumptionCache *AC,
104 DominatorTree *DT) {
105 bool IsSigned = false;
106 const DataLayout &DL = Exit->getDataLayout();
107 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Ty: Exit->getType());
108
109 if (DB) {
110 // Use the demanded bits analysis to determine the bits that are live out
111 // of the exit instruction, rounding up to the nearest power of two. If the
112 // use of demanded bits results in a smaller bit width, we know the value
113 // must be positive (i.e., IsSigned = false), because if this were not the
114 // case, the sign bit would have been demanded.
115 auto Mask = DB->getDemandedBits(I: Exit);
116 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
117 }
118
119 if (MaxBitWidth == DL.getTypeSizeInBits(Ty: Exit->getType()) && AC && DT) {
120 // If demanded bits wasn't able to limit the bit width, we can try to use
121 // value tracking instead. This can be the case, for example, if the value
122 // may be negative.
123 auto NumSignBits = ComputeNumSignBits(Op: Exit, DL, AC, CxtI: nullptr, DT);
124 auto NumTypeBits = DL.getTypeSizeInBits(Ty: Exit->getType());
125 MaxBitWidth = NumTypeBits - NumSignBits;
126 KnownBits Bits = computeKnownBits(V: Exit, DL);
127 if (!Bits.isNonNegative()) {
128 // If the value is not known to be non-negative, we set IsSigned to true,
129 // meaning that we will use sext instructions instead of zext
130 // instructions to restore the original type.
131 IsSigned = true;
132 // Make sure at least one sign bit is included in the result, so it
133 // will get properly sign-extended.
134 ++MaxBitWidth;
135 }
136 }
137 MaxBitWidth = llvm::bit_ceil(Value: MaxBitWidth);
138
139 return std::make_pair(x: Type::getIntNTy(C&: Exit->getContext(), N: MaxBitWidth),
140 y&: IsSigned);
141}
142
143/// Collect cast instructions that can be ignored in the vectorizer's cost
144/// model, given a reduction exit value and the minimal type in which the
145// reduction can be represented. Also search casts to the recurrence type
146// to find the minimum width used by the recurrence.
147static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
148 Type *RecurrenceType,
149 SmallPtrSetImpl<Instruction *> &Casts,
150 unsigned &MinWidthCastToRecurTy) {
151
152 SmallVector<Instruction *, 8> Worklist;
153 SmallPtrSet<Instruction *, 8> Visited;
154 Worklist.push_back(Elt: Exit);
155 MinWidthCastToRecurTy = -1U;
156
157 while (!Worklist.empty()) {
158 Instruction *Val = Worklist.pop_back_val();
159 Visited.insert(Ptr: Val);
160 if (auto *Cast = dyn_cast<CastInst>(Val)) {
161 if (Cast->getSrcTy() == RecurrenceType) {
162 // If the source type of a cast instruction is equal to the recurrence
163 // type, it will be eliminated, and should be ignored in the vectorizer
164 // cost model.
165 Casts.insert(Ptr: Cast);
166 continue;
167 }
168 if (Cast->getDestTy() == RecurrenceType) {
169 // The minimum width used by the recurrence is found by checking for
170 // casts on its operands. The minimum width is used by the vectorizer
171 // when finding the widest type for in-loop reductions without any
172 // loads/stores.
173 MinWidthCastToRecurTy = std::min<unsigned>(
174 a: MinWidthCastToRecurTy, b: Cast->getSrcTy()->getScalarSizeInBits());
175 continue;
176 }
177 }
178 // Add all operands to the work list if they are loop-varying values that
179 // we haven't yet visited.
180 for (Value *O : cast<User>(Val)->operands())
181 if (auto *I = dyn_cast<Instruction>(Val: O))
182 if (TheLoop->contains(Inst: I) && !Visited.count(Ptr: I))
183 Worklist.push_back(Elt: I);
184 }
185}
186
187// Check if a given Phi node can be recognized as an ordered reduction for
188// vectorizing floating point operations without unsafe math.
189static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
190 Instruction *Exit, PHINode *Phi) {
191 // Currently only FAdd and FMulAdd are supported.
192 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
193 return false;
194
195 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
196 return false;
197
198 if (Kind == RecurKind::FMulAdd &&
199 !RecurrenceDescriptor::isFMulAddIntrinsic(I: Exit))
200 return false;
201
202 // Ensure the exit instruction has only one user other than the reduction PHI
203 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(N: 3))
204 return false;
205
206 // The only pattern accepted is the one in which the reduction PHI
207 // is used as one of the operands of the exit instruction
208 auto *Op0 = Exit->getOperand(i: 0);
209 auto *Op1 = Exit->getOperand(i: 1);
210 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
211 return false;
212 if (Kind == RecurKind::FMulAdd && Exit->getOperand(i: 2) != Phi)
213 return false;
214
215 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
216 << ", ExitInst: " << *Exit << "\n");
217
218 return true;
219}
220
221/// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi
222/// is used outside the reduction chain. This is common for loops selecting the
223/// index of a minimum/maximum value (argmin/argmax).
224static bool isMinMaxReductionPhiWithUsersOutsideReductionChain(
225 PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) {
226 BasicBlock *Latch = TheLoop->getLoopLatch();
227 if (!Latch)
228 return false;
229
230 assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values");
231 Value *Inc = Phi->getIncomingValueForBlock(BB: Latch);
232 if (Phi->hasOneUse() || !Inc->hasOneUse() ||
233 !RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
234 return false;
235
236 Value *A, *B;
237 bool IsMinMax = [&]() {
238 switch (Kind) {
239 case RecurKind::UMax:
240 return match(V: Inc, P: m_UMax(L: m_Value(V&: A), R: m_Value(V&: B)));
241 case RecurKind::UMin:
242 return match(V: Inc, P: m_UMin(L: m_Value(V&: A), R: m_Value(V&: B)));
243 case RecurKind::SMax:
244 return match(V: Inc, P: m_SMax(L: m_Value(V&: A), R: m_Value(V&: B)));
245 case RecurKind::SMin:
246 return match(V: Inc, P: m_SMin(L: m_Value(V&: A), R: m_Value(V&: B)));
247 default:
248 llvm_unreachable("all min/max kinds must be handled");
249 }
250 }();
251 if (!IsMinMax)
252 return false;
253
254 if (A == B || (A != Phi && B != Phi))
255 return false;
256
257 SmallPtrSet<Instruction *, 4> CastInsts;
258 Value *RdxStart = Phi->getIncomingValueForBlock(BB: TheLoop->getLoopPreheader());
259 RedDes =
260 RecurrenceDescriptor(RdxStart, /*Exit=*/nullptr, /*Store=*/nullptr, Kind,
261 FastMathFlags(), /*ExactFP=*/nullptr, Phi->getType(),
262 /*Signed=*/false, /*Ordered=*/false, CastInsts,
263 /*MinWidthCastToRecurTy=*/-1U, /*PhiMultiUse=*/true);
264 return true;
265}
266
267bool RecurrenceDescriptor::AddReductionVar(
268 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
269 RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
270 DominatorTree *DT, ScalarEvolution *SE) {
271 if (Phi->getNumIncomingValues() != 2)
272 return false;
273
274 // Reduction variables are only found in the loop header block.
275 if (Phi->getParent() != TheLoop->getHeader())
276 return false;
277
278 // Check for min/max reduction variables that feed other users in the loop.
279 if (isMinMaxReductionPhiWithUsersOutsideReductionChain(Phi, Kind, TheLoop,
280 RedDes))
281 return true;
282
283 // Obtain the reduction start value from the value that comes from the loop
284 // preheader.
285 Value *RdxStart = Phi->getIncomingValueForBlock(BB: TheLoop->getLoopPreheader());
286
287 // ExitInstruction is the single value which is used outside the loop.
288 // We only allow for a single reduction value to be used outside the loop.
289 // This includes users of the reduction, variables (which form a cycle
290 // which ends in the phi node).
291 Instruction *ExitInstruction = nullptr;
292
293 // Variable to keep last visited store instruction. By the end of the
294 // algorithm this variable will be either empty or having intermediate
295 // reduction value stored in invariant address.
296 StoreInst *IntermediateStore = nullptr;
297
298 // Indicates that we found a reduction operation in our scan.
299 bool FoundReduxOp = false;
300
301 // We start with the PHI node and scan for all of the users of this
302 // instruction. All users must be instructions that can be used as reduction
303 // variables (such as ADD). We must have a single out-of-block user. The cycle
304 // must include the original PHI.
305 bool FoundStartPHI = false;
306
307 // To recognize min/max patterns formed by a icmp select sequence, we store
308 // the number of instruction we saw from the recognized min/max pattern,
309 // to make sure we only see exactly the two instructions.
310 unsigned NumCmpSelectPatternInst = 0;
311 InstDesc ReduxDesc(false, nullptr);
312
313 // Data used for determining if the recurrence has been type-promoted.
314 Type *RecurrenceType = Phi->getType();
315 SmallPtrSet<Instruction *, 4> CastInsts;
316 unsigned MinWidthCastToRecurrenceType;
317 Instruction *Start = Phi;
318 bool IsSigned = false;
319
320 SmallPtrSet<Instruction *, 8> VisitedInsts;
321 SmallVector<Instruction *, 8> Worklist;
322
323 // Return early if the recurrence kind does not match the type of Phi. If the
324 // recurrence kind is arithmetic, we attempt to look through AND operations
325 // resulting from the type promotion performed by InstCombine. Vector
326 // operations are not limited to the legal integer widths, so we may be able
327 // to evaluate the reduction in the narrower width.
328 // Check the scalar type to handle both scalar and vector types.
329 Type *ScalarTy = RecurrenceType->getScalarType();
330 if (ScalarTy->isFloatingPointTy()) {
331 if (!isFloatingPointRecurrenceKind(Kind))
332 return false;
333 } else if (ScalarTy->isIntegerTy()) {
334 if (!isIntegerRecurrenceKind(Kind))
335 return false;
336 if (!isMinMaxRecurrenceKind(Kind))
337 Start = lookThroughAnd(Phi, RT&: RecurrenceType, Visited&: VisitedInsts, CI&: CastInsts);
338 } else {
339 // Pointer min/max may exist, but it is not supported as a reduction op.
340 return false;
341 }
342
343 Worklist.push_back(Elt: Start);
344 VisitedInsts.insert(Ptr: Start);
345
346 // Start with all flags set because we will intersect this with the reduction
347 // flags from all the reduction operations.
348 FastMathFlags FMF = FastMathFlags::getFast();
349
350 // The first instruction in the use-def chain of the Phi node that requires
351 // exact floating point operations.
352 Instruction *ExactFPMathInst = nullptr;
353
354 // A value in the reduction can be used:
355 // - By the reduction:
356 // - Reduction operation:
357 // - One use of reduction value (safe).
358 // - Multiple use of reduction value (not safe).
359 // - PHI:
360 // - All uses of the PHI must be the reduction (safe).
361 // - Otherwise, not safe.
362 // - By instructions outside of the loop (safe).
363 // * One value may have several outside users, but all outside
364 // uses must be of the same value.
365 // - By store instructions with a loop invariant address (safe with
366 // the following restrictions):
367 // * If there are several stores, all must have the same address.
368 // * Final value should be stored in that loop invariant address.
369 // - By an instruction that is not part of the reduction (not safe).
370 // This is either:
371 // * An instruction type other than PHI or the reduction operation.
372 // * A PHI in the header other than the initial PHI.
373 while (!Worklist.empty()) {
374 Instruction *Cur = Worklist.pop_back_val();
375
376 // Store instructions are allowed iff it is the store of the reduction
377 // value to the same loop invariant memory location.
378 if (auto *SI = dyn_cast<StoreInst>(Val: Cur)) {
379 if (!SE) {
380 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
381 << "Scalar Evolution Analysis\n");
382 return false;
383 }
384
385 const SCEV *PtrScev = SE->getSCEV(V: SI->getPointerOperand());
386 // Check it is the same address as previous stores
387 if (IntermediateStore) {
388 const SCEV *OtherScev =
389 SE->getSCEV(V: IntermediateStore->getPointerOperand());
390
391 if (OtherScev != PtrScev) {
392 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
393 << "inside the loop: " << *SI->getPointerOperand()
394 << " and "
395 << *IntermediateStore->getPointerOperand() << '\n');
396 return false;
397 }
398 }
399
400 // Check the pointer is loop invariant
401 if (!SE->isLoopInvariant(S: PtrScev, L: TheLoop)) {
402 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
403 << "inside the loop: " << *SI->getPointerOperand()
404 << '\n');
405 return false;
406 }
407
408 // IntermediateStore is always the last store in the loop.
409 IntermediateStore = SI;
410 continue;
411 }
412
413 // No Users.
414 // If the instruction has no users then this is a broken chain and can't be
415 // a reduction variable.
416 if (Cur->use_empty())
417 return false;
418
419 bool IsAPhi = isa<PHINode>(Val: Cur);
420
421 // A header PHI use other than the original PHI.
422 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
423 return false;
424
425 // Reductions of instructions such as Div, and Sub is only possible if the
426 // LHS is the reduction variable.
427 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Val: Cur) &&
428 !isa<ICmpInst>(Val: Cur) && !isa<FCmpInst>(Val: Cur) &&
429 !VisitedInsts.count(Ptr: dyn_cast<Instruction>(Val: Cur->getOperand(i: 0))))
430 return false;
431
432 // Any reduction instruction must be of one of the allowed kinds. We ignore
433 // the starting value (the Phi or an AND instruction if the Phi has been
434 // type-promoted).
435 if (Cur != Start) {
436 ReduxDesc =
437 isRecurrenceInstr(L: TheLoop, Phi, I: Cur, Kind, Prev&: ReduxDesc, FuncFMF, SE);
438 ExactFPMathInst = ExactFPMathInst == nullptr
439 ? ReduxDesc.getExactFPMathInst()
440 : ExactFPMathInst;
441 if (!ReduxDesc.isRecurrence())
442 return false;
443 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
444 if (isa<FPMathOperator>(Val: ReduxDesc.getPatternInst()) && !IsAPhi) {
445 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
446 if (auto *Sel = dyn_cast<SelectInst>(Val: ReduxDesc.getPatternInst())) {
447 // Accept FMF on either fcmp or select of a min/max idiom.
448 // TODO: This is a hack to work-around the fact that FMF may not be
449 // assigned/propagated correctly. If that problem is fixed or we
450 // standardize on fmin/fmax via intrinsics, this can be removed.
451 if (auto *FCmp = dyn_cast<FCmpInst>(Val: Sel->getCondition()))
452 CurFMF |= FCmp->getFastMathFlags();
453 }
454 FMF &= CurFMF;
455 }
456 // Update this reduction kind if we matched a new instruction.
457 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
458 // state accurate while processing the worklist?
459 if (ReduxDesc.getRecKind() != RecurKind::None)
460 Kind = ReduxDesc.getRecKind();
461 }
462
463 bool IsASelect = isa<SelectInst>(Val: Cur);
464
465 // A conditional reduction operation must only have 2 or less uses in
466 // VisitedInsts.
467 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
468 hasMultipleUsesOf(I: Cur, Insts&: VisitedInsts, MaxNumUses: 2))
469 return false;
470
471 // A reduction operation must only have one use of the reduction value.
472 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
473 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(I: Cur, Insts&: VisitedInsts, MaxNumUses: 1))
474 return false;
475
476 // All inputs to a PHI node must be a reduction value.
477 if (IsAPhi && Cur != Phi && !areAllUsesIn(I: Cur, Set&: VisitedInsts))
478 return false;
479
480 if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Val: Cur) || IsASelect))
481 ++NumCmpSelectPatternInst;
482 if (isFPMinMaxRecurrenceKind(Kind) && (isa<FCmpInst>(Val: Cur) || IsASelect))
483 ++NumCmpSelectPatternInst;
484 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
485 ++NumCmpSelectPatternInst;
486
487 // Check whether we found a reduction operator.
488 FoundReduxOp |= !IsAPhi && Cur != Start;
489
490 // Process users of current instruction. Push non-PHI nodes after PHI nodes
491 // onto the stack. This way we are going to have seen all inputs to PHI
492 // nodes once we get to them.
493 SmallVector<Instruction *, 8> NonPHIs;
494 SmallVector<Instruction *, 8> PHIs;
495 for (User *U : Cur->users()) {
496 Instruction *UI = cast<Instruction>(Val: U);
497
498 // If the user is a call to llvm.fmuladd then the instruction can only be
499 // the final operand.
500 if (isFMulAddIntrinsic(I: UI))
501 if (Cur == UI->getOperand(i: 0) || Cur == UI->getOperand(i: 1))
502 return false;
503
504 // Check if we found the exit user.
505 BasicBlock *Parent = UI->getParent();
506 if (!TheLoop->contains(BB: Parent)) {
507 // If we already know this instruction is used externally, move on to
508 // the next user.
509 if (ExitInstruction == Cur)
510 continue;
511
512 // Exit if you find multiple values used outside or if the header phi
513 // node is being used. In this case the user uses the value of the
514 // previous iteration, in which case we would loose "VF-1" iterations of
515 // the reduction operation if we vectorize.
516 if (ExitInstruction != nullptr || Cur == Phi)
517 return false;
518
519 // The instruction used by an outside user must be the last instruction
520 // before we feed back to the reduction phi. Otherwise, we loose VF-1
521 // operations on the value.
522 if (!is_contained(Range: Phi->operands(), Element: Cur))
523 return false;
524
525 ExitInstruction = Cur;
526 continue;
527 }
528
529 // Process instructions only once (termination). Each reduction cycle
530 // value must only be used once, except by phi nodes and min/max
531 // reductions which are represented as a cmp followed by a select.
532 InstDesc IgnoredVal(false, nullptr);
533 if (VisitedInsts.insert(Ptr: UI).second) {
534 if (isa<PHINode>(Val: UI)) {
535 PHIs.push_back(Elt: UI);
536 } else {
537 StoreInst *SI = dyn_cast<StoreInst>(Val: UI);
538 if (SI && SI->getPointerOperand() == Cur) {
539 // Reduction variable chain can only be stored somewhere but it
540 // can't be used as an address.
541 return false;
542 }
543 NonPHIs.push_back(Elt: UI);
544 }
545 } else if (!isa<PHINode>(Val: UI) &&
546 ((!isa<FCmpInst>(Val: UI) && !isa<ICmpInst>(Val: UI) &&
547 !isa<SelectInst>(Val: UI)) ||
548 (!isConditionalRdxPattern(I: UI).isRecurrence() &&
549 !isAnyOfPattern(Loop: TheLoop, OrigPhi: Phi, I: UI, Prev&: IgnoredVal)
550 .isRecurrence() &&
551 !isMinMaxPattern(I: UI, Kind, Prev: IgnoredVal).isRecurrence())))
552 return false;
553
554 // Remember that we completed the cycle.
555 if (UI == Phi)
556 FoundStartPHI = true;
557 }
558 Worklist.append(in_start: PHIs.begin(), in_end: PHIs.end());
559 Worklist.append(in_start: NonPHIs.begin(), in_end: NonPHIs.end());
560 }
561
562 // This means we have seen one but not the other instruction of the
563 // pattern or more than just a select and cmp. Zero implies that we saw a
564 // llvm.min/max intrinsic, which is always OK.
565 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
566 NumCmpSelectPatternInst != 0)
567 return false;
568
569 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
570 return false;
571
572 if (IntermediateStore) {
573 // Check that stored value goes to the phi node again. This way we make sure
574 // that the value stored in IntermediateStore is indeed the final reduction
575 // value.
576 if (!is_contained(Range: Phi->operands(), Element: IntermediateStore->getValueOperand())) {
577 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
578 << *IntermediateStore << '\n');
579 return false;
580 }
581
582 // If there is an exit instruction it's value should be stored in
583 // IntermediateStore
584 if (ExitInstruction &&
585 IntermediateStore->getValueOperand() != ExitInstruction) {
586 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
587 "store last calculated value of the reduction: "
588 << *IntermediateStore << '\n');
589 return false;
590 }
591
592 // If all uses are inside the loop (intermediate stores), then the
593 // reduction value after the loop will be the one used in the last store.
594 if (!ExitInstruction)
595 ExitInstruction = cast<Instruction>(Val: IntermediateStore->getValueOperand());
596 }
597
598 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
599 return false;
600
601 const bool IsOrdered =
602 checkOrderedReduction(Kind, ExactFPMathInst, Exit: ExitInstruction, Phi);
603
604 if (Start != Phi) {
605 // If the starting value is not the same as the phi node, we speculatively
606 // looked through an 'and' instruction when evaluating a potential
607 // arithmetic reduction to determine if it may have been type-promoted.
608 //
609 // We now compute the minimal bit width that is required to represent the
610 // reduction. If this is the same width that was indicated by the 'and', we
611 // can represent the reduction in the smaller type. The 'and' instruction
612 // will be eliminated since it will essentially be a cast instruction that
613 // can be ignore in the cost model. If we compute a different type than we
614 // did when evaluating the 'and', the 'and' will not be eliminated, and we
615 // will end up with different kinds of operations in the recurrence
616 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
617 // the case.
618 //
619 // The vectorizer relies on InstCombine to perform the actual
620 // type-shrinking. It does this by inserting instructions to truncate the
621 // exit value of the reduction to the width indicated by RecurrenceType and
622 // then extend this value back to the original width. If IsSigned is false,
623 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
624 // used.
625 //
626 // TODO: We should not rely on InstCombine to rewrite the reduction in the
627 // smaller type. We should just generate a correctly typed expression
628 // to begin with.
629 Type *ComputedType;
630 std::tie(args&: ComputedType, args&: IsSigned) =
631 computeRecurrenceType(Exit: ExitInstruction, DB, AC, DT);
632 if (ComputedType != RecurrenceType)
633 return false;
634 }
635
636 // Collect cast instructions and the minimum width used by the recurrence.
637 // If the starting value is not the same as the phi node and the computed
638 // recurrence type is equal to the recurrence type, the recurrence expression
639 // will be represented in a narrower or wider type. If there are any cast
640 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
641 // Note that the 'and' instruction was already included in this list.
642 //
643 // TODO: A better way to represent this may be to tag in some way all the
644 // instructions that are a part of the reduction. The vectorizer cost
645 // model could then apply the recurrence type to these instructions,
646 // without needing a white list of instructions to ignore.
647 // This may also be useful for the inloop reductions, if it can be
648 // kept simple enough.
649 collectCastInstrs(TheLoop, Exit: ExitInstruction, RecurrenceType, Casts&: CastInsts,
650 MinWidthCastToRecurTy&: MinWidthCastToRecurrenceType);
651
652 // We found a reduction var if we have reached the original phi node and we
653 // only have a single instruction with out-of-loop users.
654
655 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
656 // is saved as part of the RecurrenceDescriptor.
657
658 // Save the description of this reduction variable.
659 RedDes =
660 RecurrenceDescriptor(RdxStart, ExitInstruction, IntermediateStore, Kind,
661 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
662 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
663 return true;
664}
665
666// We are looking for loops that do something like this:
667// int r = 0;
668// for (int i = 0; i < n; i++) {
669// if (src[i] > 3)
670// r = 3;
671// }
672// where the reduction value (r) only has two states, in this example 0 or 3.
673// The generated LLVM IR for this type of loop will be like this:
674// for.body:
675// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
676// ...
677// %cmp = icmp sgt i32 %5, 3
678// %spec.select = select i1 %cmp, i32 3, i32 %r
679// ...
680// In general we can support vectorization of loops where 'r' flips between
681// any two non-constants, provided they are loop invariant. The only thing
682// we actually care about at the end of the loop is whether or not any lane
683// in the selected vector is different from the start value. The final
684// across-vector reduction after the loop simply involves choosing the start
685// value if nothing changed (0 in the example above) or the other selected
686// value (3 in the example above).
687RecurrenceDescriptor::InstDesc
688RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
689 Instruction *I, InstDesc &Prev) {
690 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
691 // the select.
692 if (match(V: I, P: m_OneUse(SubPattern: m_Cmp()))) {
693 if (auto *Select = dyn_cast<SelectInst>(Val: *I->user_begin()))
694 return InstDesc(Select, Prev.getRecKind());
695 }
696
697 if (!match(V: I, P: m_Select(C: m_Cmp(), L: m_Value(), R: m_Value())))
698 return InstDesc(false, I);
699
700 SelectInst *SI = cast<SelectInst>(Val: I);
701 Value *NonPhi = nullptr;
702
703 if (OrigPhi == dyn_cast<PHINode>(Val: SI->getTrueValue()))
704 NonPhi = SI->getFalseValue();
705 else if (OrigPhi == dyn_cast<PHINode>(Val: SI->getFalseValue()))
706 NonPhi = SI->getTrueValue();
707 else
708 return InstDesc(false, I);
709
710 // We are looking for selects of the form:
711 // select(cmp(), phi, loop_invariant) or
712 // select(cmp(), loop_invariant, phi)
713 if (!Loop->isLoopInvariant(V: NonPhi))
714 return InstDesc(false, I);
715
716 return InstDesc(I, RecurKind::AnyOf);
717}
718
719// We are looking for loops that do something like this:
720// int r = 0;
721// for (int i = 0; i < n; i++) {
722// if (src[i] > 3)
723// r = i;
724// }
725// or like this:
726// int r = 0;
727// for (int i = 0; i < n; i++) {
728// if (src[i] > 3)
729// r = <loop-varying value>;
730// }
731// The reduction value (r) is derived from either the values of an induction
732// variable (i) sequence, an arbitrary loop-varying value, or from the start
733// value (0). The LLVM IR generated for such loops would be as follows:
734// for.body:
735// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
736// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
737// ...
738// %cmp = icmp sgt i32 %5, 3
739// %spec.select = select i1 %cmp, i32 %i, i32 %r
740// %inc = add nsw i32 %i, 1
741// ...
742// When searching for an induction variable (i), the reduction value after the
743// loop will be the maximum (increasing induction) or minimum (decreasing
744// induction) value of 'i' that the condition (src[i] > 3) is satisfied, or the
745// start value (0 in the example above). When the start value of the induction
746// variable 'i' is greater than the minimum (increasing induction) or maximum
747// (decreasing induction) value of the data type, we can use the minimum
748// (increasing induction) or maximum (decreasing induction) value of the data
749// type as a sentinel value to replace the start value. This allows us to
750// perform a single reduction max (increasing induction) or min (decreasing
751// induction) operation to obtain the final reduction result.
752// TODO: It is possible to solve the case where the start value is the minimum
753// value of the data type or a non-constant value by using mask and multiple
754// reduction operations.
755//
756// When searching for an arbitrary loop-varying value, the reduction value will
757// either be the initial value (0) if the condition was never met, or the value
758// of the loop-varying value in the most recent loop iteration where the
759// condition was met.
760RecurrenceDescriptor::InstDesc
761RecurrenceDescriptor::isFindPattern(Loop *TheLoop, PHINode *OrigPhi,
762 Instruction *I, ScalarEvolution &SE) {
763 // TODO: Support the vectorization of FindLastIV when the reduction phi is
764 // used by more than one select instruction. This vectorization is only
765 // performed when the SCEV of each increasing induction variable used by the
766 // select instructions is identical.
767 if (!OrigPhi->hasOneUse())
768 return InstDesc(false, I);
769
770 // We are looking for selects of the form:
771 // select(cmp(), phi, value) or
772 // select(cmp(), value, phi)
773 // where 'value' must be a loop induction variable
774 // (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
775 // TODO: Match selects with multi-use cmp conditions.
776 Value *NonRdxPhi = nullptr;
777 if (!match(V: I, P: m_CombineOr(L: m_Select(C: m_OneUse(SubPattern: m_Cmp()), L: m_Value(V&: NonRdxPhi),
778 R: m_Specific(V: OrigPhi)),
779 R: m_Select(C: m_OneUse(SubPattern: m_Cmp()), L: m_Specific(V: OrigPhi),
780 R: m_Value(V&: NonRdxPhi)))))
781 return InstDesc(false, I);
782
783 // Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
784 // std::nullopt.
785 auto GetFindFirstLastIVRecurKind = [&](Value *V) -> std::optional<RecurKind> {
786 Type *Ty = V->getType();
787 if (!SE.isSCEVable(Ty))
788 return std::nullopt;
789
790 auto *AR = SE.getSCEV(V);
791 const SCEV *Step;
792 if (!match(S: AR, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV(V&: Step),
793 L: m_SpecificLoop(L: TheLoop))))
794 return std::nullopt;
795
796 // We must have a known positive or negative step for FindIV
797 const bool PositiveStep = SE.isKnownPositive(S: Step);
798 if (!SE.isKnownNonZero(S: Step))
799 return std::nullopt;
800
801 // Check if the minimum (FindLast) or maximum (FindFirst) value of the
802 // recurrence type can be used as a sentinel value. The maximum acceptable
803 // range for the induction variable, called the valid range will exclude
804 // <sentinel value>, where <sentinel value> is
805 // [Signed|Unsigned]Min(<recurrence type>) for FindLastIV or
806 // [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
807 // TODO: This range restriction can be lifted by adding an additional
808 // virtual OR reduction.
809 auto CheckRange = [&](bool IsSigned) {
810 const ConstantRange IVRange =
811 IsSigned ? SE.getSignedRange(S: AR) : SE.getUnsignedRange(S: AR);
812 unsigned NumBits = Ty->getIntegerBitWidth();
813 APInt Sentinel;
814 if (PositiveStep) {
815 Sentinel = IsSigned ? APInt::getSignedMinValue(numBits: NumBits)
816 : APInt::getMinValue(numBits: NumBits);
817 } else {
818 Sentinel = IsSigned ? APInt::getSignedMaxValue(numBits: NumBits)
819 : APInt::getMaxValue(numBits: NumBits);
820 }
821 ConstantRange ValidRange = ConstantRange(Sentinel).inverse();
822
823 LLVM_DEBUG(
824 dbgs() << "LV: " << (PositiveStep ? "FindLastIV" : "FindFirstIV")
825 << " valid range is " << ValidRange << ", and the range of "
826 << *AR << " is " << IVRange << "\n");
827
828 // Ensure the induction variable does not wrap around by verifying that
829 // its range is fully contained within the valid range.
830 return ValidRange.contains(CR: IVRange);
831 };
832 if (PositiveStep) {
833 if (CheckRange(true))
834 return RecurKind::FindLastIVSMax;
835 if (CheckRange(false))
836 return RecurKind::FindLastIVUMax;
837 return std::nullopt;
838 }
839
840 if (CheckRange(true))
841 return RecurKind::FindFirstIVSMin;
842 if (CheckRange(false))
843 return RecurKind::FindFirstIVUMin;
844 return std::nullopt;
845 };
846
847 if (auto RK = GetFindFirstLastIVRecurKind(NonRdxPhi))
848 return InstDesc(I, *RK);
849
850 // If the recurrence is not specific to an IV, return a generic FindLast.
851 return InstDesc(I, RecurKind::FindLast);
852}
853
854RecurrenceDescriptor::InstDesc
855RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
856 const InstDesc &Prev) {
857 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
858 "Expected a cmp or select or call instruction");
859 if (!isMinMaxRecurrenceKind(Kind))
860 return InstDesc(false, I);
861
862 // We must handle the select(cmp()) as a single instruction. Advance to the
863 // select.
864 if (match(V: I, P: m_OneUse(SubPattern: m_Cmp()))) {
865 if (auto *Select = dyn_cast<SelectInst>(Val: *I->user_begin()))
866 return InstDesc(Select, Prev.getRecKind());
867 }
868
869 // Only match select with single use cmp condition, or a min/max intrinsic.
870 if (!isa<IntrinsicInst>(Val: I) &&
871 !match(V: I, P: m_Select(C: m_OneUse(SubPattern: m_Cmp()), L: m_Value(), R: m_Value())))
872 return InstDesc(false, I);
873
874 // Look for a min/max pattern.
875 if (match(V: I, P: m_UMin(L: m_Value(), R: m_Value())))
876 return InstDesc(Kind == RecurKind::UMin, I);
877 if (match(V: I, P: m_UMax(L: m_Value(), R: m_Value())))
878 return InstDesc(Kind == RecurKind::UMax, I);
879 if (match(V: I, P: m_SMax(L: m_Value(), R: m_Value())))
880 return InstDesc(Kind == RecurKind::SMax, I);
881 if (match(V: I, P: m_SMin(L: m_Value(), R: m_Value())))
882 return InstDesc(Kind == RecurKind::SMin, I);
883 if (match(V: I, P: m_OrdOrUnordFMin(L: m_Value(), R: m_Value())))
884 return InstDesc(Kind == RecurKind::FMin, I);
885 if (match(V: I, P: m_OrdOrUnordFMax(L: m_Value(), R: m_Value())))
886 return InstDesc(Kind == RecurKind::FMax, I);
887 if (match(V: I, P: m_FMinNum(Op0: m_Value(), Op1: m_Value())))
888 return InstDesc(Kind == RecurKind::FMin, I);
889 if (match(V: I, P: m_FMaxNum(Op0: m_Value(), Op1: m_Value())))
890 return InstDesc(Kind == RecurKind::FMax, I);
891 if (match(V: I, P: m_FMinimumNum(Op0: m_Value(), Op1: m_Value())))
892 return InstDesc(Kind == RecurKind::FMinimumNum, I);
893 if (match(V: I, P: m_FMaximumNum(Op0: m_Value(), Op1: m_Value())))
894 return InstDesc(Kind == RecurKind::FMaximumNum, I);
895 if (match(V: I, P: m_FMinimum(Op0: m_Value(), Op1: m_Value())))
896 return InstDesc(Kind == RecurKind::FMinimum, I);
897 if (match(V: I, P: m_FMaximum(Op0: m_Value(), Op1: m_Value())))
898 return InstDesc(Kind == RecurKind::FMaximum, I);
899
900 return InstDesc(false, I);
901}
902
903/// Returns true if the select instruction has users in the compare-and-add
904/// reduction pattern below. The select instruction argument is the last one
905/// in the sequence.
906///
907/// %sum.1 = phi ...
908/// ...
909/// %cmp = fcmp pred %0, %CFP
910/// %add = fadd %0, %sum.1
911/// %sum.2 = select %cmp, %add, %sum.1
912RecurrenceDescriptor::InstDesc
913RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
914 Value *TrueVal, *FalseVal;
915 // Only handle single use cases for now.
916 if (!match(V: I,
917 P: m_Select(C: m_OneUse(SubPattern: m_Cmp()), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal))))
918 return InstDesc(false, I);
919
920 // Handle only when either of operands of select instruction is a PHI
921 // node for now.
922 if ((isa<PHINode>(Val: TrueVal) && isa<PHINode>(Val: FalseVal)) ||
923 (!isa<PHINode>(Val: TrueVal) && !isa<PHINode>(Val: FalseVal)))
924 return InstDesc(false, I);
925
926 Instruction *I1 = isa<PHINode>(Val: TrueVal) ? dyn_cast<Instruction>(Val: FalseVal)
927 : dyn_cast<Instruction>(Val: TrueVal);
928 if (!I1 || !I1->isBinaryOp())
929 return InstDesc(false, I);
930
931 Value *Op1, *Op2;
932 if (!(((m_FAdd(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1) ||
933 m_FSub(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1)) &&
934 I1->isFast()) ||
935 (m_FMul(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1) && (I1->isFast())) ||
936 ((m_Add(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1) ||
937 m_Sub(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1))) ||
938 (m_Mul(L: m_Value(V&: Op1), R: m_Value(V&: Op2)).match(V: I1))))
939 return InstDesc(false, I);
940
941 Instruction *IPhi = isa<PHINode>(Val: Op1) ? dyn_cast<Instruction>(Val: Op1)
942 : dyn_cast<Instruction>(Val: Op2);
943 if (!IPhi || IPhi != FalseVal)
944 return InstDesc(false, I);
945
946 return InstDesc(true, I);
947}
948
949RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
950 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
951 FastMathFlags FuncFMF, ScalarEvolution *SE) {
952 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
953 switch (I->getOpcode()) {
954 default:
955 return InstDesc(false, I);
956 case Instruction::PHI:
957 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
958 case Instruction::Sub:
959 return InstDesc(
960 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
961 case Instruction::Add:
962 return InstDesc(
963 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
964 case Instruction::Mul:
965 return InstDesc(Kind == RecurKind::Mul, I);
966 case Instruction::And:
967 return InstDesc(Kind == RecurKind::And, I);
968 case Instruction::Or:
969 return InstDesc(Kind == RecurKind::Or, I);
970 case Instruction::Xor:
971 return InstDesc(Kind == RecurKind::Xor, I);
972 case Instruction::FDiv:
973 case Instruction::FMul:
974 return InstDesc(Kind == RecurKind::FMul, I,
975 I->hasAllowReassoc() ? nullptr : I);
976 case Instruction::FSub:
977 case Instruction::FAdd:
978 return InstDesc(Kind == RecurKind::FAdd, I,
979 I->hasAllowReassoc() ? nullptr : I);
980 case Instruction::Select:
981 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
982 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
983 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
984 return isConditionalRdxPattern(I);
985 if (isFindRecurrenceKind(Kind) && SE)
986 return isFindPattern(TheLoop: L, OrigPhi, I, SE&: *SE);
987 [[fallthrough]];
988 case Instruction::FCmp:
989 case Instruction::ICmp:
990 case Instruction::Call:
991 if (isAnyOfRecurrenceKind(Kind))
992 return isAnyOfPattern(Loop: L, OrigPhi, I, Prev);
993 auto HasRequiredFMF = [&]() {
994 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
995 return true;
996 if (isa<FPMathOperator>(Val: I) && I->hasNoNaNs() && I->hasNoSignedZeros())
997 return true;
998 // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan
999 // flags since NaN and signed zeroes are propagated in the intrinsic
1000 // implementation.
1001 return match(V: I, P: m_Intrinsic<Intrinsic::minimum>(Op0: m_Value(), Op1: m_Value())) ||
1002 match(V: I, P: m_Intrinsic<Intrinsic::maximum>(Op0: m_Value(), Op1: m_Value())) ||
1003 match(V: I,
1004 P: m_Intrinsic<Intrinsic::minimumnum>(Op0: m_Value(), Op1: m_Value())) ||
1005 match(V: I, P: m_Intrinsic<Intrinsic::maximumnum>(Op0: m_Value(), Op1: m_Value()));
1006 };
1007 if (isIntMinMaxRecurrenceKind(Kind))
1008 return isMinMaxPattern(I, Kind, Prev);
1009 if (isFPMinMaxRecurrenceKind(Kind)) {
1010 InstDesc Res = isMinMaxPattern(I, Kind, Prev);
1011 if (!Res.isRecurrence())
1012 return InstDesc(false, I);
1013 if (HasRequiredFMF())
1014 return Res;
1015 // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
1016 // intrinsics with extra checks ensuring the vector loop handles only
1017 // non-NaN inputs.
1018 if (match(V: I, P: m_Intrinsic<Intrinsic::maxnum>(Op0: m_Value(), Op1: m_Value()))) {
1019 assert(Kind == RecurKind::FMax &&
1020 "unexpected recurrence kind for maxnum");
1021 return InstDesc(I, RecurKind::FMaxNum);
1022 }
1023 if (match(V: I, P: m_Intrinsic<Intrinsic::minnum>(Op0: m_Value(), Op1: m_Value()))) {
1024 assert(Kind == RecurKind::FMin &&
1025 "unexpected recurrence kind for minnum");
1026 return InstDesc(I, RecurKind::FMinNum);
1027 }
1028 return InstDesc(false, I);
1029 }
1030 if (isFMulAddIntrinsic(I))
1031 return InstDesc(Kind == RecurKind::FMulAdd, I,
1032 I->hasAllowReassoc() ? nullptr : I);
1033 return InstDesc(false, I);
1034 }
1035}
1036
1037bool RecurrenceDescriptor::hasMultipleUsesOf(
1038 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
1039 unsigned MaxNumUses) {
1040 unsigned NumUses = 0;
1041 for (const Use &U : I->operands()) {
1042 if (Insts.count(Ptr: dyn_cast<Instruction>(Val: U)))
1043 ++NumUses;
1044 if (NumUses > MaxNumUses)
1045 return true;
1046 }
1047
1048 return false;
1049}
1050
1051bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
1052 RecurrenceDescriptor &RedDes,
1053 DemandedBits *DB, AssumptionCache *AC,
1054 DominatorTree *DT,
1055 ScalarEvolution *SE) {
1056 BasicBlock *Header = TheLoop->getHeader();
1057 Function &F = *Header->getParent();
1058 FastMathFlags FMF;
1059 FMF.setNoNaNs(
1060 F.getFnAttribute(Kind: "no-nans-fp-math").getValueAsBool());
1061 FMF.setNoSignedZeros(
1062 F.getFnAttribute(Kind: "no-signed-zeros-fp-math").getValueAsBool());
1063
1064 if (AddReductionVar(Phi, Kind: RecurKind::Add, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1065 SE)) {
1066 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1067 return true;
1068 }
1069 if (AddReductionVar(Phi, Kind: RecurKind::Sub, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1070 SE)) {
1071 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1072 return true;
1073 }
1074 if (AddReductionVar(Phi, Kind: RecurKind::AddChainWithSubs, TheLoop, FuncFMF: FMF, RedDes,
1075 DB, AC, DT, SE)) {
1076 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1077 << "\n");
1078 return true;
1079 }
1080 if (AddReductionVar(Phi, Kind: RecurKind::Mul, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1081 SE)) {
1082 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1083 return true;
1084 }
1085 if (AddReductionVar(Phi, Kind: RecurKind::Or, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1086 SE)) {
1087 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1088 return true;
1089 }
1090 if (AddReductionVar(Phi, Kind: RecurKind::And, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1091 SE)) {
1092 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1093 return true;
1094 }
1095 if (AddReductionVar(Phi, Kind: RecurKind::Xor, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1096 SE)) {
1097 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1098 return true;
1099 }
1100 if (AddReductionVar(Phi, Kind: RecurKind::SMax, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1101 SE)) {
1102 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
1103 return true;
1104 }
1105 if (AddReductionVar(Phi, Kind: RecurKind::SMin, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1106 SE)) {
1107 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
1108 return true;
1109 }
1110 if (AddReductionVar(Phi, Kind: RecurKind::UMax, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1111 SE)) {
1112 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
1113 return true;
1114 }
1115 if (AddReductionVar(Phi, Kind: RecurKind::UMin, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1116 SE)) {
1117 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
1118 return true;
1119 }
1120 if (AddReductionVar(Phi, Kind: RecurKind::AnyOf, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1121 SE)) {
1122 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1123 << "\n");
1124 return true;
1125 }
1126 if (AddReductionVar(Phi, Kind: RecurKind::FindLast, TheLoop, FuncFMF: FMF, RedDes, DB, AC,
1127 DT, SE)) {
1128 LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
1129 return true;
1130 }
1131 if (AddReductionVar(Phi, Kind: RecurKind::FMul, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1132 SE)) {
1133 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1134 return true;
1135 }
1136 if (AddReductionVar(Phi, Kind: RecurKind::FAdd, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1137 SE)) {
1138 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1139 return true;
1140 }
1141 if (AddReductionVar(Phi, Kind: RecurKind::FMax, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1142 SE)) {
1143 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
1144 return true;
1145 }
1146 if (AddReductionVar(Phi, Kind: RecurKind::FMin, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1147 SE)) {
1148 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
1149 return true;
1150 }
1151 if (AddReductionVar(Phi, Kind: RecurKind::FMulAdd, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1152 SE)) {
1153 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1154 return true;
1155 }
1156 if (AddReductionVar(Phi, Kind: RecurKind::FMaximum, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1157 SE)) {
1158 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
1159 return true;
1160 }
1161 if (AddReductionVar(Phi, Kind: RecurKind::FMinimum, TheLoop, FuncFMF: FMF, RedDes, DB, AC, DT,
1162 SE)) {
1163 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
1164 return true;
1165 }
1166 if (AddReductionVar(Phi, Kind: RecurKind::FMaximumNum, TheLoop, FuncFMF: FMF, RedDes, DB, AC,
1167 DT, SE)) {
1168 LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi
1169 << "\n");
1170 return true;
1171 }
1172 if (AddReductionVar(Phi, Kind: RecurKind::FMinimumNum, TheLoop, FuncFMF: FMF, RedDes, DB, AC,
1173 DT, SE)) {
1174 LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi
1175 << "\n");
1176 return true;
1177 }
1178 // Not a reduction of known type.
1179 return false;
1180}
1181
1182bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
1183 DominatorTree *DT) {
1184
1185 // Ensure the phi node is in the loop header and has two incoming values.
1186 if (Phi->getParent() != TheLoop->getHeader() ||
1187 Phi->getNumIncomingValues() != 2)
1188 return false;
1189
1190 // Ensure the loop has a preheader and a single latch block. The loop
1191 // vectorizer will need the latch to set up the next iteration of the loop.
1192 auto *Preheader = TheLoop->getLoopPreheader();
1193 auto *Latch = TheLoop->getLoopLatch();
1194 if (!Preheader || !Latch)
1195 return false;
1196
1197 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1198 if (Phi->getBasicBlockIndex(BB: Preheader) < 0 ||
1199 Phi->getBasicBlockIndex(BB: Latch) < 0)
1200 return false;
1201
1202 // Get the previous value. The previous value comes from the latch edge while
1203 // the initial value comes from the preheader edge.
1204 auto *Previous = dyn_cast<Instruction>(Val: Phi->getIncomingValueForBlock(BB: Latch));
1205
1206 // If Previous is a phi in the header, go through incoming values from the
1207 // latch until we find a non-phi value. Use this as the new Previous, all uses
1208 // in the header will be dominated by the original phi, but need to be moved
1209 // after the non-phi previous value.
1210 SmallPtrSet<PHINode *, 4> SeenPhis;
1211 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Val: Previous)) {
1212 if (PrevPhi->getParent() != Phi->getParent())
1213 return false;
1214 if (!SeenPhis.insert(Ptr: PrevPhi).second)
1215 return false;
1216 Previous = dyn_cast<Instruction>(Val: PrevPhi->getIncomingValueForBlock(BB: Latch));
1217 }
1218
1219 if (!Previous || !TheLoop->contains(Inst: Previous) || isa<PHINode>(Val: Previous))
1220 return false;
1221
1222 // Ensure every user of the phi node (recursively) is dominated by the
1223 // previous value. The dominance requirement ensures the loop vectorizer will
1224 // not need to vectorize the initial value prior to the first iteration of the
1225 // loop.
1226 // TODO: Consider extending this sinking to handle memory instructions.
1227
1228 SmallPtrSet<Value *, 8> Seen;
1229 BasicBlock *PhiBB = Phi->getParent();
1230 SmallVector<Instruction *, 8> WorkList;
1231 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1232 // Cyclic dependence.
1233 if (Previous == SinkCandidate)
1234 return false;
1235
1236 if (!Seen.insert(Ptr: SinkCandidate).second)
1237 return true;
1238 if (DT->dominates(Def: Previous,
1239 User: SinkCandidate)) // We already are good w/o sinking.
1240 return true;
1241
1242 if (SinkCandidate->getParent() != PhiBB ||
1243 SinkCandidate->mayHaveSideEffects() ||
1244 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1245 return false;
1246
1247 // If we reach a PHI node that is not dominated by Previous, we reached a
1248 // header PHI. No need for sinking.
1249 if (isa<PHINode>(Val: SinkCandidate))
1250 return true;
1251
1252 // Sink User tentatively and check its users
1253 WorkList.push_back(Elt: SinkCandidate);
1254 return true;
1255 };
1256
1257 WorkList.push_back(Elt: Phi);
1258 // Try to recursively sink instructions and their users after Previous.
1259 while (!WorkList.empty()) {
1260 Instruction *Current = WorkList.pop_back_val();
1261 for (User *User : Current->users()) {
1262 if (!TryToPushSinkCandidate(cast<Instruction>(Val: User)))
1263 return false;
1264 }
1265 }
1266
1267 return true;
1268}
1269
1270unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
1271 switch (Kind) {
1272 case RecurKind::Sub:
1273 return Instruction::Sub;
1274 case RecurKind::AddChainWithSubs:
1275 case RecurKind::Add:
1276 return Instruction::Add;
1277 case RecurKind::Mul:
1278 return Instruction::Mul;
1279 case RecurKind::Or:
1280 return Instruction::Or;
1281 case RecurKind::And:
1282 return Instruction::And;
1283 case RecurKind::Xor:
1284 return Instruction::Xor;
1285 case RecurKind::FMul:
1286 return Instruction::FMul;
1287 case RecurKind::FMulAdd:
1288 case RecurKind::FAdd:
1289 return Instruction::FAdd;
1290 case RecurKind::SMax:
1291 case RecurKind::SMin:
1292 case RecurKind::UMax:
1293 case RecurKind::UMin:
1294 return Instruction::ICmp;
1295 case RecurKind::FMax:
1296 case RecurKind::FMin:
1297 case RecurKind::FMaximum:
1298 case RecurKind::FMinimum:
1299 case RecurKind::FMaximumNum:
1300 case RecurKind::FMinimumNum:
1301 return Instruction::FCmp;
1302 case RecurKind::FindLast:
1303 case RecurKind::AnyOf:
1304 case RecurKind::FindFirstIVSMin:
1305 case RecurKind::FindFirstIVUMin:
1306 case RecurKind::FindLastIVSMax:
1307 case RecurKind::FindLastIVUMax:
1308 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1309 // are supported.
1310 default:
1311 llvm_unreachable("Unknown recurrence operation");
1312 }
1313}
1314
1315SmallVector<Instruction *, 4>
1316RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
1317 SmallVector<Instruction *, 4> ReductionOperations;
1318 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1319
1320 // Search down from the Phi to the LoopExitInstr, looking for instructions
1321 // with a single user of the correct type for the reduction.
1322
1323 // Note that we check that the type of the operand is correct for each item in
1324 // the chain, including the last (the loop exit value). This can come up from
1325 // sub, which would otherwise be treated as an add reduction. MinMax also need
1326 // to check for a pair of icmp/select, for which we use getNextInstruction and
1327 // isCorrectOpcode functions to step the right number of instruction, and
1328 // check the icmp/select pair.
1329 // FIXME: We also do not attempt to look through Select's yet, which might
1330 // be part of the reduction chain, or attempt to looks through And's to find a
1331 // smaller bitwidth. Subs are also currently not allowed (which are usually
1332 // treated as part of a add reduction) as they are expected to generally be
1333 // more expensive than out-of-loop reductions, and need to be costed more
1334 // carefully.
1335 unsigned ExpectedUses = 1;
1336 if (IsMinMax)
1337 ExpectedUses = 2;
1338
1339 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1340 for (auto *User : Cur->users()) {
1341 Instruction *UI = cast<Instruction>(Val: User);
1342 if (isa<PHINode>(Val: UI))
1343 continue;
1344 if (IsMinMax) {
1345 // We are expecting a icmp/select pair, which we go to the next select
1346 // instruction if we can. We already know that Cur has 2 uses.
1347 if (isa<SelectInst>(Val: UI))
1348 return UI;
1349 continue;
1350 }
1351 return UI;
1352 }
1353 return nullptr;
1354 };
1355 auto isCorrectOpcode = [&](Instruction *Cur) {
1356 if (IsMinMax) {
1357 Value *LHS, *RHS;
1358 return SelectPatternResult::isMinOrMax(
1359 SPF: matchSelectPattern(V: Cur, LHS, RHS).Flavor);
1360 }
1361 // Recognize a call to the llvm.fmuladd intrinsic.
1362 if (isFMulAddIntrinsic(I: Cur))
1363 return true;
1364
1365 if (Cur->getOpcode() == Instruction::Sub &&
1366 Kind == RecurKind::AddChainWithSubs)
1367 return true;
1368
1369 return Cur->getOpcode() == getOpcode();
1370 };
1371
1372 // Attempt to look through Phis which are part of the reduction chain
1373 unsigned ExtraPhiUses = 0;
1374 Instruction *RdxInstr = LoopExitInstr;
1375 if (auto ExitPhi = dyn_cast<PHINode>(Val: LoopExitInstr)) {
1376 if (ExitPhi->getNumIncomingValues() != 2)
1377 return {};
1378
1379 Instruction *Inc0 = dyn_cast<Instruction>(Val: ExitPhi->getIncomingValue(i: 0));
1380 Instruction *Inc1 = dyn_cast<Instruction>(Val: ExitPhi->getIncomingValue(i: 1));
1381
1382 Instruction *Chain = nullptr;
1383 if (Inc0 == Phi)
1384 Chain = Inc1;
1385 else if (Inc1 == Phi)
1386 Chain = Inc0;
1387 else
1388 return {};
1389
1390 RdxInstr = Chain;
1391 ExtraPhiUses = 1;
1392 }
1393
1394 // The loop exit instruction we check first (as a quick test) but add last. We
1395 // check the opcode is correct (and dont allow them to be Subs) and that they
1396 // have expected to have the expected number of uses. They will have one use
1397 // from the phi and one from a LCSSA value, no matter the type.
1398 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(N: 2))
1399 return {};
1400
1401 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1402 // for conditional reductions.
1403 if (!Phi->hasNUses(N: ExpectedUses + ExtraPhiUses))
1404 return {};
1405
1406 Instruction *Cur = getNextInstruction(Phi);
1407
1408 // Each other instruction in the chain should have the expected number of uses
1409 // and be the correct opcode.
1410 while (Cur != RdxInstr) {
1411 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(N: ExpectedUses))
1412 return {};
1413
1414 ReductionOperations.push_back(Elt: Cur);
1415 Cur = getNextInstruction(Cur);
1416 }
1417
1418 ReductionOperations.push_back(Elt: Cur);
1419 return ReductionOperations;
1420}
1421
1422InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1423 const SCEV *Step, BinaryOperator *BOp,
1424 SmallVectorImpl<Instruction *> *Casts)
1425 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1426 assert(IK != IK_NoInduction && "Not an induction");
1427
1428 // Start value type should match the induction kind and the value
1429 // itself should not be null.
1430 assert(StartValue && "StartValue is null");
1431 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1432 "StartValue is not a pointer for pointer induction");
1433 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1434 "StartValue is not an integer for integer induction");
1435
1436 // Check the Step Value. It should be non-zero integer value.
1437 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1438 "Step value is zero");
1439
1440 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1441 "StepValue is not an integer");
1442
1443 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1444 "StepValue is not FP for FpInduction");
1445 assert((IK != IK_FpInduction ||
1446 (InductionBinOp &&
1447 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1448 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1449 "Binary opcode should be specified for FP induction");
1450
1451 if (Casts)
1452 llvm::append_range(C&: RedundantCasts, R&: *Casts);
1453}
1454
1455ConstantInt *InductionDescriptor::getConstIntStepValue() const {
1456 if (isa<SCEVConstant>(Val: Step))
1457 return dyn_cast<ConstantInt>(Val: cast<SCEVConstant>(Val: Step)->getValue());
1458 return nullptr;
1459}
1460
1461bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
1462 ScalarEvolution *SE,
1463 InductionDescriptor &D) {
1464
1465 // Here we only handle FP induction variables.
1466 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1467
1468 if (TheLoop->getHeader() != Phi->getParent())
1469 return false;
1470
1471 // The loop may have multiple entrances or multiple exits; we can analyze
1472 // this phi if it has a unique entry value and a unique backedge value.
1473 if (Phi->getNumIncomingValues() != 2)
1474 return false;
1475 Value *BEValue = nullptr, *StartValue = nullptr;
1476 if (TheLoop->contains(BB: Phi->getIncomingBlock(i: 0))) {
1477 BEValue = Phi->getIncomingValue(i: 0);
1478 StartValue = Phi->getIncomingValue(i: 1);
1479 } else {
1480 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1481 "Unexpected Phi node in the loop");
1482 BEValue = Phi->getIncomingValue(i: 1);
1483 StartValue = Phi->getIncomingValue(i: 0);
1484 }
1485
1486 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Val: BEValue);
1487 if (!BOp)
1488 return false;
1489
1490 Value *Addend = nullptr;
1491 if (BOp->getOpcode() == Instruction::FAdd) {
1492 if (BOp->getOperand(i_nocapture: 0) == Phi)
1493 Addend = BOp->getOperand(i_nocapture: 1);
1494 else if (BOp->getOperand(i_nocapture: 1) == Phi)
1495 Addend = BOp->getOperand(i_nocapture: 0);
1496 } else if (BOp->getOpcode() == Instruction::FSub)
1497 if (BOp->getOperand(i_nocapture: 0) == Phi)
1498 Addend = BOp->getOperand(i_nocapture: 1);
1499
1500 if (!Addend)
1501 return false;
1502
1503 // The addend should be loop invariant
1504 if (auto *I = dyn_cast<Instruction>(Val: Addend))
1505 if (TheLoop->contains(Inst: I))
1506 return false;
1507
1508 // FP Step has unknown SCEV
1509 const SCEV *Step = SE->getUnknown(V: Addend);
1510 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1511 return true;
1512}
1513
1514/// This function is called when we suspect that the update-chain of a phi node
1515/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1516/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1517/// predicate P under which the SCEV expression for the phi can be the
1518/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1519/// cast instructions that are involved in the update-chain of this induction.
1520/// A caller that adds the required runtime predicate can be free to drop these
1521/// cast instructions, and compute the phi using \p AR (instead of some scev
1522/// expression with casts).
1523///
1524/// For example, without a predicate the scev expression can take the following
1525/// form:
1526/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1527///
1528/// It corresponds to the following IR sequence:
1529/// %for.body:
1530/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1531/// %casted_phi = "ExtTrunc i64 %x"
1532/// %add = add i64 %casted_phi, %step
1533///
1534/// where %x is given in \p PN,
1535/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1536/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1537/// several forms, for example, such as:
1538/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1539/// or:
1540/// ExtTrunc2: %t = shl %x, m
1541/// %casted_phi = ashr %t, m
1542///
1543/// If we are able to find such sequence, we return the instructions
1544/// we found, namely %casted_phi and the instructions on its use-def chain up
1545/// to the phi (not including the phi).
1546static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
1547 const SCEVUnknown *PhiScev,
1548 const SCEVAddRecExpr *AR,
1549 SmallVectorImpl<Instruction *> &CastInsts) {
1550
1551 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1552 auto *PN = cast<PHINode>(Val: PhiScev->getValue());
1553 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1554 const Loop *L = AR->getLoop();
1555
1556 // Find any cast instructions that participate in the def-use chain of
1557 // PhiScev in the loop.
1558 // FORNOW/TODO: We currently expect the def-use chain to include only
1559 // two-operand instructions, where one of the operands is an invariant.
1560 // createAddRecFromPHIWithCasts() currently does not support anything more
1561 // involved than that, so we keep the search simple. This can be
1562 // extended/generalized as needed.
1563
1564 auto getDef = [&](const Value *Val) -> Value * {
1565 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1566 if (!BinOp)
1567 return nullptr;
1568 Value *Op0 = BinOp->getOperand(i_nocapture: 0);
1569 Value *Op1 = BinOp->getOperand(i_nocapture: 1);
1570 Value *Def = nullptr;
1571 if (L->isLoopInvariant(V: Op0))
1572 Def = Op1;
1573 else if (L->isLoopInvariant(V: Op1))
1574 Def = Op0;
1575 return Def;
1576 };
1577
1578 // Look for the instruction that defines the induction via the
1579 // loop backedge.
1580 BasicBlock *Latch = L->getLoopLatch();
1581 if (!Latch)
1582 return false;
1583 Value *Val = PN->getIncomingValueForBlock(BB: Latch);
1584 if (!Val)
1585 return false;
1586
1587 // Follow the def-use chain until the induction phi is reached.
1588 // If on the way we encounter a Value that has the same SCEV Expr as the
1589 // phi node, we can consider the instructions we visit from that point
1590 // as part of the cast-sequence that can be ignored.
1591 bool InCastSequence = false;
1592 auto *Inst = dyn_cast<Instruction>(Val);
1593 while (Val != PN) {
1594 // If we encountered a phi node other than PN, or if we left the loop,
1595 // we bail out.
1596 if (!Inst || !L->contains(Inst)) {
1597 return false;
1598 }
1599 auto *AddRec = dyn_cast<SCEVAddRecExpr>(Val: PSE.getSCEV(V: Val));
1600 if (AddRec && PSE.areAddRecsEqualWithPreds(AR1: AddRec, AR2: AR))
1601 InCastSequence = true;
1602 if (InCastSequence) {
1603 // Only the last instruction in the cast sequence is expected to have
1604 // uses outside the induction def-use chain.
1605 if (!CastInsts.empty())
1606 if (!Inst->hasOneUse())
1607 return false;
1608 CastInsts.push_back(Elt: Inst);
1609 }
1610 Val = getDef(Val);
1611 if (!Val)
1612 return false;
1613 Inst = dyn_cast<Instruction>(Val);
1614 }
1615
1616 return InCastSequence;
1617}
1618
1619bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
1620 PredicatedScalarEvolution &PSE,
1621 InductionDescriptor &D, bool Assume) {
1622 Type *PhiTy = Phi->getType();
1623
1624 // Handle integer and pointer inductions variables.
1625 // Now we handle also FP induction but not trying to make a
1626 // recurrent expression from the PHI node in-place.
1627
1628 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1629 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1630 return false;
1631
1632 if (PhiTy->isFloatingPointTy())
1633 return isFPInductionPHI(Phi, TheLoop, SE: PSE.getSE(), D);
1634
1635 const SCEV *PhiScev = PSE.getSCEV(V: Phi);
1636 const auto *AR = dyn_cast<SCEVAddRecExpr>(Val: PhiScev);
1637
1638 // We need this expression to be an AddRecExpr.
1639 if (Assume && !AR)
1640 AR = PSE.getAsAddRec(V: Phi);
1641
1642 if (!AR) {
1643 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1644 return false;
1645 }
1646
1647 // Record any Cast instructions that participate in the induction update
1648 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(Val: PhiScev);
1649 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1650 // only after enabling Assume with PSCEV, this means we may have encountered
1651 // cast instructions that required adding a runtime check in order to
1652 // guarantee the correctness of the AddRecurrence respresentation of the
1653 // induction.
1654 if (PhiScev != AR && SymbolicPhi) {
1655 SmallVector<Instruction *, 2> Casts;
1656 if (getCastsForInductionPHI(PSE, PhiScev: SymbolicPhi, AR, CastInsts&: Casts))
1657 return isInductionPHI(Phi, L: TheLoop, SE: PSE.getSE(), D, Expr: AR, CastsToIgnore: &Casts);
1658 }
1659
1660 return isInductionPHI(Phi, L: TheLoop, SE: PSE.getSE(), D, Expr: AR);
1661}
1662
1663bool InductionDescriptor::isInductionPHI(
1664 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1665 InductionDescriptor &D, const SCEV *Expr,
1666 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1667 Type *PhiTy = Phi->getType();
1668 // isSCEVable returns true for integer and pointer types.
1669 if (!SE->isSCEVable(Ty: PhiTy))
1670 return false;
1671
1672 // Check that the PHI is consecutive.
1673 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(V: Phi);
1674 const SCEV *Step;
1675
1676 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1677 // match, we should treat it as a uniform. Unfortunately, we don't currently
1678 // know how to handled uniform PHIs.
1679 if (!match(S: PhiScev, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV(V&: Step),
1680 L: m_SpecificLoop(L: TheLoop)))) {
1681 LLVM_DEBUG(
1682 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1683 return false;
1684 }
1685
1686 // This function assumes that InductionPhi is called only on Phi nodes
1687 // present inside loop headers. Check for the same, and throw an assert if
1688 // the current Phi is not present inside the loop header.
1689 assert(Phi->getParent() == TheLoop->getHeader() &&
1690 "Invalid Phi node, not present in loop header");
1691
1692 Value *StartValue =
1693 Phi->getIncomingValueForBlock(BB: TheLoop->getLoopPreheader());
1694
1695 BasicBlock *Latch = TheLoop->getLoopLatch();
1696 if (!Latch)
1697 return false;
1698
1699 if (PhiTy->isIntegerTy()) {
1700 BinaryOperator *BOp =
1701 dyn_cast<BinaryOperator>(Val: Phi->getIncomingValueForBlock(BB: Latch));
1702 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1703 CastsToIgnore);
1704 return true;
1705 }
1706
1707 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1708
1709 // This allows induction variables w/non-constant steps.
1710 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1711 return true;
1712}
1713