| 1 | //===- HashRecognize.cpp ----------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The HashRecognize analysis recognizes unoptimized polynomial hash functions |
| 10 | // with operations over a Galois field of characteristic 2, also called binary |
| 11 | // fields, or GF(2^n). 2^n is termed the order of the Galois field. This class |
| 12 | // of hash functions can be optimized using a lookup-table-driven |
| 13 | // implementation, or with target-specific instructions. |
| 14 | // |
| 15 | // Examples: |
| 16 | // |
| 17 | // 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2). |
| 18 | // 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a |
| 19 | // rolling hash polynomial division in GF(2). |
| 20 | // 3. Rijndael MixColumns, a step in AES computation, which is a polynomial |
| 21 | // multiplication in GF(2^3). |
| 22 | // 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM), |
| 23 | // which is a polynomial evaluation in GF(2^128). |
| 24 | // |
| 25 | // All of them use an irreducible generating polynomial of degree m, |
| 26 | // |
| 27 | // c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0 |
| 28 | // |
| 29 | // where each coefficient c is can take values 0 or 1. The polynomial is simply |
| 30 | // represented by m+1 bits, corresponding to the coefficients. The different |
| 31 | // variants of CRC are named by degree of generating polynomial used: so CRC-32 |
| 32 | // would use a polynomial of degree 32. |
| 33 | // |
| 34 | // The reason algorithms on GF(2^n) can be optimized with a lookup-table is the |
| 35 | // following: in such fields, polynomial addition and subtraction are identical |
| 36 | // and equivalent to XOR, polynomial multiplication is an AND, and polynomial |
| 37 | // division is identity: the XOR and AND operations in unoptimized |
| 38 | // implementations are performed bit-wise, and can be optimized to be performed |
| 39 | // chunk-wise, by interleaving copies of the generating polynomial, and storing |
| 40 | // the pre-computed values in a table. |
| 41 | // |
| 42 | // A generating polynomial of m bits always has the MSB set, so we usually |
| 43 | // omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial: |
| 44 | // |
| 45 | // (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021 |
| 46 | // |
| 47 | // Transmissions are either in big-endian or little-endian form, and hash |
| 48 | // algorithms are written according to this. For example, IEEE 802 and RS-232 |
| 49 | // specify little-endian transmission. |
| 50 | // |
| 51 | //===----------------------------------------------------------------------===// |
| 52 | // |
| 53 | // At the moment, we only recognize the CRC algorithm. |
| 54 | // Documentation on CRC32 from the kernel: |
| 55 | // https://www.kernel.org/doc/Documentation/crc32.txt |
| 56 | // |
| 57 | // |
| 58 | //===----------------------------------------------------------------------===// |
| 59 | |
| 60 | #include "llvm/Analysis/HashRecognize.h" |
| 61 | #include "llvm/ADT/APInt.h" |
| 62 | #include "llvm/Analysis/LoopAnalysisManager.h" |
| 63 | #include "llvm/Analysis/LoopInfo.h" |
| 64 | #include "llvm/Analysis/ScalarEvolution.h" |
| 65 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| 66 | #include "llvm/Analysis/ValueTracking.h" |
| 67 | #include "llvm/IR/PatternMatch.h" |
| 68 | #include "llvm/Support/KnownBits.h" |
| 69 | |
| 70 | using namespace llvm; |
| 71 | using namespace PatternMatch; |
| 72 | using namespace SCEVPatternMatch; |
| 73 | |
| 74 | #define DEBUG_TYPE "hash-recognize" |
| 75 | |
| 76 | /// Checks if there's a stray instruction in the loop \p L outside of the |
| 77 | /// use-def chains from \p Roots, or if we escape the loop during the use-def |
| 78 | /// walk. |
| 79 | static bool containsUnreachable(const Loop &L, |
| 80 | ArrayRef<const Instruction *> Roots) { |
| 81 | SmallPtrSet<const Instruction *, 16> Visited; |
| 82 | BasicBlock *Latch = L.getLoopLatch(); |
| 83 | |
| 84 | SmallVector<const Instruction *, 16> Worklist(Roots); |
| 85 | while (!Worklist.empty()) { |
| 86 | const Instruction *I = Worklist.pop_back_val(); |
| 87 | Visited.insert(Ptr: I); |
| 88 | |
| 89 | if (isa<PHINode>(Val: I)) |
| 90 | continue; |
| 91 | |
| 92 | for (const Use &U : I->operands()) { |
| 93 | if (auto *UI = dyn_cast<Instruction>(Val: U)) { |
| 94 | if (!L.contains(Inst: UI)) |
| 95 | return true; |
| 96 | Worklist.push_back(Elt: UI); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | return Latch->size() != Visited.size(); |
| 101 | } |
| 102 | |
| 103 | /// A structure that can hold either a Simple Recurrence or a Conditional |
| 104 | /// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand |
| 105 | /// of the BO, while in a Conditional Recurrence, it is a SelectInst. |
| 106 | struct RecurrenceInfo { |
| 107 | const Loop &L; |
| 108 | const PHINode *Phi = nullptr; |
| 109 | BinaryOperator *BO = nullptr; |
| 110 | Value *Start = nullptr; |
| 111 | Value *Step = nullptr; |
| 112 | std::optional<APInt> ; |
| 113 | |
| 114 | RecurrenceInfo(const Loop &L) : L(L) {} |
| 115 | operator bool() const { return BO; } |
| 116 | |
| 117 | void print(raw_ostream &OS, unsigned Indent = 0) const { |
| 118 | OS.indent(NumSpaces: Indent) << "Phi: " ; |
| 119 | Phi->print(O&: OS); |
| 120 | OS << "\n" ; |
| 121 | OS.indent(NumSpaces: Indent) << "BinaryOperator: " ; |
| 122 | BO->print(O&: OS); |
| 123 | OS << "\n" ; |
| 124 | OS.indent(NumSpaces: Indent) << "Start: " ; |
| 125 | Start->print(O&: OS); |
| 126 | OS << "\n" ; |
| 127 | OS.indent(NumSpaces: Indent) << "Step: " ; |
| 128 | Step->print(O&: OS); |
| 129 | OS << "\n" ; |
| 130 | if (ExtraConst) { |
| 131 | OS.indent(NumSpaces: Indent) << "ExtraConst: " ; |
| 132 | ExtraConst->print(OS, isSigned: false); |
| 133 | OS << "\n" ; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 138 | LLVM_DUMP_METHOD void dump() const { print(dbgs()); } |
| 139 | #endif |
| 140 | |
| 141 | bool matchSimpleRecurrence(const PHINode *P); |
| 142 | bool matchConditionalRecurrence( |
| 143 | const PHINode *P, |
| 144 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
| 145 | |
| 146 | private: |
| 147 | BinaryOperator *digRecurrence( |
| 148 | Instruction *V, |
| 149 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
| 150 | }; |
| 151 | |
| 152 | /// Check the well-formedness of the (most|least) significant bit check given \p |
| 153 | /// ConditionalRecurrence, \p SimpleRecurrence, depending on \p |
| 154 | /// ByteOrderSwapped. We check that ConditionalRecurrence.Step is a |
| 155 | /// Select(Cmp()) where the compare is `>= 0` in the big-endian case, and `== 0` |
| 156 | /// in the little-endian case (or the inverse, in which case the branches of the |
| 157 | /// compare are swapped). We check that the LHS is (ConditionalRecurrence.Phi |
| 158 | /// [xor SimpleRecurrence.Phi]) in the big-endian case, and additionally check |
| 159 | /// for an AND with one in the little-endian case. We then check AllowedByR |
| 160 | /// against CheckAllowedByR, which is [0, smin) in the big-endian case, and is |
| 161 | /// [0, 1) in the little-endian case. CheckAllowedByR checks for |
| 162 | /// significant-bit-clear, and we match the corresponding arms of the select |
| 163 | /// against bit-shift and bit-shift-and-xor-gen-poly. |
| 164 | static bool |
| 165 | isSignificantBitCheckWellFormed(const RecurrenceInfo &ConditionalRecurrence, |
| 166 | const RecurrenceInfo &SimpleRecurrence, |
| 167 | bool ByteOrderSwapped) { |
| 168 | auto *SI = cast<SelectInst>(Val: ConditionalRecurrence.Step); |
| 169 | CmpPredicate Pred; |
| 170 | const Value *L; |
| 171 | const APInt *R; |
| 172 | Instruction *TV, *FV; |
| 173 | if (!match(V: SI, P: m_Select(C: m_ICmp(Pred, L: m_Value(V&: L), R: m_APInt(Res&: R)), |
| 174 | L: m_Instruction(I&: TV), R: m_Instruction(I&: FV)))) |
| 175 | return false; |
| 176 | |
| 177 | // Match predicate with or without a SimpleRecurrence (the corresponding data |
| 178 | // is LHSAux). |
| 179 | auto MatchPred = m_CombineOr( |
| 180 | L: m_Specific(V: ConditionalRecurrence.Phi), |
| 181 | R: m_c_Xor(L: m_ZExtOrTruncOrSelf(Op: m_Specific(V: ConditionalRecurrence.Phi)), |
| 182 | R: m_ZExtOrTruncOrSelf(Op: m_Specific(V: SimpleRecurrence.Phi)))); |
| 183 | bool LWellFormed = ByteOrderSwapped ? match(V: L, P: MatchPred) |
| 184 | : match(V: L, P: m_c_And(L: MatchPred, R: m_One())); |
| 185 | if (!LWellFormed) |
| 186 | return false; |
| 187 | |
| 188 | KnownBits KnownR = KnownBits::makeConstant(C: *R); |
| 189 | unsigned BW = KnownR.getBitWidth(); |
| 190 | auto RCR = ConstantRange::fromKnownBits(Known: KnownR, IsSigned: false); |
| 191 | auto AllowedByR = ConstantRange::makeAllowedICmpRegion(Pred, Other: RCR); |
| 192 | ConstantRange CheckAllowedByR(APInt::getZero(numBits: BW), |
| 193 | ByteOrderSwapped ? APInt::getSignedMinValue(numBits: BW) |
| 194 | : APInt(BW, 1)); |
| 195 | |
| 196 | BinaryOperator *BitShift = ConditionalRecurrence.BO; |
| 197 | if (AllowedByR == CheckAllowedByR) |
| 198 | return TV == BitShift && |
| 199 | match(V: FV, P: m_c_Xor(L: m_Specific(V: BitShift), |
| 200 | R: m_SpecificInt(V: *ConditionalRecurrence.ExtraConst))); |
| 201 | if (AllowedByR.inverse() == CheckAllowedByR) |
| 202 | return FV == BitShift && |
| 203 | match(V: TV, P: m_c_Xor(L: m_Specific(V: BitShift), |
| 204 | R: m_SpecificInt(V: *ConditionalRecurrence.ExtraConst))); |
| 205 | return false; |
| 206 | } |
| 207 | |
| 208 | /// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence |
| 209 | /// cycle of the form: |
| 210 | /// |
| 211 | /// loop: |
| 212 | /// %rec = phi [%start, %entry], [%BO, %loop] |
| 213 | /// ... |
| 214 | /// %BO = binop %rec, %step |
| 215 | /// |
| 216 | /// or |
| 217 | /// |
| 218 | /// loop: |
| 219 | /// %rec = phi [%start, %entry], [%BO, %loop] |
| 220 | /// ... |
| 221 | /// %BO = binop %step, %rec |
| 222 | /// |
| 223 | bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) { |
| 224 | if (llvm::matchSimpleRecurrence(P, BO, Start, Step)) { |
| 225 | Phi = P; |
| 226 | return true; |
| 227 | } |
| 228 | return false; |
| 229 | } |
| 230 | |
| 231 | /// Digs for a recurrence starting with \p V hitting the PHI node in a use-def |
| 232 | /// chain. Used by matchConditionalRecurrence. |
| 233 | BinaryOperator * |
| 234 | RecurrenceInfo::digRecurrence(Instruction *V, |
| 235 | Instruction::BinaryOps BOWithConstOpToMatch) { |
| 236 | SmallVector<Instruction *> Worklist; |
| 237 | Worklist.push_back(Elt: V); |
| 238 | while (!Worklist.empty()) { |
| 239 | Instruction *I = Worklist.pop_back_val(); |
| 240 | |
| 241 | // Don't add a PHI's operands to the Worklist. |
| 242 | if (isa<PHINode>(Val: I)) |
| 243 | continue; |
| 244 | |
| 245 | // Find a recurrence over a BinOp, by matching either of its operands |
| 246 | // with with the PHINode. |
| 247 | if (match(V: I, P: m_c_BinOp(L: m_Value(), R: m_Specific(V: Phi)))) |
| 248 | return cast<BinaryOperator>(Val: I); |
| 249 | |
| 250 | // Bind to ExtraConst, if we match exactly one. |
| 251 | if (I->getOpcode() == BOWithConstOpToMatch) { |
| 252 | if (ExtraConst) |
| 253 | return nullptr; |
| 254 | const APInt *C = nullptr; |
| 255 | if (match(V: I, P: m_c_BinOp(L: m_APInt(Res&: C), R: m_Value()))) |
| 256 | ExtraConst = *C; |
| 257 | } |
| 258 | |
| 259 | // Continue along the use-def chain. |
| 260 | for (Use &U : I->operands()) |
| 261 | if (auto *UI = dyn_cast<Instruction>(Val&: U)) |
| 262 | if (L.contains(Inst: UI)) |
| 263 | Worklist.push_back(Elt: UI); |
| 264 | } |
| 265 | return nullptr; |
| 266 | } |
| 267 | |
| 268 | /// A Conditional Recurrence is a recurrence of the form: |
| 269 | /// |
| 270 | /// loop: |
| 271 | /// %rec = phi [%start, %entry], [%step, %loop] |
| 272 | /// ... |
| 273 | /// %step = select _, %tv, %fv |
| 274 | /// |
| 275 | /// where %tv and %fv ultimately end up using %rec via the same %BO instruction, |
| 276 | /// after digging through the use-def chain. |
| 277 | /// |
| 278 | /// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging |
| 279 | /// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched, |
| 280 | /// and ExtraConst is a constant operand of that BinOp. This peculiarity exists, |
| 281 | /// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the |
| 282 | /// ExtraConst ends up being the generating polynomial. |
| 283 | bool RecurrenceInfo::matchConditionalRecurrence( |
| 284 | const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) { |
| 285 | Phi = P; |
| 286 | if (Phi->getNumIncomingValues() != 2) |
| 287 | return false; |
| 288 | |
| 289 | for (unsigned Idx = 0; Idx != 2; ++Idx) { |
| 290 | Value *FoundStep = Phi->getIncomingValue(i: Idx); |
| 291 | Value *FoundStart = Phi->getIncomingValue(i: !Idx); |
| 292 | |
| 293 | Instruction *TV, *FV; |
| 294 | if (!match(V: FoundStep, |
| 295 | P: m_Select(C: m_Cmp(), L: m_Instruction(I&: TV), R: m_Instruction(I&: FV)))) |
| 296 | continue; |
| 297 | |
| 298 | // For a conditional recurrence, both the true and false values of the |
| 299 | // select must ultimately end up in the same recurrent BinOp. |
| 300 | BinaryOperator *FoundBO = digRecurrence(V: TV, BOWithConstOpToMatch); |
| 301 | BinaryOperator *AltBO = digRecurrence(V: FV, BOWithConstOpToMatch); |
| 302 | if (!FoundBO || FoundBO != AltBO) |
| 303 | return false; |
| 304 | |
| 305 | if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) { |
| 306 | LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp " |
| 307 | "with constant in conditional recurrence\n" ); |
| 308 | return false; |
| 309 | } |
| 310 | |
| 311 | BO = FoundBO; |
| 312 | Start = FoundStart; |
| 313 | Step = FoundStep; |
| 314 | return true; |
| 315 | } |
| 316 | return false; |
| 317 | } |
| 318 | |
| 319 | /// Iterates over all the phis in \p LoopLatch, and attempts to extract a |
| 320 | /// Conditional Recurrence and an optional Simple Recurrence. |
| 321 | static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>> |
| 322 | getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { |
| 323 | auto Phis = LoopLatch->phis(); |
| 324 | unsigned NumPhis = std::distance(first: Phis.begin(), last: Phis.end()); |
| 325 | if (NumPhis != 2 && NumPhis != 3) |
| 326 | return {}; |
| 327 | |
| 328 | RecurrenceInfo SimpleRecurrence(L); |
| 329 | RecurrenceInfo ConditionalRecurrence(L); |
| 330 | for (PHINode &P : Phis) { |
| 331 | if (&P == IndVar) |
| 332 | continue; |
| 333 | if (!SimpleRecurrence) |
| 334 | SimpleRecurrence.matchSimpleRecurrence(P: &P); |
| 335 | if (!ConditionalRecurrence) |
| 336 | ConditionalRecurrence.matchConditionalRecurrence( |
| 337 | P: &P, BOWithConstOpToMatch: Instruction::BinaryOps::Xor); |
| 338 | } |
| 339 | if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence)) |
| 340 | return {}; |
| 341 | return std::make_pair(x&: SimpleRecurrence, y&: ConditionalRecurrence); |
| 342 | } |
| 343 | |
| 344 | PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, |
| 345 | Value *ComputedValue, bool ByteOrderSwapped, |
| 346 | Value *LHSAux) |
| 347 | : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), |
| 348 | ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} |
| 349 | |
| 350 | /// Generate a lookup table of 256 entries by interleaving the generating |
| 351 | /// polynomial. The optimization technique of table-lookup for CRC is also |
| 352 | /// called the Sarwate algorithm. |
| 353 | CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, |
| 354 | bool ByteOrderSwapped) { |
| 355 | unsigned BW = GenPoly.getBitWidth(); |
| 356 | CRCTable Table; |
| 357 | Table[0] = APInt::getZero(numBits: BW); |
| 358 | |
| 359 | if (ByteOrderSwapped) { |
| 360 | APInt CRCInit = APInt::getSignedMinValue(numBits: BW); |
| 361 | for (unsigned I = 1; I < 256; I <<= 1) { |
| 362 | CRCInit = CRCInit.shl(shiftAmt: 1) ^ |
| 363 | (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(numBits: BW)); |
| 364 | for (unsigned J = 0; J < I; ++J) |
| 365 | Table[I + J] = CRCInit ^ Table[J]; |
| 366 | } |
| 367 | return Table; |
| 368 | } |
| 369 | |
| 370 | APInt CRCInit(BW, 1); |
| 371 | for (unsigned I = 128; I; I >>= 1) { |
| 372 | CRCInit = CRCInit.lshr(shiftAmt: 1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(numBits: BW)); |
| 373 | for (unsigned J = 0; J < 256; J += (I << 1)) |
| 374 | Table[I + J] = CRCInit ^ Table[J]; |
| 375 | } |
| 376 | return Table; |
| 377 | } |
| 378 | |
| 379 | /// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain |
| 380 | /// of \p SI's condition, ignoring any casts. The purpose of this function is to |
| 381 | /// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC |
| 382 | /// computation. |
| 383 | /// |
| 384 | /// In other words, it checks for the following pattern: |
| 385 | /// |
| 386 | /// loop: |
| 387 | /// %P1 = phi [_, %entry], [%P1.next, %loop] |
| 388 | /// %P2 = phi [_, %entry], [%P2.next, %loop] |
| 389 | /// ... |
| 390 | /// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2) |
| 391 | /// |
| 392 | /// where %xor is in the use-def chain of \p SI's condition. |
| 393 | static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1, |
| 394 | const PHINode *P2, const Loop &L) { |
| 395 | SmallVector<const Instruction *> Worklist; |
| 396 | |
| 397 | // matchConditionalRecurrence has already ensured that the SelectInst's |
| 398 | // condition is an Instruction. |
| 399 | Worklist.push_back(Elt: cast<Instruction>(Val: SI->getCondition())); |
| 400 | |
| 401 | while (!Worklist.empty()) { |
| 402 | const Instruction *I = Worklist.pop_back_val(); |
| 403 | |
| 404 | // Don't add a PHI's operands to the Worklist. |
| 405 | if (isa<PHINode>(Val: I)) |
| 406 | continue; |
| 407 | |
| 408 | // If we match an XOR of the two PHIs ignoring casts, we're done. |
| 409 | if (match(V: I, P: m_c_Xor(L: m_ZExtOrTruncOrSelf(Op: m_Specific(V: P1)), |
| 410 | R: m_ZExtOrTruncOrSelf(Op: m_Specific(V: P2))))) |
| 411 | return true; |
| 412 | |
| 413 | // Continue along the use-def chain. |
| 414 | for (const Use &U : I->operands()) |
| 415 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
| 416 | if (L.contains(Inst: UI)) |
| 417 | Worklist.push_back(Elt: UI); |
| 418 | } |
| 419 | return false; |
| 420 | } |
| 421 | |
| 422 | // Recognizes a multiplication or division by the constant two, using SCEV. By |
| 423 | // doing this, we're immune to whether the IR expression is mul/udiv or |
| 424 | // equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul, |
| 425 | // and std::nullopt otherwise. |
| 426 | static std::optional<bool> isBigEndianBitShift(Value *V, ScalarEvolution &SE) { |
| 427 | if (!V->getType()->isIntegerTy()) |
| 428 | return {}; |
| 429 | |
| 430 | const SCEV *E = SE.getSCEV(V); |
| 431 | if (match(S: E, P: m_scev_UDiv(Op0: m_SCEV(), Op1: m_scev_SpecificInt(V: 2)))) |
| 432 | return false; |
| 433 | if (match(S: E, P: m_scev_Mul(Op0: m_scev_SpecificInt(V: 2), Op1: m_SCEV()))) |
| 434 | return true; |
| 435 | return {}; |
| 436 | } |
| 437 | |
| 438 | /// The main entry point for analyzing a loop and recognizing the CRC algorithm. |
| 439 | /// Returns a PolynomialInfo on success, and a StringRef on failure. |
| 440 | std::variant<PolynomialInfo, StringRef> HashRecognize::recognizeCRC() const { |
| 441 | if (!L.isInnermost()) |
| 442 | return "Loop is not innermost" ; |
| 443 | BasicBlock *Latch = L.getLoopLatch(); |
| 444 | BasicBlock *Exit = L.getExitBlock(); |
| 445 | const PHINode *IndVar = L.getCanonicalInductionVariable(); |
| 446 | if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1) |
| 447 | return "Loop not in canonical form" ; |
| 448 | unsigned TC = SE.getSmallConstantTripCount(L: &L); |
| 449 | if (!TC || TC % 8) |
| 450 | return "Unable to find a small constant byte-multiple trip count" ; |
| 451 | |
| 452 | auto R = getRecurrences(LoopLatch: Latch, IndVar, L); |
| 453 | if (!R) |
| 454 | return "Found stray PHI" ; |
| 455 | auto [SimpleRecurrence, ConditionalRecurrence] = *R; |
| 456 | if (!ConditionalRecurrence) |
| 457 | return "Unable to find conditional recurrence" ; |
| 458 | |
| 459 | // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv |
| 460 | // with two, or in other words, that they're single bit-shifts. |
| 461 | std::optional<bool> ByteOrderSwapped = |
| 462 | isBigEndianBitShift(V: ConditionalRecurrence.BO, SE); |
| 463 | if (!ByteOrderSwapped) |
| 464 | return "Loop with non-unit bitshifts" ; |
| 465 | if (SimpleRecurrence) { |
| 466 | if (isBigEndianBitShift(V: SimpleRecurrence.BO, SE) != ByteOrderSwapped) |
| 467 | return "Loop with non-unit bitshifts" ; |
| 468 | |
| 469 | // Ensure that the PHIs have exactly two uses: |
| 470 | // the bit-shift, and the XOR (or a cast feeding into the XOR). |
| 471 | // Also ensure that the SimpleRecurrence's evolution doesn't have stray |
| 472 | // users. |
| 473 | if (!ConditionalRecurrence.Phi->hasNUses(N: 2) || |
| 474 | !SimpleRecurrence.Phi->hasNUses(N: 2) || |
| 475 | SimpleRecurrence.BO->getUniqueUndroppableUser() != SimpleRecurrence.Phi) |
| 476 | return "Recurrences have stray uses" ; |
| 477 | |
| 478 | // Check that the SelectInst ConditionalRecurrence.Step is conditional on |
| 479 | // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi. |
| 480 | if (!isConditionalOnXorOfPHIs(SI: cast<SelectInst>(Val: ConditionalRecurrence.Step), |
| 481 | P1: SimpleRecurrence.Phi, |
| 482 | P2: ConditionalRecurrence.Phi, L)) |
| 483 | return "Recurrences not intertwined with XOR" ; |
| 484 | } |
| 485 | |
| 486 | // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS. |
| 487 | Value *LHS = ConditionalRecurrence.Start; |
| 488 | Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; |
| 489 | if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth() |
| 490 | : LHS->getType()->getIntegerBitWidth())) |
| 491 | return "Loop iterations exceed bitwidth of data" ; |
| 492 | |
| 493 | // Make sure that the computed value is used in the exit block: this should be |
| 494 | // true even if it is only really used in an outer loop's exit block, since |
| 495 | // the loop is in LCSSA form. |
| 496 | auto *ComputedValue = cast<SelectInst>(Val: ConditionalRecurrence.Step); |
| 497 | if (none_of(Range: ComputedValue->users(), P: [Exit](User *U) { |
| 498 | auto *UI = dyn_cast<Instruction>(Val: U); |
| 499 | return UI && UI->getParent() == Exit; |
| 500 | })) |
| 501 | return "Unable to find use of computed value in loop exit block" ; |
| 502 | |
| 503 | assert(ConditionalRecurrence.ExtraConst && |
| 504 | "Expected ExtraConst in conditional recurrence" ); |
| 505 | const APInt &GenPoly = *ConditionalRecurrence.ExtraConst; |
| 506 | |
| 507 | if (!isSignificantBitCheckWellFormed(ConditionalRecurrence, SimpleRecurrence, |
| 508 | ByteOrderSwapped: *ByteOrderSwapped)) |
| 509 | return "Malformed significant-bit check" ; |
| 510 | |
| 511 | SmallVector<const Instruction *> Roots( |
| 512 | {ComputedValue, |
| 513 | cast<Instruction>(Val: IndVar->getIncomingValueForBlock(BB: Latch)), |
| 514 | L.getLatchCmpInst(), Latch->getTerminator()}); |
| 515 | if (SimpleRecurrence) |
| 516 | Roots.push_back(Elt: SimpleRecurrence.BO); |
| 517 | if (containsUnreachable(L, Roots)) |
| 518 | return "Found stray unvisited instructions" ; |
| 519 | |
| 520 | return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped, |
| 521 | LHSAux); |
| 522 | } |
| 523 | |
| 524 | void CRCTable::print(raw_ostream &OS) const { |
| 525 | for (unsigned I = 0; I < 256; I++) { |
| 526 | (*this)[I].print(OS, isSigned: false); |
| 527 | OS << (I % 16 == 15 ? '\n' : ' '); |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 532 | void CRCTable::dump() const { print(dbgs()); } |
| 533 | #endif |
| 534 | |
| 535 | void HashRecognize::print(raw_ostream &OS) const { |
| 536 | if (!L.isInnermost()) |
| 537 | return; |
| 538 | OS << "HashRecognize: Checking a loop in '" |
| 539 | << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr() |
| 540 | << "\n" ; |
| 541 | auto Ret = recognizeCRC(); |
| 542 | if (!std::holds_alternative<PolynomialInfo>(v: Ret)) { |
| 543 | OS << "Did not find a hash algorithm\n" ; |
| 544 | if (std::holds_alternative<StringRef>(v: Ret)) |
| 545 | OS << "Reason: " << std::get<StringRef>(v&: Ret) << "\n" ; |
| 546 | return; |
| 547 | } |
| 548 | |
| 549 | auto Info = std::get<PolynomialInfo>(v&: Ret); |
| 550 | OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian " ) |
| 551 | << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count " |
| 552 | << Info.TripCount << "\n" ; |
| 553 | OS.indent(NumSpaces: 2) << "Initial CRC: " ; |
| 554 | Info.LHS->print(O&: OS); |
| 555 | OS << "\n" ; |
| 556 | OS.indent(NumSpaces: 2) << "Generating polynomial: " ; |
| 557 | Info.RHS.print(OS, isSigned: false); |
| 558 | OS << "\n" ; |
| 559 | OS.indent(NumSpaces: 2) << "Computed CRC: " ; |
| 560 | Info.ComputedValue->print(O&: OS); |
| 561 | OS << "\n" ; |
| 562 | if (Info.LHSAux) { |
| 563 | OS.indent(NumSpaces: 2) << "Auxiliary data: " ; |
| 564 | Info.LHSAux->print(O&: OS); |
| 565 | OS << "\n" ; |
| 566 | } |
| 567 | OS.indent(NumSpaces: 2) << "Computed CRC lookup table:\n" ; |
| 568 | genSarwateTable(GenPoly: Info.RHS, ByteOrderSwapped: Info.ByteOrderSwapped).print(OS); |
| 569 | } |
| 570 | |
| 571 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 572 | void HashRecognize::dump() const { print(dbgs()); } |
| 573 | #endif |
| 574 | |
| 575 | std::optional<PolynomialInfo> HashRecognize::getResult() const { |
| 576 | auto Res = HashRecognize(L, SE).recognizeCRC(); |
| 577 | if (std::holds_alternative<PolynomialInfo>(v: Res)) |
| 578 | return std::get<PolynomialInfo>(v&: Res); |
| 579 | return std::nullopt; |
| 580 | } |
| 581 | |
| 582 | HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE) |
| 583 | : L(L), SE(SE) {} |
| 584 | |
| 585 | PreservedAnalyses HashRecognizePrinterPass::run(Loop &L, |
| 586 | LoopAnalysisManager &AM, |
| 587 | LoopStandardAnalysisResults &AR, |
| 588 | LPMUpdater &) { |
| 589 | HashRecognize(L, AR.SE).print(OS); |
| 590 | return PreservedAnalyses::all(); |
| 591 | } |
| 592 | |