| 1 | //===- HashRecognize.cpp ----------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The HashRecognize analysis recognizes unoptimized polynomial hash functions |
| 10 | // with operations over a Galois field of characteristic 2, also called binary |
| 11 | // fields, or GF(2^n): this class of hash functions can be optimized using a |
| 12 | // lookup-table-driven implementation, or with target-specific instructions. |
| 13 | // Examples: |
| 14 | // |
| 15 | // 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2). |
| 16 | // 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a |
| 17 | // rolling hash polynomial division in GF(2). |
| 18 | // 3. Rijndael MixColumns, a step in AES computation, which is a polynomial |
| 19 | // multiplication in GF(2^3). |
| 20 | // 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM), |
| 21 | // which is a polynomial evaluation in GF(2^128). |
| 22 | // |
| 23 | // All of them use an irreducible generating polynomial of degree m, |
| 24 | // |
| 25 | // c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0 |
| 26 | // |
| 27 | // where each coefficient c is can take values in GF(2^n), where 2^n is termed |
| 28 | // the order of the Galois field. For GF(2), each coefficient can take values |
| 29 | // either 0 or 1, and the polynomial is simply represented by m+1 bits, |
| 30 | // corresponding to the coefficients. The different variants of CRC are named by |
| 31 | // degree of generating polynomial used: so CRC-32 would use a polynomial of |
| 32 | // degree 32. |
| 33 | // |
| 34 | // The reason algorithms on GF(2^n) can be optimized with a lookup-table is the |
| 35 | // following: in such fields, polynomial addition and subtraction are identical |
| 36 | // and equivalent to XOR, polynomial multiplication is an AND, and polynomial |
| 37 | // division is identity: the XOR and AND operations in unoptimized |
| 38 | // implementations are performed bit-wise, and can be optimized to be performed |
| 39 | // chunk-wise, by interleaving copies of the generating polynomial, and storing |
| 40 | // the pre-computed values in a table. |
| 41 | // |
| 42 | // A generating polynomial of m bits always has the MSB set, so we usually |
| 43 | // omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial: |
| 44 | // |
| 45 | // (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021 |
| 46 | // |
| 47 | // Transmissions are either in big-endian or little-endian form, and hash |
| 48 | // algorithms are written according to this. For example, IEEE 802 and RS-232 |
| 49 | // specify little-endian transmission. |
| 50 | // |
| 51 | //===----------------------------------------------------------------------===// |
| 52 | // |
| 53 | // At the moment, we only recognize the CRC algorithm. |
| 54 | // Documentation on CRC32 from the kernel: |
| 55 | // https://www.kernel.org/doc/Documentation/crc32.txt |
| 56 | // |
| 57 | // |
| 58 | //===----------------------------------------------------------------------===// |
| 59 | |
| 60 | #include "llvm/Analysis/HashRecognize.h" |
| 61 | #include "llvm/ADT/APInt.h" |
| 62 | #include "llvm/Analysis/LoopAnalysisManager.h" |
| 63 | #include "llvm/Analysis/LoopInfo.h" |
| 64 | #include "llvm/Analysis/ScalarEvolution.h" |
| 65 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| 66 | #include "llvm/Analysis/ValueTracking.h" |
| 67 | #include "llvm/IR/PatternMatch.h" |
| 68 | #include "llvm/Support/KnownBits.h" |
| 69 | |
| 70 | using namespace llvm; |
| 71 | using namespace PatternMatch; |
| 72 | using namespace SCEVPatternMatch; |
| 73 | |
| 74 | #define DEBUG_TYPE "hash-recognize" |
| 75 | |
| 76 | // KnownBits for a PHI node. There are at most two PHI nodes, corresponding to |
| 77 | // the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not |
| 78 | // relevant. |
| 79 | using KnownPhiMap = SmallDenseMap<const PHINode *, KnownBits, 2>; |
| 80 | |
| 81 | // A pair of a PHI node along with its incoming value from within a loop. |
| 82 | using PhiStepPair = std::pair<const PHINode *, const Instruction *>; |
| 83 | |
| 84 | /// A much simpler version of ValueTracking, in that it computes KnownBits of |
| 85 | /// values, except that it computes the evolution of KnownBits in a loop with a |
| 86 | /// given trip count, and predication is specialized for a significant-bit |
| 87 | /// check. |
| 88 | class ValueEvolution { |
| 89 | const unsigned TripCount; |
| 90 | const bool ByteOrderSwapped; |
| 91 | APInt GenPoly; |
| 92 | StringRef ErrStr; |
| 93 | |
| 94 | // Compute the KnownBits of a BinaryOperator. |
| 95 | KnownBits computeBinOp(const BinaryOperator *I); |
| 96 | |
| 97 | // Compute the KnownBits of an Instruction. |
| 98 | KnownBits computeInstr(const Instruction *I); |
| 99 | |
| 100 | // Compute the KnownBits of a Value. |
| 101 | KnownBits compute(const Value *V); |
| 102 | |
| 103 | public: |
| 104 | // ValueEvolution is meant to be constructed with the TripCount of the loop, |
| 105 | // and whether the polynomial algorithm is big-endian, for the significant-bit |
| 106 | // check. |
| 107 | ValueEvolution(unsigned TripCount, bool ByteOrderSwapped); |
| 108 | |
| 109 | // Given a list of PHI nodes along with their incoming value from within the |
| 110 | // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on |
| 111 | // the final iteration. Returns true on success and false on error. |
| 112 | bool computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions); |
| 113 | |
| 114 | // In case ValueEvolution encounters an error, this is meant to be used for a |
| 115 | // precise error message. |
| 116 | StringRef getError() const { return ErrStr; } |
| 117 | |
| 118 | // The computed KnownBits for each PHI node, which is populated after |
| 119 | // computeEvolutions is called. |
| 120 | KnownPhiMap KnownPhis; |
| 121 | }; |
| 122 | |
| 123 | ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped) |
| 124 | : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {} |
| 125 | |
| 126 | KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) { |
| 127 | KnownBits KnownL(compute(V: I->getOperand(i_nocapture: 0))); |
| 128 | KnownBits KnownR(compute(V: I->getOperand(i_nocapture: 1))); |
| 129 | |
| 130 | switch (I->getOpcode()) { |
| 131 | case Instruction::BinaryOps::And: |
| 132 | return KnownL & KnownR; |
| 133 | case Instruction::BinaryOps::Or: |
| 134 | return KnownL | KnownR; |
| 135 | case Instruction::BinaryOps::Xor: |
| 136 | return KnownL ^ KnownR; |
| 137 | case Instruction::BinaryOps::Shl: { |
| 138 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
| 139 | return KnownBits::shl(LHS: KnownL, RHS: KnownR, NUW: OBO->hasNoUnsignedWrap(), |
| 140 | NSW: OBO->hasNoSignedWrap()); |
| 141 | } |
| 142 | case Instruction::BinaryOps::LShr: |
| 143 | return KnownBits::lshr(LHS: KnownL, RHS: KnownR); |
| 144 | case Instruction::BinaryOps::AShr: |
| 145 | return KnownBits::ashr(LHS: KnownL, RHS: KnownR); |
| 146 | case Instruction::BinaryOps::Add: { |
| 147 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
| 148 | return KnownBits::add(LHS: KnownL, RHS: KnownR, NSW: OBO->hasNoUnsignedWrap(), |
| 149 | NUW: OBO->hasNoSignedWrap()); |
| 150 | } |
| 151 | case Instruction::BinaryOps::Sub: { |
| 152 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
| 153 | return KnownBits::sub(LHS: KnownL, RHS: KnownR, NSW: OBO->hasNoUnsignedWrap(), |
| 154 | NUW: OBO->hasNoSignedWrap()); |
| 155 | } |
| 156 | case Instruction::BinaryOps::Mul: { |
| 157 | Value *Op0 = I->getOperand(i_nocapture: 0); |
| 158 | Value *Op1 = I->getOperand(i_nocapture: 1); |
| 159 | bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(V: Op0); |
| 160 | return KnownBits::mul(LHS: KnownL, RHS: KnownR, NoUndefSelfMultiply: SelfMultiply); |
| 161 | } |
| 162 | case Instruction::BinaryOps::UDiv: |
| 163 | return KnownBits::udiv(LHS: KnownL, RHS: KnownR); |
| 164 | case Instruction::BinaryOps::SDiv: |
| 165 | return KnownBits::sdiv(LHS: KnownL, RHS: KnownR); |
| 166 | case Instruction::BinaryOps::URem: |
| 167 | return KnownBits::urem(LHS: KnownL, RHS: KnownR); |
| 168 | case Instruction::BinaryOps::SRem: |
| 169 | return KnownBits::srem(LHS: KnownL, RHS: KnownR); |
| 170 | default: |
| 171 | ErrStr = "Unknown BinaryOperator" ; |
| 172 | unsigned BitWidth = I->getType()->getScalarSizeInBits(); |
| 173 | return {BitWidth}; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | KnownBits ValueEvolution::computeInstr(const Instruction *I) { |
| 178 | unsigned BitWidth = I->getType()->getScalarSizeInBits(); |
| 179 | |
| 180 | // We look up in the map that contains the KnownBits of the PHI from the |
| 181 | // previous iteration. |
| 182 | if (const PHINode *P = dyn_cast<PHINode>(Val: I)) |
| 183 | return KnownPhis.lookup_or(Val: P, Default&: BitWidth); |
| 184 | |
| 185 | // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch |
| 186 | // that is predicated on the (least|most)-significant-bit check. |
| 187 | CmpPredicate Pred; |
| 188 | Value *L, *R, *TV, *FV; |
| 189 | if (match(V: I, P: m_Select(C: m_ICmp(Pred, L: m_Value(V&: L), R: m_Value(V&: R)), L: m_Value(V&: TV), |
| 190 | R: m_Value(V&: FV)))) { |
| 191 | // We need to check LCR against [0, 2) in the little-endian case, because |
| 192 | // the RCR check is insufficient: it is simply [0, 1). |
| 193 | if (!ByteOrderSwapped) { |
| 194 | KnownBits KnownL = compute(V: L); |
| 195 | unsigned ICmpBW = KnownL.getBitWidth(); |
| 196 | auto LCR = ConstantRange::fromKnownBits(Known: KnownL, IsSigned: false); |
| 197 | auto CheckLCR = ConstantRange(APInt::getZero(numBits: ICmpBW), APInt(ICmpBW, 2)); |
| 198 | if (LCR != CheckLCR) { |
| 199 | ErrStr = "Bad LHS of significant-bit-check" ; |
| 200 | return {BitWidth}; |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | // Check that the predication is on (most|least) significant bit. |
| 205 | KnownBits KnownR = compute(V: R); |
| 206 | unsigned ICmpBW = KnownR.getBitWidth(); |
| 207 | auto RCR = ConstantRange::fromKnownBits(Known: KnownR, IsSigned: false); |
| 208 | auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, Other: RCR); |
| 209 | ConstantRange CheckRCR(APInt::getZero(numBits: ICmpBW), |
| 210 | ByteOrderSwapped ? APInt::getSignedMinValue(numBits: ICmpBW) |
| 211 | : APInt(ICmpBW, 1)); |
| 212 | if (AllowedR == CheckRCR) |
| 213 | return compute(V: TV); |
| 214 | if (AllowedR.inverse() == CheckRCR) |
| 215 | return compute(V: FV); |
| 216 | |
| 217 | ErrStr = "Bad RHS of significant-bit-check" ; |
| 218 | return {BitWidth}; |
| 219 | } |
| 220 | |
| 221 | if (auto *BO = dyn_cast<BinaryOperator>(Val: I)) |
| 222 | return computeBinOp(I: BO); |
| 223 | |
| 224 | switch (I->getOpcode()) { |
| 225 | case Instruction::CastOps::Trunc: |
| 226 | return compute(V: I->getOperand(i: 0)).trunc(BitWidth); |
| 227 | case Instruction::CastOps::ZExt: |
| 228 | return compute(V: I->getOperand(i: 0)).zext(BitWidth); |
| 229 | case Instruction::CastOps::SExt: |
| 230 | return compute(V: I->getOperand(i: 0)).sext(BitWidth); |
| 231 | default: |
| 232 | ErrStr = "Unknown Instruction" ; |
| 233 | return {BitWidth}; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | KnownBits ValueEvolution::compute(const Value *V) { |
| 238 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
| 239 | return KnownBits::makeConstant(C: CI->getValue()); |
| 240 | |
| 241 | if (auto *I = dyn_cast<Instruction>(Val: V)) |
| 242 | return computeInstr(I); |
| 243 | |
| 244 | ErrStr = "Unknown Value" ; |
| 245 | unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
| 246 | return {BitWidth}; |
| 247 | } |
| 248 | |
| 249 | bool ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) { |
| 250 | for (unsigned I = 0; I < TripCount; ++I) |
| 251 | for (auto [Phi, Step] : PhiEvolutions) |
| 252 | KnownPhis.emplace_or_assign(Key: Phi, Args: computeInstr(I: Step)); |
| 253 | |
| 254 | return ErrStr.empty(); |
| 255 | } |
| 256 | |
| 257 | /// A structure that can hold either a Simple Recurrence or a Conditional |
| 258 | /// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand |
| 259 | /// of the BO, while in a Conditional Recurrence, it is a SelectInst. |
| 260 | struct RecurrenceInfo { |
| 261 | const Loop &L; |
| 262 | const PHINode *Phi = nullptr; |
| 263 | BinaryOperator *BO = nullptr; |
| 264 | Value *Start = nullptr; |
| 265 | Value *Step = nullptr; |
| 266 | std::optional<APInt> ; |
| 267 | |
| 268 | RecurrenceInfo(const Loop &L) : L(L) {} |
| 269 | operator bool() const { return BO; } |
| 270 | |
| 271 | void print(raw_ostream &OS, unsigned Indent = 0) const { |
| 272 | OS.indent(NumSpaces: Indent) << "Phi: " ; |
| 273 | Phi->print(O&: OS); |
| 274 | OS << "\n" ; |
| 275 | OS.indent(NumSpaces: Indent) << "BinaryOperator: " ; |
| 276 | BO->print(O&: OS); |
| 277 | OS << "\n" ; |
| 278 | OS.indent(NumSpaces: Indent) << "Start: " ; |
| 279 | Start->print(O&: OS); |
| 280 | OS << "\n" ; |
| 281 | OS.indent(NumSpaces: Indent) << "Step: " ; |
| 282 | Step->print(O&: OS); |
| 283 | OS << "\n" ; |
| 284 | if (ExtraConst) { |
| 285 | OS.indent(NumSpaces: Indent) << "ExtraConst: " ; |
| 286 | ExtraConst->print(OS, isSigned: false); |
| 287 | OS << "\n" ; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 292 | LLVM_DUMP_METHOD void dump() const { print(dbgs()); } |
| 293 | #endif |
| 294 | |
| 295 | bool matchSimpleRecurrence(const PHINode *P); |
| 296 | bool matchConditionalRecurrence( |
| 297 | const PHINode *P, |
| 298 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
| 299 | |
| 300 | private: |
| 301 | BinaryOperator *digRecurrence( |
| 302 | Instruction *V, |
| 303 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
| 304 | }; |
| 305 | |
| 306 | /// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence |
| 307 | /// cycle of the form: |
| 308 | /// |
| 309 | /// loop: |
| 310 | /// %rec = phi [%start, %entry], [%BO, %loop] |
| 311 | /// ... |
| 312 | /// %BO = binop %rec, %step |
| 313 | /// |
| 314 | /// or |
| 315 | /// |
| 316 | /// loop: |
| 317 | /// %rec = phi [%start, %entry], [%BO, %loop] |
| 318 | /// ... |
| 319 | /// %BO = binop %step, %rec |
| 320 | /// |
| 321 | bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) { |
| 322 | Phi = P; |
| 323 | return llvm::matchSimpleRecurrence(P: Phi, BO, Start, Step); |
| 324 | } |
| 325 | |
| 326 | /// Digs for a recurrence starting with \p V hitting the PHI node in a use-def |
| 327 | /// chain. Used by matchConditionalRecurrence. |
| 328 | BinaryOperator * |
| 329 | RecurrenceInfo::digRecurrence(Instruction *V, |
| 330 | Instruction::BinaryOps BOWithConstOpToMatch) { |
| 331 | SmallVector<Instruction *> Worklist; |
| 332 | Worklist.push_back(Elt: V); |
| 333 | while (!Worklist.empty()) { |
| 334 | Instruction *I = Worklist.pop_back_val(); |
| 335 | |
| 336 | // Don't add a PHI's operands to the Worklist. |
| 337 | if (isa<PHINode>(Val: I)) |
| 338 | continue; |
| 339 | |
| 340 | // Find a recurrence over a BinOp, by matching either of its operands |
| 341 | // with with the PHINode. |
| 342 | if (match(V: I, P: m_c_BinOp(L: m_Value(), R: m_Specific(V: Phi)))) |
| 343 | return cast<BinaryOperator>(Val: I); |
| 344 | |
| 345 | // Bind to ExtraConst, if we match exactly one. |
| 346 | if (I->getOpcode() == BOWithConstOpToMatch) { |
| 347 | if (ExtraConst) |
| 348 | return nullptr; |
| 349 | const APInt *C = nullptr; |
| 350 | if (match(V: I, P: m_c_BinOp(L: m_APInt(Res&: C), R: m_Value()))) |
| 351 | ExtraConst = *C; |
| 352 | } |
| 353 | |
| 354 | // Continue along the use-def chain. |
| 355 | for (Use &U : I->operands()) |
| 356 | if (auto *UI = dyn_cast<Instruction>(Val&: U)) |
| 357 | if (L.contains(Inst: UI)) |
| 358 | Worklist.push_back(Elt: UI); |
| 359 | } |
| 360 | return nullptr; |
| 361 | } |
| 362 | |
| 363 | /// A Conditional Recurrence is a recurrence of the form: |
| 364 | /// |
| 365 | /// loop: |
| 366 | /// %rec = phi [%start, %entry], [%step, %loop] |
| 367 | /// ... |
| 368 | /// %step = select _, %tv, %fv |
| 369 | /// |
| 370 | /// where %tv and %fv ultimately end up using %rec via the same %BO instruction, |
| 371 | /// after digging through the use-def chain. |
| 372 | /// |
| 373 | /// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging |
| 374 | /// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched, |
| 375 | /// and ExtraConst is a constant operand of that BinOp. This peculiarity exists, |
| 376 | /// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the |
| 377 | /// ExtraConst ends up being the generating polynomial. |
| 378 | bool RecurrenceInfo::matchConditionalRecurrence( |
| 379 | const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) { |
| 380 | Phi = P; |
| 381 | if (Phi->getNumIncomingValues() != 2) |
| 382 | return false; |
| 383 | |
| 384 | for (unsigned Idx = 0; Idx != 2; ++Idx) { |
| 385 | Value *FoundStep = Phi->getIncomingValue(i: Idx); |
| 386 | Value *FoundStart = Phi->getIncomingValue(i: !Idx); |
| 387 | |
| 388 | Instruction *TV, *FV; |
| 389 | if (!match(V: FoundStep, |
| 390 | P: m_Select(C: m_Cmp(), L: m_Instruction(I&: TV), R: m_Instruction(I&: FV)))) |
| 391 | continue; |
| 392 | |
| 393 | // For a conditional recurrence, both the true and false values of the |
| 394 | // select must ultimately end up in the same recurrent BinOp. |
| 395 | BinaryOperator *FoundBO = digRecurrence(V: TV, BOWithConstOpToMatch); |
| 396 | BinaryOperator *AltBO = digRecurrence(V: FV, BOWithConstOpToMatch); |
| 397 | if (!FoundBO || FoundBO != AltBO) |
| 398 | return false; |
| 399 | |
| 400 | if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) { |
| 401 | LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp " |
| 402 | "with constant in conditional recurrence\n" ); |
| 403 | return false; |
| 404 | } |
| 405 | |
| 406 | BO = FoundBO; |
| 407 | Start = FoundStart; |
| 408 | Step = FoundStep; |
| 409 | return true; |
| 410 | } |
| 411 | return false; |
| 412 | } |
| 413 | |
| 414 | /// Iterates over all the phis in \p LoopLatch, and attempts to extract a |
| 415 | /// Conditional Recurrence and an optional Simple Recurrence. |
| 416 | static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>> |
| 417 | getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { |
| 418 | auto Phis = LoopLatch->phis(); |
| 419 | unsigned NumPhis = std::distance(first: Phis.begin(), last: Phis.end()); |
| 420 | if (NumPhis != 2 && NumPhis != 3) |
| 421 | return {}; |
| 422 | |
| 423 | RecurrenceInfo SimpleRecurrence(L); |
| 424 | RecurrenceInfo ConditionalRecurrence(L); |
| 425 | for (PHINode &P : Phis) { |
| 426 | if (&P == IndVar) |
| 427 | continue; |
| 428 | if (!SimpleRecurrence) |
| 429 | SimpleRecurrence.matchSimpleRecurrence(P: &P); |
| 430 | if (!ConditionalRecurrence) |
| 431 | ConditionalRecurrence.matchConditionalRecurrence( |
| 432 | P: &P, BOWithConstOpToMatch: Instruction::BinaryOps::Xor); |
| 433 | } |
| 434 | if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence)) |
| 435 | return {}; |
| 436 | return std::make_pair(x&: SimpleRecurrence, y&: ConditionalRecurrence); |
| 437 | } |
| 438 | |
| 439 | PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, |
| 440 | Value *ComputedValue, bool ByteOrderSwapped, |
| 441 | Value *LHSAux) |
| 442 | : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), |
| 443 | ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} |
| 444 | |
| 445 | /// In the big-endian case, checks the bottom N bits against CheckFn, and that |
| 446 | /// the rest are unknown. In the little-endian case, checks the top N bits |
| 447 | /// against CheckFn, and that the rest are unknown. Callers usually call this |
| 448 | /// function with N = TripCount, and CheckFn checking that the remainder bits of |
| 449 | /// the CRC polynomial division are zero. |
| 450 | static bool (const KnownBits &Known, unsigned N, |
| 451 | function_ref<bool(const KnownBits &)> CheckFn, |
| 452 | bool ByteOrderSwapped) { |
| 453 | // Check that the entire thing is a constant. |
| 454 | if (N == Known.getBitWidth()) |
| 455 | return CheckFn(Known.extractBits(NumBits: N, BitPosition: 0)); |
| 456 | |
| 457 | // Check that the {top, bottom} N bits are not unknown and that the {bottom, |
| 458 | // top} N bits are known. |
| 459 | unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N; |
| 460 | unsigned SwappedBitPos = ByteOrderSwapped ? N : 0; |
| 461 | return CheckFn(Known.extractBits(NumBits: N, BitPosition: BitPos)) && |
| 462 | Known.extractBits(NumBits: Known.getBitWidth() - N, BitPosition: SwappedBitPos).isUnknown(); |
| 463 | } |
| 464 | |
| 465 | /// Generate a lookup table of 256 entries by interleaving the generating |
| 466 | /// polynomial. The optimization technique of table-lookup for CRC is also |
| 467 | /// called the Sarwate algorithm. |
| 468 | CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, |
| 469 | bool ByteOrderSwapped) { |
| 470 | unsigned BW = GenPoly.getBitWidth(); |
| 471 | CRCTable Table; |
| 472 | Table[0] = APInt::getZero(numBits: BW); |
| 473 | |
| 474 | if (ByteOrderSwapped) { |
| 475 | APInt CRCInit = APInt::getSignedMinValue(numBits: BW); |
| 476 | for (unsigned I = 1; I < 256; I <<= 1) { |
| 477 | CRCInit = CRCInit.shl(shiftAmt: 1) ^ |
| 478 | (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(numBits: BW)); |
| 479 | for (unsigned J = 0; J < I; ++J) |
| 480 | Table[I + J] = CRCInit ^ Table[J]; |
| 481 | } |
| 482 | return Table; |
| 483 | } |
| 484 | |
| 485 | APInt CRCInit(BW, 1); |
| 486 | for (unsigned I = 128; I; I >>= 1) { |
| 487 | CRCInit = CRCInit.lshr(shiftAmt: 1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(numBits: BW)); |
| 488 | for (unsigned J = 0; J < 256; J += (I << 1)) |
| 489 | Table[I + J] = CRCInit ^ Table[J]; |
| 490 | } |
| 491 | return Table; |
| 492 | } |
| 493 | |
| 494 | /// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain |
| 495 | /// of \p SI's condition, ignoring any casts. The purpose of this function is to |
| 496 | /// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC |
| 497 | /// computation. We cannot check the correctness of casts at this point, and |
| 498 | /// rely on the KnownBits propagation to check correctness of the CRC |
| 499 | /// computation. |
| 500 | /// |
| 501 | /// In other words, it checks for the following pattern: |
| 502 | /// |
| 503 | /// loop: |
| 504 | /// %P1 = phi [_, %entry], [%P1.next, %loop] |
| 505 | /// %P2 = phi [_, %entry], [%P2.next, %loop] |
| 506 | /// ... |
| 507 | /// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2) |
| 508 | /// |
| 509 | /// where %xor is in the use-def chain of \p SI's condition. |
| 510 | static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1, |
| 511 | const PHINode *P2, const Loop &L) { |
| 512 | SmallVector<const Instruction *> Worklist; |
| 513 | |
| 514 | // matchConditionalRecurrence has already ensured that the SelectInst's |
| 515 | // condition is an Instruction. |
| 516 | Worklist.push_back(Elt: cast<Instruction>(Val: SI->getCondition())); |
| 517 | |
| 518 | while (!Worklist.empty()) { |
| 519 | const Instruction *I = Worklist.pop_back_val(); |
| 520 | |
| 521 | // Don't add a PHI's operands to the Worklist. |
| 522 | if (isa<PHINode>(Val: I)) |
| 523 | continue; |
| 524 | |
| 525 | // If we match an XOR of the two PHIs ignoring casts, we're done. |
| 526 | if (match(V: I, P: m_c_Xor(L: m_CastOrSelf(Op: m_Specific(V: P1)), |
| 527 | R: m_CastOrSelf(Op: m_Specific(V: P2))))) |
| 528 | return true; |
| 529 | |
| 530 | // Continue along the use-def chain. |
| 531 | for (const Use &U : I->operands()) |
| 532 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
| 533 | if (L.contains(Inst: UI)) |
| 534 | Worklist.push_back(Elt: UI); |
| 535 | } |
| 536 | return false; |
| 537 | } |
| 538 | |
| 539 | // Recognizes a multiplication or division by the constant two, using SCEV. By |
| 540 | // doing this, we're immune to whether the IR expression is mul/udiv or |
| 541 | // equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul, |
| 542 | // and std::nullopt otherwise. |
| 543 | static std::optional<bool> isBigEndianBitShift(Value *V, ScalarEvolution &SE) { |
| 544 | if (!V->getType()->isIntegerTy()) |
| 545 | return {}; |
| 546 | |
| 547 | const SCEV *E = SE.getSCEV(V); |
| 548 | if (match(S: E, P: m_scev_UDiv(Op0: m_SCEV(), Op1: m_scev_SpecificInt(V: 2)))) |
| 549 | return false; |
| 550 | if (match(S: E, P: m_scev_Mul(Op0: m_scev_SpecificInt(V: 2), Op1: m_SCEV()))) |
| 551 | return true; |
| 552 | return {}; |
| 553 | } |
| 554 | |
| 555 | /// The main entry point for analyzing a loop and recognizing the CRC algorithm. |
| 556 | /// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on |
| 557 | /// failure. |
| 558 | std::variant<PolynomialInfo, ErrBits, StringRef> |
| 559 | HashRecognize::recognizeCRC() const { |
| 560 | if (!L.isInnermost()) |
| 561 | return "Loop is not innermost" ; |
| 562 | BasicBlock *Latch = L.getLoopLatch(); |
| 563 | BasicBlock *Exit = L.getExitBlock(); |
| 564 | const PHINode *IndVar = L.getCanonicalInductionVariable(); |
| 565 | if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1) |
| 566 | return "Loop not in canonical form" ; |
| 567 | unsigned TC = SE.getSmallConstantTripCount(L: &L); |
| 568 | if (!TC || TC > 256 || TC % 8) |
| 569 | return "Unable to find a small constant byte-multiple trip count" ; |
| 570 | |
| 571 | auto R = getRecurrences(LoopLatch: Latch, IndVar, L); |
| 572 | if (!R) |
| 573 | return "Found stray PHI" ; |
| 574 | auto [SimpleRecurrence, ConditionalRecurrence] = *R; |
| 575 | if (!ConditionalRecurrence) |
| 576 | return "Unable to find conditional recurrence" ; |
| 577 | |
| 578 | // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv |
| 579 | // with two, or in other words, that they're single bit-shifts. |
| 580 | std::optional<bool> ByteOrderSwapped = |
| 581 | isBigEndianBitShift(V: ConditionalRecurrence.BO, SE); |
| 582 | if (!ByteOrderSwapped) |
| 583 | return "Loop with non-unit bitshifts" ; |
| 584 | if (SimpleRecurrence) { |
| 585 | if (isBigEndianBitShift(V: SimpleRecurrence.BO, SE) != ByteOrderSwapped) |
| 586 | return "Loop with non-unit bitshifts" ; |
| 587 | |
| 588 | // Ensure that the PHIs have exactly two uses: |
| 589 | // the bit-shift, and the XOR (or a cast feeding into the XOR). |
| 590 | if (!ConditionalRecurrence.Phi->hasNUses(N: 2) || |
| 591 | !SimpleRecurrence.Phi->hasNUses(N: 2)) |
| 592 | return "Recurrences have stray uses" ; |
| 593 | |
| 594 | // Check that the SelectInst ConditionalRecurrence.Step is conditional on |
| 595 | // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi. |
| 596 | if (!isConditionalOnXorOfPHIs(SI: cast<SelectInst>(Val: ConditionalRecurrence.Step), |
| 597 | P1: SimpleRecurrence.Phi, |
| 598 | P2: ConditionalRecurrence.Phi, L)) |
| 599 | return "Recurrences not intertwined with XOR" ; |
| 600 | } |
| 601 | |
| 602 | // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS. |
| 603 | Value *LHS = ConditionalRecurrence.Start; |
| 604 | Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; |
| 605 | if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth() |
| 606 | : LHS->getType()->getIntegerBitWidth())) |
| 607 | return "Loop iterations exceed bitwidth of data" ; |
| 608 | |
| 609 | // Make sure that the computed value is used in the exit block: this should be |
| 610 | // true even if it is only really used in an outer loop's exit block, since |
| 611 | // the loop is in LCSSA form. |
| 612 | auto *ComputedValue = cast<SelectInst>(Val: ConditionalRecurrence.Step); |
| 613 | if (none_of(Range: ComputedValue->users(), P: [Exit](User *U) { |
| 614 | auto *UI = dyn_cast<Instruction>(Val: U); |
| 615 | return UI && UI->getParent() == Exit; |
| 616 | })) |
| 617 | return "Unable to find use of computed value in loop exit block" ; |
| 618 | |
| 619 | assert(ConditionalRecurrence.ExtraConst && |
| 620 | "Expected ExtraConst in conditional recurrence" ); |
| 621 | const APInt &GenPoly = *ConditionalRecurrence.ExtraConst; |
| 622 | |
| 623 | // PhiEvolutions are pairs of PHINodes along with their incoming value from |
| 624 | // within the loop, which we term as their step. Note that in the case of a |
| 625 | // Simple Recurrence, Step is an operand of the BO, while in a Conditional |
| 626 | // Recurrence, it is a SelectInst. |
| 627 | SmallVector<PhiStepPair, 2> PhiEvolutions; |
| 628 | PhiEvolutions.emplace_back(Args&: ConditionalRecurrence.Phi, Args&: ComputedValue); |
| 629 | if (SimpleRecurrence) |
| 630 | PhiEvolutions.emplace_back(Args&: SimpleRecurrence.Phi, Args&: SimpleRecurrence.BO); |
| 631 | |
| 632 | ValueEvolution VE(TC, *ByteOrderSwapped); |
| 633 | if (!VE.computeEvolutions(PhiEvolutions)) |
| 634 | return VE.getError(); |
| 635 | KnownBits ResultBits = VE.KnownPhis.at(Val: ConditionalRecurrence.Phi); |
| 636 | |
| 637 | unsigned N = std::min(a: TC, b: ResultBits.getBitWidth()); |
| 638 | auto IsZero = [](const KnownBits &K) { return K.isZero(); }; |
| 639 | if (!checkExtractBits(Known: ResultBits, N, CheckFn: IsZero, ByteOrderSwapped: *ByteOrderSwapped)) |
| 640 | return ErrBits(ResultBits, TC, *ByteOrderSwapped); |
| 641 | |
| 642 | return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped, |
| 643 | LHSAux); |
| 644 | } |
| 645 | |
| 646 | void CRCTable::print(raw_ostream &OS) const { |
| 647 | for (unsigned I = 0; I < 256; I++) { |
| 648 | (*this)[I].print(OS, isSigned: false); |
| 649 | OS << (I % 16 == 15 ? '\n' : ' '); |
| 650 | } |
| 651 | } |
| 652 | |
| 653 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 654 | void CRCTable::dump() const { print(dbgs()); } |
| 655 | #endif |
| 656 | |
| 657 | void HashRecognize::print(raw_ostream &OS) const { |
| 658 | if (!L.isInnermost()) |
| 659 | return; |
| 660 | OS << "HashRecognize: Checking a loop in '" |
| 661 | << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr() |
| 662 | << "\n" ; |
| 663 | auto Ret = recognizeCRC(); |
| 664 | if (!std::holds_alternative<PolynomialInfo>(v: Ret)) { |
| 665 | OS << "Did not find a hash algorithm\n" ; |
| 666 | if (std::holds_alternative<StringRef>(v: Ret)) |
| 667 | OS << "Reason: " << std::get<StringRef>(v&: Ret) << "\n" ; |
| 668 | if (std::holds_alternative<ErrBits>(v: Ret)) { |
| 669 | auto [Actual, Iter, ByteOrderSwapped] = std::get<ErrBits>(v&: Ret); |
| 670 | OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top " ) |
| 671 | << Iter << " bits zero (" ; |
| 672 | Actual.print(OS); |
| 673 | OS << ")\n" ; |
| 674 | } |
| 675 | return; |
| 676 | } |
| 677 | |
| 678 | auto Info = std::get<PolynomialInfo>(v&: Ret); |
| 679 | OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian " ) |
| 680 | << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count " |
| 681 | << Info.TripCount << "\n" ; |
| 682 | OS.indent(NumSpaces: 2) << "Initial CRC: " ; |
| 683 | Info.LHS->print(O&: OS); |
| 684 | OS << "\n" ; |
| 685 | OS.indent(NumSpaces: 2) << "Generating polynomial: " ; |
| 686 | Info.RHS.print(OS, isSigned: false); |
| 687 | OS << "\n" ; |
| 688 | OS.indent(NumSpaces: 2) << "Computed CRC: " ; |
| 689 | Info.ComputedValue->print(O&: OS); |
| 690 | OS << "\n" ; |
| 691 | if (Info.LHSAux) { |
| 692 | OS.indent(NumSpaces: 2) << "Auxiliary data: " ; |
| 693 | Info.LHSAux->print(O&: OS); |
| 694 | OS << "\n" ; |
| 695 | } |
| 696 | OS.indent(NumSpaces: 2) << "Computed CRC lookup table:\n" ; |
| 697 | genSarwateTable(GenPoly: Info.RHS, ByteOrderSwapped: Info.ByteOrderSwapped).print(OS); |
| 698 | } |
| 699 | |
| 700 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 701 | void HashRecognize::dump() const { print(dbgs()); } |
| 702 | #endif |
| 703 | |
| 704 | std::optional<PolynomialInfo> HashRecognize::getResult() const { |
| 705 | auto Res = HashRecognize(L, SE).recognizeCRC(); |
| 706 | if (std::holds_alternative<PolynomialInfo>(v: Res)) |
| 707 | return std::get<PolynomialInfo>(v&: Res); |
| 708 | return std::nullopt; |
| 709 | } |
| 710 | |
| 711 | HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE) |
| 712 | : L(L), SE(SE) {} |
| 713 | |
| 714 | PreservedAnalyses HashRecognizePrinterPass::run(Loop &L, |
| 715 | LoopAnalysisManager &AM, |
| 716 | LoopStandardAnalysisResults &AR, |
| 717 | LPMUpdater &) { |
| 718 | HashRecognize(L, AR.SE).print(OS); |
| 719 | return PreservedAnalyses::all(); |
| 720 | } |
| 721 | |