1 | //===- HashRecognize.cpp ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The HashRecognize analysis recognizes unoptimized polynomial hash functions |
10 | // with operations over a Galois field of characteristic 2, also called binary |
11 | // fields, or GF(2^n): this class of hash functions can be optimized using a |
12 | // lookup-table-driven implementation, or with target-specific instructions. |
13 | // Examples: |
14 | // |
15 | // 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2). |
16 | // 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a |
17 | // rolling hash polynomial division in GF(2). |
18 | // 3. Rijndael MixColumns, a step in AES computation, which is a polynomial |
19 | // multiplication in GF(2^3). |
20 | // 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM), |
21 | // which is a polynomial evaluation in GF(2^128). |
22 | // |
23 | // All of them use an irreducible generating polynomial of degree m, |
24 | // |
25 | // c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0 |
26 | // |
27 | // where each coefficient c is can take values in GF(2^n), where 2^n is termed |
28 | // the order of the Galois field. For GF(2), each coefficient can take values |
29 | // either 0 or 1, and the polynomial is simply represented by m+1 bits, |
30 | // corresponding to the coefficients. The different variants of CRC are named by |
31 | // degree of generating polynomial used: so CRC-32 would use a polynomial of |
32 | // degree 32. |
33 | // |
34 | // The reason algorithms on GF(2^n) can be optimized with a lookup-table is the |
35 | // following: in such fields, polynomial addition and subtraction are identical |
36 | // and equivalent to XOR, polynomial multiplication is an AND, and polynomial |
37 | // division is identity: the XOR and AND operations in unoptimized |
38 | // implementations are performed bit-wise, and can be optimized to be performed |
39 | // chunk-wise, by interleaving copies of the generating polynomial, and storing |
40 | // the pre-computed values in a table. |
41 | // |
42 | // A generating polynomial of m bits always has the MSB set, so we usually |
43 | // omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial: |
44 | // |
45 | // (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021 |
46 | // |
47 | // Transmissions are either in big-endian or little-endian form, and hash |
48 | // algorithms are written according to this. For example, IEEE 802 and RS-232 |
49 | // specify little-endian transmission. |
50 | // |
51 | //===----------------------------------------------------------------------===// |
52 | // |
53 | // At the moment, we only recognize the CRC algorithm. |
54 | // Documentation on CRC32 from the kernel: |
55 | // https://www.kernel.org/doc/Documentation/crc32.txt |
56 | // |
57 | // |
58 | //===----------------------------------------------------------------------===// |
59 | |
60 | #include "llvm/Analysis/HashRecognize.h" |
61 | #include "llvm/ADT/APInt.h" |
62 | #include "llvm/Analysis/LoopAnalysisManager.h" |
63 | #include "llvm/Analysis/LoopInfo.h" |
64 | #include "llvm/Analysis/ScalarEvolution.h" |
65 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
66 | #include "llvm/Analysis/ValueTracking.h" |
67 | #include "llvm/IR/PatternMatch.h" |
68 | #include "llvm/Support/KnownBits.h" |
69 | |
70 | using namespace llvm; |
71 | using namespace PatternMatch; |
72 | using namespace SCEVPatternMatch; |
73 | |
74 | #define DEBUG_TYPE "hash-recognize" |
75 | |
76 | // KnownBits for a PHI node. There are at most two PHI nodes, corresponding to |
77 | // the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not |
78 | // relevant. |
79 | using KnownPhiMap = SmallDenseMap<const PHINode *, KnownBits, 2>; |
80 | |
81 | // A pair of a PHI node along with its incoming value from within a loop. |
82 | using PhiStepPair = std::pair<const PHINode *, const Instruction *>; |
83 | |
84 | /// A much simpler version of ValueTracking, in that it computes KnownBits of |
85 | /// values, except that it computes the evolution of KnownBits in a loop with a |
86 | /// given trip count, and predication is specialized for a significant-bit |
87 | /// check. |
88 | class ValueEvolution { |
89 | const unsigned TripCount; |
90 | const bool ByteOrderSwapped; |
91 | APInt GenPoly; |
92 | StringRef ErrStr; |
93 | |
94 | // Compute the KnownBits of a BinaryOperator. |
95 | KnownBits computeBinOp(const BinaryOperator *I); |
96 | |
97 | // Compute the KnownBits of an Instruction. |
98 | KnownBits computeInstr(const Instruction *I); |
99 | |
100 | // Compute the KnownBits of a Value. |
101 | KnownBits compute(const Value *V); |
102 | |
103 | public: |
104 | // ValueEvolution is meant to be constructed with the TripCount of the loop, |
105 | // and whether the polynomial algorithm is big-endian, for the significant-bit |
106 | // check. |
107 | ValueEvolution(unsigned TripCount, bool ByteOrderSwapped); |
108 | |
109 | // Given a list of PHI nodes along with their incoming value from within the |
110 | // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on |
111 | // the final iteration. Returns true on success and false on error. |
112 | bool computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions); |
113 | |
114 | // In case ValueEvolution encounters an error, this is meant to be used for a |
115 | // precise error message. |
116 | StringRef getError() const { return ErrStr; } |
117 | |
118 | // The computed KnownBits for each PHI node, which is populated after |
119 | // computeEvolutions is called. |
120 | KnownPhiMap KnownPhis; |
121 | }; |
122 | |
123 | ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped) |
124 | : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {} |
125 | |
126 | KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) { |
127 | KnownBits KnownL(compute(V: I->getOperand(i_nocapture: 0))); |
128 | KnownBits KnownR(compute(V: I->getOperand(i_nocapture: 1))); |
129 | |
130 | switch (I->getOpcode()) { |
131 | case Instruction::BinaryOps::And: |
132 | return KnownL & KnownR; |
133 | case Instruction::BinaryOps::Or: |
134 | return KnownL | KnownR; |
135 | case Instruction::BinaryOps::Xor: |
136 | return KnownL ^ KnownR; |
137 | case Instruction::BinaryOps::Shl: { |
138 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
139 | return KnownBits::shl(LHS: KnownL, RHS: KnownR, NUW: OBO->hasNoUnsignedWrap(), |
140 | NSW: OBO->hasNoSignedWrap()); |
141 | } |
142 | case Instruction::BinaryOps::LShr: |
143 | return KnownBits::lshr(LHS: KnownL, RHS: KnownR); |
144 | case Instruction::BinaryOps::AShr: |
145 | return KnownBits::ashr(LHS: KnownL, RHS: KnownR); |
146 | case Instruction::BinaryOps::Add: { |
147 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
148 | return KnownBits::add(LHS: KnownL, RHS: KnownR, NSW: OBO->hasNoUnsignedWrap(), |
149 | NUW: OBO->hasNoSignedWrap()); |
150 | } |
151 | case Instruction::BinaryOps::Sub: { |
152 | auto *OBO = cast<OverflowingBinaryOperator>(Val: I); |
153 | return KnownBits::sub(LHS: KnownL, RHS: KnownR, NSW: OBO->hasNoUnsignedWrap(), |
154 | NUW: OBO->hasNoSignedWrap()); |
155 | } |
156 | case Instruction::BinaryOps::Mul: { |
157 | Value *Op0 = I->getOperand(i_nocapture: 0); |
158 | Value *Op1 = I->getOperand(i_nocapture: 1); |
159 | bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(V: Op0); |
160 | return KnownBits::mul(LHS: KnownL, RHS: KnownR, NoUndefSelfMultiply: SelfMultiply); |
161 | } |
162 | case Instruction::BinaryOps::UDiv: |
163 | return KnownBits::udiv(LHS: KnownL, RHS: KnownR); |
164 | case Instruction::BinaryOps::SDiv: |
165 | return KnownBits::sdiv(LHS: KnownL, RHS: KnownR); |
166 | case Instruction::BinaryOps::URem: |
167 | return KnownBits::urem(LHS: KnownL, RHS: KnownR); |
168 | case Instruction::BinaryOps::SRem: |
169 | return KnownBits::srem(LHS: KnownL, RHS: KnownR); |
170 | default: |
171 | ErrStr = "Unknown BinaryOperator" ; |
172 | unsigned BitWidth = I->getType()->getScalarSizeInBits(); |
173 | return {BitWidth}; |
174 | } |
175 | } |
176 | |
177 | KnownBits ValueEvolution::computeInstr(const Instruction *I) { |
178 | unsigned BitWidth = I->getType()->getScalarSizeInBits(); |
179 | |
180 | // We look up in the map that contains the KnownBits of the PHI from the |
181 | // previous iteration. |
182 | if (const PHINode *P = dyn_cast<PHINode>(Val: I)) |
183 | return KnownPhis.lookup_or(Val: P, Default&: BitWidth); |
184 | |
185 | // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch |
186 | // that is predicated on the (least|most)-significant-bit check. |
187 | CmpPredicate Pred; |
188 | Value *L, *R, *TV, *FV; |
189 | if (match(V: I, P: m_Select(C: m_ICmp(Pred, L: m_Value(V&: L), R: m_Value(V&: R)), L: m_Value(V&: TV), |
190 | R: m_Value(V&: FV)))) { |
191 | // We need to check LCR against [0, 2) in the little-endian case, because |
192 | // the RCR check is insufficient: it is simply [0, 1). |
193 | if (!ByteOrderSwapped) { |
194 | KnownBits KnownL = compute(V: L); |
195 | unsigned ICmpBW = KnownL.getBitWidth(); |
196 | auto LCR = ConstantRange::fromKnownBits(Known: KnownL, IsSigned: false); |
197 | auto CheckLCR = ConstantRange(APInt::getZero(numBits: ICmpBW), APInt(ICmpBW, 2)); |
198 | if (LCR != CheckLCR) { |
199 | ErrStr = "Bad LHS of significant-bit-check" ; |
200 | return {BitWidth}; |
201 | } |
202 | } |
203 | |
204 | // Check that the predication is on (most|least) significant bit. |
205 | KnownBits KnownR = compute(V: R); |
206 | unsigned ICmpBW = KnownR.getBitWidth(); |
207 | auto RCR = ConstantRange::fromKnownBits(Known: KnownR, IsSigned: false); |
208 | auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, Other: RCR); |
209 | ConstantRange CheckRCR(APInt::getZero(numBits: ICmpBW), |
210 | ByteOrderSwapped ? APInt::getSignedMinValue(numBits: ICmpBW) |
211 | : APInt(ICmpBW, 1)); |
212 | if (AllowedR == CheckRCR) |
213 | return compute(V: TV); |
214 | if (AllowedR.inverse() == CheckRCR) |
215 | return compute(V: FV); |
216 | |
217 | ErrStr = "Bad RHS of significant-bit-check" ; |
218 | return {BitWidth}; |
219 | } |
220 | |
221 | if (auto *BO = dyn_cast<BinaryOperator>(Val: I)) |
222 | return computeBinOp(I: BO); |
223 | |
224 | switch (I->getOpcode()) { |
225 | case Instruction::CastOps::Trunc: |
226 | return compute(V: I->getOperand(i: 0)).trunc(BitWidth); |
227 | case Instruction::CastOps::ZExt: |
228 | return compute(V: I->getOperand(i: 0)).zext(BitWidth); |
229 | case Instruction::CastOps::SExt: |
230 | return compute(V: I->getOperand(i: 0)).sext(BitWidth); |
231 | default: |
232 | ErrStr = "Unknown Instruction" ; |
233 | return {BitWidth}; |
234 | } |
235 | } |
236 | |
237 | KnownBits ValueEvolution::compute(const Value *V) { |
238 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
239 | return KnownBits::makeConstant(C: CI->getValue()); |
240 | |
241 | if (auto *I = dyn_cast<Instruction>(Val: V)) |
242 | return computeInstr(I); |
243 | |
244 | ErrStr = "Unknown Value" ; |
245 | unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
246 | return {BitWidth}; |
247 | } |
248 | |
249 | bool ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) { |
250 | for (unsigned I = 0; I < TripCount; ++I) |
251 | for (auto [Phi, Step] : PhiEvolutions) |
252 | KnownPhis.emplace_or_assign(Key: Phi, Args: computeInstr(I: Step)); |
253 | |
254 | return ErrStr.empty(); |
255 | } |
256 | |
257 | /// A structure that can hold either a Simple Recurrence or a Conditional |
258 | /// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand |
259 | /// of the BO, while in a Conditional Recurrence, it is a SelectInst. |
260 | struct RecurrenceInfo { |
261 | const Loop &L; |
262 | const PHINode *Phi = nullptr; |
263 | BinaryOperator *BO = nullptr; |
264 | Value *Start = nullptr; |
265 | Value *Step = nullptr; |
266 | std::optional<APInt> ; |
267 | |
268 | RecurrenceInfo(const Loop &L) : L(L) {} |
269 | operator bool() const { return BO; } |
270 | |
271 | void print(raw_ostream &OS, unsigned Indent = 0) const { |
272 | OS.indent(NumSpaces: Indent) << "Phi: " ; |
273 | Phi->print(O&: OS); |
274 | OS << "\n" ; |
275 | OS.indent(NumSpaces: Indent) << "BinaryOperator: " ; |
276 | BO->print(O&: OS); |
277 | OS << "\n" ; |
278 | OS.indent(NumSpaces: Indent) << "Start: " ; |
279 | Start->print(O&: OS); |
280 | OS << "\n" ; |
281 | OS.indent(NumSpaces: Indent) << "Step: " ; |
282 | Step->print(O&: OS); |
283 | OS << "\n" ; |
284 | if (ExtraConst) { |
285 | OS.indent(NumSpaces: Indent) << "ExtraConst: " ; |
286 | ExtraConst->print(OS, isSigned: false); |
287 | OS << "\n" ; |
288 | } |
289 | } |
290 | |
291 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
292 | LLVM_DUMP_METHOD void dump() const { print(dbgs()); } |
293 | #endif |
294 | |
295 | bool matchSimpleRecurrence(const PHINode *P); |
296 | bool matchConditionalRecurrence( |
297 | const PHINode *P, |
298 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
299 | |
300 | private: |
301 | BinaryOperator *digRecurrence( |
302 | Instruction *V, |
303 | Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); |
304 | }; |
305 | |
306 | /// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence |
307 | /// cycle of the form: |
308 | /// |
309 | /// loop: |
310 | /// %rec = phi [%start, %entry], [%BO, %loop] |
311 | /// ... |
312 | /// %BO = binop %rec, %step |
313 | /// |
314 | /// or |
315 | /// |
316 | /// loop: |
317 | /// %rec = phi [%start, %entry], [%BO, %loop] |
318 | /// ... |
319 | /// %BO = binop %step, %rec |
320 | /// |
321 | bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) { |
322 | Phi = P; |
323 | return llvm::matchSimpleRecurrence(P: Phi, BO, Start, Step); |
324 | } |
325 | |
326 | /// Digs for a recurrence starting with \p V hitting the PHI node in a use-def |
327 | /// chain. Used by matchConditionalRecurrence. |
328 | BinaryOperator * |
329 | RecurrenceInfo::digRecurrence(Instruction *V, |
330 | Instruction::BinaryOps BOWithConstOpToMatch) { |
331 | SmallVector<Instruction *> Worklist; |
332 | Worklist.push_back(Elt: V); |
333 | while (!Worklist.empty()) { |
334 | Instruction *I = Worklist.pop_back_val(); |
335 | |
336 | // Don't add a PHI's operands to the Worklist. |
337 | if (isa<PHINode>(Val: I)) |
338 | continue; |
339 | |
340 | // Find a recurrence over a BinOp, by matching either of its operands |
341 | // with with the PHINode. |
342 | if (match(V: I, P: m_c_BinOp(L: m_Value(), R: m_Specific(V: Phi)))) |
343 | return cast<BinaryOperator>(Val: I); |
344 | |
345 | // Bind to ExtraConst, if we match exactly one. |
346 | if (I->getOpcode() == BOWithConstOpToMatch) { |
347 | if (ExtraConst) |
348 | return nullptr; |
349 | const APInt *C = nullptr; |
350 | if (match(V: I, P: m_c_BinOp(L: m_APInt(Res&: C), R: m_Value()))) |
351 | ExtraConst = *C; |
352 | } |
353 | |
354 | // Continue along the use-def chain. |
355 | for (Use &U : I->operands()) |
356 | if (auto *UI = dyn_cast<Instruction>(Val&: U)) |
357 | if (L.contains(Inst: UI)) |
358 | Worklist.push_back(Elt: UI); |
359 | } |
360 | return nullptr; |
361 | } |
362 | |
363 | /// A Conditional Recurrence is a recurrence of the form: |
364 | /// |
365 | /// loop: |
366 | /// %rec = phi [%start, %entry], [%step, %loop] |
367 | /// ... |
368 | /// %step = select _, %tv, %fv |
369 | /// |
370 | /// where %tv and %fv ultimately end up using %rec via the same %BO instruction, |
371 | /// after digging through the use-def chain. |
372 | /// |
373 | /// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging |
374 | /// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched, |
375 | /// and ExtraConst is a constant operand of that BinOp. This peculiarity exists, |
376 | /// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the |
377 | /// ExtraConst ends up being the generating polynomial. |
378 | bool RecurrenceInfo::matchConditionalRecurrence( |
379 | const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) { |
380 | Phi = P; |
381 | if (Phi->getNumIncomingValues() != 2) |
382 | return false; |
383 | |
384 | for (unsigned Idx = 0; Idx != 2; ++Idx) { |
385 | Value *FoundStep = Phi->getIncomingValue(i: Idx); |
386 | Value *FoundStart = Phi->getIncomingValue(i: !Idx); |
387 | |
388 | Instruction *TV, *FV; |
389 | if (!match(V: FoundStep, |
390 | P: m_Select(C: m_Cmp(), L: m_Instruction(I&: TV), R: m_Instruction(I&: FV)))) |
391 | continue; |
392 | |
393 | // For a conditional recurrence, both the true and false values of the |
394 | // select must ultimately end up in the same recurrent BinOp. |
395 | BinaryOperator *FoundBO = digRecurrence(V: TV, BOWithConstOpToMatch); |
396 | BinaryOperator *AltBO = digRecurrence(V: FV, BOWithConstOpToMatch); |
397 | if (!FoundBO || FoundBO != AltBO) |
398 | return false; |
399 | |
400 | if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) { |
401 | LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp " |
402 | "with constant in conditional recurrence\n" ); |
403 | return false; |
404 | } |
405 | |
406 | BO = FoundBO; |
407 | Start = FoundStart; |
408 | Step = FoundStep; |
409 | return true; |
410 | } |
411 | return false; |
412 | } |
413 | |
414 | /// Iterates over all the phis in \p LoopLatch, and attempts to extract a |
415 | /// Conditional Recurrence and an optional Simple Recurrence. |
416 | static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>> |
417 | getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { |
418 | auto Phis = LoopLatch->phis(); |
419 | unsigned NumPhis = std::distance(first: Phis.begin(), last: Phis.end()); |
420 | if (NumPhis != 2 && NumPhis != 3) |
421 | return {}; |
422 | |
423 | RecurrenceInfo SimpleRecurrence(L); |
424 | RecurrenceInfo ConditionalRecurrence(L); |
425 | for (PHINode &P : Phis) { |
426 | if (&P == IndVar) |
427 | continue; |
428 | if (!SimpleRecurrence) |
429 | SimpleRecurrence.matchSimpleRecurrence(P: &P); |
430 | if (!ConditionalRecurrence) |
431 | ConditionalRecurrence.matchConditionalRecurrence( |
432 | P: &P, BOWithConstOpToMatch: Instruction::BinaryOps::Xor); |
433 | } |
434 | if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence)) |
435 | return {}; |
436 | return std::make_pair(x&: SimpleRecurrence, y&: ConditionalRecurrence); |
437 | } |
438 | |
439 | PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, |
440 | Value *ComputedValue, bool ByteOrderSwapped, |
441 | Value *LHSAux) |
442 | : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), |
443 | ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} |
444 | |
445 | /// In the big-endian case, checks the bottom N bits against CheckFn, and that |
446 | /// the rest are unknown. In the little-endian case, checks the top N bits |
447 | /// against CheckFn, and that the rest are unknown. Callers usually call this |
448 | /// function with N = TripCount, and CheckFn checking that the remainder bits of |
449 | /// the CRC polynomial division are zero. |
450 | static bool (const KnownBits &Known, unsigned N, |
451 | function_ref<bool(const KnownBits &)> CheckFn, |
452 | bool ByteOrderSwapped) { |
453 | // Check that the entire thing is a constant. |
454 | if (N == Known.getBitWidth()) |
455 | return CheckFn(Known.extractBits(NumBits: N, BitPosition: 0)); |
456 | |
457 | // Check that the {top, bottom} N bits are not unknown and that the {bottom, |
458 | // top} N bits are known. |
459 | unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N; |
460 | unsigned SwappedBitPos = ByteOrderSwapped ? N : 0; |
461 | return CheckFn(Known.extractBits(NumBits: N, BitPosition: BitPos)) && |
462 | Known.extractBits(NumBits: Known.getBitWidth() - N, BitPosition: SwappedBitPos).isUnknown(); |
463 | } |
464 | |
465 | /// Generate a lookup table of 256 entries by interleaving the generating |
466 | /// polynomial. The optimization technique of table-lookup for CRC is also |
467 | /// called the Sarwate algorithm. |
468 | CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, |
469 | bool ByteOrderSwapped) { |
470 | unsigned BW = GenPoly.getBitWidth(); |
471 | CRCTable Table; |
472 | Table[0] = APInt::getZero(numBits: BW); |
473 | |
474 | if (ByteOrderSwapped) { |
475 | APInt CRCInit = APInt::getSignedMinValue(numBits: BW); |
476 | for (unsigned I = 1; I < 256; I <<= 1) { |
477 | CRCInit = CRCInit.shl(shiftAmt: 1) ^ |
478 | (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(numBits: BW)); |
479 | for (unsigned J = 0; J < I; ++J) |
480 | Table[I + J] = CRCInit ^ Table[J]; |
481 | } |
482 | return Table; |
483 | } |
484 | |
485 | APInt CRCInit(BW, 1); |
486 | for (unsigned I = 128; I; I >>= 1) { |
487 | CRCInit = CRCInit.lshr(shiftAmt: 1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(numBits: BW)); |
488 | for (unsigned J = 0; J < 256; J += (I << 1)) |
489 | Table[I + J] = CRCInit ^ Table[J]; |
490 | } |
491 | return Table; |
492 | } |
493 | |
494 | /// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain |
495 | /// of \p SI's condition, ignoring any casts. The purpose of this function is to |
496 | /// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC |
497 | /// computation. We cannot check the correctness of casts at this point, and |
498 | /// rely on the KnownBits propagation to check correctness of the CRC |
499 | /// computation. |
500 | /// |
501 | /// In other words, it checks for the following pattern: |
502 | /// |
503 | /// loop: |
504 | /// %P1 = phi [_, %entry], [%P1.next, %loop] |
505 | /// %P2 = phi [_, %entry], [%P2.next, %loop] |
506 | /// ... |
507 | /// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2) |
508 | /// |
509 | /// where %xor is in the use-def chain of \p SI's condition. |
510 | static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1, |
511 | const PHINode *P2, const Loop &L) { |
512 | SmallVector<const Instruction *> Worklist; |
513 | |
514 | // matchConditionalRecurrence has already ensured that the SelectInst's |
515 | // condition is an Instruction. |
516 | Worklist.push_back(Elt: cast<Instruction>(Val: SI->getCondition())); |
517 | |
518 | while (!Worklist.empty()) { |
519 | const Instruction *I = Worklist.pop_back_val(); |
520 | |
521 | // Don't add a PHI's operands to the Worklist. |
522 | if (isa<PHINode>(Val: I)) |
523 | continue; |
524 | |
525 | // If we match an XOR of the two PHIs ignoring casts, we're done. |
526 | if (match(V: I, P: m_c_Xor(L: m_CastOrSelf(Op: m_Specific(V: P1)), |
527 | R: m_CastOrSelf(Op: m_Specific(V: P2))))) |
528 | return true; |
529 | |
530 | // Continue along the use-def chain. |
531 | for (const Use &U : I->operands()) |
532 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
533 | if (L.contains(Inst: UI)) |
534 | Worklist.push_back(Elt: UI); |
535 | } |
536 | return false; |
537 | } |
538 | |
539 | // Recognizes a multiplication or division by the constant two, using SCEV. By |
540 | // doing this, we're immune to whether the IR expression is mul/udiv or |
541 | // equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul, |
542 | // and std::nullopt otherwise. |
543 | static std::optional<bool> isBigEndianBitShift(Value *V, ScalarEvolution &SE) { |
544 | if (!V->getType()->isIntegerTy()) |
545 | return {}; |
546 | |
547 | const SCEV *E = SE.getSCEV(V); |
548 | if (match(S: E, P: m_scev_UDiv(Op0: m_SCEV(), Op1: m_scev_SpecificInt(V: 2)))) |
549 | return false; |
550 | if (match(S: E, P: m_scev_Mul(Op0: m_scev_SpecificInt(V: 2), Op1: m_SCEV()))) |
551 | return true; |
552 | return {}; |
553 | } |
554 | |
555 | /// The main entry point for analyzing a loop and recognizing the CRC algorithm. |
556 | /// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on |
557 | /// failure. |
558 | std::variant<PolynomialInfo, ErrBits, StringRef> |
559 | HashRecognize::recognizeCRC() const { |
560 | if (!L.isInnermost()) |
561 | return "Loop is not innermost" ; |
562 | BasicBlock *Latch = L.getLoopLatch(); |
563 | BasicBlock *Exit = L.getExitBlock(); |
564 | const PHINode *IndVar = L.getCanonicalInductionVariable(); |
565 | if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1) |
566 | return "Loop not in canonical form" ; |
567 | unsigned TC = SE.getSmallConstantTripCount(L: &L); |
568 | if (!TC || TC > 256 || TC % 8) |
569 | return "Unable to find a small constant byte-multiple trip count" ; |
570 | |
571 | auto R = getRecurrences(LoopLatch: Latch, IndVar, L); |
572 | if (!R) |
573 | return "Found stray PHI" ; |
574 | auto [SimpleRecurrence, ConditionalRecurrence] = *R; |
575 | if (!ConditionalRecurrence) |
576 | return "Unable to find conditional recurrence" ; |
577 | |
578 | // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv |
579 | // with two, or in other words, that they're single bit-shifts. |
580 | std::optional<bool> ByteOrderSwapped = |
581 | isBigEndianBitShift(V: ConditionalRecurrence.BO, SE); |
582 | if (!ByteOrderSwapped) |
583 | return "Loop with non-unit bitshifts" ; |
584 | if (SimpleRecurrence) { |
585 | if (isBigEndianBitShift(V: SimpleRecurrence.BO, SE) != ByteOrderSwapped) |
586 | return "Loop with non-unit bitshifts" ; |
587 | |
588 | // Ensure that the PHIs have exactly two uses: |
589 | // the bit-shift, and the XOR (or a cast feeding into the XOR). |
590 | if (!ConditionalRecurrence.Phi->hasNUses(N: 2) || |
591 | !SimpleRecurrence.Phi->hasNUses(N: 2)) |
592 | return "Recurrences have stray uses" ; |
593 | |
594 | // Check that the SelectInst ConditionalRecurrence.Step is conditional on |
595 | // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi. |
596 | if (!isConditionalOnXorOfPHIs(SI: cast<SelectInst>(Val: ConditionalRecurrence.Step), |
597 | P1: SimpleRecurrence.Phi, |
598 | P2: ConditionalRecurrence.Phi, L)) |
599 | return "Recurrences not intertwined with XOR" ; |
600 | } |
601 | |
602 | // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS. |
603 | Value *LHS = ConditionalRecurrence.Start; |
604 | Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; |
605 | if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth() |
606 | : LHS->getType()->getIntegerBitWidth())) |
607 | return "Loop iterations exceed bitwidth of data" ; |
608 | |
609 | // Make sure that the computed value is used in the exit block: this should be |
610 | // true even if it is only really used in an outer loop's exit block, since |
611 | // the loop is in LCSSA form. |
612 | auto *ComputedValue = cast<SelectInst>(Val: ConditionalRecurrence.Step); |
613 | if (none_of(Range: ComputedValue->users(), P: [Exit](User *U) { |
614 | auto *UI = dyn_cast<Instruction>(Val: U); |
615 | return UI && UI->getParent() == Exit; |
616 | })) |
617 | return "Unable to find use of computed value in loop exit block" ; |
618 | |
619 | assert(ConditionalRecurrence.ExtraConst && |
620 | "Expected ExtraConst in conditional recurrence" ); |
621 | const APInt &GenPoly = *ConditionalRecurrence.ExtraConst; |
622 | |
623 | // PhiEvolutions are pairs of PHINodes along with their incoming value from |
624 | // within the loop, which we term as their step. Note that in the case of a |
625 | // Simple Recurrence, Step is an operand of the BO, while in a Conditional |
626 | // Recurrence, it is a SelectInst. |
627 | SmallVector<PhiStepPair, 2> PhiEvolutions; |
628 | PhiEvolutions.emplace_back(Args&: ConditionalRecurrence.Phi, Args&: ComputedValue); |
629 | if (SimpleRecurrence) |
630 | PhiEvolutions.emplace_back(Args&: SimpleRecurrence.Phi, Args&: SimpleRecurrence.BO); |
631 | |
632 | ValueEvolution VE(TC, *ByteOrderSwapped); |
633 | if (!VE.computeEvolutions(PhiEvolutions)) |
634 | return VE.getError(); |
635 | KnownBits ResultBits = VE.KnownPhis.at(Val: ConditionalRecurrence.Phi); |
636 | |
637 | unsigned N = std::min(a: TC, b: ResultBits.getBitWidth()); |
638 | auto IsZero = [](const KnownBits &K) { return K.isZero(); }; |
639 | if (!checkExtractBits(Known: ResultBits, N, CheckFn: IsZero, ByteOrderSwapped: *ByteOrderSwapped)) |
640 | return ErrBits(ResultBits, TC, *ByteOrderSwapped); |
641 | |
642 | return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped, |
643 | LHSAux); |
644 | } |
645 | |
646 | void CRCTable::print(raw_ostream &OS) const { |
647 | for (unsigned I = 0; I < 256; I++) { |
648 | (*this)[I].print(OS, isSigned: false); |
649 | OS << (I % 16 == 15 ? '\n' : ' '); |
650 | } |
651 | } |
652 | |
653 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
654 | void CRCTable::dump() const { print(dbgs()); } |
655 | #endif |
656 | |
657 | void HashRecognize::print(raw_ostream &OS) const { |
658 | if (!L.isInnermost()) |
659 | return; |
660 | OS << "HashRecognize: Checking a loop in '" |
661 | << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr() |
662 | << "\n" ; |
663 | auto Ret = recognizeCRC(); |
664 | if (!std::holds_alternative<PolynomialInfo>(v: Ret)) { |
665 | OS << "Did not find a hash algorithm\n" ; |
666 | if (std::holds_alternative<StringRef>(v: Ret)) |
667 | OS << "Reason: " << std::get<StringRef>(v&: Ret) << "\n" ; |
668 | if (std::holds_alternative<ErrBits>(v: Ret)) { |
669 | auto [Actual, Iter, ByteOrderSwapped] = std::get<ErrBits>(v&: Ret); |
670 | OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top " ) |
671 | << Iter << " bits zero (" ; |
672 | Actual.print(OS); |
673 | OS << ")\n" ; |
674 | } |
675 | return; |
676 | } |
677 | |
678 | auto Info = std::get<PolynomialInfo>(v&: Ret); |
679 | OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian " ) |
680 | << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count " |
681 | << Info.TripCount << "\n" ; |
682 | OS.indent(NumSpaces: 2) << "Initial CRC: " ; |
683 | Info.LHS->print(O&: OS); |
684 | OS << "\n" ; |
685 | OS.indent(NumSpaces: 2) << "Generating polynomial: " ; |
686 | Info.RHS.print(OS, isSigned: false); |
687 | OS << "\n" ; |
688 | OS.indent(NumSpaces: 2) << "Computed CRC: " ; |
689 | Info.ComputedValue->print(O&: OS); |
690 | OS << "\n" ; |
691 | if (Info.LHSAux) { |
692 | OS.indent(NumSpaces: 2) << "Auxiliary data: " ; |
693 | Info.LHSAux->print(O&: OS); |
694 | OS << "\n" ; |
695 | } |
696 | OS.indent(NumSpaces: 2) << "Computed CRC lookup table:\n" ; |
697 | genSarwateTable(GenPoly: Info.RHS, ByteOrderSwapped: Info.ByteOrderSwapped).print(OS); |
698 | } |
699 | |
700 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
701 | void HashRecognize::dump() const { print(dbgs()); } |
702 | #endif |
703 | |
704 | std::optional<PolynomialInfo> HashRecognize::getResult() const { |
705 | auto Res = HashRecognize(L, SE).recognizeCRC(); |
706 | if (std::holds_alternative<PolynomialInfo>(v: Res)) |
707 | return std::get<PolynomialInfo>(v&: Res); |
708 | return std::nullopt; |
709 | } |
710 | |
711 | HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE) |
712 | : L(L), SE(SE) {} |
713 | |
714 | PreservedAnalyses HashRecognizePrinterPass::run(Loop &L, |
715 | LoopAnalysisManager &AM, |
716 | LoopStandardAnalysisResults &AR, |
717 | LPMUpdater &) { |
718 | HashRecognize(L, AR.SE).print(OS); |
719 | return PreservedAnalyses::all(); |
720 | } |
721 | |