| 1 | #include "blake3_impl.h" |
| 2 | #include <string.h> |
| 3 | |
| 4 | INLINE uint32_t rotr32(uint32_t w, uint32_t c) { |
| 5 | return (w >> c) | (w << (32 - c)); |
| 6 | } |
| 7 | |
| 8 | INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, |
| 9 | uint32_t x, uint32_t y) { |
| 10 | state[a] = state[a] + state[b] + x; |
| 11 | state[d] = rotr32(w: state[d] ^ state[a], c: 16); |
| 12 | state[c] = state[c] + state[d]; |
| 13 | state[b] = rotr32(w: state[b] ^ state[c], c: 12); |
| 14 | state[a] = state[a] + state[b] + y; |
| 15 | state[d] = rotr32(w: state[d] ^ state[a], c: 8); |
| 16 | state[c] = state[c] + state[d]; |
| 17 | state[b] = rotr32(w: state[b] ^ state[c], c: 7); |
| 18 | } |
| 19 | |
| 20 | INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) { |
| 21 | // Select the message schedule based on the round. |
| 22 | const uint8_t *schedule = MSG_SCHEDULE[round]; |
| 23 | |
| 24 | // Mix the columns. |
| 25 | g(state, a: 0, b: 4, c: 8, d: 12, x: msg[schedule[0]], y: msg[schedule[1]]); |
| 26 | g(state, a: 1, b: 5, c: 9, d: 13, x: msg[schedule[2]], y: msg[schedule[3]]); |
| 27 | g(state, a: 2, b: 6, c: 10, d: 14, x: msg[schedule[4]], y: msg[schedule[5]]); |
| 28 | g(state, a: 3, b: 7, c: 11, d: 15, x: msg[schedule[6]], y: msg[schedule[7]]); |
| 29 | |
| 30 | // Mix the rows. |
| 31 | g(state, a: 0, b: 5, c: 10, d: 15, x: msg[schedule[8]], y: msg[schedule[9]]); |
| 32 | g(state, a: 1, b: 6, c: 11, d: 12, x: msg[schedule[10]], y: msg[schedule[11]]); |
| 33 | g(state, a: 2, b: 7, c: 8, d: 13, x: msg[schedule[12]], y: msg[schedule[13]]); |
| 34 | g(state, a: 3, b: 4, c: 9, d: 14, x: msg[schedule[14]], y: msg[schedule[15]]); |
| 35 | } |
| 36 | |
| 37 | INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8], |
| 38 | const uint8_t block[BLAKE3_BLOCK_LEN], |
| 39 | uint8_t block_len, uint64_t counter, uint8_t flags) { |
| 40 | uint32_t block_words[16]; |
| 41 | block_words[0] = load32(src: block + 4 * 0); |
| 42 | block_words[1] = load32(src: block + 4 * 1); |
| 43 | block_words[2] = load32(src: block + 4 * 2); |
| 44 | block_words[3] = load32(src: block + 4 * 3); |
| 45 | block_words[4] = load32(src: block + 4 * 4); |
| 46 | block_words[5] = load32(src: block + 4 * 5); |
| 47 | block_words[6] = load32(src: block + 4 * 6); |
| 48 | block_words[7] = load32(src: block + 4 * 7); |
| 49 | block_words[8] = load32(src: block + 4 * 8); |
| 50 | block_words[9] = load32(src: block + 4 * 9); |
| 51 | block_words[10] = load32(src: block + 4 * 10); |
| 52 | block_words[11] = load32(src: block + 4 * 11); |
| 53 | block_words[12] = load32(src: block + 4 * 12); |
| 54 | block_words[13] = load32(src: block + 4 * 13); |
| 55 | block_words[14] = load32(src: block + 4 * 14); |
| 56 | block_words[15] = load32(src: block + 4 * 15); |
| 57 | |
| 58 | state[0] = cv[0]; |
| 59 | state[1] = cv[1]; |
| 60 | state[2] = cv[2]; |
| 61 | state[3] = cv[3]; |
| 62 | state[4] = cv[4]; |
| 63 | state[5] = cv[5]; |
| 64 | state[6] = cv[6]; |
| 65 | state[7] = cv[7]; |
| 66 | state[8] = IV[0]; |
| 67 | state[9] = IV[1]; |
| 68 | state[10] = IV[2]; |
| 69 | state[11] = IV[3]; |
| 70 | state[12] = counter_low(counter); |
| 71 | state[13] = counter_high(counter); |
| 72 | state[14] = (uint32_t)block_len; |
| 73 | state[15] = (uint32_t)flags; |
| 74 | |
| 75 | round_fn(state, msg: &block_words[0], round: 0); |
| 76 | round_fn(state, msg: &block_words[0], round: 1); |
| 77 | round_fn(state, msg: &block_words[0], round: 2); |
| 78 | round_fn(state, msg: &block_words[0], round: 3); |
| 79 | round_fn(state, msg: &block_words[0], round: 4); |
| 80 | round_fn(state, msg: &block_words[0], round: 5); |
| 81 | round_fn(state, msg: &block_words[0], round: 6); |
| 82 | } |
| 83 | |
| 84 | void blake3_compress_in_place_portable(uint32_t cv[8], |
| 85 | const uint8_t block[BLAKE3_BLOCK_LEN], |
| 86 | uint8_t block_len, uint64_t counter, |
| 87 | uint8_t flags) { |
| 88 | uint32_t state[16]; |
| 89 | compress_pre(state, cv, block, block_len, counter, flags); |
| 90 | cv[0] = state[0] ^ state[8]; |
| 91 | cv[1] = state[1] ^ state[9]; |
| 92 | cv[2] = state[2] ^ state[10]; |
| 93 | cv[3] = state[3] ^ state[11]; |
| 94 | cv[4] = state[4] ^ state[12]; |
| 95 | cv[5] = state[5] ^ state[13]; |
| 96 | cv[6] = state[6] ^ state[14]; |
| 97 | cv[7] = state[7] ^ state[15]; |
| 98 | } |
| 99 | |
| 100 | void blake3_compress_xof_portable(const uint32_t cv[8], |
| 101 | const uint8_t block[BLAKE3_BLOCK_LEN], |
| 102 | uint8_t block_len, uint64_t counter, |
| 103 | uint8_t flags, uint8_t out[64]) { |
| 104 | uint32_t state[16]; |
| 105 | compress_pre(state, cv, block, block_len, counter, flags); |
| 106 | |
| 107 | store32(dst: &out[0 * 4], w: state[0] ^ state[8]); |
| 108 | store32(dst: &out[1 * 4], w: state[1] ^ state[9]); |
| 109 | store32(dst: &out[2 * 4], w: state[2] ^ state[10]); |
| 110 | store32(dst: &out[3 * 4], w: state[3] ^ state[11]); |
| 111 | store32(dst: &out[4 * 4], w: state[4] ^ state[12]); |
| 112 | store32(dst: &out[5 * 4], w: state[5] ^ state[13]); |
| 113 | store32(dst: &out[6 * 4], w: state[6] ^ state[14]); |
| 114 | store32(dst: &out[7 * 4], w: state[7] ^ state[15]); |
| 115 | store32(dst: &out[8 * 4], w: state[8] ^ cv[0]); |
| 116 | store32(dst: &out[9 * 4], w: state[9] ^ cv[1]); |
| 117 | store32(dst: &out[10 * 4], w: state[10] ^ cv[2]); |
| 118 | store32(dst: &out[11 * 4], w: state[11] ^ cv[3]); |
| 119 | store32(dst: &out[12 * 4], w: state[12] ^ cv[4]); |
| 120 | store32(dst: &out[13 * 4], w: state[13] ^ cv[5]); |
| 121 | store32(dst: &out[14 * 4], w: state[14] ^ cv[6]); |
| 122 | store32(dst: &out[15 * 4], w: state[15] ^ cv[7]); |
| 123 | } |
| 124 | |
| 125 | INLINE void hash_one_portable(const uint8_t *input, size_t blocks, |
| 126 | const uint32_t key[8], uint64_t counter, |
| 127 | uint8_t flags, uint8_t flags_start, |
| 128 | uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { |
| 129 | uint32_t cv[8]; |
| 130 | memcpy(dest: cv, src: key, BLAKE3_KEY_LEN); |
| 131 | uint8_t block_flags = flags | flags_start; |
| 132 | while (blocks > 0) { |
| 133 | if (blocks == 1) { |
| 134 | block_flags |= flags_end; |
| 135 | } |
| 136 | blake3_compress_in_place_portable(cv, block: input, BLAKE3_BLOCK_LEN, counter, |
| 137 | flags: block_flags); |
| 138 | input = &input[BLAKE3_BLOCK_LEN]; |
| 139 | blocks -= 1; |
| 140 | block_flags = flags; |
| 141 | } |
| 142 | store_cv_words(bytes_out: out, cv_words: cv); |
| 143 | } |
| 144 | |
| 145 | void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, |
| 146 | size_t blocks, const uint32_t key[8], |
| 147 | uint64_t counter, bool increment_counter, |
| 148 | uint8_t flags, uint8_t flags_start, |
| 149 | uint8_t flags_end, uint8_t *out) { |
| 150 | while (num_inputs > 0) { |
| 151 | hash_one_portable(input: inputs[0], blocks, key, counter, flags, flags_start, |
| 152 | flags_end, out); |
| 153 | if (increment_counter) { |
| 154 | counter += 1; |
| 155 | } |
| 156 | inputs += 1; |
| 157 | num_inputs -= 1; |
| 158 | out = &out[BLAKE3_OUT_LEN]; |
| 159 | } |
| 160 | } |
| 161 | |