1 | #include "blake3_impl.h" |
2 | #include <string.h> |
3 | |
4 | INLINE uint32_t rotr32(uint32_t w, uint32_t c) { |
5 | return (w >> c) | (w << (32 - c)); |
6 | } |
7 | |
8 | INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, |
9 | uint32_t x, uint32_t y) { |
10 | state[a] = state[a] + state[b] + x; |
11 | state[d] = rotr32(w: state[d] ^ state[a], c: 16); |
12 | state[c] = state[c] + state[d]; |
13 | state[b] = rotr32(w: state[b] ^ state[c], c: 12); |
14 | state[a] = state[a] + state[b] + y; |
15 | state[d] = rotr32(w: state[d] ^ state[a], c: 8); |
16 | state[c] = state[c] + state[d]; |
17 | state[b] = rotr32(w: state[b] ^ state[c], c: 7); |
18 | } |
19 | |
20 | INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) { |
21 | // Select the message schedule based on the round. |
22 | const uint8_t *schedule = MSG_SCHEDULE[round]; |
23 | |
24 | // Mix the columns. |
25 | g(state, a: 0, b: 4, c: 8, d: 12, x: msg[schedule[0]], y: msg[schedule[1]]); |
26 | g(state, a: 1, b: 5, c: 9, d: 13, x: msg[schedule[2]], y: msg[schedule[3]]); |
27 | g(state, a: 2, b: 6, c: 10, d: 14, x: msg[schedule[4]], y: msg[schedule[5]]); |
28 | g(state, a: 3, b: 7, c: 11, d: 15, x: msg[schedule[6]], y: msg[schedule[7]]); |
29 | |
30 | // Mix the rows. |
31 | g(state, a: 0, b: 5, c: 10, d: 15, x: msg[schedule[8]], y: msg[schedule[9]]); |
32 | g(state, a: 1, b: 6, c: 11, d: 12, x: msg[schedule[10]], y: msg[schedule[11]]); |
33 | g(state, a: 2, b: 7, c: 8, d: 13, x: msg[schedule[12]], y: msg[schedule[13]]); |
34 | g(state, a: 3, b: 4, c: 9, d: 14, x: msg[schedule[14]], y: msg[schedule[15]]); |
35 | } |
36 | |
37 | INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8], |
38 | const uint8_t block[BLAKE3_BLOCK_LEN], |
39 | uint8_t block_len, uint64_t counter, uint8_t flags) { |
40 | uint32_t block_words[16]; |
41 | block_words[0] = load32(src: block + 4 * 0); |
42 | block_words[1] = load32(src: block + 4 * 1); |
43 | block_words[2] = load32(src: block + 4 * 2); |
44 | block_words[3] = load32(src: block + 4 * 3); |
45 | block_words[4] = load32(src: block + 4 * 4); |
46 | block_words[5] = load32(src: block + 4 * 5); |
47 | block_words[6] = load32(src: block + 4 * 6); |
48 | block_words[7] = load32(src: block + 4 * 7); |
49 | block_words[8] = load32(src: block + 4 * 8); |
50 | block_words[9] = load32(src: block + 4 * 9); |
51 | block_words[10] = load32(src: block + 4 * 10); |
52 | block_words[11] = load32(src: block + 4 * 11); |
53 | block_words[12] = load32(src: block + 4 * 12); |
54 | block_words[13] = load32(src: block + 4 * 13); |
55 | block_words[14] = load32(src: block + 4 * 14); |
56 | block_words[15] = load32(src: block + 4 * 15); |
57 | |
58 | state[0] = cv[0]; |
59 | state[1] = cv[1]; |
60 | state[2] = cv[2]; |
61 | state[3] = cv[3]; |
62 | state[4] = cv[4]; |
63 | state[5] = cv[5]; |
64 | state[6] = cv[6]; |
65 | state[7] = cv[7]; |
66 | state[8] = IV[0]; |
67 | state[9] = IV[1]; |
68 | state[10] = IV[2]; |
69 | state[11] = IV[3]; |
70 | state[12] = counter_low(counter); |
71 | state[13] = counter_high(counter); |
72 | state[14] = (uint32_t)block_len; |
73 | state[15] = (uint32_t)flags; |
74 | |
75 | round_fn(state, msg: &block_words[0], round: 0); |
76 | round_fn(state, msg: &block_words[0], round: 1); |
77 | round_fn(state, msg: &block_words[0], round: 2); |
78 | round_fn(state, msg: &block_words[0], round: 3); |
79 | round_fn(state, msg: &block_words[0], round: 4); |
80 | round_fn(state, msg: &block_words[0], round: 5); |
81 | round_fn(state, msg: &block_words[0], round: 6); |
82 | } |
83 | |
84 | void blake3_compress_in_place_portable(uint32_t cv[8], |
85 | const uint8_t block[BLAKE3_BLOCK_LEN], |
86 | uint8_t block_len, uint64_t counter, |
87 | uint8_t flags) { |
88 | uint32_t state[16]; |
89 | compress_pre(state, cv, block, block_len, counter, flags); |
90 | cv[0] = state[0] ^ state[8]; |
91 | cv[1] = state[1] ^ state[9]; |
92 | cv[2] = state[2] ^ state[10]; |
93 | cv[3] = state[3] ^ state[11]; |
94 | cv[4] = state[4] ^ state[12]; |
95 | cv[5] = state[5] ^ state[13]; |
96 | cv[6] = state[6] ^ state[14]; |
97 | cv[7] = state[7] ^ state[15]; |
98 | } |
99 | |
100 | void blake3_compress_xof_portable(const uint32_t cv[8], |
101 | const uint8_t block[BLAKE3_BLOCK_LEN], |
102 | uint8_t block_len, uint64_t counter, |
103 | uint8_t flags, uint8_t out[64]) { |
104 | uint32_t state[16]; |
105 | compress_pre(state, cv, block, block_len, counter, flags); |
106 | |
107 | store32(dst: &out[0 * 4], w: state[0] ^ state[8]); |
108 | store32(dst: &out[1 * 4], w: state[1] ^ state[9]); |
109 | store32(dst: &out[2 * 4], w: state[2] ^ state[10]); |
110 | store32(dst: &out[3 * 4], w: state[3] ^ state[11]); |
111 | store32(dst: &out[4 * 4], w: state[4] ^ state[12]); |
112 | store32(dst: &out[5 * 4], w: state[5] ^ state[13]); |
113 | store32(dst: &out[6 * 4], w: state[6] ^ state[14]); |
114 | store32(dst: &out[7 * 4], w: state[7] ^ state[15]); |
115 | store32(dst: &out[8 * 4], w: state[8] ^ cv[0]); |
116 | store32(dst: &out[9 * 4], w: state[9] ^ cv[1]); |
117 | store32(dst: &out[10 * 4], w: state[10] ^ cv[2]); |
118 | store32(dst: &out[11 * 4], w: state[11] ^ cv[3]); |
119 | store32(dst: &out[12 * 4], w: state[12] ^ cv[4]); |
120 | store32(dst: &out[13 * 4], w: state[13] ^ cv[5]); |
121 | store32(dst: &out[14 * 4], w: state[14] ^ cv[6]); |
122 | store32(dst: &out[15 * 4], w: state[15] ^ cv[7]); |
123 | } |
124 | |
125 | INLINE void hash_one_portable(const uint8_t *input, size_t blocks, |
126 | const uint32_t key[8], uint64_t counter, |
127 | uint8_t flags, uint8_t flags_start, |
128 | uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) { |
129 | uint32_t cv[8]; |
130 | memcpy(dest: cv, src: key, BLAKE3_KEY_LEN); |
131 | uint8_t block_flags = flags | flags_start; |
132 | while (blocks > 0) { |
133 | if (blocks == 1) { |
134 | block_flags |= flags_end; |
135 | } |
136 | blake3_compress_in_place_portable(cv, block: input, BLAKE3_BLOCK_LEN, counter, |
137 | flags: block_flags); |
138 | input = &input[BLAKE3_BLOCK_LEN]; |
139 | blocks -= 1; |
140 | block_flags = flags; |
141 | } |
142 | store_cv_words(bytes_out: out, cv_words: cv); |
143 | } |
144 | |
145 | void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, |
146 | size_t blocks, const uint32_t key[8], |
147 | uint64_t counter, bool increment_counter, |
148 | uint8_t flags, uint8_t flags_start, |
149 | uint8_t flags_end, uint8_t *out) { |
150 | while (num_inputs > 0) { |
151 | hash_one_portable(input: inputs[0], blocks, key, counter, flags, flags_start, |
152 | flags_end, out); |
153 | if (increment_counter) { |
154 | counter += 1; |
155 | } |
156 | inputs += 1; |
157 | num_inputs -= 1; |
158 | out = &out[BLAKE3_OUT_LEN]; |
159 | } |
160 | } |
161 | |