1 | #if defined(__x86_64__) |
2 | |
3 | #include "llvm_blake3_prefix.h" |
4 | |
5 | #if defined(__ELF__) && !(defined(__sun__) && defined(__svr4__)) |
6 | .section .note.GNU-stack,"" ,%progbits |
7 | #endif |
8 | |
9 | #if defined(__ELF__) && defined(__CET__) && defined(__has_include) |
10 | #if __has_include(<cet.h>) |
11 | #include <cet.h> |
12 | #endif |
13 | #endif |
14 | |
15 | #if !defined(_CET_ENDBR) |
16 | #define _CET_ENDBR |
17 | #endif |
18 | |
19 | #ifdef __APPLE__ |
20 | #define HIDDEN .private_extern |
21 | #else |
22 | #define HIDDEN .hidden |
23 | #endif |
24 | |
25 | .intel_syntax noprefix |
26 | HIDDEN blake3_hash_many_sse2 |
27 | HIDDEN _blake3_hash_many_sse2 |
28 | HIDDEN blake3_compress_in_place_sse2 |
29 | HIDDEN _blake3_compress_in_place_sse2 |
30 | HIDDEN blake3_compress_xof_sse2 |
31 | HIDDEN _blake3_compress_xof_sse2 |
32 | .global blake3_hash_many_sse2 |
33 | .global _blake3_hash_many_sse2 |
34 | .global blake3_compress_in_place_sse2 |
35 | .global _blake3_compress_in_place_sse2 |
36 | .global blake3_compress_xof_sse2 |
37 | .global _blake3_compress_xof_sse2 |
38 | #ifdef __APPLE__ |
39 | .text |
40 | #else |
41 | .section .text |
42 | #endif |
43 | .p2align 6 |
44 | _blake3_hash_many_sse2: |
45 | blake3_hash_many_sse2: |
46 | _CET_ENDBR |
47 | push r15 |
48 | push r14 |
49 | push r13 |
50 | push r12 |
51 | push rbx |
52 | push rbp |
53 | mov rbp, rsp |
54 | sub rsp, 360 |
55 | and rsp, 0xFFFFFFFFFFFFFFC0 |
56 | neg r9d |
57 | movd xmm0, r9d |
58 | pshufd xmm0, xmm0, 0x00 |
59 | movdqa xmmword ptr [rsp+0x130], xmm0 |
60 | movdqa xmm1, xmm0 |
61 | pand xmm1, xmmword ptr [ADD0+rip] |
62 | pand xmm0, xmmword ptr [ADD1+rip] |
63 | movdqa xmmword ptr [rsp+0x150], xmm0 |
64 | movd xmm0, r8d |
65 | pshufd xmm0, xmm0, 0x00 |
66 | paddd xmm0, xmm1 |
67 | movdqa xmmword ptr [rsp+0x110], xmm0 |
68 | pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
69 | pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
70 | pcmpgtd xmm1, xmm0 |
71 | shr r8, 32 |
72 | movd xmm2, r8d |
73 | pshufd xmm2, xmm2, 0x00 |
74 | psubd xmm2, xmm1 |
75 | movdqa xmmword ptr [rsp+0x120], xmm2 |
76 | mov rbx, qword ptr [rbp+0x50] |
77 | mov r15, rdx |
78 | shl r15, 6 |
79 | movzx r13d, byte ptr [rbp+0x38] |
80 | movzx r12d, byte ptr [rbp+0x48] |
81 | cmp rsi, 4 |
82 | jc 3f |
83 | 2: |
84 | movdqu xmm3, xmmword ptr [rcx] |
85 | pshufd xmm0, xmm3, 0x00 |
86 | pshufd xmm1, xmm3, 0x55 |
87 | pshufd xmm2, xmm3, 0xAA |
88 | pshufd xmm3, xmm3, 0xFF |
89 | movdqu xmm7, xmmword ptr [rcx+0x10] |
90 | pshufd xmm4, xmm7, 0x00 |
91 | pshufd xmm5, xmm7, 0x55 |
92 | pshufd xmm6, xmm7, 0xAA |
93 | pshufd xmm7, xmm7, 0xFF |
94 | mov r8, qword ptr [rdi] |
95 | mov r9, qword ptr [rdi+0x8] |
96 | mov r10, qword ptr [rdi+0x10] |
97 | mov r11, qword ptr [rdi+0x18] |
98 | movzx eax, byte ptr [rbp+0x40] |
99 | or eax, r13d |
100 | xor edx, edx |
101 | 9: |
102 | mov r14d, eax |
103 | or eax, r12d |
104 | add rdx, 64 |
105 | cmp rdx, r15 |
106 | cmovne eax, r14d |
107 | movdqu xmm8, xmmword ptr [r8+rdx-0x40] |
108 | movdqu xmm9, xmmword ptr [r9+rdx-0x40] |
109 | movdqu xmm10, xmmword ptr [r10+rdx-0x40] |
110 | movdqu xmm11, xmmword ptr [r11+rdx-0x40] |
111 | movdqa xmm12, xmm8 |
112 | punpckldq xmm8, xmm9 |
113 | punpckhdq xmm12, xmm9 |
114 | movdqa xmm14, xmm10 |
115 | punpckldq xmm10, xmm11 |
116 | punpckhdq xmm14, xmm11 |
117 | movdqa xmm9, xmm8 |
118 | punpcklqdq xmm8, xmm10 |
119 | punpckhqdq xmm9, xmm10 |
120 | movdqa xmm13, xmm12 |
121 | punpcklqdq xmm12, xmm14 |
122 | punpckhqdq xmm13, xmm14 |
123 | movdqa xmmword ptr [rsp], xmm8 |
124 | movdqa xmmword ptr [rsp+0x10], xmm9 |
125 | movdqa xmmword ptr [rsp+0x20], xmm12 |
126 | movdqa xmmword ptr [rsp+0x30], xmm13 |
127 | movdqu xmm8, xmmword ptr [r8+rdx-0x30] |
128 | movdqu xmm9, xmmword ptr [r9+rdx-0x30] |
129 | movdqu xmm10, xmmword ptr [r10+rdx-0x30] |
130 | movdqu xmm11, xmmword ptr [r11+rdx-0x30] |
131 | movdqa xmm12, xmm8 |
132 | punpckldq xmm8, xmm9 |
133 | punpckhdq xmm12, xmm9 |
134 | movdqa xmm14, xmm10 |
135 | punpckldq xmm10, xmm11 |
136 | punpckhdq xmm14, xmm11 |
137 | movdqa xmm9, xmm8 |
138 | punpcklqdq xmm8, xmm10 |
139 | punpckhqdq xmm9, xmm10 |
140 | movdqa xmm13, xmm12 |
141 | punpcklqdq xmm12, xmm14 |
142 | punpckhqdq xmm13, xmm14 |
143 | movdqa xmmword ptr [rsp+0x40], xmm8 |
144 | movdqa xmmword ptr [rsp+0x50], xmm9 |
145 | movdqa xmmword ptr [rsp+0x60], xmm12 |
146 | movdqa xmmword ptr [rsp+0x70], xmm13 |
147 | movdqu xmm8, xmmword ptr [r8+rdx-0x20] |
148 | movdqu xmm9, xmmword ptr [r9+rdx-0x20] |
149 | movdqu xmm10, xmmword ptr [r10+rdx-0x20] |
150 | movdqu xmm11, xmmword ptr [r11+rdx-0x20] |
151 | movdqa xmm12, xmm8 |
152 | punpckldq xmm8, xmm9 |
153 | punpckhdq xmm12, xmm9 |
154 | movdqa xmm14, xmm10 |
155 | punpckldq xmm10, xmm11 |
156 | punpckhdq xmm14, xmm11 |
157 | movdqa xmm9, xmm8 |
158 | punpcklqdq xmm8, xmm10 |
159 | punpckhqdq xmm9, xmm10 |
160 | movdqa xmm13, xmm12 |
161 | punpcklqdq xmm12, xmm14 |
162 | punpckhqdq xmm13, xmm14 |
163 | movdqa xmmword ptr [rsp+0x80], xmm8 |
164 | movdqa xmmword ptr [rsp+0x90], xmm9 |
165 | movdqa xmmword ptr [rsp+0xA0], xmm12 |
166 | movdqa xmmword ptr [rsp+0xB0], xmm13 |
167 | movdqu xmm8, xmmword ptr [r8+rdx-0x10] |
168 | movdqu xmm9, xmmword ptr [r9+rdx-0x10] |
169 | movdqu xmm10, xmmword ptr [r10+rdx-0x10] |
170 | movdqu xmm11, xmmword ptr [r11+rdx-0x10] |
171 | movdqa xmm12, xmm8 |
172 | punpckldq xmm8, xmm9 |
173 | punpckhdq xmm12, xmm9 |
174 | movdqa xmm14, xmm10 |
175 | punpckldq xmm10, xmm11 |
176 | punpckhdq xmm14, xmm11 |
177 | movdqa xmm9, xmm8 |
178 | punpcklqdq xmm8, xmm10 |
179 | punpckhqdq xmm9, xmm10 |
180 | movdqa xmm13, xmm12 |
181 | punpcklqdq xmm12, xmm14 |
182 | punpckhqdq xmm13, xmm14 |
183 | movdqa xmmword ptr [rsp+0xC0], xmm8 |
184 | movdqa xmmword ptr [rsp+0xD0], xmm9 |
185 | movdqa xmmword ptr [rsp+0xE0], xmm12 |
186 | movdqa xmmword ptr [rsp+0xF0], xmm13 |
187 | movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] |
188 | movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] |
189 | movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] |
190 | movdqa xmm12, xmmword ptr [rsp+0x110] |
191 | movdqa xmm13, xmmword ptr [rsp+0x120] |
192 | movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] |
193 | movd xmm15, eax |
194 | pshufd xmm15, xmm15, 0x00 |
195 | prefetcht0 [r8+rdx+0x80] |
196 | prefetcht0 [r9+rdx+0x80] |
197 | prefetcht0 [r10+rdx+0x80] |
198 | prefetcht0 [r11+rdx+0x80] |
199 | paddd xmm0, xmmword ptr [rsp] |
200 | paddd xmm1, xmmword ptr [rsp+0x20] |
201 | paddd xmm2, xmmword ptr [rsp+0x40] |
202 | paddd xmm3, xmmword ptr [rsp+0x60] |
203 | paddd xmm0, xmm4 |
204 | paddd xmm1, xmm5 |
205 | paddd xmm2, xmm6 |
206 | paddd xmm3, xmm7 |
207 | pxor xmm12, xmm0 |
208 | pxor xmm13, xmm1 |
209 | pxor xmm14, xmm2 |
210 | pxor xmm15, xmm3 |
211 | pshuflw xmm12, xmm12, 0xB1 |
212 | pshufhw xmm12, xmm12, 0xB1 |
213 | pshuflw xmm13, xmm13, 0xB1 |
214 | pshufhw xmm13, xmm13, 0xB1 |
215 | pshuflw xmm14, xmm14, 0xB1 |
216 | pshufhw xmm14, xmm14, 0xB1 |
217 | pshuflw xmm15, xmm15, 0xB1 |
218 | pshufhw xmm15, xmm15, 0xB1 |
219 | movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] |
220 | paddd xmm8, xmm12 |
221 | paddd xmm9, xmm13 |
222 | paddd xmm10, xmm14 |
223 | paddd xmm11, xmm15 |
224 | pxor xmm4, xmm8 |
225 | pxor xmm5, xmm9 |
226 | pxor xmm6, xmm10 |
227 | pxor xmm7, xmm11 |
228 | movdqa xmmword ptr [rsp+0x100], xmm8 |
229 | movdqa xmm8, xmm4 |
230 | psrld xmm8, 12 |
231 | pslld xmm4, 20 |
232 | por xmm4, xmm8 |
233 | movdqa xmm8, xmm5 |
234 | psrld xmm8, 12 |
235 | pslld xmm5, 20 |
236 | por xmm5, xmm8 |
237 | movdqa xmm8, xmm6 |
238 | psrld xmm8, 12 |
239 | pslld xmm6, 20 |
240 | por xmm6, xmm8 |
241 | movdqa xmm8, xmm7 |
242 | psrld xmm8, 12 |
243 | pslld xmm7, 20 |
244 | por xmm7, xmm8 |
245 | paddd xmm0, xmmword ptr [rsp+0x10] |
246 | paddd xmm1, xmmword ptr [rsp+0x30] |
247 | paddd xmm2, xmmword ptr [rsp+0x50] |
248 | paddd xmm3, xmmword ptr [rsp+0x70] |
249 | paddd xmm0, xmm4 |
250 | paddd xmm1, xmm5 |
251 | paddd xmm2, xmm6 |
252 | paddd xmm3, xmm7 |
253 | pxor xmm12, xmm0 |
254 | pxor xmm13, xmm1 |
255 | pxor xmm14, xmm2 |
256 | pxor xmm15, xmm3 |
257 | movdqa xmm8, xmm12 |
258 | psrld xmm12, 8 |
259 | pslld xmm8, 24 |
260 | pxor xmm12, xmm8 |
261 | movdqa xmm8, xmm13 |
262 | psrld xmm13, 8 |
263 | pslld xmm8, 24 |
264 | pxor xmm13, xmm8 |
265 | movdqa xmm8, xmm14 |
266 | psrld xmm14, 8 |
267 | pslld xmm8, 24 |
268 | pxor xmm14, xmm8 |
269 | movdqa xmm8, xmm15 |
270 | psrld xmm15, 8 |
271 | pslld xmm8, 24 |
272 | pxor xmm15, xmm8 |
273 | movdqa xmm8, xmmword ptr [rsp+0x100] |
274 | paddd xmm8, xmm12 |
275 | paddd xmm9, xmm13 |
276 | paddd xmm10, xmm14 |
277 | paddd xmm11, xmm15 |
278 | pxor xmm4, xmm8 |
279 | pxor xmm5, xmm9 |
280 | pxor xmm6, xmm10 |
281 | pxor xmm7, xmm11 |
282 | movdqa xmmword ptr [rsp+0x100], xmm8 |
283 | movdqa xmm8, xmm4 |
284 | psrld xmm8, 7 |
285 | pslld xmm4, 25 |
286 | por xmm4, xmm8 |
287 | movdqa xmm8, xmm5 |
288 | psrld xmm8, 7 |
289 | pslld xmm5, 25 |
290 | por xmm5, xmm8 |
291 | movdqa xmm8, xmm6 |
292 | psrld xmm8, 7 |
293 | pslld xmm6, 25 |
294 | por xmm6, xmm8 |
295 | movdqa xmm8, xmm7 |
296 | psrld xmm8, 7 |
297 | pslld xmm7, 25 |
298 | por xmm7, xmm8 |
299 | paddd xmm0, xmmword ptr [rsp+0x80] |
300 | paddd xmm1, xmmword ptr [rsp+0xA0] |
301 | paddd xmm2, xmmword ptr [rsp+0xC0] |
302 | paddd xmm3, xmmword ptr [rsp+0xE0] |
303 | paddd xmm0, xmm5 |
304 | paddd xmm1, xmm6 |
305 | paddd xmm2, xmm7 |
306 | paddd xmm3, xmm4 |
307 | pxor xmm15, xmm0 |
308 | pxor xmm12, xmm1 |
309 | pxor xmm13, xmm2 |
310 | pxor xmm14, xmm3 |
311 | pshuflw xmm15, xmm15, 0xB1 |
312 | pshufhw xmm15, xmm15, 0xB1 |
313 | pshuflw xmm12, xmm12, 0xB1 |
314 | pshufhw xmm12, xmm12, 0xB1 |
315 | pshuflw xmm13, xmm13, 0xB1 |
316 | pshufhw xmm13, xmm13, 0xB1 |
317 | pshuflw xmm14, xmm14, 0xB1 |
318 | pshufhw xmm14, xmm14, 0xB1 |
319 | paddd xmm10, xmm15 |
320 | paddd xmm11, xmm12 |
321 | movdqa xmm8, xmmword ptr [rsp+0x100] |
322 | paddd xmm8, xmm13 |
323 | paddd xmm9, xmm14 |
324 | pxor xmm5, xmm10 |
325 | pxor xmm6, xmm11 |
326 | pxor xmm7, xmm8 |
327 | pxor xmm4, xmm9 |
328 | movdqa xmmword ptr [rsp+0x100], xmm8 |
329 | movdqa xmm8, xmm5 |
330 | psrld xmm8, 12 |
331 | pslld xmm5, 20 |
332 | por xmm5, xmm8 |
333 | movdqa xmm8, xmm6 |
334 | psrld xmm8, 12 |
335 | pslld xmm6, 20 |
336 | por xmm6, xmm8 |
337 | movdqa xmm8, xmm7 |
338 | psrld xmm8, 12 |
339 | pslld xmm7, 20 |
340 | por xmm7, xmm8 |
341 | movdqa xmm8, xmm4 |
342 | psrld xmm8, 12 |
343 | pslld xmm4, 20 |
344 | por xmm4, xmm8 |
345 | paddd xmm0, xmmword ptr [rsp+0x90] |
346 | paddd xmm1, xmmword ptr [rsp+0xB0] |
347 | paddd xmm2, xmmword ptr [rsp+0xD0] |
348 | paddd xmm3, xmmword ptr [rsp+0xF0] |
349 | paddd xmm0, xmm5 |
350 | paddd xmm1, xmm6 |
351 | paddd xmm2, xmm7 |
352 | paddd xmm3, xmm4 |
353 | pxor xmm15, xmm0 |
354 | pxor xmm12, xmm1 |
355 | pxor xmm13, xmm2 |
356 | pxor xmm14, xmm3 |
357 | movdqa xmm8, xmm15 |
358 | psrld xmm15, 8 |
359 | pslld xmm8, 24 |
360 | pxor xmm15, xmm8 |
361 | movdqa xmm8, xmm12 |
362 | psrld xmm12, 8 |
363 | pslld xmm8, 24 |
364 | pxor xmm12, xmm8 |
365 | movdqa xmm8, xmm13 |
366 | psrld xmm13, 8 |
367 | pslld xmm8, 24 |
368 | pxor xmm13, xmm8 |
369 | movdqa xmm8, xmm14 |
370 | psrld xmm14, 8 |
371 | pslld xmm8, 24 |
372 | pxor xmm14, xmm8 |
373 | paddd xmm10, xmm15 |
374 | paddd xmm11, xmm12 |
375 | movdqa xmm8, xmmword ptr [rsp+0x100] |
376 | paddd xmm8, xmm13 |
377 | paddd xmm9, xmm14 |
378 | pxor xmm5, xmm10 |
379 | pxor xmm6, xmm11 |
380 | pxor xmm7, xmm8 |
381 | pxor xmm4, xmm9 |
382 | movdqa xmmword ptr [rsp+0x100], xmm8 |
383 | movdqa xmm8, xmm5 |
384 | psrld xmm8, 7 |
385 | pslld xmm5, 25 |
386 | por xmm5, xmm8 |
387 | movdqa xmm8, xmm6 |
388 | psrld xmm8, 7 |
389 | pslld xmm6, 25 |
390 | por xmm6, xmm8 |
391 | movdqa xmm8, xmm7 |
392 | psrld xmm8, 7 |
393 | pslld xmm7, 25 |
394 | por xmm7, xmm8 |
395 | movdqa xmm8, xmm4 |
396 | psrld xmm8, 7 |
397 | pslld xmm4, 25 |
398 | por xmm4, xmm8 |
399 | paddd xmm0, xmmword ptr [rsp+0x20] |
400 | paddd xmm1, xmmword ptr [rsp+0x30] |
401 | paddd xmm2, xmmword ptr [rsp+0x70] |
402 | paddd xmm3, xmmword ptr [rsp+0x40] |
403 | paddd xmm0, xmm4 |
404 | paddd xmm1, xmm5 |
405 | paddd xmm2, xmm6 |
406 | paddd xmm3, xmm7 |
407 | pxor xmm12, xmm0 |
408 | pxor xmm13, xmm1 |
409 | pxor xmm14, xmm2 |
410 | pxor xmm15, xmm3 |
411 | pshuflw xmm12, xmm12, 0xB1 |
412 | pshufhw xmm12, xmm12, 0xB1 |
413 | pshuflw xmm13, xmm13, 0xB1 |
414 | pshufhw xmm13, xmm13, 0xB1 |
415 | pshuflw xmm14, xmm14, 0xB1 |
416 | pshufhw xmm14, xmm14, 0xB1 |
417 | pshuflw xmm15, xmm15, 0xB1 |
418 | pshufhw xmm15, xmm15, 0xB1 |
419 | movdqa xmm8, xmmword ptr [rsp+0x100] |
420 | paddd xmm8, xmm12 |
421 | paddd xmm9, xmm13 |
422 | paddd xmm10, xmm14 |
423 | paddd xmm11, xmm15 |
424 | pxor xmm4, xmm8 |
425 | pxor xmm5, xmm9 |
426 | pxor xmm6, xmm10 |
427 | pxor xmm7, xmm11 |
428 | movdqa xmmword ptr [rsp+0x100], xmm8 |
429 | movdqa xmm8, xmm4 |
430 | psrld xmm8, 12 |
431 | pslld xmm4, 20 |
432 | por xmm4, xmm8 |
433 | movdqa xmm8, xmm5 |
434 | psrld xmm8, 12 |
435 | pslld xmm5, 20 |
436 | por xmm5, xmm8 |
437 | movdqa xmm8, xmm6 |
438 | psrld xmm8, 12 |
439 | pslld xmm6, 20 |
440 | por xmm6, xmm8 |
441 | movdqa xmm8, xmm7 |
442 | psrld xmm8, 12 |
443 | pslld xmm7, 20 |
444 | por xmm7, xmm8 |
445 | paddd xmm0, xmmword ptr [rsp+0x60] |
446 | paddd xmm1, xmmword ptr [rsp+0xA0] |
447 | paddd xmm2, xmmword ptr [rsp] |
448 | paddd xmm3, xmmword ptr [rsp+0xD0] |
449 | paddd xmm0, xmm4 |
450 | paddd xmm1, xmm5 |
451 | paddd xmm2, xmm6 |
452 | paddd xmm3, xmm7 |
453 | pxor xmm12, xmm0 |
454 | pxor xmm13, xmm1 |
455 | pxor xmm14, xmm2 |
456 | pxor xmm15, xmm3 |
457 | movdqa xmm8, xmm12 |
458 | psrld xmm12, 8 |
459 | pslld xmm8, 24 |
460 | pxor xmm12, xmm8 |
461 | movdqa xmm8, xmm13 |
462 | psrld xmm13, 8 |
463 | pslld xmm8, 24 |
464 | pxor xmm13, xmm8 |
465 | movdqa xmm8, xmm14 |
466 | psrld xmm14, 8 |
467 | pslld xmm8, 24 |
468 | pxor xmm14, xmm8 |
469 | movdqa xmm8, xmm15 |
470 | psrld xmm15, 8 |
471 | pslld xmm8, 24 |
472 | pxor xmm15, xmm8 |
473 | movdqa xmm8, xmmword ptr [rsp+0x100] |
474 | paddd xmm8, xmm12 |
475 | paddd xmm9, xmm13 |
476 | paddd xmm10, xmm14 |
477 | paddd xmm11, xmm15 |
478 | pxor xmm4, xmm8 |
479 | pxor xmm5, xmm9 |
480 | pxor xmm6, xmm10 |
481 | pxor xmm7, xmm11 |
482 | movdqa xmmword ptr [rsp+0x100], xmm8 |
483 | movdqa xmm8, xmm4 |
484 | psrld xmm8, 7 |
485 | pslld xmm4, 25 |
486 | por xmm4, xmm8 |
487 | movdqa xmm8, xmm5 |
488 | psrld xmm8, 7 |
489 | pslld xmm5, 25 |
490 | por xmm5, xmm8 |
491 | movdqa xmm8, xmm6 |
492 | psrld xmm8, 7 |
493 | pslld xmm6, 25 |
494 | por xmm6, xmm8 |
495 | movdqa xmm8, xmm7 |
496 | psrld xmm8, 7 |
497 | pslld xmm7, 25 |
498 | por xmm7, xmm8 |
499 | paddd xmm0, xmmword ptr [rsp+0x10] |
500 | paddd xmm1, xmmword ptr [rsp+0xC0] |
501 | paddd xmm2, xmmword ptr [rsp+0x90] |
502 | paddd xmm3, xmmword ptr [rsp+0xF0] |
503 | paddd xmm0, xmm5 |
504 | paddd xmm1, xmm6 |
505 | paddd xmm2, xmm7 |
506 | paddd xmm3, xmm4 |
507 | pxor xmm15, xmm0 |
508 | pxor xmm12, xmm1 |
509 | pxor xmm13, xmm2 |
510 | pxor xmm14, xmm3 |
511 | pshuflw xmm15, xmm15, 0xB1 |
512 | pshufhw xmm15, xmm15, 0xB1 |
513 | pshuflw xmm12, xmm12, 0xB1 |
514 | pshufhw xmm12, xmm12, 0xB1 |
515 | pshuflw xmm13, xmm13, 0xB1 |
516 | pshufhw xmm13, xmm13, 0xB1 |
517 | pshuflw xmm14, xmm14, 0xB1 |
518 | pshufhw xmm14, xmm14, 0xB1 |
519 | paddd xmm10, xmm15 |
520 | paddd xmm11, xmm12 |
521 | movdqa xmm8, xmmword ptr [rsp+0x100] |
522 | paddd xmm8, xmm13 |
523 | paddd xmm9, xmm14 |
524 | pxor xmm5, xmm10 |
525 | pxor xmm6, xmm11 |
526 | pxor xmm7, xmm8 |
527 | pxor xmm4, xmm9 |
528 | movdqa xmmword ptr [rsp+0x100], xmm8 |
529 | movdqa xmm8, xmm5 |
530 | psrld xmm8, 12 |
531 | pslld xmm5, 20 |
532 | por xmm5, xmm8 |
533 | movdqa xmm8, xmm6 |
534 | psrld xmm8, 12 |
535 | pslld xmm6, 20 |
536 | por xmm6, xmm8 |
537 | movdqa xmm8, xmm7 |
538 | psrld xmm8, 12 |
539 | pslld xmm7, 20 |
540 | por xmm7, xmm8 |
541 | movdqa xmm8, xmm4 |
542 | psrld xmm8, 12 |
543 | pslld xmm4, 20 |
544 | por xmm4, xmm8 |
545 | paddd xmm0, xmmword ptr [rsp+0xB0] |
546 | paddd xmm1, xmmword ptr [rsp+0x50] |
547 | paddd xmm2, xmmword ptr [rsp+0xE0] |
548 | paddd xmm3, xmmword ptr [rsp+0x80] |
549 | paddd xmm0, xmm5 |
550 | paddd xmm1, xmm6 |
551 | paddd xmm2, xmm7 |
552 | paddd xmm3, xmm4 |
553 | pxor xmm15, xmm0 |
554 | pxor xmm12, xmm1 |
555 | pxor xmm13, xmm2 |
556 | pxor xmm14, xmm3 |
557 | movdqa xmm8, xmm15 |
558 | psrld xmm15, 8 |
559 | pslld xmm8, 24 |
560 | pxor xmm15, xmm8 |
561 | movdqa xmm8, xmm12 |
562 | psrld xmm12, 8 |
563 | pslld xmm8, 24 |
564 | pxor xmm12, xmm8 |
565 | movdqa xmm8, xmm13 |
566 | psrld xmm13, 8 |
567 | pslld xmm8, 24 |
568 | pxor xmm13, xmm8 |
569 | movdqa xmm8, xmm14 |
570 | psrld xmm14, 8 |
571 | pslld xmm8, 24 |
572 | pxor xmm14, xmm8 |
573 | paddd xmm10, xmm15 |
574 | paddd xmm11, xmm12 |
575 | movdqa xmm8, xmmword ptr [rsp+0x100] |
576 | paddd xmm8, xmm13 |
577 | paddd xmm9, xmm14 |
578 | pxor xmm5, xmm10 |
579 | pxor xmm6, xmm11 |
580 | pxor xmm7, xmm8 |
581 | pxor xmm4, xmm9 |
582 | movdqa xmmword ptr [rsp+0x100], xmm8 |
583 | movdqa xmm8, xmm5 |
584 | psrld xmm8, 7 |
585 | pslld xmm5, 25 |
586 | por xmm5, xmm8 |
587 | movdqa xmm8, xmm6 |
588 | psrld xmm8, 7 |
589 | pslld xmm6, 25 |
590 | por xmm6, xmm8 |
591 | movdqa xmm8, xmm7 |
592 | psrld xmm8, 7 |
593 | pslld xmm7, 25 |
594 | por xmm7, xmm8 |
595 | movdqa xmm8, xmm4 |
596 | psrld xmm8, 7 |
597 | pslld xmm4, 25 |
598 | por xmm4, xmm8 |
599 | paddd xmm0, xmmword ptr [rsp+0x30] |
600 | paddd xmm1, xmmword ptr [rsp+0xA0] |
601 | paddd xmm2, xmmword ptr [rsp+0xD0] |
602 | paddd xmm3, xmmword ptr [rsp+0x70] |
603 | paddd xmm0, xmm4 |
604 | paddd xmm1, xmm5 |
605 | paddd xmm2, xmm6 |
606 | paddd xmm3, xmm7 |
607 | pxor xmm12, xmm0 |
608 | pxor xmm13, xmm1 |
609 | pxor xmm14, xmm2 |
610 | pxor xmm15, xmm3 |
611 | pshuflw xmm12, xmm12, 0xB1 |
612 | pshufhw xmm12, xmm12, 0xB1 |
613 | pshuflw xmm13, xmm13, 0xB1 |
614 | pshufhw xmm13, xmm13, 0xB1 |
615 | pshuflw xmm14, xmm14, 0xB1 |
616 | pshufhw xmm14, xmm14, 0xB1 |
617 | pshuflw xmm15, xmm15, 0xB1 |
618 | pshufhw xmm15, xmm15, 0xB1 |
619 | movdqa xmm8, xmmword ptr [rsp+0x100] |
620 | paddd xmm8, xmm12 |
621 | paddd xmm9, xmm13 |
622 | paddd xmm10, xmm14 |
623 | paddd xmm11, xmm15 |
624 | pxor xmm4, xmm8 |
625 | pxor xmm5, xmm9 |
626 | pxor xmm6, xmm10 |
627 | pxor xmm7, xmm11 |
628 | movdqa xmmword ptr [rsp+0x100], xmm8 |
629 | movdqa xmm8, xmm4 |
630 | psrld xmm8, 12 |
631 | pslld xmm4, 20 |
632 | por xmm4, xmm8 |
633 | movdqa xmm8, xmm5 |
634 | psrld xmm8, 12 |
635 | pslld xmm5, 20 |
636 | por xmm5, xmm8 |
637 | movdqa xmm8, xmm6 |
638 | psrld xmm8, 12 |
639 | pslld xmm6, 20 |
640 | por xmm6, xmm8 |
641 | movdqa xmm8, xmm7 |
642 | psrld xmm8, 12 |
643 | pslld xmm7, 20 |
644 | por xmm7, xmm8 |
645 | paddd xmm0, xmmword ptr [rsp+0x40] |
646 | paddd xmm1, xmmword ptr [rsp+0xC0] |
647 | paddd xmm2, xmmword ptr [rsp+0x20] |
648 | paddd xmm3, xmmword ptr [rsp+0xE0] |
649 | paddd xmm0, xmm4 |
650 | paddd xmm1, xmm5 |
651 | paddd xmm2, xmm6 |
652 | paddd xmm3, xmm7 |
653 | pxor xmm12, xmm0 |
654 | pxor xmm13, xmm1 |
655 | pxor xmm14, xmm2 |
656 | pxor xmm15, xmm3 |
657 | movdqa xmm8, xmm12 |
658 | psrld xmm12, 8 |
659 | pslld xmm8, 24 |
660 | pxor xmm12, xmm8 |
661 | movdqa xmm8, xmm13 |
662 | psrld xmm13, 8 |
663 | pslld xmm8, 24 |
664 | pxor xmm13, xmm8 |
665 | movdqa xmm8, xmm14 |
666 | psrld xmm14, 8 |
667 | pslld xmm8, 24 |
668 | pxor xmm14, xmm8 |
669 | movdqa xmm8, xmm15 |
670 | psrld xmm15, 8 |
671 | pslld xmm8, 24 |
672 | pxor xmm15, xmm8 |
673 | movdqa xmm8, xmmword ptr [rsp+0x100] |
674 | paddd xmm8, xmm12 |
675 | paddd xmm9, xmm13 |
676 | paddd xmm10, xmm14 |
677 | paddd xmm11, xmm15 |
678 | pxor xmm4, xmm8 |
679 | pxor xmm5, xmm9 |
680 | pxor xmm6, xmm10 |
681 | pxor xmm7, xmm11 |
682 | movdqa xmmword ptr [rsp+0x100], xmm8 |
683 | movdqa xmm8, xmm4 |
684 | psrld xmm8, 7 |
685 | pslld xmm4, 25 |
686 | por xmm4, xmm8 |
687 | movdqa xmm8, xmm5 |
688 | psrld xmm8, 7 |
689 | pslld xmm5, 25 |
690 | por xmm5, xmm8 |
691 | movdqa xmm8, xmm6 |
692 | psrld xmm8, 7 |
693 | pslld xmm6, 25 |
694 | por xmm6, xmm8 |
695 | movdqa xmm8, xmm7 |
696 | psrld xmm8, 7 |
697 | pslld xmm7, 25 |
698 | por xmm7, xmm8 |
699 | paddd xmm0, xmmword ptr [rsp+0x60] |
700 | paddd xmm1, xmmword ptr [rsp+0x90] |
701 | paddd xmm2, xmmword ptr [rsp+0xB0] |
702 | paddd xmm3, xmmword ptr [rsp+0x80] |
703 | paddd xmm0, xmm5 |
704 | paddd xmm1, xmm6 |
705 | paddd xmm2, xmm7 |
706 | paddd xmm3, xmm4 |
707 | pxor xmm15, xmm0 |
708 | pxor xmm12, xmm1 |
709 | pxor xmm13, xmm2 |
710 | pxor xmm14, xmm3 |
711 | pshuflw xmm15, xmm15, 0xB1 |
712 | pshufhw xmm15, xmm15, 0xB1 |
713 | pshuflw xmm12, xmm12, 0xB1 |
714 | pshufhw xmm12, xmm12, 0xB1 |
715 | pshuflw xmm13, xmm13, 0xB1 |
716 | pshufhw xmm13, xmm13, 0xB1 |
717 | pshuflw xmm14, xmm14, 0xB1 |
718 | pshufhw xmm14, xmm14, 0xB1 |
719 | paddd xmm10, xmm15 |
720 | paddd xmm11, xmm12 |
721 | movdqa xmm8, xmmword ptr [rsp+0x100] |
722 | paddd xmm8, xmm13 |
723 | paddd xmm9, xmm14 |
724 | pxor xmm5, xmm10 |
725 | pxor xmm6, xmm11 |
726 | pxor xmm7, xmm8 |
727 | pxor xmm4, xmm9 |
728 | movdqa xmmword ptr [rsp+0x100], xmm8 |
729 | movdqa xmm8, xmm5 |
730 | psrld xmm8, 12 |
731 | pslld xmm5, 20 |
732 | por xmm5, xmm8 |
733 | movdqa xmm8, xmm6 |
734 | psrld xmm8, 12 |
735 | pslld xmm6, 20 |
736 | por xmm6, xmm8 |
737 | movdqa xmm8, xmm7 |
738 | psrld xmm8, 12 |
739 | pslld xmm7, 20 |
740 | por xmm7, xmm8 |
741 | movdqa xmm8, xmm4 |
742 | psrld xmm8, 12 |
743 | pslld xmm4, 20 |
744 | por xmm4, xmm8 |
745 | paddd xmm0, xmmword ptr [rsp+0x50] |
746 | paddd xmm1, xmmword ptr [rsp] |
747 | paddd xmm2, xmmword ptr [rsp+0xF0] |
748 | paddd xmm3, xmmword ptr [rsp+0x10] |
749 | paddd xmm0, xmm5 |
750 | paddd xmm1, xmm6 |
751 | paddd xmm2, xmm7 |
752 | paddd xmm3, xmm4 |
753 | pxor xmm15, xmm0 |
754 | pxor xmm12, xmm1 |
755 | pxor xmm13, xmm2 |
756 | pxor xmm14, xmm3 |
757 | movdqa xmm8, xmm15 |
758 | psrld xmm15, 8 |
759 | pslld xmm8, 24 |
760 | pxor xmm15, xmm8 |
761 | movdqa xmm8, xmm12 |
762 | psrld xmm12, 8 |
763 | pslld xmm8, 24 |
764 | pxor xmm12, xmm8 |
765 | movdqa xmm8, xmm13 |
766 | psrld xmm13, 8 |
767 | pslld xmm8, 24 |
768 | pxor xmm13, xmm8 |
769 | movdqa xmm8, xmm14 |
770 | psrld xmm14, 8 |
771 | pslld xmm8, 24 |
772 | pxor xmm14, xmm8 |
773 | paddd xmm10, xmm15 |
774 | paddd xmm11, xmm12 |
775 | movdqa xmm8, xmmword ptr [rsp+0x100] |
776 | paddd xmm8, xmm13 |
777 | paddd xmm9, xmm14 |
778 | pxor xmm5, xmm10 |
779 | pxor xmm6, xmm11 |
780 | pxor xmm7, xmm8 |
781 | pxor xmm4, xmm9 |
782 | movdqa xmmword ptr [rsp+0x100], xmm8 |
783 | movdqa xmm8, xmm5 |
784 | psrld xmm8, 7 |
785 | pslld xmm5, 25 |
786 | por xmm5, xmm8 |
787 | movdqa xmm8, xmm6 |
788 | psrld xmm8, 7 |
789 | pslld xmm6, 25 |
790 | por xmm6, xmm8 |
791 | movdqa xmm8, xmm7 |
792 | psrld xmm8, 7 |
793 | pslld xmm7, 25 |
794 | por xmm7, xmm8 |
795 | movdqa xmm8, xmm4 |
796 | psrld xmm8, 7 |
797 | pslld xmm4, 25 |
798 | por xmm4, xmm8 |
799 | paddd xmm0, xmmword ptr [rsp+0xA0] |
800 | paddd xmm1, xmmword ptr [rsp+0xC0] |
801 | paddd xmm2, xmmword ptr [rsp+0xE0] |
802 | paddd xmm3, xmmword ptr [rsp+0xD0] |
803 | paddd xmm0, xmm4 |
804 | paddd xmm1, xmm5 |
805 | paddd xmm2, xmm6 |
806 | paddd xmm3, xmm7 |
807 | pxor xmm12, xmm0 |
808 | pxor xmm13, xmm1 |
809 | pxor xmm14, xmm2 |
810 | pxor xmm15, xmm3 |
811 | pshuflw xmm12, xmm12, 0xB1 |
812 | pshufhw xmm12, xmm12, 0xB1 |
813 | pshuflw xmm13, xmm13, 0xB1 |
814 | pshufhw xmm13, xmm13, 0xB1 |
815 | pshuflw xmm14, xmm14, 0xB1 |
816 | pshufhw xmm14, xmm14, 0xB1 |
817 | pshuflw xmm15, xmm15, 0xB1 |
818 | pshufhw xmm15, xmm15, 0xB1 |
819 | movdqa xmm8, xmmword ptr [rsp+0x100] |
820 | paddd xmm8, xmm12 |
821 | paddd xmm9, xmm13 |
822 | paddd xmm10, xmm14 |
823 | paddd xmm11, xmm15 |
824 | pxor xmm4, xmm8 |
825 | pxor xmm5, xmm9 |
826 | pxor xmm6, xmm10 |
827 | pxor xmm7, xmm11 |
828 | movdqa xmmword ptr [rsp+0x100], xmm8 |
829 | movdqa xmm8, xmm4 |
830 | psrld xmm8, 12 |
831 | pslld xmm4, 20 |
832 | por xmm4, xmm8 |
833 | movdqa xmm8, xmm5 |
834 | psrld xmm8, 12 |
835 | pslld xmm5, 20 |
836 | por xmm5, xmm8 |
837 | movdqa xmm8, xmm6 |
838 | psrld xmm8, 12 |
839 | pslld xmm6, 20 |
840 | por xmm6, xmm8 |
841 | movdqa xmm8, xmm7 |
842 | psrld xmm8, 12 |
843 | pslld xmm7, 20 |
844 | por xmm7, xmm8 |
845 | paddd xmm0, xmmword ptr [rsp+0x70] |
846 | paddd xmm1, xmmword ptr [rsp+0x90] |
847 | paddd xmm2, xmmword ptr [rsp+0x30] |
848 | paddd xmm3, xmmword ptr [rsp+0xF0] |
849 | paddd xmm0, xmm4 |
850 | paddd xmm1, xmm5 |
851 | paddd xmm2, xmm6 |
852 | paddd xmm3, xmm7 |
853 | pxor xmm12, xmm0 |
854 | pxor xmm13, xmm1 |
855 | pxor xmm14, xmm2 |
856 | pxor xmm15, xmm3 |
857 | movdqa xmm8, xmm12 |
858 | psrld xmm12, 8 |
859 | pslld xmm8, 24 |
860 | pxor xmm12, xmm8 |
861 | movdqa xmm8, xmm13 |
862 | psrld xmm13, 8 |
863 | pslld xmm8, 24 |
864 | pxor xmm13, xmm8 |
865 | movdqa xmm8, xmm14 |
866 | psrld xmm14, 8 |
867 | pslld xmm8, 24 |
868 | pxor xmm14, xmm8 |
869 | movdqa xmm8, xmm15 |
870 | psrld xmm15, 8 |
871 | pslld xmm8, 24 |
872 | pxor xmm15, xmm8 |
873 | movdqa xmm8, xmmword ptr [rsp+0x100] |
874 | paddd xmm8, xmm12 |
875 | paddd xmm9, xmm13 |
876 | paddd xmm10, xmm14 |
877 | paddd xmm11, xmm15 |
878 | pxor xmm4, xmm8 |
879 | pxor xmm5, xmm9 |
880 | pxor xmm6, xmm10 |
881 | pxor xmm7, xmm11 |
882 | movdqa xmmword ptr [rsp+0x100], xmm8 |
883 | movdqa xmm8, xmm4 |
884 | psrld xmm8, 7 |
885 | pslld xmm4, 25 |
886 | por xmm4, xmm8 |
887 | movdqa xmm8, xmm5 |
888 | psrld xmm8, 7 |
889 | pslld xmm5, 25 |
890 | por xmm5, xmm8 |
891 | movdqa xmm8, xmm6 |
892 | psrld xmm8, 7 |
893 | pslld xmm6, 25 |
894 | por xmm6, xmm8 |
895 | movdqa xmm8, xmm7 |
896 | psrld xmm8, 7 |
897 | pslld xmm7, 25 |
898 | por xmm7, xmm8 |
899 | paddd xmm0, xmmword ptr [rsp+0x40] |
900 | paddd xmm1, xmmword ptr [rsp+0xB0] |
901 | paddd xmm2, xmmword ptr [rsp+0x50] |
902 | paddd xmm3, xmmword ptr [rsp+0x10] |
903 | paddd xmm0, xmm5 |
904 | paddd xmm1, xmm6 |
905 | paddd xmm2, xmm7 |
906 | paddd xmm3, xmm4 |
907 | pxor xmm15, xmm0 |
908 | pxor xmm12, xmm1 |
909 | pxor xmm13, xmm2 |
910 | pxor xmm14, xmm3 |
911 | pshuflw xmm15, xmm15, 0xB1 |
912 | pshufhw xmm15, xmm15, 0xB1 |
913 | pshuflw xmm12, xmm12, 0xB1 |
914 | pshufhw xmm12, xmm12, 0xB1 |
915 | pshuflw xmm13, xmm13, 0xB1 |
916 | pshufhw xmm13, xmm13, 0xB1 |
917 | pshuflw xmm14, xmm14, 0xB1 |
918 | pshufhw xmm14, xmm14, 0xB1 |
919 | paddd xmm10, xmm15 |
920 | paddd xmm11, xmm12 |
921 | movdqa xmm8, xmmword ptr [rsp+0x100] |
922 | paddd xmm8, xmm13 |
923 | paddd xmm9, xmm14 |
924 | pxor xmm5, xmm10 |
925 | pxor xmm6, xmm11 |
926 | pxor xmm7, xmm8 |
927 | pxor xmm4, xmm9 |
928 | movdqa xmmword ptr [rsp+0x100], xmm8 |
929 | movdqa xmm8, xmm5 |
930 | psrld xmm8, 12 |
931 | pslld xmm5, 20 |
932 | por xmm5, xmm8 |
933 | movdqa xmm8, xmm6 |
934 | psrld xmm8, 12 |
935 | pslld xmm6, 20 |
936 | por xmm6, xmm8 |
937 | movdqa xmm8, xmm7 |
938 | psrld xmm8, 12 |
939 | pslld xmm7, 20 |
940 | por xmm7, xmm8 |
941 | movdqa xmm8, xmm4 |
942 | psrld xmm8, 12 |
943 | pslld xmm4, 20 |
944 | por xmm4, xmm8 |
945 | paddd xmm0, xmmword ptr [rsp] |
946 | paddd xmm1, xmmword ptr [rsp+0x20] |
947 | paddd xmm2, xmmword ptr [rsp+0x80] |
948 | paddd xmm3, xmmword ptr [rsp+0x60] |
949 | paddd xmm0, xmm5 |
950 | paddd xmm1, xmm6 |
951 | paddd xmm2, xmm7 |
952 | paddd xmm3, xmm4 |
953 | pxor xmm15, xmm0 |
954 | pxor xmm12, xmm1 |
955 | pxor xmm13, xmm2 |
956 | pxor xmm14, xmm3 |
957 | movdqa xmm8, xmm15 |
958 | psrld xmm15, 8 |
959 | pslld xmm8, 24 |
960 | pxor xmm15, xmm8 |
961 | movdqa xmm8, xmm12 |
962 | psrld xmm12, 8 |
963 | pslld xmm8, 24 |
964 | pxor xmm12, xmm8 |
965 | movdqa xmm8, xmm13 |
966 | psrld xmm13, 8 |
967 | pslld xmm8, 24 |
968 | pxor xmm13, xmm8 |
969 | movdqa xmm8, xmm14 |
970 | psrld xmm14, 8 |
971 | pslld xmm8, 24 |
972 | pxor xmm14, xmm8 |
973 | paddd xmm10, xmm15 |
974 | paddd xmm11, xmm12 |
975 | movdqa xmm8, xmmword ptr [rsp+0x100] |
976 | paddd xmm8, xmm13 |
977 | paddd xmm9, xmm14 |
978 | pxor xmm5, xmm10 |
979 | pxor xmm6, xmm11 |
980 | pxor xmm7, xmm8 |
981 | pxor xmm4, xmm9 |
982 | movdqa xmmword ptr [rsp+0x100], xmm8 |
983 | movdqa xmm8, xmm5 |
984 | psrld xmm8, 7 |
985 | pslld xmm5, 25 |
986 | por xmm5, xmm8 |
987 | movdqa xmm8, xmm6 |
988 | psrld xmm8, 7 |
989 | pslld xmm6, 25 |
990 | por xmm6, xmm8 |
991 | movdqa xmm8, xmm7 |
992 | psrld xmm8, 7 |
993 | pslld xmm7, 25 |
994 | por xmm7, xmm8 |
995 | movdqa xmm8, xmm4 |
996 | psrld xmm8, 7 |
997 | pslld xmm4, 25 |
998 | por xmm4, xmm8 |
999 | paddd xmm0, xmmword ptr [rsp+0xC0] |
1000 | paddd xmm1, xmmword ptr [rsp+0x90] |
1001 | paddd xmm2, xmmword ptr [rsp+0xF0] |
1002 | paddd xmm3, xmmword ptr [rsp+0xE0] |
1003 | paddd xmm0, xmm4 |
1004 | paddd xmm1, xmm5 |
1005 | paddd xmm2, xmm6 |
1006 | paddd xmm3, xmm7 |
1007 | pxor xmm12, xmm0 |
1008 | pxor xmm13, xmm1 |
1009 | pxor xmm14, xmm2 |
1010 | pxor xmm15, xmm3 |
1011 | pshuflw xmm12, xmm12, 0xB1 |
1012 | pshufhw xmm12, xmm12, 0xB1 |
1013 | pshuflw xmm13, xmm13, 0xB1 |
1014 | pshufhw xmm13, xmm13, 0xB1 |
1015 | pshuflw xmm14, xmm14, 0xB1 |
1016 | pshufhw xmm14, xmm14, 0xB1 |
1017 | pshuflw xmm15, xmm15, 0xB1 |
1018 | pshufhw xmm15, xmm15, 0xB1 |
1019 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1020 | paddd xmm8, xmm12 |
1021 | paddd xmm9, xmm13 |
1022 | paddd xmm10, xmm14 |
1023 | paddd xmm11, xmm15 |
1024 | pxor xmm4, xmm8 |
1025 | pxor xmm5, xmm9 |
1026 | pxor xmm6, xmm10 |
1027 | pxor xmm7, xmm11 |
1028 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1029 | movdqa xmm8, xmm4 |
1030 | psrld xmm8, 12 |
1031 | pslld xmm4, 20 |
1032 | por xmm4, xmm8 |
1033 | movdqa xmm8, xmm5 |
1034 | psrld xmm8, 12 |
1035 | pslld xmm5, 20 |
1036 | por xmm5, xmm8 |
1037 | movdqa xmm8, xmm6 |
1038 | psrld xmm8, 12 |
1039 | pslld xmm6, 20 |
1040 | por xmm6, xmm8 |
1041 | movdqa xmm8, xmm7 |
1042 | psrld xmm8, 12 |
1043 | pslld xmm7, 20 |
1044 | por xmm7, xmm8 |
1045 | paddd xmm0, xmmword ptr [rsp+0xD0] |
1046 | paddd xmm1, xmmword ptr [rsp+0xB0] |
1047 | paddd xmm2, xmmword ptr [rsp+0xA0] |
1048 | paddd xmm3, xmmword ptr [rsp+0x80] |
1049 | paddd xmm0, xmm4 |
1050 | paddd xmm1, xmm5 |
1051 | paddd xmm2, xmm6 |
1052 | paddd xmm3, xmm7 |
1053 | pxor xmm12, xmm0 |
1054 | pxor xmm13, xmm1 |
1055 | pxor xmm14, xmm2 |
1056 | pxor xmm15, xmm3 |
1057 | movdqa xmm8, xmm12 |
1058 | psrld xmm12, 8 |
1059 | pslld xmm8, 24 |
1060 | pxor xmm12, xmm8 |
1061 | movdqa xmm8, xmm13 |
1062 | psrld xmm13, 8 |
1063 | pslld xmm8, 24 |
1064 | pxor xmm13, xmm8 |
1065 | movdqa xmm8, xmm14 |
1066 | psrld xmm14, 8 |
1067 | pslld xmm8, 24 |
1068 | pxor xmm14, xmm8 |
1069 | movdqa xmm8, xmm15 |
1070 | psrld xmm15, 8 |
1071 | pslld xmm8, 24 |
1072 | pxor xmm15, xmm8 |
1073 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1074 | paddd xmm8, xmm12 |
1075 | paddd xmm9, xmm13 |
1076 | paddd xmm10, xmm14 |
1077 | paddd xmm11, xmm15 |
1078 | pxor xmm4, xmm8 |
1079 | pxor xmm5, xmm9 |
1080 | pxor xmm6, xmm10 |
1081 | pxor xmm7, xmm11 |
1082 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1083 | movdqa xmm8, xmm4 |
1084 | psrld xmm8, 7 |
1085 | pslld xmm4, 25 |
1086 | por xmm4, xmm8 |
1087 | movdqa xmm8, xmm5 |
1088 | psrld xmm8, 7 |
1089 | pslld xmm5, 25 |
1090 | por xmm5, xmm8 |
1091 | movdqa xmm8, xmm6 |
1092 | psrld xmm8, 7 |
1093 | pslld xmm6, 25 |
1094 | por xmm6, xmm8 |
1095 | movdqa xmm8, xmm7 |
1096 | psrld xmm8, 7 |
1097 | pslld xmm7, 25 |
1098 | por xmm7, xmm8 |
1099 | paddd xmm0, xmmword ptr [rsp+0x70] |
1100 | paddd xmm1, xmmword ptr [rsp+0x50] |
1101 | paddd xmm2, xmmword ptr [rsp] |
1102 | paddd xmm3, xmmword ptr [rsp+0x60] |
1103 | paddd xmm0, xmm5 |
1104 | paddd xmm1, xmm6 |
1105 | paddd xmm2, xmm7 |
1106 | paddd xmm3, xmm4 |
1107 | pxor xmm15, xmm0 |
1108 | pxor xmm12, xmm1 |
1109 | pxor xmm13, xmm2 |
1110 | pxor xmm14, xmm3 |
1111 | pshuflw xmm15, xmm15, 0xB1 |
1112 | pshufhw xmm15, xmm15, 0xB1 |
1113 | pshuflw xmm12, xmm12, 0xB1 |
1114 | pshufhw xmm12, xmm12, 0xB1 |
1115 | pshuflw xmm13, xmm13, 0xB1 |
1116 | pshufhw xmm13, xmm13, 0xB1 |
1117 | pshuflw xmm14, xmm14, 0xB1 |
1118 | pshufhw xmm14, xmm14, 0xB1 |
1119 | paddd xmm10, xmm15 |
1120 | paddd xmm11, xmm12 |
1121 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1122 | paddd xmm8, xmm13 |
1123 | paddd xmm9, xmm14 |
1124 | pxor xmm5, xmm10 |
1125 | pxor xmm6, xmm11 |
1126 | pxor xmm7, xmm8 |
1127 | pxor xmm4, xmm9 |
1128 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1129 | movdqa xmm8, xmm5 |
1130 | psrld xmm8, 12 |
1131 | pslld xmm5, 20 |
1132 | por xmm5, xmm8 |
1133 | movdqa xmm8, xmm6 |
1134 | psrld xmm8, 12 |
1135 | pslld xmm6, 20 |
1136 | por xmm6, xmm8 |
1137 | movdqa xmm8, xmm7 |
1138 | psrld xmm8, 12 |
1139 | pslld xmm7, 20 |
1140 | por xmm7, xmm8 |
1141 | movdqa xmm8, xmm4 |
1142 | psrld xmm8, 12 |
1143 | pslld xmm4, 20 |
1144 | por xmm4, xmm8 |
1145 | paddd xmm0, xmmword ptr [rsp+0x20] |
1146 | paddd xmm1, xmmword ptr [rsp+0x30] |
1147 | paddd xmm2, xmmword ptr [rsp+0x10] |
1148 | paddd xmm3, xmmword ptr [rsp+0x40] |
1149 | paddd xmm0, xmm5 |
1150 | paddd xmm1, xmm6 |
1151 | paddd xmm2, xmm7 |
1152 | paddd xmm3, xmm4 |
1153 | pxor xmm15, xmm0 |
1154 | pxor xmm12, xmm1 |
1155 | pxor xmm13, xmm2 |
1156 | pxor xmm14, xmm3 |
1157 | movdqa xmm8, xmm15 |
1158 | psrld xmm15, 8 |
1159 | pslld xmm8, 24 |
1160 | pxor xmm15, xmm8 |
1161 | movdqa xmm8, xmm12 |
1162 | psrld xmm12, 8 |
1163 | pslld xmm8, 24 |
1164 | pxor xmm12, xmm8 |
1165 | movdqa xmm8, xmm13 |
1166 | psrld xmm13, 8 |
1167 | pslld xmm8, 24 |
1168 | pxor xmm13, xmm8 |
1169 | movdqa xmm8, xmm14 |
1170 | psrld xmm14, 8 |
1171 | pslld xmm8, 24 |
1172 | pxor xmm14, xmm8 |
1173 | paddd xmm10, xmm15 |
1174 | paddd xmm11, xmm12 |
1175 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1176 | paddd xmm8, xmm13 |
1177 | paddd xmm9, xmm14 |
1178 | pxor xmm5, xmm10 |
1179 | pxor xmm6, xmm11 |
1180 | pxor xmm7, xmm8 |
1181 | pxor xmm4, xmm9 |
1182 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1183 | movdqa xmm8, xmm5 |
1184 | psrld xmm8, 7 |
1185 | pslld xmm5, 25 |
1186 | por xmm5, xmm8 |
1187 | movdqa xmm8, xmm6 |
1188 | psrld xmm8, 7 |
1189 | pslld xmm6, 25 |
1190 | por xmm6, xmm8 |
1191 | movdqa xmm8, xmm7 |
1192 | psrld xmm8, 7 |
1193 | pslld xmm7, 25 |
1194 | por xmm7, xmm8 |
1195 | movdqa xmm8, xmm4 |
1196 | psrld xmm8, 7 |
1197 | pslld xmm4, 25 |
1198 | por xmm4, xmm8 |
1199 | paddd xmm0, xmmword ptr [rsp+0x90] |
1200 | paddd xmm1, xmmword ptr [rsp+0xB0] |
1201 | paddd xmm2, xmmword ptr [rsp+0x80] |
1202 | paddd xmm3, xmmword ptr [rsp+0xF0] |
1203 | paddd xmm0, xmm4 |
1204 | paddd xmm1, xmm5 |
1205 | paddd xmm2, xmm6 |
1206 | paddd xmm3, xmm7 |
1207 | pxor xmm12, xmm0 |
1208 | pxor xmm13, xmm1 |
1209 | pxor xmm14, xmm2 |
1210 | pxor xmm15, xmm3 |
1211 | pshuflw xmm12, xmm12, 0xB1 |
1212 | pshufhw xmm12, xmm12, 0xB1 |
1213 | pshuflw xmm13, xmm13, 0xB1 |
1214 | pshufhw xmm13, xmm13, 0xB1 |
1215 | pshuflw xmm14, xmm14, 0xB1 |
1216 | pshufhw xmm14, xmm14, 0xB1 |
1217 | pshuflw xmm15, xmm15, 0xB1 |
1218 | pshufhw xmm15, xmm15, 0xB1 |
1219 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1220 | paddd xmm8, xmm12 |
1221 | paddd xmm9, xmm13 |
1222 | paddd xmm10, xmm14 |
1223 | paddd xmm11, xmm15 |
1224 | pxor xmm4, xmm8 |
1225 | pxor xmm5, xmm9 |
1226 | pxor xmm6, xmm10 |
1227 | pxor xmm7, xmm11 |
1228 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1229 | movdqa xmm8, xmm4 |
1230 | psrld xmm8, 12 |
1231 | pslld xmm4, 20 |
1232 | por xmm4, xmm8 |
1233 | movdqa xmm8, xmm5 |
1234 | psrld xmm8, 12 |
1235 | pslld xmm5, 20 |
1236 | por xmm5, xmm8 |
1237 | movdqa xmm8, xmm6 |
1238 | psrld xmm8, 12 |
1239 | pslld xmm6, 20 |
1240 | por xmm6, xmm8 |
1241 | movdqa xmm8, xmm7 |
1242 | psrld xmm8, 12 |
1243 | pslld xmm7, 20 |
1244 | por xmm7, xmm8 |
1245 | paddd xmm0, xmmword ptr [rsp+0xE0] |
1246 | paddd xmm1, xmmword ptr [rsp+0x50] |
1247 | paddd xmm2, xmmword ptr [rsp+0xC0] |
1248 | paddd xmm3, xmmword ptr [rsp+0x10] |
1249 | paddd xmm0, xmm4 |
1250 | paddd xmm1, xmm5 |
1251 | paddd xmm2, xmm6 |
1252 | paddd xmm3, xmm7 |
1253 | pxor xmm12, xmm0 |
1254 | pxor xmm13, xmm1 |
1255 | pxor xmm14, xmm2 |
1256 | pxor xmm15, xmm3 |
1257 | movdqa xmm8, xmm12 |
1258 | psrld xmm12, 8 |
1259 | pslld xmm8, 24 |
1260 | pxor xmm12, xmm8 |
1261 | movdqa xmm8, xmm13 |
1262 | psrld xmm13, 8 |
1263 | pslld xmm8, 24 |
1264 | pxor xmm13, xmm8 |
1265 | movdqa xmm8, xmm14 |
1266 | psrld xmm14, 8 |
1267 | pslld xmm8, 24 |
1268 | pxor xmm14, xmm8 |
1269 | movdqa xmm8, xmm15 |
1270 | psrld xmm15, 8 |
1271 | pslld xmm8, 24 |
1272 | pxor xmm15, xmm8 |
1273 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1274 | paddd xmm8, xmm12 |
1275 | paddd xmm9, xmm13 |
1276 | paddd xmm10, xmm14 |
1277 | paddd xmm11, xmm15 |
1278 | pxor xmm4, xmm8 |
1279 | pxor xmm5, xmm9 |
1280 | pxor xmm6, xmm10 |
1281 | pxor xmm7, xmm11 |
1282 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1283 | movdqa xmm8, xmm4 |
1284 | psrld xmm8, 7 |
1285 | pslld xmm4, 25 |
1286 | por xmm4, xmm8 |
1287 | movdqa xmm8, xmm5 |
1288 | psrld xmm8, 7 |
1289 | pslld xmm5, 25 |
1290 | por xmm5, xmm8 |
1291 | movdqa xmm8, xmm6 |
1292 | psrld xmm8, 7 |
1293 | pslld xmm6, 25 |
1294 | por xmm6, xmm8 |
1295 | movdqa xmm8, xmm7 |
1296 | psrld xmm8, 7 |
1297 | pslld xmm7, 25 |
1298 | por xmm7, xmm8 |
1299 | paddd xmm0, xmmword ptr [rsp+0xD0] |
1300 | paddd xmm1, xmmword ptr [rsp] |
1301 | paddd xmm2, xmmword ptr [rsp+0x20] |
1302 | paddd xmm3, xmmword ptr [rsp+0x40] |
1303 | paddd xmm0, xmm5 |
1304 | paddd xmm1, xmm6 |
1305 | paddd xmm2, xmm7 |
1306 | paddd xmm3, xmm4 |
1307 | pxor xmm15, xmm0 |
1308 | pxor xmm12, xmm1 |
1309 | pxor xmm13, xmm2 |
1310 | pxor xmm14, xmm3 |
1311 | pshuflw xmm15, xmm15, 0xB1 |
1312 | pshufhw xmm15, xmm15, 0xB1 |
1313 | pshuflw xmm12, xmm12, 0xB1 |
1314 | pshufhw xmm12, xmm12, 0xB1 |
1315 | pshuflw xmm13, xmm13, 0xB1 |
1316 | pshufhw xmm13, xmm13, 0xB1 |
1317 | pshuflw xmm14, xmm14, 0xB1 |
1318 | pshufhw xmm14, xmm14, 0xB1 |
1319 | paddd xmm10, xmm15 |
1320 | paddd xmm11, xmm12 |
1321 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1322 | paddd xmm8, xmm13 |
1323 | paddd xmm9, xmm14 |
1324 | pxor xmm5, xmm10 |
1325 | pxor xmm6, xmm11 |
1326 | pxor xmm7, xmm8 |
1327 | pxor xmm4, xmm9 |
1328 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1329 | movdqa xmm8, xmm5 |
1330 | psrld xmm8, 12 |
1331 | pslld xmm5, 20 |
1332 | por xmm5, xmm8 |
1333 | movdqa xmm8, xmm6 |
1334 | psrld xmm8, 12 |
1335 | pslld xmm6, 20 |
1336 | por xmm6, xmm8 |
1337 | movdqa xmm8, xmm7 |
1338 | psrld xmm8, 12 |
1339 | pslld xmm7, 20 |
1340 | por xmm7, xmm8 |
1341 | movdqa xmm8, xmm4 |
1342 | psrld xmm8, 12 |
1343 | pslld xmm4, 20 |
1344 | por xmm4, xmm8 |
1345 | paddd xmm0, xmmword ptr [rsp+0x30] |
1346 | paddd xmm1, xmmword ptr [rsp+0xA0] |
1347 | paddd xmm2, xmmword ptr [rsp+0x60] |
1348 | paddd xmm3, xmmword ptr [rsp+0x70] |
1349 | paddd xmm0, xmm5 |
1350 | paddd xmm1, xmm6 |
1351 | paddd xmm2, xmm7 |
1352 | paddd xmm3, xmm4 |
1353 | pxor xmm15, xmm0 |
1354 | pxor xmm12, xmm1 |
1355 | pxor xmm13, xmm2 |
1356 | pxor xmm14, xmm3 |
1357 | movdqa xmm8, xmm15 |
1358 | psrld xmm15, 8 |
1359 | pslld xmm8, 24 |
1360 | pxor xmm15, xmm8 |
1361 | movdqa xmm8, xmm12 |
1362 | psrld xmm12, 8 |
1363 | pslld xmm8, 24 |
1364 | pxor xmm12, xmm8 |
1365 | movdqa xmm8, xmm13 |
1366 | psrld xmm13, 8 |
1367 | pslld xmm8, 24 |
1368 | pxor xmm13, xmm8 |
1369 | movdqa xmm8, xmm14 |
1370 | psrld xmm14, 8 |
1371 | pslld xmm8, 24 |
1372 | pxor xmm14, xmm8 |
1373 | paddd xmm10, xmm15 |
1374 | paddd xmm11, xmm12 |
1375 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1376 | paddd xmm8, xmm13 |
1377 | paddd xmm9, xmm14 |
1378 | pxor xmm5, xmm10 |
1379 | pxor xmm6, xmm11 |
1380 | pxor xmm7, xmm8 |
1381 | pxor xmm4, xmm9 |
1382 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1383 | movdqa xmm8, xmm5 |
1384 | psrld xmm8, 7 |
1385 | pslld xmm5, 25 |
1386 | por xmm5, xmm8 |
1387 | movdqa xmm8, xmm6 |
1388 | psrld xmm8, 7 |
1389 | pslld xmm6, 25 |
1390 | por xmm6, xmm8 |
1391 | movdqa xmm8, xmm7 |
1392 | psrld xmm8, 7 |
1393 | pslld xmm7, 25 |
1394 | por xmm7, xmm8 |
1395 | movdqa xmm8, xmm4 |
1396 | psrld xmm8, 7 |
1397 | pslld xmm4, 25 |
1398 | por xmm4, xmm8 |
1399 | paddd xmm0, xmmword ptr [rsp+0xB0] |
1400 | paddd xmm1, xmmword ptr [rsp+0x50] |
1401 | paddd xmm2, xmmword ptr [rsp+0x10] |
1402 | paddd xmm3, xmmword ptr [rsp+0x80] |
1403 | paddd xmm0, xmm4 |
1404 | paddd xmm1, xmm5 |
1405 | paddd xmm2, xmm6 |
1406 | paddd xmm3, xmm7 |
1407 | pxor xmm12, xmm0 |
1408 | pxor xmm13, xmm1 |
1409 | pxor xmm14, xmm2 |
1410 | pxor xmm15, xmm3 |
1411 | pshuflw xmm12, xmm12, 0xB1 |
1412 | pshufhw xmm12, xmm12, 0xB1 |
1413 | pshuflw xmm13, xmm13, 0xB1 |
1414 | pshufhw xmm13, xmm13, 0xB1 |
1415 | pshuflw xmm14, xmm14, 0xB1 |
1416 | pshufhw xmm14, xmm14, 0xB1 |
1417 | pshuflw xmm15, xmm15, 0xB1 |
1418 | pshufhw xmm15, xmm15, 0xB1 |
1419 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1420 | paddd xmm8, xmm12 |
1421 | paddd xmm9, xmm13 |
1422 | paddd xmm10, xmm14 |
1423 | paddd xmm11, xmm15 |
1424 | pxor xmm4, xmm8 |
1425 | pxor xmm5, xmm9 |
1426 | pxor xmm6, xmm10 |
1427 | pxor xmm7, xmm11 |
1428 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1429 | movdqa xmm8, xmm4 |
1430 | psrld xmm8, 12 |
1431 | pslld xmm4, 20 |
1432 | por xmm4, xmm8 |
1433 | movdqa xmm8, xmm5 |
1434 | psrld xmm8, 12 |
1435 | pslld xmm5, 20 |
1436 | por xmm5, xmm8 |
1437 | movdqa xmm8, xmm6 |
1438 | psrld xmm8, 12 |
1439 | pslld xmm6, 20 |
1440 | por xmm6, xmm8 |
1441 | movdqa xmm8, xmm7 |
1442 | psrld xmm8, 12 |
1443 | pslld xmm7, 20 |
1444 | por xmm7, xmm8 |
1445 | paddd xmm0, xmmword ptr [rsp+0xF0] |
1446 | paddd xmm1, xmmword ptr [rsp] |
1447 | paddd xmm2, xmmword ptr [rsp+0x90] |
1448 | paddd xmm3, xmmword ptr [rsp+0x60] |
1449 | paddd xmm0, xmm4 |
1450 | paddd xmm1, xmm5 |
1451 | paddd xmm2, xmm6 |
1452 | paddd xmm3, xmm7 |
1453 | pxor xmm12, xmm0 |
1454 | pxor xmm13, xmm1 |
1455 | pxor xmm14, xmm2 |
1456 | pxor xmm15, xmm3 |
1457 | movdqa xmm8, xmm12 |
1458 | psrld xmm12, 8 |
1459 | pslld xmm8, 24 |
1460 | pxor xmm12, xmm8 |
1461 | movdqa xmm8, xmm13 |
1462 | psrld xmm13, 8 |
1463 | pslld xmm8, 24 |
1464 | pxor xmm13, xmm8 |
1465 | movdqa xmm8, xmm14 |
1466 | psrld xmm14, 8 |
1467 | pslld xmm8, 24 |
1468 | pxor xmm14, xmm8 |
1469 | movdqa xmm8, xmm15 |
1470 | psrld xmm15, 8 |
1471 | pslld xmm8, 24 |
1472 | pxor xmm15, xmm8 |
1473 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1474 | paddd xmm8, xmm12 |
1475 | paddd xmm9, xmm13 |
1476 | paddd xmm10, xmm14 |
1477 | paddd xmm11, xmm15 |
1478 | pxor xmm4, xmm8 |
1479 | pxor xmm5, xmm9 |
1480 | pxor xmm6, xmm10 |
1481 | pxor xmm7, xmm11 |
1482 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1483 | movdqa xmm8, xmm4 |
1484 | psrld xmm8, 7 |
1485 | pslld xmm4, 25 |
1486 | por xmm4, xmm8 |
1487 | movdqa xmm8, xmm5 |
1488 | psrld xmm8, 7 |
1489 | pslld xmm5, 25 |
1490 | por xmm5, xmm8 |
1491 | movdqa xmm8, xmm6 |
1492 | psrld xmm8, 7 |
1493 | pslld xmm6, 25 |
1494 | por xmm6, xmm8 |
1495 | movdqa xmm8, xmm7 |
1496 | psrld xmm8, 7 |
1497 | pslld xmm7, 25 |
1498 | por xmm7, xmm8 |
1499 | paddd xmm0, xmmword ptr [rsp+0xE0] |
1500 | paddd xmm1, xmmword ptr [rsp+0x20] |
1501 | paddd xmm2, xmmword ptr [rsp+0x30] |
1502 | paddd xmm3, xmmword ptr [rsp+0x70] |
1503 | paddd xmm0, xmm5 |
1504 | paddd xmm1, xmm6 |
1505 | paddd xmm2, xmm7 |
1506 | paddd xmm3, xmm4 |
1507 | pxor xmm15, xmm0 |
1508 | pxor xmm12, xmm1 |
1509 | pxor xmm13, xmm2 |
1510 | pxor xmm14, xmm3 |
1511 | pshuflw xmm15, xmm15, 0xB1 |
1512 | pshufhw xmm15, xmm15, 0xB1 |
1513 | pshuflw xmm12, xmm12, 0xB1 |
1514 | pshufhw xmm12, xmm12, 0xB1 |
1515 | pshuflw xmm13, xmm13, 0xB1 |
1516 | pshufhw xmm13, xmm13, 0xB1 |
1517 | pshuflw xmm14, xmm14, 0xB1 |
1518 | pshufhw xmm14, xmm14, 0xB1 |
1519 | paddd xmm10, xmm15 |
1520 | paddd xmm11, xmm12 |
1521 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1522 | paddd xmm8, xmm13 |
1523 | paddd xmm9, xmm14 |
1524 | pxor xmm5, xmm10 |
1525 | pxor xmm6, xmm11 |
1526 | pxor xmm7, xmm8 |
1527 | pxor xmm4, xmm9 |
1528 | movdqa xmmword ptr [rsp+0x100], xmm8 |
1529 | movdqa xmm8, xmm5 |
1530 | psrld xmm8, 12 |
1531 | pslld xmm5, 20 |
1532 | por xmm5, xmm8 |
1533 | movdqa xmm8, xmm6 |
1534 | psrld xmm8, 12 |
1535 | pslld xmm6, 20 |
1536 | por xmm6, xmm8 |
1537 | movdqa xmm8, xmm7 |
1538 | psrld xmm8, 12 |
1539 | pslld xmm7, 20 |
1540 | por xmm7, xmm8 |
1541 | movdqa xmm8, xmm4 |
1542 | psrld xmm8, 12 |
1543 | pslld xmm4, 20 |
1544 | por xmm4, xmm8 |
1545 | paddd xmm0, xmmword ptr [rsp+0xA0] |
1546 | paddd xmm1, xmmword ptr [rsp+0xC0] |
1547 | paddd xmm2, xmmword ptr [rsp+0x40] |
1548 | paddd xmm3, xmmword ptr [rsp+0xD0] |
1549 | paddd xmm0, xmm5 |
1550 | paddd xmm1, xmm6 |
1551 | paddd xmm2, xmm7 |
1552 | paddd xmm3, xmm4 |
1553 | pxor xmm15, xmm0 |
1554 | pxor xmm12, xmm1 |
1555 | pxor xmm13, xmm2 |
1556 | pxor xmm14, xmm3 |
1557 | movdqa xmm8, xmm15 |
1558 | psrld xmm15, 8 |
1559 | pslld xmm8, 24 |
1560 | pxor xmm15, xmm8 |
1561 | movdqa xmm8, xmm12 |
1562 | psrld xmm12, 8 |
1563 | pslld xmm8, 24 |
1564 | pxor xmm12, xmm8 |
1565 | movdqa xmm8, xmm13 |
1566 | psrld xmm13, 8 |
1567 | pslld xmm8, 24 |
1568 | pxor xmm13, xmm8 |
1569 | movdqa xmm8, xmm14 |
1570 | psrld xmm14, 8 |
1571 | pslld xmm8, 24 |
1572 | pxor xmm14, xmm8 |
1573 | paddd xmm10, xmm15 |
1574 | paddd xmm11, xmm12 |
1575 | movdqa xmm8, xmmword ptr [rsp+0x100] |
1576 | paddd xmm8, xmm13 |
1577 | paddd xmm9, xmm14 |
1578 | pxor xmm5, xmm10 |
1579 | pxor xmm6, xmm11 |
1580 | pxor xmm7, xmm8 |
1581 | pxor xmm4, xmm9 |
1582 | pxor xmm0, xmm8 |
1583 | pxor xmm1, xmm9 |
1584 | pxor xmm2, xmm10 |
1585 | pxor xmm3, xmm11 |
1586 | movdqa xmm8, xmm5 |
1587 | psrld xmm8, 7 |
1588 | pslld xmm5, 25 |
1589 | por xmm5, xmm8 |
1590 | movdqa xmm8, xmm6 |
1591 | psrld xmm8, 7 |
1592 | pslld xmm6, 25 |
1593 | por xmm6, xmm8 |
1594 | movdqa xmm8, xmm7 |
1595 | psrld xmm8, 7 |
1596 | pslld xmm7, 25 |
1597 | por xmm7, xmm8 |
1598 | movdqa xmm8, xmm4 |
1599 | psrld xmm8, 7 |
1600 | pslld xmm4, 25 |
1601 | por xmm4, xmm8 |
1602 | pxor xmm4, xmm12 |
1603 | pxor xmm5, xmm13 |
1604 | pxor xmm6, xmm14 |
1605 | pxor xmm7, xmm15 |
1606 | mov eax, r13d |
1607 | jne 9b |
1608 | movdqa xmm9, xmm0 |
1609 | punpckldq xmm0, xmm1 |
1610 | punpckhdq xmm9, xmm1 |
1611 | movdqa xmm11, xmm2 |
1612 | punpckldq xmm2, xmm3 |
1613 | punpckhdq xmm11, xmm3 |
1614 | movdqa xmm1, xmm0 |
1615 | punpcklqdq xmm0, xmm2 |
1616 | punpckhqdq xmm1, xmm2 |
1617 | movdqa xmm3, xmm9 |
1618 | punpcklqdq xmm9, xmm11 |
1619 | punpckhqdq xmm3, xmm11 |
1620 | movdqu xmmword ptr [rbx], xmm0 |
1621 | movdqu xmmword ptr [rbx+0x20], xmm1 |
1622 | movdqu xmmword ptr [rbx+0x40], xmm9 |
1623 | movdqu xmmword ptr [rbx+0x60], xmm3 |
1624 | movdqa xmm9, xmm4 |
1625 | punpckldq xmm4, xmm5 |
1626 | punpckhdq xmm9, xmm5 |
1627 | movdqa xmm11, xmm6 |
1628 | punpckldq xmm6, xmm7 |
1629 | punpckhdq xmm11, xmm7 |
1630 | movdqa xmm5, xmm4 |
1631 | punpcklqdq xmm4, xmm6 |
1632 | punpckhqdq xmm5, xmm6 |
1633 | movdqa xmm7, xmm9 |
1634 | punpcklqdq xmm9, xmm11 |
1635 | punpckhqdq xmm7, xmm11 |
1636 | movdqu xmmword ptr [rbx+0x10], xmm4 |
1637 | movdqu xmmword ptr [rbx+0x30], xmm5 |
1638 | movdqu xmmword ptr [rbx+0x50], xmm9 |
1639 | movdqu xmmword ptr [rbx+0x70], xmm7 |
1640 | movdqa xmm1, xmmword ptr [rsp+0x110] |
1641 | movdqa xmm0, xmm1 |
1642 | paddd xmm1, xmmword ptr [rsp+0x150] |
1643 | movdqa xmmword ptr [rsp+0x110], xmm1 |
1644 | pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] |
1645 | pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] |
1646 | pcmpgtd xmm0, xmm1 |
1647 | movdqa xmm1, xmmword ptr [rsp+0x120] |
1648 | psubd xmm1, xmm0 |
1649 | movdqa xmmword ptr [rsp+0x120], xmm1 |
1650 | add rbx, 128 |
1651 | add rdi, 32 |
1652 | sub rsi, 4 |
1653 | cmp rsi, 4 |
1654 | jnc 2b |
1655 | test rsi, rsi |
1656 | jnz 3f |
1657 | 4: |
1658 | mov rsp, rbp |
1659 | pop rbp |
1660 | pop rbx |
1661 | pop r12 |
1662 | pop r13 |
1663 | pop r14 |
1664 | pop r15 |
1665 | ret |
1666 | .p2align 5 |
1667 | 3: |
1668 | test esi, 0x2 |
1669 | je 3f |
1670 | movups xmm0, xmmword ptr [rcx] |
1671 | movups xmm1, xmmword ptr [rcx+0x10] |
1672 | movaps xmm8, xmm0 |
1673 | movaps xmm9, xmm1 |
1674 | movd xmm13, dword ptr [rsp+0x110] |
1675 | movd xmm14, dword ptr [rsp+0x120] |
1676 | punpckldq xmm13, xmm14 |
1677 | movaps xmmword ptr [rsp], xmm13 |
1678 | movd xmm14, dword ptr [rsp+0x114] |
1679 | movd xmm13, dword ptr [rsp+0x124] |
1680 | punpckldq xmm14, xmm13 |
1681 | movaps xmmword ptr [rsp+0x10], xmm14 |
1682 | mov r8, qword ptr [rdi] |
1683 | mov r9, qword ptr [rdi+0x8] |
1684 | movzx eax, byte ptr [rbp+0x40] |
1685 | or eax, r13d |
1686 | xor edx, edx |
1687 | 2: |
1688 | mov r14d, eax |
1689 | or eax, r12d |
1690 | add rdx, 64 |
1691 | cmp rdx, r15 |
1692 | cmovne eax, r14d |
1693 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1694 | movaps xmm10, xmm2 |
1695 | movups xmm4, xmmword ptr [r8+rdx-0x40] |
1696 | movups xmm5, xmmword ptr [r8+rdx-0x30] |
1697 | movaps xmm3, xmm4 |
1698 | shufps xmm4, xmm5, 136 |
1699 | shufps xmm3, xmm5, 221 |
1700 | movaps xmm5, xmm3 |
1701 | movups xmm6, xmmword ptr [r8+rdx-0x20] |
1702 | movups xmm7, xmmword ptr [r8+rdx-0x10] |
1703 | movaps xmm3, xmm6 |
1704 | shufps xmm6, xmm7, 136 |
1705 | pshufd xmm6, xmm6, 0x93 |
1706 | shufps xmm3, xmm7, 221 |
1707 | pshufd xmm7, xmm3, 0x93 |
1708 | movups xmm12, xmmword ptr [r9+rdx-0x40] |
1709 | movups xmm13, xmmword ptr [r9+rdx-0x30] |
1710 | movaps xmm11, xmm12 |
1711 | shufps xmm12, xmm13, 136 |
1712 | shufps xmm11, xmm13, 221 |
1713 | movaps xmm13, xmm11 |
1714 | movups xmm14, xmmword ptr [r9+rdx-0x20] |
1715 | movups xmm15, xmmword ptr [r9+rdx-0x10] |
1716 | movaps xmm11, xmm14 |
1717 | shufps xmm14, xmm15, 136 |
1718 | pshufd xmm14, xmm14, 0x93 |
1719 | shufps xmm11, xmm15, 221 |
1720 | pshufd xmm15, xmm11, 0x93 |
1721 | shl rax, 0x20 |
1722 | or rax, 0x40 |
1723 | movq xmm3, rax |
1724 | movdqa xmmword ptr [rsp+0x20], xmm3 |
1725 | movaps xmm3, xmmword ptr [rsp] |
1726 | movaps xmm11, xmmword ptr [rsp+0x10] |
1727 | punpcklqdq xmm3, xmmword ptr [rsp+0x20] |
1728 | punpcklqdq xmm11, xmmword ptr [rsp+0x20] |
1729 | mov al, 7 |
1730 | 9: |
1731 | paddd xmm0, xmm4 |
1732 | paddd xmm8, xmm12 |
1733 | movaps xmmword ptr [rsp+0x20], xmm4 |
1734 | movaps xmmword ptr [rsp+0x30], xmm12 |
1735 | paddd xmm0, xmm1 |
1736 | paddd xmm8, xmm9 |
1737 | pxor xmm3, xmm0 |
1738 | pxor xmm11, xmm8 |
1739 | pshuflw xmm3, xmm3, 0xB1 |
1740 | pshufhw xmm3, xmm3, 0xB1 |
1741 | pshuflw xmm11, xmm11, 0xB1 |
1742 | pshufhw xmm11, xmm11, 0xB1 |
1743 | paddd xmm2, xmm3 |
1744 | paddd xmm10, xmm11 |
1745 | pxor xmm1, xmm2 |
1746 | pxor xmm9, xmm10 |
1747 | movdqa xmm4, xmm1 |
1748 | pslld xmm1, 20 |
1749 | psrld xmm4, 12 |
1750 | por xmm1, xmm4 |
1751 | movdqa xmm4, xmm9 |
1752 | pslld xmm9, 20 |
1753 | psrld xmm4, 12 |
1754 | por xmm9, xmm4 |
1755 | paddd xmm0, xmm5 |
1756 | paddd xmm8, xmm13 |
1757 | movaps xmmword ptr [rsp+0x40], xmm5 |
1758 | movaps xmmword ptr [rsp+0x50], xmm13 |
1759 | paddd xmm0, xmm1 |
1760 | paddd xmm8, xmm9 |
1761 | pxor xmm3, xmm0 |
1762 | pxor xmm11, xmm8 |
1763 | movdqa xmm13, xmm3 |
1764 | psrld xmm3, 8 |
1765 | pslld xmm13, 24 |
1766 | pxor xmm3, xmm13 |
1767 | movdqa xmm13, xmm11 |
1768 | psrld xmm11, 8 |
1769 | pslld xmm13, 24 |
1770 | pxor xmm11, xmm13 |
1771 | paddd xmm2, xmm3 |
1772 | paddd xmm10, xmm11 |
1773 | pxor xmm1, xmm2 |
1774 | pxor xmm9, xmm10 |
1775 | movdqa xmm4, xmm1 |
1776 | pslld xmm1, 25 |
1777 | psrld xmm4, 7 |
1778 | por xmm1, xmm4 |
1779 | movdqa xmm4, xmm9 |
1780 | pslld xmm9, 25 |
1781 | psrld xmm4, 7 |
1782 | por xmm9, xmm4 |
1783 | pshufd xmm0, xmm0, 0x93 |
1784 | pshufd xmm8, xmm8, 0x93 |
1785 | pshufd xmm3, xmm3, 0x4E |
1786 | pshufd xmm11, xmm11, 0x4E |
1787 | pshufd xmm2, xmm2, 0x39 |
1788 | pshufd xmm10, xmm10, 0x39 |
1789 | paddd xmm0, xmm6 |
1790 | paddd xmm8, xmm14 |
1791 | paddd xmm0, xmm1 |
1792 | paddd xmm8, xmm9 |
1793 | pxor xmm3, xmm0 |
1794 | pxor xmm11, xmm8 |
1795 | pshuflw xmm3, xmm3, 0xB1 |
1796 | pshufhw xmm3, xmm3, 0xB1 |
1797 | pshuflw xmm11, xmm11, 0xB1 |
1798 | pshufhw xmm11, xmm11, 0xB1 |
1799 | paddd xmm2, xmm3 |
1800 | paddd xmm10, xmm11 |
1801 | pxor xmm1, xmm2 |
1802 | pxor xmm9, xmm10 |
1803 | movdqa xmm4, xmm1 |
1804 | pslld xmm1, 20 |
1805 | psrld xmm4, 12 |
1806 | por xmm1, xmm4 |
1807 | movdqa xmm4, xmm9 |
1808 | pslld xmm9, 20 |
1809 | psrld xmm4, 12 |
1810 | por xmm9, xmm4 |
1811 | paddd xmm0, xmm7 |
1812 | paddd xmm8, xmm15 |
1813 | paddd xmm0, xmm1 |
1814 | paddd xmm8, xmm9 |
1815 | pxor xmm3, xmm0 |
1816 | pxor xmm11, xmm8 |
1817 | movdqa xmm13, xmm3 |
1818 | psrld xmm3, 8 |
1819 | pslld xmm13, 24 |
1820 | pxor xmm3, xmm13 |
1821 | movdqa xmm13, xmm11 |
1822 | psrld xmm11, 8 |
1823 | pslld xmm13, 24 |
1824 | pxor xmm11, xmm13 |
1825 | paddd xmm2, xmm3 |
1826 | paddd xmm10, xmm11 |
1827 | pxor xmm1, xmm2 |
1828 | pxor xmm9, xmm10 |
1829 | movdqa xmm4, xmm1 |
1830 | pslld xmm1, 25 |
1831 | psrld xmm4, 7 |
1832 | por xmm1, xmm4 |
1833 | movdqa xmm4, xmm9 |
1834 | pslld xmm9, 25 |
1835 | psrld xmm4, 7 |
1836 | por xmm9, xmm4 |
1837 | pshufd xmm0, xmm0, 0x39 |
1838 | pshufd xmm8, xmm8, 0x39 |
1839 | pshufd xmm3, xmm3, 0x4E |
1840 | pshufd xmm11, xmm11, 0x4E |
1841 | pshufd xmm2, xmm2, 0x93 |
1842 | pshufd xmm10, xmm10, 0x93 |
1843 | dec al |
1844 | je 9f |
1845 | movdqa xmm12, xmmword ptr [rsp+0x20] |
1846 | movdqa xmm5, xmmword ptr [rsp+0x40] |
1847 | pshufd xmm13, xmm12, 0x0F |
1848 | shufps xmm12, xmm5, 214 |
1849 | pshufd xmm4, xmm12, 0x39 |
1850 | movdqa xmm12, xmm6 |
1851 | shufps xmm12, xmm7, 250 |
1852 | pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] |
1853 | pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] |
1854 | por xmm13, xmm12 |
1855 | movdqa xmmword ptr [rsp+0x20], xmm13 |
1856 | movdqa xmm12, xmm7 |
1857 | punpcklqdq xmm12, xmm5 |
1858 | movdqa xmm13, xmm6 |
1859 | pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] |
1860 | pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] |
1861 | por xmm12, xmm13 |
1862 | pshufd xmm12, xmm12, 0x78 |
1863 | punpckhdq xmm5, xmm7 |
1864 | punpckldq xmm6, xmm5 |
1865 | pshufd xmm7, xmm6, 0x1E |
1866 | movdqa xmmword ptr [rsp+0x40], xmm12 |
1867 | movdqa xmm5, xmmword ptr [rsp+0x30] |
1868 | movdqa xmm13, xmmword ptr [rsp+0x50] |
1869 | pshufd xmm6, xmm5, 0x0F |
1870 | shufps xmm5, xmm13, 214 |
1871 | pshufd xmm12, xmm5, 0x39 |
1872 | movdqa xmm5, xmm14 |
1873 | shufps xmm5, xmm15, 250 |
1874 | pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] |
1875 | pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] |
1876 | por xmm6, xmm5 |
1877 | movdqa xmm5, xmm15 |
1878 | punpcklqdq xmm5, xmm13 |
1879 | movdqa xmmword ptr [rsp+0x30], xmm2 |
1880 | movdqa xmm2, xmm14 |
1881 | pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] |
1882 | pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] |
1883 | por xmm5, xmm2 |
1884 | movdqa xmm2, xmmword ptr [rsp+0x30] |
1885 | pshufd xmm5, xmm5, 0x78 |
1886 | punpckhdq xmm13, xmm15 |
1887 | punpckldq xmm14, xmm13 |
1888 | pshufd xmm15, xmm14, 0x1E |
1889 | movdqa xmm13, xmm6 |
1890 | movdqa xmm14, xmm5 |
1891 | movdqa xmm5, xmmword ptr [rsp+0x20] |
1892 | movdqa xmm6, xmmword ptr [rsp+0x40] |
1893 | jmp 9b |
1894 | 9: |
1895 | pxor xmm0, xmm2 |
1896 | pxor xmm1, xmm3 |
1897 | pxor xmm8, xmm10 |
1898 | pxor xmm9, xmm11 |
1899 | mov eax, r13d |
1900 | cmp rdx, r15 |
1901 | jne 2b |
1902 | movups xmmword ptr [rbx], xmm0 |
1903 | movups xmmword ptr [rbx+0x10], xmm1 |
1904 | movups xmmword ptr [rbx+0x20], xmm8 |
1905 | movups xmmword ptr [rbx+0x30], xmm9 |
1906 | mov eax, dword ptr [rsp+0x130] |
1907 | neg eax |
1908 | mov r10d, dword ptr [rsp+0x110+8*rax] |
1909 | mov r11d, dword ptr [rsp+0x120+8*rax] |
1910 | mov dword ptr [rsp+0x110], r10d |
1911 | mov dword ptr [rsp+0x120], r11d |
1912 | add rdi, 16 |
1913 | add rbx, 64 |
1914 | sub rsi, 2 |
1915 | 3: |
1916 | test esi, 0x1 |
1917 | je 4b |
1918 | movups xmm0, xmmword ptr [rcx] |
1919 | movups xmm1, xmmword ptr [rcx+0x10] |
1920 | movd xmm13, dword ptr [rsp+0x110] |
1921 | movd xmm14, dword ptr [rsp+0x120] |
1922 | punpckldq xmm13, xmm14 |
1923 | mov r8, qword ptr [rdi] |
1924 | movzx eax, byte ptr [rbp+0x40] |
1925 | or eax, r13d |
1926 | xor edx, edx |
1927 | 2: |
1928 | mov r14d, eax |
1929 | or eax, r12d |
1930 | add rdx, 64 |
1931 | cmp rdx, r15 |
1932 | cmovne eax, r14d |
1933 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
1934 | shl rax, 32 |
1935 | or rax, 64 |
1936 | movq xmm12, rax |
1937 | movdqa xmm3, xmm13 |
1938 | punpcklqdq xmm3, xmm12 |
1939 | movups xmm4, xmmword ptr [r8+rdx-0x40] |
1940 | movups xmm5, xmmword ptr [r8+rdx-0x30] |
1941 | movaps xmm8, xmm4 |
1942 | shufps xmm4, xmm5, 136 |
1943 | shufps xmm8, xmm5, 221 |
1944 | movaps xmm5, xmm8 |
1945 | movups xmm6, xmmword ptr [r8+rdx-0x20] |
1946 | movups xmm7, xmmword ptr [r8+rdx-0x10] |
1947 | movaps xmm8, xmm6 |
1948 | shufps xmm6, xmm7, 136 |
1949 | pshufd xmm6, xmm6, 0x93 |
1950 | shufps xmm8, xmm7, 221 |
1951 | pshufd xmm7, xmm8, 0x93 |
1952 | mov al, 7 |
1953 | 9: |
1954 | paddd xmm0, xmm4 |
1955 | paddd xmm0, xmm1 |
1956 | pxor xmm3, xmm0 |
1957 | pshuflw xmm3, xmm3, 0xB1 |
1958 | pshufhw xmm3, xmm3, 0xB1 |
1959 | paddd xmm2, xmm3 |
1960 | pxor xmm1, xmm2 |
1961 | movdqa xmm11, xmm1 |
1962 | pslld xmm1, 20 |
1963 | psrld xmm11, 12 |
1964 | por xmm1, xmm11 |
1965 | paddd xmm0, xmm5 |
1966 | paddd xmm0, xmm1 |
1967 | pxor xmm3, xmm0 |
1968 | movdqa xmm14, xmm3 |
1969 | psrld xmm3, 8 |
1970 | pslld xmm14, 24 |
1971 | pxor xmm3, xmm14 |
1972 | paddd xmm2, xmm3 |
1973 | pxor xmm1, xmm2 |
1974 | movdqa xmm11, xmm1 |
1975 | pslld xmm1, 25 |
1976 | psrld xmm11, 7 |
1977 | por xmm1, xmm11 |
1978 | pshufd xmm0, xmm0, 0x93 |
1979 | pshufd xmm3, xmm3, 0x4E |
1980 | pshufd xmm2, xmm2, 0x39 |
1981 | paddd xmm0, xmm6 |
1982 | paddd xmm0, xmm1 |
1983 | pxor xmm3, xmm0 |
1984 | pshuflw xmm3, xmm3, 0xB1 |
1985 | pshufhw xmm3, xmm3, 0xB1 |
1986 | paddd xmm2, xmm3 |
1987 | pxor xmm1, xmm2 |
1988 | movdqa xmm11, xmm1 |
1989 | pslld xmm1, 20 |
1990 | psrld xmm11, 12 |
1991 | por xmm1, xmm11 |
1992 | paddd xmm0, xmm7 |
1993 | paddd xmm0, xmm1 |
1994 | pxor xmm3, xmm0 |
1995 | movdqa xmm14, xmm3 |
1996 | psrld xmm3, 8 |
1997 | pslld xmm14, 24 |
1998 | pxor xmm3, xmm14 |
1999 | paddd xmm2, xmm3 |
2000 | pxor xmm1, xmm2 |
2001 | movdqa xmm11, xmm1 |
2002 | pslld xmm1, 25 |
2003 | psrld xmm11, 7 |
2004 | por xmm1, xmm11 |
2005 | pshufd xmm0, xmm0, 0x39 |
2006 | pshufd xmm3, xmm3, 0x4E |
2007 | pshufd xmm2, xmm2, 0x93 |
2008 | dec al |
2009 | jz 9f |
2010 | movdqa xmm8, xmm4 |
2011 | shufps xmm8, xmm5, 214 |
2012 | pshufd xmm9, xmm4, 0x0F |
2013 | pshufd xmm4, xmm8, 0x39 |
2014 | movdqa xmm8, xmm6 |
2015 | shufps xmm8, xmm7, 250 |
2016 | pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] |
2017 | pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] |
2018 | por xmm9, xmm8 |
2019 | movdqa xmm8, xmm7 |
2020 | punpcklqdq xmm8, xmm5 |
2021 | movdqa xmm10, xmm6 |
2022 | pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] |
2023 | pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] |
2024 | por xmm8, xmm10 |
2025 | pshufd xmm8, xmm8, 0x78 |
2026 | punpckhdq xmm5, xmm7 |
2027 | punpckldq xmm6, xmm5 |
2028 | pshufd xmm7, xmm6, 0x1E |
2029 | movdqa xmm5, xmm9 |
2030 | movdqa xmm6, xmm8 |
2031 | jmp 9b |
2032 | 9: |
2033 | pxor xmm0, xmm2 |
2034 | pxor xmm1, xmm3 |
2035 | mov eax, r13d |
2036 | cmp rdx, r15 |
2037 | jne 2b |
2038 | movups xmmword ptr [rbx], xmm0 |
2039 | movups xmmword ptr [rbx+0x10], xmm1 |
2040 | jmp 4b |
2041 | |
2042 | .p2align 6 |
2043 | blake3_compress_in_place_sse2: |
2044 | _blake3_compress_in_place_sse2: |
2045 | _CET_ENDBR |
2046 | movups xmm0, xmmword ptr [rdi] |
2047 | movups xmm1, xmmword ptr [rdi+0x10] |
2048 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
2049 | shl r8, 32 |
2050 | add rdx, r8 |
2051 | movq xmm3, rcx |
2052 | movq xmm4, rdx |
2053 | punpcklqdq xmm3, xmm4 |
2054 | movups xmm4, xmmword ptr [rsi] |
2055 | movups xmm5, xmmword ptr [rsi+0x10] |
2056 | movaps xmm8, xmm4 |
2057 | shufps xmm4, xmm5, 136 |
2058 | shufps xmm8, xmm5, 221 |
2059 | movaps xmm5, xmm8 |
2060 | movups xmm6, xmmword ptr [rsi+0x20] |
2061 | movups xmm7, xmmword ptr [rsi+0x30] |
2062 | movaps xmm8, xmm6 |
2063 | shufps xmm6, xmm7, 136 |
2064 | pshufd xmm6, xmm6, 0x93 |
2065 | shufps xmm8, xmm7, 221 |
2066 | pshufd xmm7, xmm8, 0x93 |
2067 | mov al, 7 |
2068 | 9: |
2069 | paddd xmm0, xmm4 |
2070 | paddd xmm0, xmm1 |
2071 | pxor xmm3, xmm0 |
2072 | pshuflw xmm3, xmm3, 0xB1 |
2073 | pshufhw xmm3, xmm3, 0xB1 |
2074 | paddd xmm2, xmm3 |
2075 | pxor xmm1, xmm2 |
2076 | movdqa xmm11, xmm1 |
2077 | pslld xmm1, 20 |
2078 | psrld xmm11, 12 |
2079 | por xmm1, xmm11 |
2080 | paddd xmm0, xmm5 |
2081 | paddd xmm0, xmm1 |
2082 | pxor xmm3, xmm0 |
2083 | movdqa xmm14, xmm3 |
2084 | psrld xmm3, 8 |
2085 | pslld xmm14, 24 |
2086 | pxor xmm3, xmm14 |
2087 | paddd xmm2, xmm3 |
2088 | pxor xmm1, xmm2 |
2089 | movdqa xmm11, xmm1 |
2090 | pslld xmm1, 25 |
2091 | psrld xmm11, 7 |
2092 | por xmm1, xmm11 |
2093 | pshufd xmm0, xmm0, 0x93 |
2094 | pshufd xmm3, xmm3, 0x4E |
2095 | pshufd xmm2, xmm2, 0x39 |
2096 | paddd xmm0, xmm6 |
2097 | paddd xmm0, xmm1 |
2098 | pxor xmm3, xmm0 |
2099 | pshuflw xmm3, xmm3, 0xB1 |
2100 | pshufhw xmm3, xmm3, 0xB1 |
2101 | paddd xmm2, xmm3 |
2102 | pxor xmm1, xmm2 |
2103 | movdqa xmm11, xmm1 |
2104 | pslld xmm1, 20 |
2105 | psrld xmm11, 12 |
2106 | por xmm1, xmm11 |
2107 | paddd xmm0, xmm7 |
2108 | paddd xmm0, xmm1 |
2109 | pxor xmm3, xmm0 |
2110 | movdqa xmm14, xmm3 |
2111 | psrld xmm3, 8 |
2112 | pslld xmm14, 24 |
2113 | pxor xmm3, xmm14 |
2114 | paddd xmm2, xmm3 |
2115 | pxor xmm1, xmm2 |
2116 | movdqa xmm11, xmm1 |
2117 | pslld xmm1, 25 |
2118 | psrld xmm11, 7 |
2119 | por xmm1, xmm11 |
2120 | pshufd xmm0, xmm0, 0x39 |
2121 | pshufd xmm3, xmm3, 0x4E |
2122 | pshufd xmm2, xmm2, 0x93 |
2123 | dec al |
2124 | jz 9f |
2125 | movdqa xmm8, xmm4 |
2126 | shufps xmm8, xmm5, 214 |
2127 | pshufd xmm9, xmm4, 0x0F |
2128 | pshufd xmm4, xmm8, 0x39 |
2129 | movdqa xmm8, xmm6 |
2130 | shufps xmm8, xmm7, 250 |
2131 | pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] |
2132 | pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] |
2133 | por xmm9, xmm8 |
2134 | movdqa xmm8, xmm7 |
2135 | punpcklqdq xmm8, xmm5 |
2136 | movdqa xmm10, xmm6 |
2137 | pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] |
2138 | pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] |
2139 | por xmm8, xmm10 |
2140 | pshufd xmm8, xmm8, 0x78 |
2141 | punpckhdq xmm5, xmm7 |
2142 | punpckldq xmm6, xmm5 |
2143 | pshufd xmm7, xmm6, 0x1E |
2144 | movdqa xmm5, xmm9 |
2145 | movdqa xmm6, xmm8 |
2146 | jmp 9b |
2147 | 9: |
2148 | pxor xmm0, xmm2 |
2149 | pxor xmm1, xmm3 |
2150 | movups xmmword ptr [rdi], xmm0 |
2151 | movups xmmword ptr [rdi+0x10], xmm1 |
2152 | ret |
2153 | |
2154 | .p2align 6 |
2155 | blake3_compress_xof_sse2: |
2156 | _blake3_compress_xof_sse2: |
2157 | _CET_ENDBR |
2158 | movups xmm0, xmmword ptr [rdi] |
2159 | movups xmm1, xmmword ptr [rdi+0x10] |
2160 | movaps xmm2, xmmword ptr [BLAKE3_IV+rip] |
2161 | movzx eax, r8b |
2162 | movzx edx, dl |
2163 | shl rax, 32 |
2164 | add rdx, rax |
2165 | movq xmm3, rcx |
2166 | movq xmm4, rdx |
2167 | punpcklqdq xmm3, xmm4 |
2168 | movups xmm4, xmmword ptr [rsi] |
2169 | movups xmm5, xmmword ptr [rsi+0x10] |
2170 | movaps xmm8, xmm4 |
2171 | shufps xmm4, xmm5, 136 |
2172 | shufps xmm8, xmm5, 221 |
2173 | movaps xmm5, xmm8 |
2174 | movups xmm6, xmmword ptr [rsi+0x20] |
2175 | movups xmm7, xmmword ptr [rsi+0x30] |
2176 | movaps xmm8, xmm6 |
2177 | shufps xmm6, xmm7, 136 |
2178 | pshufd xmm6, xmm6, 0x93 |
2179 | shufps xmm8, xmm7, 221 |
2180 | pshufd xmm7, xmm8, 0x93 |
2181 | mov al, 7 |
2182 | 9: |
2183 | paddd xmm0, xmm4 |
2184 | paddd xmm0, xmm1 |
2185 | pxor xmm3, xmm0 |
2186 | pshuflw xmm3, xmm3, 0xB1 |
2187 | pshufhw xmm3, xmm3, 0xB1 |
2188 | paddd xmm2, xmm3 |
2189 | pxor xmm1, xmm2 |
2190 | movdqa xmm11, xmm1 |
2191 | pslld xmm1, 20 |
2192 | psrld xmm11, 12 |
2193 | por xmm1, xmm11 |
2194 | paddd xmm0, xmm5 |
2195 | paddd xmm0, xmm1 |
2196 | pxor xmm3, xmm0 |
2197 | movdqa xmm14, xmm3 |
2198 | psrld xmm3, 8 |
2199 | pslld xmm14, 24 |
2200 | pxor xmm3, xmm14 |
2201 | paddd xmm2, xmm3 |
2202 | pxor xmm1, xmm2 |
2203 | movdqa xmm11, xmm1 |
2204 | pslld xmm1, 25 |
2205 | psrld xmm11, 7 |
2206 | por xmm1, xmm11 |
2207 | pshufd xmm0, xmm0, 0x93 |
2208 | pshufd xmm3, xmm3, 0x4E |
2209 | pshufd xmm2, xmm2, 0x39 |
2210 | paddd xmm0, xmm6 |
2211 | paddd xmm0, xmm1 |
2212 | pxor xmm3, xmm0 |
2213 | pshuflw xmm3, xmm3, 0xB1 |
2214 | pshufhw xmm3, xmm3, 0xB1 |
2215 | paddd xmm2, xmm3 |
2216 | pxor xmm1, xmm2 |
2217 | movdqa xmm11, xmm1 |
2218 | pslld xmm1, 20 |
2219 | psrld xmm11, 12 |
2220 | por xmm1, xmm11 |
2221 | paddd xmm0, xmm7 |
2222 | paddd xmm0, xmm1 |
2223 | pxor xmm3, xmm0 |
2224 | movdqa xmm14, xmm3 |
2225 | psrld xmm3, 8 |
2226 | pslld xmm14, 24 |
2227 | pxor xmm3, xmm14 |
2228 | paddd xmm2, xmm3 |
2229 | pxor xmm1, xmm2 |
2230 | movdqa xmm11, xmm1 |
2231 | pslld xmm1, 25 |
2232 | psrld xmm11, 7 |
2233 | por xmm1, xmm11 |
2234 | pshufd xmm0, xmm0, 0x39 |
2235 | pshufd xmm3, xmm3, 0x4E |
2236 | pshufd xmm2, xmm2, 0x93 |
2237 | dec al |
2238 | jz 9f |
2239 | movdqa xmm8, xmm4 |
2240 | shufps xmm8, xmm5, 214 |
2241 | pshufd xmm9, xmm4, 0x0F |
2242 | pshufd xmm4, xmm8, 0x39 |
2243 | movdqa xmm8, xmm6 |
2244 | shufps xmm8, xmm7, 250 |
2245 | pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] |
2246 | pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] |
2247 | por xmm9, xmm8 |
2248 | movdqa xmm8, xmm7 |
2249 | punpcklqdq xmm8, xmm5 |
2250 | movdqa xmm10, xmm6 |
2251 | pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] |
2252 | pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] |
2253 | por xmm8, xmm10 |
2254 | pshufd xmm8, xmm8, 0x78 |
2255 | punpckhdq xmm5, xmm7 |
2256 | punpckldq xmm6, xmm5 |
2257 | pshufd xmm7, xmm6, 0x1E |
2258 | movdqa xmm5, xmm9 |
2259 | movdqa xmm6, xmm8 |
2260 | jmp 9b |
2261 | 9: |
2262 | movdqu xmm4, xmmword ptr [rdi] |
2263 | movdqu xmm5, xmmword ptr [rdi+0x10] |
2264 | pxor xmm0, xmm2 |
2265 | pxor xmm1, xmm3 |
2266 | pxor xmm2, xmm4 |
2267 | pxor xmm3, xmm5 |
2268 | movups xmmword ptr [r9], xmm0 |
2269 | movups xmmword ptr [r9+0x10], xmm1 |
2270 | movups xmmword ptr [r9+0x20], xmm2 |
2271 | movups xmmword ptr [r9+0x30], xmm3 |
2272 | ret |
2273 | |
2274 | |
2275 | #ifdef __APPLE__ |
2276 | .static_data |
2277 | #else |
2278 | .section .rodata |
2279 | #endif |
2280 | .p2align 6 |
2281 | BLAKE3_IV: |
2282 | .long 0x6A09E667, 0xBB67AE85 |
2283 | .long 0x3C6EF372, 0xA54FF53A |
2284 | ADD0: |
2285 | .long 0, 1, 2, 3 |
2286 | ADD1: |
2287 | .long 4, 4, 4, 4 |
2288 | BLAKE3_IV_0: |
2289 | .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 |
2290 | BLAKE3_IV_1: |
2291 | .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 |
2292 | BLAKE3_IV_2: |
2293 | .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 |
2294 | BLAKE3_IV_3: |
2295 | .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A |
2296 | BLAKE3_BLOCK_LEN: |
2297 | .long 64, 64, 64, 64 |
2298 | CMP_MSB_MASK: |
2299 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 |
2300 | PBLENDW_0x33_MASK: |
2301 | .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 |
2302 | PBLENDW_0xCC_MASK: |
2303 | .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF |
2304 | PBLENDW_0x3F_MASK: |
2305 | .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 |
2306 | PBLENDW_0xC0_MASK: |
2307 | .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF |
2308 | |
2309 | #endif |
2310 | |