1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%include "ring_core_generated/prefix_symbols_nasm.inc" 12section .text code align=64 13 14EXTERN OPENSSL_ia32cap_P 15 16chacha20_poly1305_constants: 17 18section .rdata rdata align=8 19ALIGN 64 20$L$chacha20_consts: 21 DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 22 DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 23$L$rol8: 24 DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 25 DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 26$L$rol16: 27 DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 28 DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 29$L$avx2_init: 30 DD 0,0,0,0 31$L$sse_inc: 32 DD 1,0,0,0 33$L$avx2_inc: 34 DD 2,0,0,0,2,0,0,0 35$L$clamp: 36 DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC 37 DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF 38ALIGN 16 39$L$and_masks: 40 DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41 DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42 DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43 DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44 DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45 DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 47 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 48 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 49 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 50 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 51 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 52 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 53 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 54 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 55 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 56section .text 57 58 59 60ALIGN 64 61poly_hash_ad_internal: 62 63 64 xor r10,r10 65 xor r11,r11 66 xor r12,r12 67 cmp r8,13 68 jne NEAR $L$hash_ad_loop 69$L$poly_fast_tls_ad: 70 71 mov r10,QWORD[rcx] 72 mov r11,QWORD[5+rcx] 73 shr r11,24 74 mov r12,1 75 mov rax,QWORD[((0+160+0))+rbp] 76 mov r15,rax 77 mul r10 78 mov r13,rax 79 mov r14,rdx 80 mov rax,QWORD[((0+160+0))+rbp] 81 mul r11 82 imul r15,r12 83 add r14,rax 84 adc r15,rdx 85 mov rax,QWORD[((8+160+0))+rbp] 86 mov r9,rax 87 mul r10 88 add r14,rax 89 adc rdx,0 90 mov r10,rdx 91 mov rax,QWORD[((8+160+0))+rbp] 92 mul r11 93 add r15,rax 94 adc rdx,0 95 imul r9,r12 96 add r15,r10 97 adc r9,rdx 98 mov r10,r13 99 mov r11,r14 100 mov r12,r15 101 and r12,3 102 mov r13,r15 103 and r13,-4 104 mov r14,r9 105 shrd r15,r9,2 106 shr r9,2 107 add r15,r13 108 adc r9,r14 109 add r10,r15 110 adc r11,r9 111 adc r12,0 112 113 ret 114$L$hash_ad_loop: 115 116 cmp r8,16 117 jb NEAR $L$hash_ad_tail 118 add r10,QWORD[((0+0))+rcx] 119 adc r11,QWORD[((8+0))+rcx] 120 adc r12,1 121 mov rax,QWORD[((0+160+0))+rbp] 122 mov r15,rax 123 mul r10 124 mov r13,rax 125 mov r14,rdx 126 mov rax,QWORD[((0+160+0))+rbp] 127 mul r11 128 imul r15,r12 129 add r14,rax 130 adc r15,rdx 131 mov rax,QWORD[((8+160+0))+rbp] 132 mov r9,rax 133 mul r10 134 add r14,rax 135 adc rdx,0 136 mov r10,rdx 137 mov rax,QWORD[((8+160+0))+rbp] 138 mul r11 139 add r15,rax 140 adc rdx,0 141 imul r9,r12 142 add r15,r10 143 adc r9,rdx 144 mov r10,r13 145 mov r11,r14 146 mov r12,r15 147 and r12,3 148 mov r13,r15 149 and r13,-4 150 mov r14,r9 151 shrd r15,r9,2 152 shr r9,2 153 add r15,r13 154 adc r9,r14 155 add r10,r15 156 adc r11,r9 157 adc r12,0 158 159 lea rcx,[16+rcx] 160 sub r8,16 161 jmp NEAR $L$hash_ad_loop 162$L$hash_ad_tail: 163 cmp r8,0 164 je NEAR $L$hash_ad_done 165 166 xor r13,r13 167 xor r14,r14 168 xor r15,r15 169 add rcx,r8 170$L$hash_ad_tail_loop: 171 shld r14,r13,8 172 shl r13,8 173 movzx r15,BYTE[((-1))+rcx] 174 xor r13,r15 175 dec rcx 176 dec r8 177 jne NEAR $L$hash_ad_tail_loop 178 179 add r10,r13 180 adc r11,r14 181 adc r12,1 182 mov rax,QWORD[((0+160+0))+rbp] 183 mov r15,rax 184 mul r10 185 mov r13,rax 186 mov r14,rdx 187 mov rax,QWORD[((0+160+0))+rbp] 188 mul r11 189 imul r15,r12 190 add r14,rax 191 adc r15,rdx 192 mov rax,QWORD[((8+160+0))+rbp] 193 mov r9,rax 194 mul r10 195 add r14,rax 196 adc rdx,0 197 mov r10,rdx 198 mov rax,QWORD[((8+160+0))+rbp] 199 mul r11 200 add r15,rax 201 adc rdx,0 202 imul r9,r12 203 add r15,r10 204 adc r9,rdx 205 mov r10,r13 206 mov r11,r14 207 mov r12,r15 208 and r12,3 209 mov r13,r15 210 and r13,-4 211 mov r14,r9 212 shrd r15,r9,2 213 shr r9,2 214 add r15,r13 215 adc r9,r14 216 add r10,r15 217 adc r11,r9 218 adc r12,0 219 220 221$L$hash_ad_done: 222 ret 223 224 225 226global chacha20_poly1305_open 227 228ALIGN 64 229chacha20_poly1305_open: 230 mov QWORD[8+rsp],rdi ;WIN64 prologue 231 mov QWORD[16+rsp],rsi 232 mov rax,rsp 233$L$SEH_begin_chacha20_poly1305_open: 234 mov rdi,rcx 235 mov rsi,rdx 236 mov rdx,r8 237 mov rcx,r9 238 mov r8,QWORD[40+rsp] 239 mov r9,QWORD[48+rsp] 240 241 242 243_CET_ENDBR 244 push rbp 245 246 push rbx 247 248 push r12 249 250 push r13 251 252 push r14 253 254 push r15 255 256 257 258 push r9 259 260 sub rsp,288 + 160 + 32 261 262 263 lea rbp,[32+rsp] 264 and rbp,-32 265 266 movaps XMMWORD[(0+0)+rbp],xmm6 267 movaps XMMWORD[(16+0)+rbp],xmm7 268 movaps XMMWORD[(32+0)+rbp],xmm8 269 movaps XMMWORD[(48+0)+rbp],xmm9 270 movaps XMMWORD[(64+0)+rbp],xmm10 271 movaps XMMWORD[(80+0)+rbp],xmm11 272 movaps XMMWORD[(96+0)+rbp],xmm12 273 movaps XMMWORD[(112+0)+rbp],xmm13 274 movaps XMMWORD[(128+0)+rbp],xmm14 275 movaps XMMWORD[(144+0)+rbp],xmm15 276 277 mov rbx,rdx 278 mov QWORD[((0+160+32))+rbp],r8 279 mov QWORD[((8+160+32))+rbp],rbx 280 281 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 282 and eax,288 283 xor eax,288 284 jz NEAR chacha20_poly1305_open_avx2 285 286 cmp rbx,128 287 jbe NEAR $L$open_sse_128 288 289 movdqa xmm0,XMMWORD[$L$chacha20_consts] 290 movdqu xmm4,XMMWORD[r9] 291 movdqu xmm8,XMMWORD[16+r9] 292 movdqu xmm12,XMMWORD[32+r9] 293 294 movdqa xmm7,xmm12 295 296 movdqa XMMWORD[(160+48)+rbp],xmm4 297 movdqa XMMWORD[(160+64)+rbp],xmm8 298 movdqa XMMWORD[(160+96)+rbp],xmm12 299 mov r10,10 300$L$open_sse_init_rounds: 301 paddd xmm0,xmm4 302 pxor xmm12,xmm0 303 pshufb xmm12,XMMWORD[$L$rol16] 304 paddd xmm8,xmm12 305 pxor xmm4,xmm8 306 movdqa xmm3,xmm4 307 pslld xmm3,12 308 psrld xmm4,20 309 pxor xmm4,xmm3 310 paddd xmm0,xmm4 311 pxor xmm12,xmm0 312 pshufb xmm12,XMMWORD[$L$rol8] 313 paddd xmm8,xmm12 314 pxor xmm4,xmm8 315 movdqa xmm3,xmm4 316 pslld xmm3,7 317 psrld xmm4,25 318 pxor xmm4,xmm3 319DB 102,15,58,15,228,4 320DB 102,69,15,58,15,192,8 321DB 102,69,15,58,15,228,12 322 paddd xmm0,xmm4 323 pxor xmm12,xmm0 324 pshufb xmm12,XMMWORD[$L$rol16] 325 paddd xmm8,xmm12 326 pxor xmm4,xmm8 327 movdqa xmm3,xmm4 328 pslld xmm3,12 329 psrld xmm4,20 330 pxor xmm4,xmm3 331 paddd xmm0,xmm4 332 pxor xmm12,xmm0 333 pshufb xmm12,XMMWORD[$L$rol8] 334 paddd xmm8,xmm12 335 pxor xmm4,xmm8 336 movdqa xmm3,xmm4 337 pslld xmm3,7 338 psrld xmm4,25 339 pxor xmm4,xmm3 340DB 102,15,58,15,228,12 341DB 102,69,15,58,15,192,8 342DB 102,69,15,58,15,228,4 343 344 dec r10 345 jne NEAR $L$open_sse_init_rounds 346 347 paddd xmm0,XMMWORD[$L$chacha20_consts] 348 paddd xmm4,XMMWORD[((160+48))+rbp] 349 350 pand xmm0,XMMWORD[$L$clamp] 351 movdqa XMMWORD[(160+0)+rbp],xmm0 352 movdqa XMMWORD[(160+16)+rbp],xmm4 353 354 mov r8,r8 355 call poly_hash_ad_internal 356$L$open_sse_main_loop: 357 cmp rbx,16*16 358 jb NEAR $L$open_sse_tail 359 360 movdqa xmm0,XMMWORD[$L$chacha20_consts] 361 movdqa xmm4,XMMWORD[((160+48))+rbp] 362 movdqa xmm8,XMMWORD[((160+64))+rbp] 363 movdqa xmm1,xmm0 364 movdqa xmm5,xmm4 365 movdqa xmm9,xmm8 366 movdqa xmm2,xmm0 367 movdqa xmm6,xmm4 368 movdqa xmm10,xmm8 369 movdqa xmm3,xmm0 370 movdqa xmm7,xmm4 371 movdqa xmm11,xmm8 372 movdqa xmm15,XMMWORD[((160+96))+rbp] 373 paddd xmm15,XMMWORD[$L$sse_inc] 374 movdqa xmm14,xmm15 375 paddd xmm14,XMMWORD[$L$sse_inc] 376 movdqa xmm13,xmm14 377 paddd xmm13,XMMWORD[$L$sse_inc] 378 movdqa xmm12,xmm13 379 paddd xmm12,XMMWORD[$L$sse_inc] 380 movdqa XMMWORD[(160+96)+rbp],xmm12 381 movdqa XMMWORD[(160+112)+rbp],xmm13 382 movdqa XMMWORD[(160+128)+rbp],xmm14 383 movdqa XMMWORD[(160+144)+rbp],xmm15 384 385 386 387 mov rcx,4 388 mov r8,rsi 389$L$open_sse_main_loop_rounds: 390 movdqa XMMWORD[(160+80)+rbp],xmm8 391 movdqa xmm8,XMMWORD[$L$rol16] 392 paddd xmm3,xmm7 393 paddd xmm2,xmm6 394 paddd xmm1,xmm5 395 paddd xmm0,xmm4 396 pxor xmm15,xmm3 397 pxor xmm14,xmm2 398 pxor xmm13,xmm1 399 pxor xmm12,xmm0 400DB 102,69,15,56,0,248 401DB 102,69,15,56,0,240 402DB 102,69,15,56,0,232 403DB 102,69,15,56,0,224 404 movdqa xmm8,XMMWORD[((160+80))+rbp] 405 paddd xmm11,xmm15 406 paddd xmm10,xmm14 407 paddd xmm9,xmm13 408 paddd xmm8,xmm12 409 pxor xmm7,xmm11 410 add r10,QWORD[((0+0))+r8] 411 adc r11,QWORD[((8+0))+r8] 412 adc r12,1 413 414 lea r8,[16+r8] 415 pxor xmm6,xmm10 416 pxor xmm5,xmm9 417 pxor xmm4,xmm8 418 movdqa XMMWORD[(160+80)+rbp],xmm8 419 movdqa xmm8,xmm7 420 psrld xmm8,20 421 pslld xmm7,32-20 422 pxor xmm7,xmm8 423 movdqa xmm8,xmm6 424 psrld xmm8,20 425 pslld xmm6,32-20 426 pxor xmm6,xmm8 427 movdqa xmm8,xmm5 428 psrld xmm8,20 429 pslld xmm5,32-20 430 pxor xmm5,xmm8 431 movdqa xmm8,xmm4 432 psrld xmm8,20 433 pslld xmm4,32-20 434 pxor xmm4,xmm8 435 mov rax,QWORD[((0+160+0))+rbp] 436 mov r15,rax 437 mul r10 438 mov r13,rax 439 mov r14,rdx 440 mov rax,QWORD[((0+160+0))+rbp] 441 mul r11 442 imul r15,r12 443 add r14,rax 444 adc r15,rdx 445 movdqa xmm8,XMMWORD[$L$rol8] 446 paddd xmm3,xmm7 447 paddd xmm2,xmm6 448 paddd xmm1,xmm5 449 paddd xmm0,xmm4 450 pxor xmm15,xmm3 451 pxor xmm14,xmm2 452 pxor xmm13,xmm1 453 pxor xmm12,xmm0 454DB 102,69,15,56,0,248 455DB 102,69,15,56,0,240 456DB 102,69,15,56,0,232 457DB 102,69,15,56,0,224 458 movdqa xmm8,XMMWORD[((160+80))+rbp] 459 paddd xmm11,xmm15 460 paddd xmm10,xmm14 461 paddd xmm9,xmm13 462 paddd xmm8,xmm12 463 pxor xmm7,xmm11 464 pxor xmm6,xmm10 465 mov rax,QWORD[((8+160+0))+rbp] 466 mov r9,rax 467 mul r10 468 add r14,rax 469 adc rdx,0 470 mov r10,rdx 471 mov rax,QWORD[((8+160+0))+rbp] 472 mul r11 473 add r15,rax 474 adc rdx,0 475 pxor xmm5,xmm9 476 pxor xmm4,xmm8 477 movdqa XMMWORD[(160+80)+rbp],xmm8 478 movdqa xmm8,xmm7 479 psrld xmm8,25 480 pslld xmm7,32-25 481 pxor xmm7,xmm8 482 movdqa xmm8,xmm6 483 psrld xmm8,25 484 pslld xmm6,32-25 485 pxor xmm6,xmm8 486 movdqa xmm8,xmm5 487 psrld xmm8,25 488 pslld xmm5,32-25 489 pxor xmm5,xmm8 490 movdqa xmm8,xmm4 491 psrld xmm8,25 492 pslld xmm4,32-25 493 pxor xmm4,xmm8 494 movdqa xmm8,XMMWORD[((160+80))+rbp] 495 imul r9,r12 496 add r15,r10 497 adc r9,rdx 498DB 102,15,58,15,255,4 499DB 102,69,15,58,15,219,8 500DB 102,69,15,58,15,255,12 501DB 102,15,58,15,246,4 502DB 102,69,15,58,15,210,8 503DB 102,69,15,58,15,246,12 504DB 102,15,58,15,237,4 505DB 102,69,15,58,15,201,8 506DB 102,69,15,58,15,237,12 507DB 102,15,58,15,228,4 508DB 102,69,15,58,15,192,8 509DB 102,69,15,58,15,228,12 510 movdqa XMMWORD[(160+80)+rbp],xmm8 511 movdqa xmm8,XMMWORD[$L$rol16] 512 paddd xmm3,xmm7 513 paddd xmm2,xmm6 514 paddd xmm1,xmm5 515 paddd xmm0,xmm4 516 pxor xmm15,xmm3 517 pxor xmm14,xmm2 518 mov r10,r13 519 mov r11,r14 520 mov r12,r15 521 and r12,3 522 mov r13,r15 523 and r13,-4 524 mov r14,r9 525 shrd r15,r9,2 526 shr r9,2 527 add r15,r13 528 adc r9,r14 529 add r10,r15 530 adc r11,r9 531 adc r12,0 532 pxor xmm13,xmm1 533 pxor xmm12,xmm0 534DB 102,69,15,56,0,248 535DB 102,69,15,56,0,240 536DB 102,69,15,56,0,232 537DB 102,69,15,56,0,224 538 movdqa xmm8,XMMWORD[((160+80))+rbp] 539 paddd xmm11,xmm15 540 paddd xmm10,xmm14 541 paddd xmm9,xmm13 542 paddd xmm8,xmm12 543 pxor xmm7,xmm11 544 pxor xmm6,xmm10 545 pxor xmm5,xmm9 546 pxor xmm4,xmm8 547 movdqa XMMWORD[(160+80)+rbp],xmm8 548 movdqa xmm8,xmm7 549 psrld xmm8,20 550 pslld xmm7,32-20 551 pxor xmm7,xmm8 552 movdqa xmm8,xmm6 553 psrld xmm8,20 554 pslld xmm6,32-20 555 pxor xmm6,xmm8 556 movdqa xmm8,xmm5 557 psrld xmm8,20 558 pslld xmm5,32-20 559 pxor xmm5,xmm8 560 movdqa xmm8,xmm4 561 psrld xmm8,20 562 pslld xmm4,32-20 563 pxor xmm4,xmm8 564 movdqa xmm8,XMMWORD[$L$rol8] 565 paddd xmm3,xmm7 566 paddd xmm2,xmm6 567 paddd xmm1,xmm5 568 paddd xmm0,xmm4 569 pxor xmm15,xmm3 570 pxor xmm14,xmm2 571 pxor xmm13,xmm1 572 pxor xmm12,xmm0 573DB 102,69,15,56,0,248 574DB 102,69,15,56,0,240 575DB 102,69,15,56,0,232 576DB 102,69,15,56,0,224 577 movdqa xmm8,XMMWORD[((160+80))+rbp] 578 paddd xmm11,xmm15 579 paddd xmm10,xmm14 580 paddd xmm9,xmm13 581 paddd xmm8,xmm12 582 pxor xmm7,xmm11 583 pxor xmm6,xmm10 584 pxor xmm5,xmm9 585 pxor xmm4,xmm8 586 movdqa XMMWORD[(160+80)+rbp],xmm8 587 movdqa xmm8,xmm7 588 psrld xmm8,25 589 pslld xmm7,32-25 590 pxor xmm7,xmm8 591 movdqa xmm8,xmm6 592 psrld xmm8,25 593 pslld xmm6,32-25 594 pxor xmm6,xmm8 595 movdqa xmm8,xmm5 596 psrld xmm8,25 597 pslld xmm5,32-25 598 pxor xmm5,xmm8 599 movdqa xmm8,xmm4 600 psrld xmm8,25 601 pslld xmm4,32-25 602 pxor xmm4,xmm8 603 movdqa xmm8,XMMWORD[((160+80))+rbp] 604DB 102,15,58,15,255,12 605DB 102,69,15,58,15,219,8 606DB 102,69,15,58,15,255,4 607DB 102,15,58,15,246,12 608DB 102,69,15,58,15,210,8 609DB 102,69,15,58,15,246,4 610DB 102,15,58,15,237,12 611DB 102,69,15,58,15,201,8 612DB 102,69,15,58,15,237,4 613DB 102,15,58,15,228,12 614DB 102,69,15,58,15,192,8 615DB 102,69,15,58,15,228,4 616 617 dec rcx 618 jge NEAR $L$open_sse_main_loop_rounds 619 add r10,QWORD[((0+0))+r8] 620 adc r11,QWORD[((8+0))+r8] 621 adc r12,1 622 mov rax,QWORD[((0+160+0))+rbp] 623 mov r15,rax 624 mul r10 625 mov r13,rax 626 mov r14,rdx 627 mov rax,QWORD[((0+160+0))+rbp] 628 mul r11 629 imul r15,r12 630 add r14,rax 631 adc r15,rdx 632 mov rax,QWORD[((8+160+0))+rbp] 633 mov r9,rax 634 mul r10 635 add r14,rax 636 adc rdx,0 637 mov r10,rdx 638 mov rax,QWORD[((8+160+0))+rbp] 639 mul r11 640 add r15,rax 641 adc rdx,0 642 imul r9,r12 643 add r15,r10 644 adc r9,rdx 645 mov r10,r13 646 mov r11,r14 647 mov r12,r15 648 and r12,3 649 mov r13,r15 650 and r13,-4 651 mov r14,r9 652 shrd r15,r9,2 653 shr r9,2 654 add r15,r13 655 adc r9,r14 656 add r10,r15 657 adc r11,r9 658 adc r12,0 659 660 lea r8,[16+r8] 661 cmp rcx,-6 662 jg NEAR $L$open_sse_main_loop_rounds 663 paddd xmm3,XMMWORD[$L$chacha20_consts] 664 paddd xmm7,XMMWORD[((160+48))+rbp] 665 paddd xmm11,XMMWORD[((160+64))+rbp] 666 paddd xmm15,XMMWORD[((160+144))+rbp] 667 paddd xmm2,XMMWORD[$L$chacha20_consts] 668 paddd xmm6,XMMWORD[((160+48))+rbp] 669 paddd xmm10,XMMWORD[((160+64))+rbp] 670 paddd xmm14,XMMWORD[((160+128))+rbp] 671 paddd xmm1,XMMWORD[$L$chacha20_consts] 672 paddd xmm5,XMMWORD[((160+48))+rbp] 673 paddd xmm9,XMMWORD[((160+64))+rbp] 674 paddd xmm13,XMMWORD[((160+112))+rbp] 675 paddd xmm0,XMMWORD[$L$chacha20_consts] 676 paddd xmm4,XMMWORD[((160+48))+rbp] 677 paddd xmm8,XMMWORD[((160+64))+rbp] 678 paddd xmm12,XMMWORD[((160+96))+rbp] 679 movdqa XMMWORD[(160+80)+rbp],xmm12 680 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 681 pxor xmm12,xmm3 682 movdqu XMMWORD[(0 + 0)+rdi],xmm12 683 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 684 pxor xmm12,xmm7 685 movdqu XMMWORD[(16 + 0)+rdi],xmm12 686 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 687 pxor xmm12,xmm11 688 movdqu XMMWORD[(32 + 0)+rdi],xmm12 689 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 690 pxor xmm12,xmm15 691 movdqu XMMWORD[(48 + 0)+rdi],xmm12 692 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 693 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 694 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 695 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 696 pxor xmm2,xmm3 697 pxor xmm6,xmm7 698 pxor xmm10,xmm11 699 pxor xmm15,xmm14 700 movdqu XMMWORD[(0 + 64)+rdi],xmm2 701 movdqu XMMWORD[(16 + 64)+rdi],xmm6 702 movdqu XMMWORD[(32 + 64)+rdi],xmm10 703 movdqu XMMWORD[(48 + 64)+rdi],xmm15 704 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 705 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 706 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 707 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 708 pxor xmm1,xmm3 709 pxor xmm5,xmm7 710 pxor xmm9,xmm11 711 pxor xmm15,xmm13 712 movdqu XMMWORD[(0 + 128)+rdi],xmm1 713 movdqu XMMWORD[(16 + 128)+rdi],xmm5 714 movdqu XMMWORD[(32 + 128)+rdi],xmm9 715 movdqu XMMWORD[(48 + 128)+rdi],xmm15 716 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 717 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 718 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 719 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 720 pxor xmm0,xmm3 721 pxor xmm4,xmm7 722 pxor xmm8,xmm11 723 pxor xmm15,XMMWORD[((160+80))+rbp] 724 movdqu XMMWORD[(0 + 192)+rdi],xmm0 725 movdqu XMMWORD[(16 + 192)+rdi],xmm4 726 movdqu XMMWORD[(32 + 192)+rdi],xmm8 727 movdqu XMMWORD[(48 + 192)+rdi],xmm15 728 729 lea rsi,[256+rsi] 730 lea rdi,[256+rdi] 731 sub rbx,16*16 732 jmp NEAR $L$open_sse_main_loop 733$L$open_sse_tail: 734 735 test rbx,rbx 736 jz NEAR $L$open_sse_finalize 737 cmp rbx,12*16 738 ja NEAR $L$open_sse_tail_256 739 cmp rbx,8*16 740 ja NEAR $L$open_sse_tail_192 741 cmp rbx,4*16 742 ja NEAR $L$open_sse_tail_128 743 movdqa xmm0,XMMWORD[$L$chacha20_consts] 744 movdqa xmm4,XMMWORD[((160+48))+rbp] 745 movdqa xmm8,XMMWORD[((160+64))+rbp] 746 movdqa xmm12,XMMWORD[((160+96))+rbp] 747 paddd xmm12,XMMWORD[$L$sse_inc] 748 movdqa XMMWORD[(160+96)+rbp],xmm12 749 750 xor r8,r8 751 mov rcx,rbx 752 cmp rcx,16 753 jb NEAR $L$open_sse_tail_64_rounds 754$L$open_sse_tail_64_rounds_and_x1hash: 755 add r10,QWORD[((0+0))+r8*1+rsi] 756 adc r11,QWORD[((8+0))+r8*1+rsi] 757 adc r12,1 758 mov rax,QWORD[((0+160+0))+rbp] 759 mov r15,rax 760 mul r10 761 mov r13,rax 762 mov r14,rdx 763 mov rax,QWORD[((0+160+0))+rbp] 764 mul r11 765 imul r15,r12 766 add r14,rax 767 adc r15,rdx 768 mov rax,QWORD[((8+160+0))+rbp] 769 mov r9,rax 770 mul r10 771 add r14,rax 772 adc rdx,0 773 mov r10,rdx 774 mov rax,QWORD[((8+160+0))+rbp] 775 mul r11 776 add r15,rax 777 adc rdx,0 778 imul r9,r12 779 add r15,r10 780 adc r9,rdx 781 mov r10,r13 782 mov r11,r14 783 mov r12,r15 784 and r12,3 785 mov r13,r15 786 and r13,-4 787 mov r14,r9 788 shrd r15,r9,2 789 shr r9,2 790 add r15,r13 791 adc r9,r14 792 add r10,r15 793 adc r11,r9 794 adc r12,0 795 796 sub rcx,16 797$L$open_sse_tail_64_rounds: 798 add r8,16 799 paddd xmm0,xmm4 800 pxor xmm12,xmm0 801 pshufb xmm12,XMMWORD[$L$rol16] 802 paddd xmm8,xmm12 803 pxor xmm4,xmm8 804 movdqa xmm3,xmm4 805 pslld xmm3,12 806 psrld xmm4,20 807 pxor xmm4,xmm3 808 paddd xmm0,xmm4 809 pxor xmm12,xmm0 810 pshufb xmm12,XMMWORD[$L$rol8] 811 paddd xmm8,xmm12 812 pxor xmm4,xmm8 813 movdqa xmm3,xmm4 814 pslld xmm3,7 815 psrld xmm4,25 816 pxor xmm4,xmm3 817DB 102,15,58,15,228,4 818DB 102,69,15,58,15,192,8 819DB 102,69,15,58,15,228,12 820 paddd xmm0,xmm4 821 pxor xmm12,xmm0 822 pshufb xmm12,XMMWORD[$L$rol16] 823 paddd xmm8,xmm12 824 pxor xmm4,xmm8 825 movdqa xmm3,xmm4 826 pslld xmm3,12 827 psrld xmm4,20 828 pxor xmm4,xmm3 829 paddd xmm0,xmm4 830 pxor xmm12,xmm0 831 pshufb xmm12,XMMWORD[$L$rol8] 832 paddd xmm8,xmm12 833 pxor xmm4,xmm8 834 movdqa xmm3,xmm4 835 pslld xmm3,7 836 psrld xmm4,25 837 pxor xmm4,xmm3 838DB 102,15,58,15,228,12 839DB 102,69,15,58,15,192,8 840DB 102,69,15,58,15,228,4 841 842 cmp rcx,16 843 jae NEAR $L$open_sse_tail_64_rounds_and_x1hash 844 cmp r8,10*16 845 jne NEAR $L$open_sse_tail_64_rounds 846 paddd xmm0,XMMWORD[$L$chacha20_consts] 847 paddd xmm4,XMMWORD[((160+48))+rbp] 848 paddd xmm8,XMMWORD[((160+64))+rbp] 849 paddd xmm12,XMMWORD[((160+96))+rbp] 850 851 jmp NEAR $L$open_sse_tail_64_dec_loop 852 853$L$open_sse_tail_128: 854 movdqa xmm0,XMMWORD[$L$chacha20_consts] 855 movdqa xmm4,XMMWORD[((160+48))+rbp] 856 movdqa xmm8,XMMWORD[((160+64))+rbp] 857 movdqa xmm1,xmm0 858 movdqa xmm5,xmm4 859 movdqa xmm9,xmm8 860 movdqa xmm13,XMMWORD[((160+96))+rbp] 861 paddd xmm13,XMMWORD[$L$sse_inc] 862 movdqa xmm12,xmm13 863 paddd xmm12,XMMWORD[$L$sse_inc] 864 movdqa XMMWORD[(160+96)+rbp],xmm12 865 movdqa XMMWORD[(160+112)+rbp],xmm13 866 867 mov rcx,rbx 868 and rcx,-16 869 xor r8,r8 870$L$open_sse_tail_128_rounds_and_x1hash: 871 add r10,QWORD[((0+0))+r8*1+rsi] 872 adc r11,QWORD[((8+0))+r8*1+rsi] 873 adc r12,1 874 mov rax,QWORD[((0+160+0))+rbp] 875 mov r15,rax 876 mul r10 877 mov r13,rax 878 mov r14,rdx 879 mov rax,QWORD[((0+160+0))+rbp] 880 mul r11 881 imul r15,r12 882 add r14,rax 883 adc r15,rdx 884 mov rax,QWORD[((8+160+0))+rbp] 885 mov r9,rax 886 mul r10 887 add r14,rax 888 adc rdx,0 889 mov r10,rdx 890 mov rax,QWORD[((8+160+0))+rbp] 891 mul r11 892 add r15,rax 893 adc rdx,0 894 imul r9,r12 895 add r15,r10 896 adc r9,rdx 897 mov r10,r13 898 mov r11,r14 899 mov r12,r15 900 and r12,3 901 mov r13,r15 902 and r13,-4 903 mov r14,r9 904 shrd r15,r9,2 905 shr r9,2 906 add r15,r13 907 adc r9,r14 908 add r10,r15 909 adc r11,r9 910 adc r12,0 911 912$L$open_sse_tail_128_rounds: 913 add r8,16 914 paddd xmm0,xmm4 915 pxor xmm12,xmm0 916 pshufb xmm12,XMMWORD[$L$rol16] 917 paddd xmm8,xmm12 918 pxor xmm4,xmm8 919 movdqa xmm3,xmm4 920 pslld xmm3,12 921 psrld xmm4,20 922 pxor xmm4,xmm3 923 paddd xmm0,xmm4 924 pxor xmm12,xmm0 925 pshufb xmm12,XMMWORD[$L$rol8] 926 paddd xmm8,xmm12 927 pxor xmm4,xmm8 928 movdqa xmm3,xmm4 929 pslld xmm3,7 930 psrld xmm4,25 931 pxor xmm4,xmm3 932DB 102,15,58,15,228,4 933DB 102,69,15,58,15,192,8 934DB 102,69,15,58,15,228,12 935 paddd xmm1,xmm5 936 pxor xmm13,xmm1 937 pshufb xmm13,XMMWORD[$L$rol16] 938 paddd xmm9,xmm13 939 pxor xmm5,xmm9 940 movdqa xmm3,xmm5 941 pslld xmm3,12 942 psrld xmm5,20 943 pxor xmm5,xmm3 944 paddd xmm1,xmm5 945 pxor xmm13,xmm1 946 pshufb xmm13,XMMWORD[$L$rol8] 947 paddd xmm9,xmm13 948 pxor xmm5,xmm9 949 movdqa xmm3,xmm5 950 pslld xmm3,7 951 psrld xmm5,25 952 pxor xmm5,xmm3 953DB 102,15,58,15,237,4 954DB 102,69,15,58,15,201,8 955DB 102,69,15,58,15,237,12 956 paddd xmm0,xmm4 957 pxor xmm12,xmm0 958 pshufb xmm12,XMMWORD[$L$rol16] 959 paddd xmm8,xmm12 960 pxor xmm4,xmm8 961 movdqa xmm3,xmm4 962 pslld xmm3,12 963 psrld xmm4,20 964 pxor xmm4,xmm3 965 paddd xmm0,xmm4 966 pxor xmm12,xmm0 967 pshufb xmm12,XMMWORD[$L$rol8] 968 paddd xmm8,xmm12 969 pxor xmm4,xmm8 970 movdqa xmm3,xmm4 971 pslld xmm3,7 972 psrld xmm4,25 973 pxor xmm4,xmm3 974DB 102,15,58,15,228,12 975DB 102,69,15,58,15,192,8 976DB 102,69,15,58,15,228,4 977 paddd xmm1,xmm5 978 pxor xmm13,xmm1 979 pshufb xmm13,XMMWORD[$L$rol16] 980 paddd xmm9,xmm13 981 pxor xmm5,xmm9 982 movdqa xmm3,xmm5 983 pslld xmm3,12 984 psrld xmm5,20 985 pxor xmm5,xmm3 986 paddd xmm1,xmm5 987 pxor xmm13,xmm1 988 pshufb xmm13,XMMWORD[$L$rol8] 989 paddd xmm9,xmm13 990 pxor xmm5,xmm9 991 movdqa xmm3,xmm5 992 pslld xmm3,7 993 psrld xmm5,25 994 pxor xmm5,xmm3 995DB 102,15,58,15,237,12 996DB 102,69,15,58,15,201,8 997DB 102,69,15,58,15,237,4 998 999 cmp r8,rcx 1000 jb NEAR $L$open_sse_tail_128_rounds_and_x1hash 1001 cmp r8,10*16 1002 jne NEAR $L$open_sse_tail_128_rounds 1003 paddd xmm1,XMMWORD[$L$chacha20_consts] 1004 paddd xmm5,XMMWORD[((160+48))+rbp] 1005 paddd xmm9,XMMWORD[((160+64))+rbp] 1006 paddd xmm13,XMMWORD[((160+112))+rbp] 1007 paddd xmm0,XMMWORD[$L$chacha20_consts] 1008 paddd xmm4,XMMWORD[((160+48))+rbp] 1009 paddd xmm8,XMMWORD[((160+64))+rbp] 1010 paddd xmm12,XMMWORD[((160+96))+rbp] 1011 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1012 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1013 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1014 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1015 pxor xmm1,xmm3 1016 pxor xmm5,xmm7 1017 pxor xmm9,xmm11 1018 pxor xmm15,xmm13 1019 movdqu XMMWORD[(0 + 0)+rdi],xmm1 1020 movdqu XMMWORD[(16 + 0)+rdi],xmm5 1021 movdqu XMMWORD[(32 + 0)+rdi],xmm9 1022 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1023 1024 sub rbx,4*16 1025 lea rsi,[64+rsi] 1026 lea rdi,[64+rdi] 1027 jmp NEAR $L$open_sse_tail_64_dec_loop 1028 1029$L$open_sse_tail_192: 1030 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1031 movdqa xmm4,XMMWORD[((160+48))+rbp] 1032 movdqa xmm8,XMMWORD[((160+64))+rbp] 1033 movdqa xmm1,xmm0 1034 movdqa xmm5,xmm4 1035 movdqa xmm9,xmm8 1036 movdqa xmm2,xmm0 1037 movdqa xmm6,xmm4 1038 movdqa xmm10,xmm8 1039 movdqa xmm14,XMMWORD[((160+96))+rbp] 1040 paddd xmm14,XMMWORD[$L$sse_inc] 1041 movdqa xmm13,xmm14 1042 paddd xmm13,XMMWORD[$L$sse_inc] 1043 movdqa xmm12,xmm13 1044 paddd xmm12,XMMWORD[$L$sse_inc] 1045 movdqa XMMWORD[(160+96)+rbp],xmm12 1046 movdqa XMMWORD[(160+112)+rbp],xmm13 1047 movdqa XMMWORD[(160+128)+rbp],xmm14 1048 1049 mov rcx,rbx 1050 mov r8,10*16 1051 cmp rcx,10*16 1052 cmovg rcx,r8 1053 and rcx,-16 1054 xor r8,r8 1055$L$open_sse_tail_192_rounds_and_x1hash: 1056 add r10,QWORD[((0+0))+r8*1+rsi] 1057 adc r11,QWORD[((8+0))+r8*1+rsi] 1058 adc r12,1 1059 mov rax,QWORD[((0+160+0))+rbp] 1060 mov r15,rax 1061 mul r10 1062 mov r13,rax 1063 mov r14,rdx 1064 mov rax,QWORD[((0+160+0))+rbp] 1065 mul r11 1066 imul r15,r12 1067 add r14,rax 1068 adc r15,rdx 1069 mov rax,QWORD[((8+160+0))+rbp] 1070 mov r9,rax 1071 mul r10 1072 add r14,rax 1073 adc rdx,0 1074 mov r10,rdx 1075 mov rax,QWORD[((8+160+0))+rbp] 1076 mul r11 1077 add r15,rax 1078 adc rdx,0 1079 imul r9,r12 1080 add r15,r10 1081 adc r9,rdx 1082 mov r10,r13 1083 mov r11,r14 1084 mov r12,r15 1085 and r12,3 1086 mov r13,r15 1087 and r13,-4 1088 mov r14,r9 1089 shrd r15,r9,2 1090 shr r9,2 1091 add r15,r13 1092 adc r9,r14 1093 add r10,r15 1094 adc r11,r9 1095 adc r12,0 1096 1097$L$open_sse_tail_192_rounds: 1098 add r8,16 1099 paddd xmm0,xmm4 1100 pxor xmm12,xmm0 1101 pshufb xmm12,XMMWORD[$L$rol16] 1102 paddd xmm8,xmm12 1103 pxor xmm4,xmm8 1104 movdqa xmm3,xmm4 1105 pslld xmm3,12 1106 psrld xmm4,20 1107 pxor xmm4,xmm3 1108 paddd xmm0,xmm4 1109 pxor xmm12,xmm0 1110 pshufb xmm12,XMMWORD[$L$rol8] 1111 paddd xmm8,xmm12 1112 pxor xmm4,xmm8 1113 movdqa xmm3,xmm4 1114 pslld xmm3,7 1115 psrld xmm4,25 1116 pxor xmm4,xmm3 1117DB 102,15,58,15,228,4 1118DB 102,69,15,58,15,192,8 1119DB 102,69,15,58,15,228,12 1120 paddd xmm1,xmm5 1121 pxor xmm13,xmm1 1122 pshufb xmm13,XMMWORD[$L$rol16] 1123 paddd xmm9,xmm13 1124 pxor xmm5,xmm9 1125 movdqa xmm3,xmm5 1126 pslld xmm3,12 1127 psrld xmm5,20 1128 pxor xmm5,xmm3 1129 paddd xmm1,xmm5 1130 pxor xmm13,xmm1 1131 pshufb xmm13,XMMWORD[$L$rol8] 1132 paddd xmm9,xmm13 1133 pxor xmm5,xmm9 1134 movdqa xmm3,xmm5 1135 pslld xmm3,7 1136 psrld xmm5,25 1137 pxor xmm5,xmm3 1138DB 102,15,58,15,237,4 1139DB 102,69,15,58,15,201,8 1140DB 102,69,15,58,15,237,12 1141 paddd xmm2,xmm6 1142 pxor xmm14,xmm2 1143 pshufb xmm14,XMMWORD[$L$rol16] 1144 paddd xmm10,xmm14 1145 pxor xmm6,xmm10 1146 movdqa xmm3,xmm6 1147 pslld xmm3,12 1148 psrld xmm6,20 1149 pxor xmm6,xmm3 1150 paddd xmm2,xmm6 1151 pxor xmm14,xmm2 1152 pshufb xmm14,XMMWORD[$L$rol8] 1153 paddd xmm10,xmm14 1154 pxor xmm6,xmm10 1155 movdqa xmm3,xmm6 1156 pslld xmm3,7 1157 psrld xmm6,25 1158 pxor xmm6,xmm3 1159DB 102,15,58,15,246,4 1160DB 102,69,15,58,15,210,8 1161DB 102,69,15,58,15,246,12 1162 paddd xmm0,xmm4 1163 pxor xmm12,xmm0 1164 pshufb xmm12,XMMWORD[$L$rol16] 1165 paddd xmm8,xmm12 1166 pxor xmm4,xmm8 1167 movdqa xmm3,xmm4 1168 pslld xmm3,12 1169 psrld xmm4,20 1170 pxor xmm4,xmm3 1171 paddd xmm0,xmm4 1172 pxor xmm12,xmm0 1173 pshufb xmm12,XMMWORD[$L$rol8] 1174 paddd xmm8,xmm12 1175 pxor xmm4,xmm8 1176 movdqa xmm3,xmm4 1177 pslld xmm3,7 1178 psrld xmm4,25 1179 pxor xmm4,xmm3 1180DB 102,15,58,15,228,12 1181DB 102,69,15,58,15,192,8 1182DB 102,69,15,58,15,228,4 1183 paddd xmm1,xmm5 1184 pxor xmm13,xmm1 1185 pshufb xmm13,XMMWORD[$L$rol16] 1186 paddd xmm9,xmm13 1187 pxor xmm5,xmm9 1188 movdqa xmm3,xmm5 1189 pslld xmm3,12 1190 psrld xmm5,20 1191 pxor xmm5,xmm3 1192 paddd xmm1,xmm5 1193 pxor xmm13,xmm1 1194 pshufb xmm13,XMMWORD[$L$rol8] 1195 paddd xmm9,xmm13 1196 pxor xmm5,xmm9 1197 movdqa xmm3,xmm5 1198 pslld xmm3,7 1199 psrld xmm5,25 1200 pxor xmm5,xmm3 1201DB 102,15,58,15,237,12 1202DB 102,69,15,58,15,201,8 1203DB 102,69,15,58,15,237,4 1204 paddd xmm2,xmm6 1205 pxor xmm14,xmm2 1206 pshufb xmm14,XMMWORD[$L$rol16] 1207 paddd xmm10,xmm14 1208 pxor xmm6,xmm10 1209 movdqa xmm3,xmm6 1210 pslld xmm3,12 1211 psrld xmm6,20 1212 pxor xmm6,xmm3 1213 paddd xmm2,xmm6 1214 pxor xmm14,xmm2 1215 pshufb xmm14,XMMWORD[$L$rol8] 1216 paddd xmm10,xmm14 1217 pxor xmm6,xmm10 1218 movdqa xmm3,xmm6 1219 pslld xmm3,7 1220 psrld xmm6,25 1221 pxor xmm6,xmm3 1222DB 102,15,58,15,246,12 1223DB 102,69,15,58,15,210,8 1224DB 102,69,15,58,15,246,4 1225 1226 cmp r8,rcx 1227 jb NEAR $L$open_sse_tail_192_rounds_and_x1hash 1228 cmp r8,10*16 1229 jne NEAR $L$open_sse_tail_192_rounds 1230 cmp rbx,11*16 1231 jb NEAR $L$open_sse_tail_192_finish 1232 add r10,QWORD[((0+160))+rsi] 1233 adc r11,QWORD[((8+160))+rsi] 1234 adc r12,1 1235 mov rax,QWORD[((0+160+0))+rbp] 1236 mov r15,rax 1237 mul r10 1238 mov r13,rax 1239 mov r14,rdx 1240 mov rax,QWORD[((0+160+0))+rbp] 1241 mul r11 1242 imul r15,r12 1243 add r14,rax 1244 adc r15,rdx 1245 mov rax,QWORD[((8+160+0))+rbp] 1246 mov r9,rax 1247 mul r10 1248 add r14,rax 1249 adc rdx,0 1250 mov r10,rdx 1251 mov rax,QWORD[((8+160+0))+rbp] 1252 mul r11 1253 add r15,rax 1254 adc rdx,0 1255 imul r9,r12 1256 add r15,r10 1257 adc r9,rdx 1258 mov r10,r13 1259 mov r11,r14 1260 mov r12,r15 1261 and r12,3 1262 mov r13,r15 1263 and r13,-4 1264 mov r14,r9 1265 shrd r15,r9,2 1266 shr r9,2 1267 add r15,r13 1268 adc r9,r14 1269 add r10,r15 1270 adc r11,r9 1271 adc r12,0 1272 1273 cmp rbx,12*16 1274 jb NEAR $L$open_sse_tail_192_finish 1275 add r10,QWORD[((0+176))+rsi] 1276 adc r11,QWORD[((8+176))+rsi] 1277 adc r12,1 1278 mov rax,QWORD[((0+160+0))+rbp] 1279 mov r15,rax 1280 mul r10 1281 mov r13,rax 1282 mov r14,rdx 1283 mov rax,QWORD[((0+160+0))+rbp] 1284 mul r11 1285 imul r15,r12 1286 add r14,rax 1287 adc r15,rdx 1288 mov rax,QWORD[((8+160+0))+rbp] 1289 mov r9,rax 1290 mul r10 1291 add r14,rax 1292 adc rdx,0 1293 mov r10,rdx 1294 mov rax,QWORD[((8+160+0))+rbp] 1295 mul r11 1296 add r15,rax 1297 adc rdx,0 1298 imul r9,r12 1299 add r15,r10 1300 adc r9,rdx 1301 mov r10,r13 1302 mov r11,r14 1303 mov r12,r15 1304 and r12,3 1305 mov r13,r15 1306 and r13,-4 1307 mov r14,r9 1308 shrd r15,r9,2 1309 shr r9,2 1310 add r15,r13 1311 adc r9,r14 1312 add r10,r15 1313 adc r11,r9 1314 adc r12,0 1315 1316$L$open_sse_tail_192_finish: 1317 paddd xmm2,XMMWORD[$L$chacha20_consts] 1318 paddd xmm6,XMMWORD[((160+48))+rbp] 1319 paddd xmm10,XMMWORD[((160+64))+rbp] 1320 paddd xmm14,XMMWORD[((160+128))+rbp] 1321 paddd xmm1,XMMWORD[$L$chacha20_consts] 1322 paddd xmm5,XMMWORD[((160+48))+rbp] 1323 paddd xmm9,XMMWORD[((160+64))+rbp] 1324 paddd xmm13,XMMWORD[((160+112))+rbp] 1325 paddd xmm0,XMMWORD[$L$chacha20_consts] 1326 paddd xmm4,XMMWORD[((160+48))+rbp] 1327 paddd xmm8,XMMWORD[((160+64))+rbp] 1328 paddd xmm12,XMMWORD[((160+96))+rbp] 1329 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1330 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1331 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1332 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1333 pxor xmm2,xmm3 1334 pxor xmm6,xmm7 1335 pxor xmm10,xmm11 1336 pxor xmm15,xmm14 1337 movdqu XMMWORD[(0 + 0)+rdi],xmm2 1338 movdqu XMMWORD[(16 + 0)+rdi],xmm6 1339 movdqu XMMWORD[(32 + 0)+rdi],xmm10 1340 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1341 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1342 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1343 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1344 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1345 pxor xmm1,xmm3 1346 pxor xmm5,xmm7 1347 pxor xmm9,xmm11 1348 pxor xmm15,xmm13 1349 movdqu XMMWORD[(0 + 64)+rdi],xmm1 1350 movdqu XMMWORD[(16 + 64)+rdi],xmm5 1351 movdqu XMMWORD[(32 + 64)+rdi],xmm9 1352 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1353 1354 sub rbx,8*16 1355 lea rsi,[128+rsi] 1356 lea rdi,[128+rdi] 1357 jmp NEAR $L$open_sse_tail_64_dec_loop 1358 1359$L$open_sse_tail_256: 1360 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1361 movdqa xmm4,XMMWORD[((160+48))+rbp] 1362 movdqa xmm8,XMMWORD[((160+64))+rbp] 1363 movdqa xmm1,xmm0 1364 movdqa xmm5,xmm4 1365 movdqa xmm9,xmm8 1366 movdqa xmm2,xmm0 1367 movdqa xmm6,xmm4 1368 movdqa xmm10,xmm8 1369 movdqa xmm3,xmm0 1370 movdqa xmm7,xmm4 1371 movdqa xmm11,xmm8 1372 movdqa xmm15,XMMWORD[((160+96))+rbp] 1373 paddd xmm15,XMMWORD[$L$sse_inc] 1374 movdqa xmm14,xmm15 1375 paddd xmm14,XMMWORD[$L$sse_inc] 1376 movdqa xmm13,xmm14 1377 paddd xmm13,XMMWORD[$L$sse_inc] 1378 movdqa xmm12,xmm13 1379 paddd xmm12,XMMWORD[$L$sse_inc] 1380 movdqa XMMWORD[(160+96)+rbp],xmm12 1381 movdqa XMMWORD[(160+112)+rbp],xmm13 1382 movdqa XMMWORD[(160+128)+rbp],xmm14 1383 movdqa XMMWORD[(160+144)+rbp],xmm15 1384 1385 xor r8,r8 1386$L$open_sse_tail_256_rounds_and_x1hash: 1387 add r10,QWORD[((0+0))+r8*1+rsi] 1388 adc r11,QWORD[((8+0))+r8*1+rsi] 1389 adc r12,1 1390 movdqa XMMWORD[(160+80)+rbp],xmm11 1391 paddd xmm0,xmm4 1392 pxor xmm12,xmm0 1393 pshufb xmm12,XMMWORD[$L$rol16] 1394 paddd xmm8,xmm12 1395 pxor xmm4,xmm8 1396 movdqa xmm11,xmm4 1397 pslld xmm11,12 1398 psrld xmm4,20 1399 pxor xmm4,xmm11 1400 paddd xmm0,xmm4 1401 pxor xmm12,xmm0 1402 pshufb xmm12,XMMWORD[$L$rol8] 1403 paddd xmm8,xmm12 1404 pxor xmm4,xmm8 1405 movdqa xmm11,xmm4 1406 pslld xmm11,7 1407 psrld xmm4,25 1408 pxor xmm4,xmm11 1409DB 102,15,58,15,228,4 1410DB 102,69,15,58,15,192,8 1411DB 102,69,15,58,15,228,12 1412 paddd xmm1,xmm5 1413 pxor xmm13,xmm1 1414 pshufb xmm13,XMMWORD[$L$rol16] 1415 paddd xmm9,xmm13 1416 pxor xmm5,xmm9 1417 movdqa xmm11,xmm5 1418 pslld xmm11,12 1419 psrld xmm5,20 1420 pxor xmm5,xmm11 1421 paddd xmm1,xmm5 1422 pxor xmm13,xmm1 1423 pshufb xmm13,XMMWORD[$L$rol8] 1424 paddd xmm9,xmm13 1425 pxor xmm5,xmm9 1426 movdqa xmm11,xmm5 1427 pslld xmm11,7 1428 psrld xmm5,25 1429 pxor xmm5,xmm11 1430DB 102,15,58,15,237,4 1431DB 102,69,15,58,15,201,8 1432DB 102,69,15,58,15,237,12 1433 paddd xmm2,xmm6 1434 pxor xmm14,xmm2 1435 pshufb xmm14,XMMWORD[$L$rol16] 1436 paddd xmm10,xmm14 1437 pxor xmm6,xmm10 1438 movdqa xmm11,xmm6 1439 pslld xmm11,12 1440 psrld xmm6,20 1441 pxor xmm6,xmm11 1442 paddd xmm2,xmm6 1443 pxor xmm14,xmm2 1444 pshufb xmm14,XMMWORD[$L$rol8] 1445 paddd xmm10,xmm14 1446 pxor xmm6,xmm10 1447 movdqa xmm11,xmm6 1448 pslld xmm11,7 1449 psrld xmm6,25 1450 pxor xmm6,xmm11 1451DB 102,15,58,15,246,4 1452DB 102,69,15,58,15,210,8 1453DB 102,69,15,58,15,246,12 1454 movdqa xmm11,XMMWORD[((160+80))+rbp] 1455 mov rax,QWORD[((0+160+0))+rbp] 1456 mov r15,rax 1457 mul r10 1458 mov r13,rax 1459 mov r14,rdx 1460 mov rax,QWORD[((0+160+0))+rbp] 1461 mul r11 1462 imul r15,r12 1463 add r14,rax 1464 adc r15,rdx 1465 movdqa XMMWORD[(160+80)+rbp],xmm9 1466 paddd xmm3,xmm7 1467 pxor xmm15,xmm3 1468 pshufb xmm15,XMMWORD[$L$rol16] 1469 paddd xmm11,xmm15 1470 pxor xmm7,xmm11 1471 movdqa xmm9,xmm7 1472 pslld xmm9,12 1473 psrld xmm7,20 1474 pxor xmm7,xmm9 1475 paddd xmm3,xmm7 1476 pxor xmm15,xmm3 1477 pshufb xmm15,XMMWORD[$L$rol8] 1478 paddd xmm11,xmm15 1479 pxor xmm7,xmm11 1480 movdqa xmm9,xmm7 1481 pslld xmm9,7 1482 psrld xmm7,25 1483 pxor xmm7,xmm9 1484DB 102,15,58,15,255,4 1485DB 102,69,15,58,15,219,8 1486DB 102,69,15,58,15,255,12 1487 movdqa xmm9,XMMWORD[((160+80))+rbp] 1488 mov rax,QWORD[((8+160+0))+rbp] 1489 mov r9,rax 1490 mul r10 1491 add r14,rax 1492 adc rdx,0 1493 mov r10,rdx 1494 mov rax,QWORD[((8+160+0))+rbp] 1495 mul r11 1496 add r15,rax 1497 adc rdx,0 1498 movdqa XMMWORD[(160+80)+rbp],xmm11 1499 paddd xmm0,xmm4 1500 pxor xmm12,xmm0 1501 pshufb xmm12,XMMWORD[$L$rol16] 1502 paddd xmm8,xmm12 1503 pxor xmm4,xmm8 1504 movdqa xmm11,xmm4 1505 pslld xmm11,12 1506 psrld xmm4,20 1507 pxor xmm4,xmm11 1508 paddd xmm0,xmm4 1509 pxor xmm12,xmm0 1510 pshufb xmm12,XMMWORD[$L$rol8] 1511 paddd xmm8,xmm12 1512 pxor xmm4,xmm8 1513 movdqa xmm11,xmm4 1514 pslld xmm11,7 1515 psrld xmm4,25 1516 pxor xmm4,xmm11 1517DB 102,15,58,15,228,12 1518DB 102,69,15,58,15,192,8 1519DB 102,69,15,58,15,228,4 1520 paddd xmm1,xmm5 1521 pxor xmm13,xmm1 1522 pshufb xmm13,XMMWORD[$L$rol16] 1523 paddd xmm9,xmm13 1524 pxor xmm5,xmm9 1525 movdqa xmm11,xmm5 1526 pslld xmm11,12 1527 psrld xmm5,20 1528 pxor xmm5,xmm11 1529 paddd xmm1,xmm5 1530 pxor xmm13,xmm1 1531 pshufb xmm13,XMMWORD[$L$rol8] 1532 paddd xmm9,xmm13 1533 pxor xmm5,xmm9 1534 movdqa xmm11,xmm5 1535 pslld xmm11,7 1536 psrld xmm5,25 1537 pxor xmm5,xmm11 1538DB 102,15,58,15,237,12 1539DB 102,69,15,58,15,201,8 1540DB 102,69,15,58,15,237,4 1541 imul r9,r12 1542 add r15,r10 1543 adc r9,rdx 1544 paddd xmm2,xmm6 1545 pxor xmm14,xmm2 1546 pshufb xmm14,XMMWORD[$L$rol16] 1547 paddd xmm10,xmm14 1548 pxor xmm6,xmm10 1549 movdqa xmm11,xmm6 1550 pslld xmm11,12 1551 psrld xmm6,20 1552 pxor xmm6,xmm11 1553 paddd xmm2,xmm6 1554 pxor xmm14,xmm2 1555 pshufb xmm14,XMMWORD[$L$rol8] 1556 paddd xmm10,xmm14 1557 pxor xmm6,xmm10 1558 movdqa xmm11,xmm6 1559 pslld xmm11,7 1560 psrld xmm6,25 1561 pxor xmm6,xmm11 1562DB 102,15,58,15,246,12 1563DB 102,69,15,58,15,210,8 1564DB 102,69,15,58,15,246,4 1565 movdqa xmm11,XMMWORD[((160+80))+rbp] 1566 mov r10,r13 1567 mov r11,r14 1568 mov r12,r15 1569 and r12,3 1570 mov r13,r15 1571 and r13,-4 1572 mov r14,r9 1573 shrd r15,r9,2 1574 shr r9,2 1575 add r15,r13 1576 adc r9,r14 1577 add r10,r15 1578 adc r11,r9 1579 adc r12,0 1580 movdqa XMMWORD[(160+80)+rbp],xmm9 1581 paddd xmm3,xmm7 1582 pxor xmm15,xmm3 1583 pshufb xmm15,XMMWORD[$L$rol16] 1584 paddd xmm11,xmm15 1585 pxor xmm7,xmm11 1586 movdqa xmm9,xmm7 1587 pslld xmm9,12 1588 psrld xmm7,20 1589 pxor xmm7,xmm9 1590 paddd xmm3,xmm7 1591 pxor xmm15,xmm3 1592 pshufb xmm15,XMMWORD[$L$rol8] 1593 paddd xmm11,xmm15 1594 pxor xmm7,xmm11 1595 movdqa xmm9,xmm7 1596 pslld xmm9,7 1597 psrld xmm7,25 1598 pxor xmm7,xmm9 1599DB 102,15,58,15,255,12 1600DB 102,69,15,58,15,219,8 1601DB 102,69,15,58,15,255,4 1602 movdqa xmm9,XMMWORD[((160+80))+rbp] 1603 1604 add r8,16 1605 cmp r8,10*16 1606 jb NEAR $L$open_sse_tail_256_rounds_and_x1hash 1607 1608 mov rcx,rbx 1609 and rcx,-16 1610$L$open_sse_tail_256_hash: 1611 add r10,QWORD[((0+0))+r8*1+rsi] 1612 adc r11,QWORD[((8+0))+r8*1+rsi] 1613 adc r12,1 1614 mov rax,QWORD[((0+160+0))+rbp] 1615 mov r15,rax 1616 mul r10 1617 mov r13,rax 1618 mov r14,rdx 1619 mov rax,QWORD[((0+160+0))+rbp] 1620 mul r11 1621 imul r15,r12 1622 add r14,rax 1623 adc r15,rdx 1624 mov rax,QWORD[((8+160+0))+rbp] 1625 mov r9,rax 1626 mul r10 1627 add r14,rax 1628 adc rdx,0 1629 mov r10,rdx 1630 mov rax,QWORD[((8+160+0))+rbp] 1631 mul r11 1632 add r15,rax 1633 adc rdx,0 1634 imul r9,r12 1635 add r15,r10 1636 adc r9,rdx 1637 mov r10,r13 1638 mov r11,r14 1639 mov r12,r15 1640 and r12,3 1641 mov r13,r15 1642 and r13,-4 1643 mov r14,r9 1644 shrd r15,r9,2 1645 shr r9,2 1646 add r15,r13 1647 adc r9,r14 1648 add r10,r15 1649 adc r11,r9 1650 adc r12,0 1651 1652 add r8,16 1653 cmp r8,rcx 1654 jb NEAR $L$open_sse_tail_256_hash 1655 paddd xmm3,XMMWORD[$L$chacha20_consts] 1656 paddd xmm7,XMMWORD[((160+48))+rbp] 1657 paddd xmm11,XMMWORD[((160+64))+rbp] 1658 paddd xmm15,XMMWORD[((160+144))+rbp] 1659 paddd xmm2,XMMWORD[$L$chacha20_consts] 1660 paddd xmm6,XMMWORD[((160+48))+rbp] 1661 paddd xmm10,XMMWORD[((160+64))+rbp] 1662 paddd xmm14,XMMWORD[((160+128))+rbp] 1663 paddd xmm1,XMMWORD[$L$chacha20_consts] 1664 paddd xmm5,XMMWORD[((160+48))+rbp] 1665 paddd xmm9,XMMWORD[((160+64))+rbp] 1666 paddd xmm13,XMMWORD[((160+112))+rbp] 1667 paddd xmm0,XMMWORD[$L$chacha20_consts] 1668 paddd xmm4,XMMWORD[((160+48))+rbp] 1669 paddd xmm8,XMMWORD[((160+64))+rbp] 1670 paddd xmm12,XMMWORD[((160+96))+rbp] 1671 movdqa XMMWORD[(160+80)+rbp],xmm12 1672 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 1673 pxor xmm12,xmm3 1674 movdqu XMMWORD[(0 + 0)+rdi],xmm12 1675 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 1676 pxor xmm12,xmm7 1677 movdqu XMMWORD[(16 + 0)+rdi],xmm12 1678 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 1679 pxor xmm12,xmm11 1680 movdqu XMMWORD[(32 + 0)+rdi],xmm12 1681 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 1682 pxor xmm12,xmm15 1683 movdqu XMMWORD[(48 + 0)+rdi],xmm12 1684 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1685 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1686 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1687 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1688 pxor xmm2,xmm3 1689 pxor xmm6,xmm7 1690 pxor xmm10,xmm11 1691 pxor xmm15,xmm14 1692 movdqu XMMWORD[(0 + 64)+rdi],xmm2 1693 movdqu XMMWORD[(16 + 64)+rdi],xmm6 1694 movdqu XMMWORD[(32 + 64)+rdi],xmm10 1695 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1696 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 1697 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 1698 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 1699 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 1700 pxor xmm1,xmm3 1701 pxor xmm5,xmm7 1702 pxor xmm9,xmm11 1703 pxor xmm15,xmm13 1704 movdqu XMMWORD[(0 + 128)+rdi],xmm1 1705 movdqu XMMWORD[(16 + 128)+rdi],xmm5 1706 movdqu XMMWORD[(32 + 128)+rdi],xmm9 1707 movdqu XMMWORD[(48 + 128)+rdi],xmm15 1708 1709 movdqa xmm12,XMMWORD[((160+80))+rbp] 1710 sub rbx,12*16 1711 lea rsi,[192+rsi] 1712 lea rdi,[192+rdi] 1713 1714 1715$L$open_sse_tail_64_dec_loop: 1716 cmp rbx,16 1717 jb NEAR $L$open_sse_tail_16_init 1718 sub rbx,16 1719 movdqu xmm3,XMMWORD[rsi] 1720 pxor xmm0,xmm3 1721 movdqu XMMWORD[rdi],xmm0 1722 lea rsi,[16+rsi] 1723 lea rdi,[16+rdi] 1724 movdqa xmm0,xmm4 1725 movdqa xmm4,xmm8 1726 movdqa xmm8,xmm12 1727 jmp NEAR $L$open_sse_tail_64_dec_loop 1728$L$open_sse_tail_16_init: 1729 movdqa xmm1,xmm0 1730 1731 1732$L$open_sse_tail_16: 1733 test rbx,rbx 1734 jz NEAR $L$open_sse_finalize 1735 1736 1737 1738 pxor xmm3,xmm3 1739 lea rsi,[((-1))+rbx*1+rsi] 1740 mov r8,rbx 1741$L$open_sse_tail_16_compose: 1742 pslldq xmm3,1 1743 pinsrb xmm3,BYTE[rsi],0 1744 sub rsi,1 1745 sub r8,1 1746 jnz NEAR $L$open_sse_tail_16_compose 1747 1748DB 102,73,15,126,221 1749 pextrq r14,xmm3,1 1750 1751 pxor xmm3,xmm1 1752 1753 1754$L$open_sse_tail_16_extract: 1755 pextrb XMMWORD[rdi],xmm3,0 1756 psrldq xmm3,1 1757 add rdi,1 1758 sub rbx,1 1759 jne NEAR $L$open_sse_tail_16_extract 1760 1761 add r10,r13 1762 adc r11,r14 1763 adc r12,1 1764 mov rax,QWORD[((0+160+0))+rbp] 1765 mov r15,rax 1766 mul r10 1767 mov r13,rax 1768 mov r14,rdx 1769 mov rax,QWORD[((0+160+0))+rbp] 1770 mul r11 1771 imul r15,r12 1772 add r14,rax 1773 adc r15,rdx 1774 mov rax,QWORD[((8+160+0))+rbp] 1775 mov r9,rax 1776 mul r10 1777 add r14,rax 1778 adc rdx,0 1779 mov r10,rdx 1780 mov rax,QWORD[((8+160+0))+rbp] 1781 mul r11 1782 add r15,rax 1783 adc rdx,0 1784 imul r9,r12 1785 add r15,r10 1786 adc r9,rdx 1787 mov r10,r13 1788 mov r11,r14 1789 mov r12,r15 1790 and r12,3 1791 mov r13,r15 1792 and r13,-4 1793 mov r14,r9 1794 shrd r15,r9,2 1795 shr r9,2 1796 add r15,r13 1797 adc r9,r14 1798 add r10,r15 1799 adc r11,r9 1800 adc r12,0 1801 1802 1803$L$open_sse_finalize: 1804 add r10,QWORD[((0+160+32))+rbp] 1805 adc r11,QWORD[((8+160+32))+rbp] 1806 adc r12,1 1807 mov rax,QWORD[((0+160+0))+rbp] 1808 mov r15,rax 1809 mul r10 1810 mov r13,rax 1811 mov r14,rdx 1812 mov rax,QWORD[((0+160+0))+rbp] 1813 mul r11 1814 imul r15,r12 1815 add r14,rax 1816 adc r15,rdx 1817 mov rax,QWORD[((8+160+0))+rbp] 1818 mov r9,rax 1819 mul r10 1820 add r14,rax 1821 adc rdx,0 1822 mov r10,rdx 1823 mov rax,QWORD[((8+160+0))+rbp] 1824 mul r11 1825 add r15,rax 1826 adc rdx,0 1827 imul r9,r12 1828 add r15,r10 1829 adc r9,rdx 1830 mov r10,r13 1831 mov r11,r14 1832 mov r12,r15 1833 and r12,3 1834 mov r13,r15 1835 and r13,-4 1836 mov r14,r9 1837 shrd r15,r9,2 1838 shr r9,2 1839 add r15,r13 1840 adc r9,r14 1841 add r10,r15 1842 adc r11,r9 1843 adc r12,0 1844 1845 1846 mov r13,r10 1847 mov r14,r11 1848 mov r15,r12 1849 sub r10,-5 1850 sbb r11,-1 1851 sbb r12,3 1852 cmovc r10,r13 1853 cmovc r11,r14 1854 cmovc r12,r15 1855 1856 add r10,QWORD[((0+160+16))+rbp] 1857 adc r11,QWORD[((8+160+16))+rbp] 1858 1859 movaps xmm6,XMMWORD[((0+0))+rbp] 1860 movaps xmm7,XMMWORD[((16+0))+rbp] 1861 movaps xmm8,XMMWORD[((32+0))+rbp] 1862 movaps xmm9,XMMWORD[((48+0))+rbp] 1863 movaps xmm10,XMMWORD[((64+0))+rbp] 1864 movaps xmm11,XMMWORD[((80+0))+rbp] 1865 movaps xmm12,XMMWORD[((96+0))+rbp] 1866 movaps xmm13,XMMWORD[((112+0))+rbp] 1867 movaps xmm14,XMMWORD[((128+0))+rbp] 1868 movaps xmm15,XMMWORD[((144+0))+rbp] 1869 1870 1871 add rsp,288 + 160 + 32 1872 1873 1874 pop r9 1875 1876 mov QWORD[r9],r10 1877 mov QWORD[8+r9],r11 1878 pop r15 1879 1880 pop r14 1881 1882 pop r13 1883 1884 pop r12 1885 1886 pop rbx 1887 1888 pop rbp 1889 1890 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1891 mov rsi,QWORD[16+rsp] 1892 ret 1893 1894$L$open_sse_128: 1895 1896 movdqu xmm0,XMMWORD[$L$chacha20_consts] 1897 movdqa xmm1,xmm0 1898 movdqa xmm2,xmm0 1899 movdqu xmm4,XMMWORD[r9] 1900 movdqa xmm5,xmm4 1901 movdqa xmm6,xmm4 1902 movdqu xmm8,XMMWORD[16+r9] 1903 movdqa xmm9,xmm8 1904 movdqa xmm10,xmm8 1905 movdqu xmm12,XMMWORD[32+r9] 1906 movdqa xmm13,xmm12 1907 paddd xmm13,XMMWORD[$L$sse_inc] 1908 movdqa xmm14,xmm13 1909 paddd xmm14,XMMWORD[$L$sse_inc] 1910 movdqa xmm7,xmm4 1911 movdqa xmm11,xmm8 1912 movdqa xmm15,xmm13 1913 mov r10,10 1914 1915$L$open_sse_128_rounds: 1916 paddd xmm0,xmm4 1917 pxor xmm12,xmm0 1918 pshufb xmm12,XMMWORD[$L$rol16] 1919 paddd xmm8,xmm12 1920 pxor xmm4,xmm8 1921 movdqa xmm3,xmm4 1922 pslld xmm3,12 1923 psrld xmm4,20 1924 pxor xmm4,xmm3 1925 paddd xmm0,xmm4 1926 pxor xmm12,xmm0 1927 pshufb xmm12,XMMWORD[$L$rol8] 1928 paddd xmm8,xmm12 1929 pxor xmm4,xmm8 1930 movdqa xmm3,xmm4 1931 pslld xmm3,7 1932 psrld xmm4,25 1933 pxor xmm4,xmm3 1934DB 102,15,58,15,228,4 1935DB 102,69,15,58,15,192,8 1936DB 102,69,15,58,15,228,12 1937 paddd xmm1,xmm5 1938 pxor xmm13,xmm1 1939 pshufb xmm13,XMMWORD[$L$rol16] 1940 paddd xmm9,xmm13 1941 pxor xmm5,xmm9 1942 movdqa xmm3,xmm5 1943 pslld xmm3,12 1944 psrld xmm5,20 1945 pxor xmm5,xmm3 1946 paddd xmm1,xmm5 1947 pxor xmm13,xmm1 1948 pshufb xmm13,XMMWORD[$L$rol8] 1949 paddd xmm9,xmm13 1950 pxor xmm5,xmm9 1951 movdqa xmm3,xmm5 1952 pslld xmm3,7 1953 psrld xmm5,25 1954 pxor xmm5,xmm3 1955DB 102,15,58,15,237,4 1956DB 102,69,15,58,15,201,8 1957DB 102,69,15,58,15,237,12 1958 paddd xmm2,xmm6 1959 pxor xmm14,xmm2 1960 pshufb xmm14,XMMWORD[$L$rol16] 1961 paddd xmm10,xmm14 1962 pxor xmm6,xmm10 1963 movdqa xmm3,xmm6 1964 pslld xmm3,12 1965 psrld xmm6,20 1966 pxor xmm6,xmm3 1967 paddd xmm2,xmm6 1968 pxor xmm14,xmm2 1969 pshufb xmm14,XMMWORD[$L$rol8] 1970 paddd xmm10,xmm14 1971 pxor xmm6,xmm10 1972 movdqa xmm3,xmm6 1973 pslld xmm3,7 1974 psrld xmm6,25 1975 pxor xmm6,xmm3 1976DB 102,15,58,15,246,4 1977DB 102,69,15,58,15,210,8 1978DB 102,69,15,58,15,246,12 1979 paddd xmm0,xmm4 1980 pxor xmm12,xmm0 1981 pshufb xmm12,XMMWORD[$L$rol16] 1982 paddd xmm8,xmm12 1983 pxor xmm4,xmm8 1984 movdqa xmm3,xmm4 1985 pslld xmm3,12 1986 psrld xmm4,20 1987 pxor xmm4,xmm3 1988 paddd xmm0,xmm4 1989 pxor xmm12,xmm0 1990 pshufb xmm12,XMMWORD[$L$rol8] 1991 paddd xmm8,xmm12 1992 pxor xmm4,xmm8 1993 movdqa xmm3,xmm4 1994 pslld xmm3,7 1995 psrld xmm4,25 1996 pxor xmm4,xmm3 1997DB 102,15,58,15,228,12 1998DB 102,69,15,58,15,192,8 1999DB 102,69,15,58,15,228,4 2000 paddd xmm1,xmm5 2001 pxor xmm13,xmm1 2002 pshufb xmm13,XMMWORD[$L$rol16] 2003 paddd xmm9,xmm13 2004 pxor xmm5,xmm9 2005 movdqa xmm3,xmm5 2006 pslld xmm3,12 2007 psrld xmm5,20 2008 pxor xmm5,xmm3 2009 paddd xmm1,xmm5 2010 pxor xmm13,xmm1 2011 pshufb xmm13,XMMWORD[$L$rol8] 2012 paddd xmm9,xmm13 2013 pxor xmm5,xmm9 2014 movdqa xmm3,xmm5 2015 pslld xmm3,7 2016 psrld xmm5,25 2017 pxor xmm5,xmm3 2018DB 102,15,58,15,237,12 2019DB 102,69,15,58,15,201,8 2020DB 102,69,15,58,15,237,4 2021 paddd xmm2,xmm6 2022 pxor xmm14,xmm2 2023 pshufb xmm14,XMMWORD[$L$rol16] 2024 paddd xmm10,xmm14 2025 pxor xmm6,xmm10 2026 movdqa xmm3,xmm6 2027 pslld xmm3,12 2028 psrld xmm6,20 2029 pxor xmm6,xmm3 2030 paddd xmm2,xmm6 2031 pxor xmm14,xmm2 2032 pshufb xmm14,XMMWORD[$L$rol8] 2033 paddd xmm10,xmm14 2034 pxor xmm6,xmm10 2035 movdqa xmm3,xmm6 2036 pslld xmm3,7 2037 psrld xmm6,25 2038 pxor xmm6,xmm3 2039DB 102,15,58,15,246,12 2040DB 102,69,15,58,15,210,8 2041DB 102,69,15,58,15,246,4 2042 2043 dec r10 2044 jnz NEAR $L$open_sse_128_rounds 2045 paddd xmm0,XMMWORD[$L$chacha20_consts] 2046 paddd xmm1,XMMWORD[$L$chacha20_consts] 2047 paddd xmm2,XMMWORD[$L$chacha20_consts] 2048 paddd xmm4,xmm7 2049 paddd xmm5,xmm7 2050 paddd xmm6,xmm7 2051 paddd xmm9,xmm11 2052 paddd xmm10,xmm11 2053 paddd xmm13,xmm15 2054 paddd xmm15,XMMWORD[$L$sse_inc] 2055 paddd xmm14,xmm15 2056 2057 pand xmm0,XMMWORD[$L$clamp] 2058 movdqa XMMWORD[(160+0)+rbp],xmm0 2059 movdqa XMMWORD[(160+16)+rbp],xmm4 2060 2061 mov r8,r8 2062 call poly_hash_ad_internal 2063$L$open_sse_128_xor_hash: 2064 cmp rbx,16 2065 jb NEAR $L$open_sse_tail_16 2066 sub rbx,16 2067 add r10,QWORD[((0+0))+rsi] 2068 adc r11,QWORD[((8+0))+rsi] 2069 adc r12,1 2070 2071 2072 movdqu xmm3,XMMWORD[rsi] 2073 pxor xmm1,xmm3 2074 movdqu XMMWORD[rdi],xmm1 2075 lea rsi,[16+rsi] 2076 lea rdi,[16+rdi] 2077 mov rax,QWORD[((0+160+0))+rbp] 2078 mov r15,rax 2079 mul r10 2080 mov r13,rax 2081 mov r14,rdx 2082 mov rax,QWORD[((0+160+0))+rbp] 2083 mul r11 2084 imul r15,r12 2085 add r14,rax 2086 adc r15,rdx 2087 mov rax,QWORD[((8+160+0))+rbp] 2088 mov r9,rax 2089 mul r10 2090 add r14,rax 2091 adc rdx,0 2092 mov r10,rdx 2093 mov rax,QWORD[((8+160+0))+rbp] 2094 mul r11 2095 add r15,rax 2096 adc rdx,0 2097 imul r9,r12 2098 add r15,r10 2099 adc r9,rdx 2100 mov r10,r13 2101 mov r11,r14 2102 mov r12,r15 2103 and r12,3 2104 mov r13,r15 2105 and r13,-4 2106 mov r14,r9 2107 shrd r15,r9,2 2108 shr r9,2 2109 add r15,r13 2110 adc r9,r14 2111 add r10,r15 2112 adc r11,r9 2113 adc r12,0 2114 2115 2116 movdqa xmm1,xmm5 2117 movdqa xmm5,xmm9 2118 movdqa xmm9,xmm13 2119 movdqa xmm13,xmm2 2120 movdqa xmm2,xmm6 2121 movdqa xmm6,xmm10 2122 movdqa xmm10,xmm14 2123 jmp NEAR $L$open_sse_128_xor_hash 2124$L$SEH_end_chacha20_poly1305_open: 2125 2126 2127 2128 2129 2130 2131 2132 2133global chacha20_poly1305_seal 2134 2135ALIGN 64 2136chacha20_poly1305_seal: 2137 mov QWORD[8+rsp],rdi ;WIN64 prologue 2138 mov QWORD[16+rsp],rsi 2139 mov rax,rsp 2140$L$SEH_begin_chacha20_poly1305_seal: 2141 mov rdi,rcx 2142 mov rsi,rdx 2143 mov rdx,r8 2144 mov rcx,r9 2145 mov r8,QWORD[40+rsp] 2146 mov r9,QWORD[48+rsp] 2147 2148 2149 2150_CET_ENDBR 2151 push rbp 2152 2153 push rbx 2154 2155 push r12 2156 2157 push r13 2158 2159 push r14 2160 2161 push r15 2162 2163 2164 2165 push r9 2166 2167 sub rsp,288 + 160 + 32 2168 2169 lea rbp,[32+rsp] 2170 and rbp,-32 2171 2172 movaps XMMWORD[(0+0)+rbp],xmm6 2173 movaps XMMWORD[(16+0)+rbp],xmm7 2174 movaps XMMWORD[(32+0)+rbp],xmm8 2175 movaps XMMWORD[(48+0)+rbp],xmm9 2176 movaps XMMWORD[(64+0)+rbp],xmm10 2177 movaps XMMWORD[(80+0)+rbp],xmm11 2178 movaps XMMWORD[(96+0)+rbp],xmm12 2179 movaps XMMWORD[(112+0)+rbp],xmm13 2180 movaps XMMWORD[(128+0)+rbp],xmm14 2181 movaps XMMWORD[(144+0)+rbp],xmm15 2182 2183 mov rbx,QWORD[56+r9] 2184 add rbx,rdx 2185 mov QWORD[((0+160+32))+rbp],r8 2186 mov QWORD[((8+160+32))+rbp],rbx 2187 mov rbx,rdx 2188 2189 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 2190 and eax,288 2191 xor eax,288 2192 jz NEAR chacha20_poly1305_seal_avx2 2193 2194 cmp rbx,128 2195 jbe NEAR $L$seal_sse_128 2196 2197 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2198 movdqu xmm4,XMMWORD[r9] 2199 movdqu xmm8,XMMWORD[16+r9] 2200 movdqu xmm12,XMMWORD[32+r9] 2201 2202 movdqa xmm1,xmm0 2203 movdqa xmm2,xmm0 2204 movdqa xmm3,xmm0 2205 movdqa xmm5,xmm4 2206 movdqa xmm6,xmm4 2207 movdqa xmm7,xmm4 2208 movdqa xmm9,xmm8 2209 movdqa xmm10,xmm8 2210 movdqa xmm11,xmm8 2211 movdqa xmm15,xmm12 2212 paddd xmm12,XMMWORD[$L$sse_inc] 2213 movdqa xmm14,xmm12 2214 paddd xmm12,XMMWORD[$L$sse_inc] 2215 movdqa xmm13,xmm12 2216 paddd xmm12,XMMWORD[$L$sse_inc] 2217 2218 movdqa XMMWORD[(160+48)+rbp],xmm4 2219 movdqa XMMWORD[(160+64)+rbp],xmm8 2220 movdqa XMMWORD[(160+96)+rbp],xmm12 2221 movdqa XMMWORD[(160+112)+rbp],xmm13 2222 movdqa XMMWORD[(160+128)+rbp],xmm14 2223 movdqa XMMWORD[(160+144)+rbp],xmm15 2224 mov r10,10 2225$L$seal_sse_init_rounds: 2226 movdqa XMMWORD[(160+80)+rbp],xmm8 2227 movdqa xmm8,XMMWORD[$L$rol16] 2228 paddd xmm3,xmm7 2229 paddd xmm2,xmm6 2230 paddd xmm1,xmm5 2231 paddd xmm0,xmm4 2232 pxor xmm15,xmm3 2233 pxor xmm14,xmm2 2234 pxor xmm13,xmm1 2235 pxor xmm12,xmm0 2236DB 102,69,15,56,0,248 2237DB 102,69,15,56,0,240 2238DB 102,69,15,56,0,232 2239DB 102,69,15,56,0,224 2240 movdqa xmm8,XMMWORD[((160+80))+rbp] 2241 paddd xmm11,xmm15 2242 paddd xmm10,xmm14 2243 paddd xmm9,xmm13 2244 paddd xmm8,xmm12 2245 pxor xmm7,xmm11 2246 pxor xmm6,xmm10 2247 pxor xmm5,xmm9 2248 pxor xmm4,xmm8 2249 movdqa XMMWORD[(160+80)+rbp],xmm8 2250 movdqa xmm8,xmm7 2251 psrld xmm8,20 2252 pslld xmm7,32-20 2253 pxor xmm7,xmm8 2254 movdqa xmm8,xmm6 2255 psrld xmm8,20 2256 pslld xmm6,32-20 2257 pxor xmm6,xmm8 2258 movdqa xmm8,xmm5 2259 psrld xmm8,20 2260 pslld xmm5,32-20 2261 pxor xmm5,xmm8 2262 movdqa xmm8,xmm4 2263 psrld xmm8,20 2264 pslld xmm4,32-20 2265 pxor xmm4,xmm8 2266 movdqa xmm8,XMMWORD[$L$rol8] 2267 paddd xmm3,xmm7 2268 paddd xmm2,xmm6 2269 paddd xmm1,xmm5 2270 paddd xmm0,xmm4 2271 pxor xmm15,xmm3 2272 pxor xmm14,xmm2 2273 pxor xmm13,xmm1 2274 pxor xmm12,xmm0 2275DB 102,69,15,56,0,248 2276DB 102,69,15,56,0,240 2277DB 102,69,15,56,0,232 2278DB 102,69,15,56,0,224 2279 movdqa xmm8,XMMWORD[((160+80))+rbp] 2280 paddd xmm11,xmm15 2281 paddd xmm10,xmm14 2282 paddd xmm9,xmm13 2283 paddd xmm8,xmm12 2284 pxor xmm7,xmm11 2285 pxor xmm6,xmm10 2286 pxor xmm5,xmm9 2287 pxor xmm4,xmm8 2288 movdqa XMMWORD[(160+80)+rbp],xmm8 2289 movdqa xmm8,xmm7 2290 psrld xmm8,25 2291 pslld xmm7,32-25 2292 pxor xmm7,xmm8 2293 movdqa xmm8,xmm6 2294 psrld xmm8,25 2295 pslld xmm6,32-25 2296 pxor xmm6,xmm8 2297 movdqa xmm8,xmm5 2298 psrld xmm8,25 2299 pslld xmm5,32-25 2300 pxor xmm5,xmm8 2301 movdqa xmm8,xmm4 2302 psrld xmm8,25 2303 pslld xmm4,32-25 2304 pxor xmm4,xmm8 2305 movdqa xmm8,XMMWORD[((160+80))+rbp] 2306DB 102,15,58,15,255,4 2307DB 102,69,15,58,15,219,8 2308DB 102,69,15,58,15,255,12 2309DB 102,15,58,15,246,4 2310DB 102,69,15,58,15,210,8 2311DB 102,69,15,58,15,246,12 2312DB 102,15,58,15,237,4 2313DB 102,69,15,58,15,201,8 2314DB 102,69,15,58,15,237,12 2315DB 102,15,58,15,228,4 2316DB 102,69,15,58,15,192,8 2317DB 102,69,15,58,15,228,12 2318 movdqa XMMWORD[(160+80)+rbp],xmm8 2319 movdqa xmm8,XMMWORD[$L$rol16] 2320 paddd xmm3,xmm7 2321 paddd xmm2,xmm6 2322 paddd xmm1,xmm5 2323 paddd xmm0,xmm4 2324 pxor xmm15,xmm3 2325 pxor xmm14,xmm2 2326 pxor xmm13,xmm1 2327 pxor xmm12,xmm0 2328DB 102,69,15,56,0,248 2329DB 102,69,15,56,0,240 2330DB 102,69,15,56,0,232 2331DB 102,69,15,56,0,224 2332 movdqa xmm8,XMMWORD[((160+80))+rbp] 2333 paddd xmm11,xmm15 2334 paddd xmm10,xmm14 2335 paddd xmm9,xmm13 2336 paddd xmm8,xmm12 2337 pxor xmm7,xmm11 2338 pxor xmm6,xmm10 2339 pxor xmm5,xmm9 2340 pxor xmm4,xmm8 2341 movdqa XMMWORD[(160+80)+rbp],xmm8 2342 movdqa xmm8,xmm7 2343 psrld xmm8,20 2344 pslld xmm7,32-20 2345 pxor xmm7,xmm8 2346 movdqa xmm8,xmm6 2347 psrld xmm8,20 2348 pslld xmm6,32-20 2349 pxor xmm6,xmm8 2350 movdqa xmm8,xmm5 2351 psrld xmm8,20 2352 pslld xmm5,32-20 2353 pxor xmm5,xmm8 2354 movdqa xmm8,xmm4 2355 psrld xmm8,20 2356 pslld xmm4,32-20 2357 pxor xmm4,xmm8 2358 movdqa xmm8,XMMWORD[$L$rol8] 2359 paddd xmm3,xmm7 2360 paddd xmm2,xmm6 2361 paddd xmm1,xmm5 2362 paddd xmm0,xmm4 2363 pxor xmm15,xmm3 2364 pxor xmm14,xmm2 2365 pxor xmm13,xmm1 2366 pxor xmm12,xmm0 2367DB 102,69,15,56,0,248 2368DB 102,69,15,56,0,240 2369DB 102,69,15,56,0,232 2370DB 102,69,15,56,0,224 2371 movdqa xmm8,XMMWORD[((160+80))+rbp] 2372 paddd xmm11,xmm15 2373 paddd xmm10,xmm14 2374 paddd xmm9,xmm13 2375 paddd xmm8,xmm12 2376 pxor xmm7,xmm11 2377 pxor xmm6,xmm10 2378 pxor xmm5,xmm9 2379 pxor xmm4,xmm8 2380 movdqa XMMWORD[(160+80)+rbp],xmm8 2381 movdqa xmm8,xmm7 2382 psrld xmm8,25 2383 pslld xmm7,32-25 2384 pxor xmm7,xmm8 2385 movdqa xmm8,xmm6 2386 psrld xmm8,25 2387 pslld xmm6,32-25 2388 pxor xmm6,xmm8 2389 movdqa xmm8,xmm5 2390 psrld xmm8,25 2391 pslld xmm5,32-25 2392 pxor xmm5,xmm8 2393 movdqa xmm8,xmm4 2394 psrld xmm8,25 2395 pslld xmm4,32-25 2396 pxor xmm4,xmm8 2397 movdqa xmm8,XMMWORD[((160+80))+rbp] 2398DB 102,15,58,15,255,12 2399DB 102,69,15,58,15,219,8 2400DB 102,69,15,58,15,255,4 2401DB 102,15,58,15,246,12 2402DB 102,69,15,58,15,210,8 2403DB 102,69,15,58,15,246,4 2404DB 102,15,58,15,237,12 2405DB 102,69,15,58,15,201,8 2406DB 102,69,15,58,15,237,4 2407DB 102,15,58,15,228,12 2408DB 102,69,15,58,15,192,8 2409DB 102,69,15,58,15,228,4 2410 2411 dec r10 2412 jnz NEAR $L$seal_sse_init_rounds 2413 paddd xmm3,XMMWORD[$L$chacha20_consts] 2414 paddd xmm7,XMMWORD[((160+48))+rbp] 2415 paddd xmm11,XMMWORD[((160+64))+rbp] 2416 paddd xmm15,XMMWORD[((160+144))+rbp] 2417 paddd xmm2,XMMWORD[$L$chacha20_consts] 2418 paddd xmm6,XMMWORD[((160+48))+rbp] 2419 paddd xmm10,XMMWORD[((160+64))+rbp] 2420 paddd xmm14,XMMWORD[((160+128))+rbp] 2421 paddd xmm1,XMMWORD[$L$chacha20_consts] 2422 paddd xmm5,XMMWORD[((160+48))+rbp] 2423 paddd xmm9,XMMWORD[((160+64))+rbp] 2424 paddd xmm13,XMMWORD[((160+112))+rbp] 2425 paddd xmm0,XMMWORD[$L$chacha20_consts] 2426 paddd xmm4,XMMWORD[((160+48))+rbp] 2427 paddd xmm8,XMMWORD[((160+64))+rbp] 2428 paddd xmm12,XMMWORD[((160+96))+rbp] 2429 2430 2431 pand xmm3,XMMWORD[$L$clamp] 2432 movdqa XMMWORD[(160+0)+rbp],xmm3 2433 movdqa XMMWORD[(160+16)+rbp],xmm7 2434 2435 mov r8,r8 2436 call poly_hash_ad_internal 2437 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 2438 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 2439 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 2440 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 2441 pxor xmm2,xmm3 2442 pxor xmm6,xmm7 2443 pxor xmm10,xmm11 2444 pxor xmm15,xmm14 2445 movdqu XMMWORD[(0 + 0)+rdi],xmm2 2446 movdqu XMMWORD[(16 + 0)+rdi],xmm6 2447 movdqu XMMWORD[(32 + 0)+rdi],xmm10 2448 movdqu XMMWORD[(48 + 0)+rdi],xmm15 2449 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2450 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2451 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2452 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2453 pxor xmm1,xmm3 2454 pxor xmm5,xmm7 2455 pxor xmm9,xmm11 2456 pxor xmm15,xmm13 2457 movdqu XMMWORD[(0 + 64)+rdi],xmm1 2458 movdqu XMMWORD[(16 + 64)+rdi],xmm5 2459 movdqu XMMWORD[(32 + 64)+rdi],xmm9 2460 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2461 2462 cmp rbx,12*16 2463 ja NEAR $L$seal_sse_main_init 2464 mov rcx,8*16 2465 sub rbx,8*16 2466 lea rsi,[128+rsi] 2467 jmp NEAR $L$seal_sse_128_tail_hash 2468$L$seal_sse_main_init: 2469 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2470 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2471 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2472 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2473 pxor xmm0,xmm3 2474 pxor xmm4,xmm7 2475 pxor xmm8,xmm11 2476 pxor xmm15,xmm12 2477 movdqu XMMWORD[(0 + 128)+rdi],xmm0 2478 movdqu XMMWORD[(16 + 128)+rdi],xmm4 2479 movdqu XMMWORD[(32 + 128)+rdi],xmm8 2480 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2481 2482 mov rcx,12*16 2483 sub rbx,12*16 2484 lea rsi,[192+rsi] 2485 mov rcx,2 2486 mov r8,8 2487 cmp rbx,4*16 2488 jbe NEAR $L$seal_sse_tail_64 2489 cmp rbx,8*16 2490 jbe NEAR $L$seal_sse_tail_128 2491 cmp rbx,12*16 2492 jbe NEAR $L$seal_sse_tail_192 2493 2494$L$seal_sse_main_loop: 2495 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2496 movdqa xmm4,XMMWORD[((160+48))+rbp] 2497 movdqa xmm8,XMMWORD[((160+64))+rbp] 2498 movdqa xmm1,xmm0 2499 movdqa xmm5,xmm4 2500 movdqa xmm9,xmm8 2501 movdqa xmm2,xmm0 2502 movdqa xmm6,xmm4 2503 movdqa xmm10,xmm8 2504 movdqa xmm3,xmm0 2505 movdqa xmm7,xmm4 2506 movdqa xmm11,xmm8 2507 movdqa xmm15,XMMWORD[((160+96))+rbp] 2508 paddd xmm15,XMMWORD[$L$sse_inc] 2509 movdqa xmm14,xmm15 2510 paddd xmm14,XMMWORD[$L$sse_inc] 2511 movdqa xmm13,xmm14 2512 paddd xmm13,XMMWORD[$L$sse_inc] 2513 movdqa xmm12,xmm13 2514 paddd xmm12,XMMWORD[$L$sse_inc] 2515 movdqa XMMWORD[(160+96)+rbp],xmm12 2516 movdqa XMMWORD[(160+112)+rbp],xmm13 2517 movdqa XMMWORD[(160+128)+rbp],xmm14 2518 movdqa XMMWORD[(160+144)+rbp],xmm15 2519 2520ALIGN 32 2521$L$seal_sse_main_rounds: 2522 movdqa XMMWORD[(160+80)+rbp],xmm8 2523 movdqa xmm8,XMMWORD[$L$rol16] 2524 paddd xmm3,xmm7 2525 paddd xmm2,xmm6 2526 paddd xmm1,xmm5 2527 paddd xmm0,xmm4 2528 pxor xmm15,xmm3 2529 pxor xmm14,xmm2 2530 pxor xmm13,xmm1 2531 pxor xmm12,xmm0 2532DB 102,69,15,56,0,248 2533DB 102,69,15,56,0,240 2534DB 102,69,15,56,0,232 2535DB 102,69,15,56,0,224 2536 movdqa xmm8,XMMWORD[((160+80))+rbp] 2537 paddd xmm11,xmm15 2538 paddd xmm10,xmm14 2539 paddd xmm9,xmm13 2540 paddd xmm8,xmm12 2541 pxor xmm7,xmm11 2542 add r10,QWORD[((0+0))+rdi] 2543 adc r11,QWORD[((8+0))+rdi] 2544 adc r12,1 2545 pxor xmm6,xmm10 2546 pxor xmm5,xmm9 2547 pxor xmm4,xmm8 2548 movdqa XMMWORD[(160+80)+rbp],xmm8 2549 movdqa xmm8,xmm7 2550 psrld xmm8,20 2551 pslld xmm7,32-20 2552 pxor xmm7,xmm8 2553 movdqa xmm8,xmm6 2554 psrld xmm8,20 2555 pslld xmm6,32-20 2556 pxor xmm6,xmm8 2557 movdqa xmm8,xmm5 2558 psrld xmm8,20 2559 pslld xmm5,32-20 2560 pxor xmm5,xmm8 2561 movdqa xmm8,xmm4 2562 psrld xmm8,20 2563 pslld xmm4,32-20 2564 pxor xmm4,xmm8 2565 mov rax,QWORD[((0+160+0))+rbp] 2566 mov r15,rax 2567 mul r10 2568 mov r13,rax 2569 mov r14,rdx 2570 mov rax,QWORD[((0+160+0))+rbp] 2571 mul r11 2572 imul r15,r12 2573 add r14,rax 2574 adc r15,rdx 2575 movdqa xmm8,XMMWORD[$L$rol8] 2576 paddd xmm3,xmm7 2577 paddd xmm2,xmm6 2578 paddd xmm1,xmm5 2579 paddd xmm0,xmm4 2580 pxor xmm15,xmm3 2581 pxor xmm14,xmm2 2582 pxor xmm13,xmm1 2583 pxor xmm12,xmm0 2584DB 102,69,15,56,0,248 2585DB 102,69,15,56,0,240 2586DB 102,69,15,56,0,232 2587DB 102,69,15,56,0,224 2588 movdqa xmm8,XMMWORD[((160+80))+rbp] 2589 paddd xmm11,xmm15 2590 paddd xmm10,xmm14 2591 paddd xmm9,xmm13 2592 paddd xmm8,xmm12 2593 pxor xmm7,xmm11 2594 pxor xmm6,xmm10 2595 mov rax,QWORD[((8+160+0))+rbp] 2596 mov r9,rax 2597 mul r10 2598 add r14,rax 2599 adc rdx,0 2600 mov r10,rdx 2601 mov rax,QWORD[((8+160+0))+rbp] 2602 mul r11 2603 add r15,rax 2604 adc rdx,0 2605 pxor xmm5,xmm9 2606 pxor xmm4,xmm8 2607 movdqa XMMWORD[(160+80)+rbp],xmm8 2608 movdqa xmm8,xmm7 2609 psrld xmm8,25 2610 pslld xmm7,32-25 2611 pxor xmm7,xmm8 2612 movdqa xmm8,xmm6 2613 psrld xmm8,25 2614 pslld xmm6,32-25 2615 pxor xmm6,xmm8 2616 movdqa xmm8,xmm5 2617 psrld xmm8,25 2618 pslld xmm5,32-25 2619 pxor xmm5,xmm8 2620 movdqa xmm8,xmm4 2621 psrld xmm8,25 2622 pslld xmm4,32-25 2623 pxor xmm4,xmm8 2624 movdqa xmm8,XMMWORD[((160+80))+rbp] 2625 imul r9,r12 2626 add r15,r10 2627 adc r9,rdx 2628DB 102,15,58,15,255,4 2629DB 102,69,15,58,15,219,8 2630DB 102,69,15,58,15,255,12 2631DB 102,15,58,15,246,4 2632DB 102,69,15,58,15,210,8 2633DB 102,69,15,58,15,246,12 2634DB 102,15,58,15,237,4 2635DB 102,69,15,58,15,201,8 2636DB 102,69,15,58,15,237,12 2637DB 102,15,58,15,228,4 2638DB 102,69,15,58,15,192,8 2639DB 102,69,15,58,15,228,12 2640 movdqa XMMWORD[(160+80)+rbp],xmm8 2641 movdqa xmm8,XMMWORD[$L$rol16] 2642 paddd xmm3,xmm7 2643 paddd xmm2,xmm6 2644 paddd xmm1,xmm5 2645 paddd xmm0,xmm4 2646 pxor xmm15,xmm3 2647 pxor xmm14,xmm2 2648 mov r10,r13 2649 mov r11,r14 2650 mov r12,r15 2651 and r12,3 2652 mov r13,r15 2653 and r13,-4 2654 mov r14,r9 2655 shrd r15,r9,2 2656 shr r9,2 2657 add r15,r13 2658 adc r9,r14 2659 add r10,r15 2660 adc r11,r9 2661 adc r12,0 2662 pxor xmm13,xmm1 2663 pxor xmm12,xmm0 2664DB 102,69,15,56,0,248 2665DB 102,69,15,56,0,240 2666DB 102,69,15,56,0,232 2667DB 102,69,15,56,0,224 2668 movdqa xmm8,XMMWORD[((160+80))+rbp] 2669 paddd xmm11,xmm15 2670 paddd xmm10,xmm14 2671 paddd xmm9,xmm13 2672 paddd xmm8,xmm12 2673 pxor xmm7,xmm11 2674 pxor xmm6,xmm10 2675 pxor xmm5,xmm9 2676 pxor xmm4,xmm8 2677 movdqa XMMWORD[(160+80)+rbp],xmm8 2678 movdqa xmm8,xmm7 2679 psrld xmm8,20 2680 pslld xmm7,32-20 2681 pxor xmm7,xmm8 2682 movdqa xmm8,xmm6 2683 psrld xmm8,20 2684 pslld xmm6,32-20 2685 pxor xmm6,xmm8 2686 movdqa xmm8,xmm5 2687 psrld xmm8,20 2688 pslld xmm5,32-20 2689 pxor xmm5,xmm8 2690 movdqa xmm8,xmm4 2691 psrld xmm8,20 2692 pslld xmm4,32-20 2693 pxor xmm4,xmm8 2694 movdqa xmm8,XMMWORD[$L$rol8] 2695 paddd xmm3,xmm7 2696 paddd xmm2,xmm6 2697 paddd xmm1,xmm5 2698 paddd xmm0,xmm4 2699 pxor xmm15,xmm3 2700 pxor xmm14,xmm2 2701 pxor xmm13,xmm1 2702 pxor xmm12,xmm0 2703DB 102,69,15,56,0,248 2704DB 102,69,15,56,0,240 2705DB 102,69,15,56,0,232 2706DB 102,69,15,56,0,224 2707 movdqa xmm8,XMMWORD[((160+80))+rbp] 2708 paddd xmm11,xmm15 2709 paddd xmm10,xmm14 2710 paddd xmm9,xmm13 2711 paddd xmm8,xmm12 2712 pxor xmm7,xmm11 2713 pxor xmm6,xmm10 2714 pxor xmm5,xmm9 2715 pxor xmm4,xmm8 2716 movdqa XMMWORD[(160+80)+rbp],xmm8 2717 movdqa xmm8,xmm7 2718 psrld xmm8,25 2719 pslld xmm7,32-25 2720 pxor xmm7,xmm8 2721 movdqa xmm8,xmm6 2722 psrld xmm8,25 2723 pslld xmm6,32-25 2724 pxor xmm6,xmm8 2725 movdqa xmm8,xmm5 2726 psrld xmm8,25 2727 pslld xmm5,32-25 2728 pxor xmm5,xmm8 2729 movdqa xmm8,xmm4 2730 psrld xmm8,25 2731 pslld xmm4,32-25 2732 pxor xmm4,xmm8 2733 movdqa xmm8,XMMWORD[((160+80))+rbp] 2734DB 102,15,58,15,255,12 2735DB 102,69,15,58,15,219,8 2736DB 102,69,15,58,15,255,4 2737DB 102,15,58,15,246,12 2738DB 102,69,15,58,15,210,8 2739DB 102,69,15,58,15,246,4 2740DB 102,15,58,15,237,12 2741DB 102,69,15,58,15,201,8 2742DB 102,69,15,58,15,237,4 2743DB 102,15,58,15,228,12 2744DB 102,69,15,58,15,192,8 2745DB 102,69,15,58,15,228,4 2746 2747 lea rdi,[16+rdi] 2748 dec r8 2749 jge NEAR $L$seal_sse_main_rounds 2750 add r10,QWORD[((0+0))+rdi] 2751 adc r11,QWORD[((8+0))+rdi] 2752 adc r12,1 2753 mov rax,QWORD[((0+160+0))+rbp] 2754 mov r15,rax 2755 mul r10 2756 mov r13,rax 2757 mov r14,rdx 2758 mov rax,QWORD[((0+160+0))+rbp] 2759 mul r11 2760 imul r15,r12 2761 add r14,rax 2762 adc r15,rdx 2763 mov rax,QWORD[((8+160+0))+rbp] 2764 mov r9,rax 2765 mul r10 2766 add r14,rax 2767 adc rdx,0 2768 mov r10,rdx 2769 mov rax,QWORD[((8+160+0))+rbp] 2770 mul r11 2771 add r15,rax 2772 adc rdx,0 2773 imul r9,r12 2774 add r15,r10 2775 adc r9,rdx 2776 mov r10,r13 2777 mov r11,r14 2778 mov r12,r15 2779 and r12,3 2780 mov r13,r15 2781 and r13,-4 2782 mov r14,r9 2783 shrd r15,r9,2 2784 shr r9,2 2785 add r15,r13 2786 adc r9,r14 2787 add r10,r15 2788 adc r11,r9 2789 adc r12,0 2790 2791 lea rdi,[16+rdi] 2792 dec rcx 2793 jg NEAR $L$seal_sse_main_rounds 2794 paddd xmm3,XMMWORD[$L$chacha20_consts] 2795 paddd xmm7,XMMWORD[((160+48))+rbp] 2796 paddd xmm11,XMMWORD[((160+64))+rbp] 2797 paddd xmm15,XMMWORD[((160+144))+rbp] 2798 paddd xmm2,XMMWORD[$L$chacha20_consts] 2799 paddd xmm6,XMMWORD[((160+48))+rbp] 2800 paddd xmm10,XMMWORD[((160+64))+rbp] 2801 paddd xmm14,XMMWORD[((160+128))+rbp] 2802 paddd xmm1,XMMWORD[$L$chacha20_consts] 2803 paddd xmm5,XMMWORD[((160+48))+rbp] 2804 paddd xmm9,XMMWORD[((160+64))+rbp] 2805 paddd xmm13,XMMWORD[((160+112))+rbp] 2806 paddd xmm0,XMMWORD[$L$chacha20_consts] 2807 paddd xmm4,XMMWORD[((160+48))+rbp] 2808 paddd xmm8,XMMWORD[((160+64))+rbp] 2809 paddd xmm12,XMMWORD[((160+96))+rbp] 2810 2811 movdqa XMMWORD[(160+80)+rbp],xmm14 2812 movdqa XMMWORD[(160+80)+rbp],xmm14 2813 movdqu xmm14,XMMWORD[((0 + 0))+rsi] 2814 pxor xmm14,xmm3 2815 movdqu XMMWORD[(0 + 0)+rdi],xmm14 2816 movdqu xmm14,XMMWORD[((16 + 0))+rsi] 2817 pxor xmm14,xmm7 2818 movdqu XMMWORD[(16 + 0)+rdi],xmm14 2819 movdqu xmm14,XMMWORD[((32 + 0))+rsi] 2820 pxor xmm14,xmm11 2821 movdqu XMMWORD[(32 + 0)+rdi],xmm14 2822 movdqu xmm14,XMMWORD[((48 + 0))+rsi] 2823 pxor xmm14,xmm15 2824 movdqu XMMWORD[(48 + 0)+rdi],xmm14 2825 2826 movdqa xmm14,XMMWORD[((160+80))+rbp] 2827 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2828 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2829 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2830 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2831 pxor xmm2,xmm3 2832 pxor xmm6,xmm7 2833 pxor xmm10,xmm11 2834 pxor xmm15,xmm14 2835 movdqu XMMWORD[(0 + 64)+rdi],xmm2 2836 movdqu XMMWORD[(16 + 64)+rdi],xmm6 2837 movdqu XMMWORD[(32 + 64)+rdi],xmm10 2838 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2839 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2840 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2841 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2842 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2843 pxor xmm1,xmm3 2844 pxor xmm5,xmm7 2845 pxor xmm9,xmm11 2846 pxor xmm15,xmm13 2847 movdqu XMMWORD[(0 + 128)+rdi],xmm1 2848 movdqu XMMWORD[(16 + 128)+rdi],xmm5 2849 movdqu XMMWORD[(32 + 128)+rdi],xmm9 2850 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2851 2852 cmp rbx,16*16 2853 ja NEAR $L$seal_sse_main_loop_xor 2854 2855 mov rcx,12*16 2856 sub rbx,12*16 2857 lea rsi,[192+rsi] 2858 jmp NEAR $L$seal_sse_128_tail_hash 2859$L$seal_sse_main_loop_xor: 2860 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 2861 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 2862 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 2863 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 2864 pxor xmm0,xmm3 2865 pxor xmm4,xmm7 2866 pxor xmm8,xmm11 2867 pxor xmm15,xmm12 2868 movdqu XMMWORD[(0 + 192)+rdi],xmm0 2869 movdqu XMMWORD[(16 + 192)+rdi],xmm4 2870 movdqu XMMWORD[(32 + 192)+rdi],xmm8 2871 movdqu XMMWORD[(48 + 192)+rdi],xmm15 2872 2873 lea rsi,[256+rsi] 2874 sub rbx,16*16 2875 mov rcx,6 2876 mov r8,4 2877 cmp rbx,12*16 2878 jg NEAR $L$seal_sse_main_loop 2879 mov rcx,rbx 2880 test rbx,rbx 2881 je NEAR $L$seal_sse_128_tail_hash 2882 mov rcx,6 2883 cmp rbx,8*16 2884 ja NEAR $L$seal_sse_tail_192 2885 cmp rbx,4*16 2886 ja NEAR $L$seal_sse_tail_128 2887 2888$L$seal_sse_tail_64: 2889 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2890 movdqa xmm4,XMMWORD[((160+48))+rbp] 2891 movdqa xmm8,XMMWORD[((160+64))+rbp] 2892 movdqa xmm12,XMMWORD[((160+96))+rbp] 2893 paddd xmm12,XMMWORD[$L$sse_inc] 2894 movdqa XMMWORD[(160+96)+rbp],xmm12 2895 2896$L$seal_sse_tail_64_rounds_and_x2hash: 2897 add r10,QWORD[((0+0))+rdi] 2898 adc r11,QWORD[((8+0))+rdi] 2899 adc r12,1 2900 mov rax,QWORD[((0+160+0))+rbp] 2901 mov r15,rax 2902 mul r10 2903 mov r13,rax 2904 mov r14,rdx 2905 mov rax,QWORD[((0+160+0))+rbp] 2906 mul r11 2907 imul r15,r12 2908 add r14,rax 2909 adc r15,rdx 2910 mov rax,QWORD[((8+160+0))+rbp] 2911 mov r9,rax 2912 mul r10 2913 add r14,rax 2914 adc rdx,0 2915 mov r10,rdx 2916 mov rax,QWORD[((8+160+0))+rbp] 2917 mul r11 2918 add r15,rax 2919 adc rdx,0 2920 imul r9,r12 2921 add r15,r10 2922 adc r9,rdx 2923 mov r10,r13 2924 mov r11,r14 2925 mov r12,r15 2926 and r12,3 2927 mov r13,r15 2928 and r13,-4 2929 mov r14,r9 2930 shrd r15,r9,2 2931 shr r9,2 2932 add r15,r13 2933 adc r9,r14 2934 add r10,r15 2935 adc r11,r9 2936 adc r12,0 2937 2938 lea rdi,[16+rdi] 2939$L$seal_sse_tail_64_rounds_and_x1hash: 2940 paddd xmm0,xmm4 2941 pxor xmm12,xmm0 2942 pshufb xmm12,XMMWORD[$L$rol16] 2943 paddd xmm8,xmm12 2944 pxor xmm4,xmm8 2945 movdqa xmm3,xmm4 2946 pslld xmm3,12 2947 psrld xmm4,20 2948 pxor xmm4,xmm3 2949 paddd xmm0,xmm4 2950 pxor xmm12,xmm0 2951 pshufb xmm12,XMMWORD[$L$rol8] 2952 paddd xmm8,xmm12 2953 pxor xmm4,xmm8 2954 movdqa xmm3,xmm4 2955 pslld xmm3,7 2956 psrld xmm4,25 2957 pxor xmm4,xmm3 2958DB 102,15,58,15,228,4 2959DB 102,69,15,58,15,192,8 2960DB 102,69,15,58,15,228,12 2961 paddd xmm0,xmm4 2962 pxor xmm12,xmm0 2963 pshufb xmm12,XMMWORD[$L$rol16] 2964 paddd xmm8,xmm12 2965 pxor xmm4,xmm8 2966 movdqa xmm3,xmm4 2967 pslld xmm3,12 2968 psrld xmm4,20 2969 pxor xmm4,xmm3 2970 paddd xmm0,xmm4 2971 pxor xmm12,xmm0 2972 pshufb xmm12,XMMWORD[$L$rol8] 2973 paddd xmm8,xmm12 2974 pxor xmm4,xmm8 2975 movdqa xmm3,xmm4 2976 pslld xmm3,7 2977 psrld xmm4,25 2978 pxor xmm4,xmm3 2979DB 102,15,58,15,228,12 2980DB 102,69,15,58,15,192,8 2981DB 102,69,15,58,15,228,4 2982 add r10,QWORD[((0+0))+rdi] 2983 adc r11,QWORD[((8+0))+rdi] 2984 adc r12,1 2985 mov rax,QWORD[((0+160+0))+rbp] 2986 mov r15,rax 2987 mul r10 2988 mov r13,rax 2989 mov r14,rdx 2990 mov rax,QWORD[((0+160+0))+rbp] 2991 mul r11 2992 imul r15,r12 2993 add r14,rax 2994 adc r15,rdx 2995 mov rax,QWORD[((8+160+0))+rbp] 2996 mov r9,rax 2997 mul r10 2998 add r14,rax 2999 adc rdx,0 3000 mov r10,rdx 3001 mov rax,QWORD[((8+160+0))+rbp] 3002 mul r11 3003 add r15,rax 3004 adc rdx,0 3005 imul r9,r12 3006 add r15,r10 3007 adc r9,rdx 3008 mov r10,r13 3009 mov r11,r14 3010 mov r12,r15 3011 and r12,3 3012 mov r13,r15 3013 and r13,-4 3014 mov r14,r9 3015 shrd r15,r9,2 3016 shr r9,2 3017 add r15,r13 3018 adc r9,r14 3019 add r10,r15 3020 adc r11,r9 3021 adc r12,0 3022 3023 lea rdi,[16+rdi] 3024 dec rcx 3025 jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash 3026 dec r8 3027 jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash 3028 paddd xmm0,XMMWORD[$L$chacha20_consts] 3029 paddd xmm4,XMMWORD[((160+48))+rbp] 3030 paddd xmm8,XMMWORD[((160+64))+rbp] 3031 paddd xmm12,XMMWORD[((160+96))+rbp] 3032 3033 jmp NEAR $L$seal_sse_128_tail_xor 3034 3035$L$seal_sse_tail_128: 3036 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3037 movdqa xmm4,XMMWORD[((160+48))+rbp] 3038 movdqa xmm8,XMMWORD[((160+64))+rbp] 3039 movdqa xmm1,xmm0 3040 movdqa xmm5,xmm4 3041 movdqa xmm9,xmm8 3042 movdqa xmm13,XMMWORD[((160+96))+rbp] 3043 paddd xmm13,XMMWORD[$L$sse_inc] 3044 movdqa xmm12,xmm13 3045 paddd xmm12,XMMWORD[$L$sse_inc] 3046 movdqa XMMWORD[(160+96)+rbp],xmm12 3047 movdqa XMMWORD[(160+112)+rbp],xmm13 3048 3049$L$seal_sse_tail_128_rounds_and_x2hash: 3050 add r10,QWORD[((0+0))+rdi] 3051 adc r11,QWORD[((8+0))+rdi] 3052 adc r12,1 3053 mov rax,QWORD[((0+160+0))+rbp] 3054 mov r15,rax 3055 mul r10 3056 mov r13,rax 3057 mov r14,rdx 3058 mov rax,QWORD[((0+160+0))+rbp] 3059 mul r11 3060 imul r15,r12 3061 add r14,rax 3062 adc r15,rdx 3063 mov rax,QWORD[((8+160+0))+rbp] 3064 mov r9,rax 3065 mul r10 3066 add r14,rax 3067 adc rdx,0 3068 mov r10,rdx 3069 mov rax,QWORD[((8+160+0))+rbp] 3070 mul r11 3071 add r15,rax 3072 adc rdx,0 3073 imul r9,r12 3074 add r15,r10 3075 adc r9,rdx 3076 mov r10,r13 3077 mov r11,r14 3078 mov r12,r15 3079 and r12,3 3080 mov r13,r15 3081 and r13,-4 3082 mov r14,r9 3083 shrd r15,r9,2 3084 shr r9,2 3085 add r15,r13 3086 adc r9,r14 3087 add r10,r15 3088 adc r11,r9 3089 adc r12,0 3090 3091 lea rdi,[16+rdi] 3092$L$seal_sse_tail_128_rounds_and_x1hash: 3093 paddd xmm0,xmm4 3094 pxor xmm12,xmm0 3095 pshufb xmm12,XMMWORD[$L$rol16] 3096 paddd xmm8,xmm12 3097 pxor xmm4,xmm8 3098 movdqa xmm3,xmm4 3099 pslld xmm3,12 3100 psrld xmm4,20 3101 pxor xmm4,xmm3 3102 paddd xmm0,xmm4 3103 pxor xmm12,xmm0 3104 pshufb xmm12,XMMWORD[$L$rol8] 3105 paddd xmm8,xmm12 3106 pxor xmm4,xmm8 3107 movdqa xmm3,xmm4 3108 pslld xmm3,7 3109 psrld xmm4,25 3110 pxor xmm4,xmm3 3111DB 102,15,58,15,228,4 3112DB 102,69,15,58,15,192,8 3113DB 102,69,15,58,15,228,12 3114 paddd xmm1,xmm5 3115 pxor xmm13,xmm1 3116 pshufb xmm13,XMMWORD[$L$rol16] 3117 paddd xmm9,xmm13 3118 pxor xmm5,xmm9 3119 movdqa xmm3,xmm5 3120 pslld xmm3,12 3121 psrld xmm5,20 3122 pxor xmm5,xmm3 3123 paddd xmm1,xmm5 3124 pxor xmm13,xmm1 3125 pshufb xmm13,XMMWORD[$L$rol8] 3126 paddd xmm9,xmm13 3127 pxor xmm5,xmm9 3128 movdqa xmm3,xmm5 3129 pslld xmm3,7 3130 psrld xmm5,25 3131 pxor xmm5,xmm3 3132DB 102,15,58,15,237,4 3133DB 102,69,15,58,15,201,8 3134DB 102,69,15,58,15,237,12 3135 add r10,QWORD[((0+0))+rdi] 3136 adc r11,QWORD[((8+0))+rdi] 3137 adc r12,1 3138 mov rax,QWORD[((0+160+0))+rbp] 3139 mov r15,rax 3140 mul r10 3141 mov r13,rax 3142 mov r14,rdx 3143 mov rax,QWORD[((0+160+0))+rbp] 3144 mul r11 3145 imul r15,r12 3146 add r14,rax 3147 adc r15,rdx 3148 mov rax,QWORD[((8+160+0))+rbp] 3149 mov r9,rax 3150 mul r10 3151 add r14,rax 3152 adc rdx,0 3153 mov r10,rdx 3154 mov rax,QWORD[((8+160+0))+rbp] 3155 mul r11 3156 add r15,rax 3157 adc rdx,0 3158 imul r9,r12 3159 add r15,r10 3160 adc r9,rdx 3161 mov r10,r13 3162 mov r11,r14 3163 mov r12,r15 3164 and r12,3 3165 mov r13,r15 3166 and r13,-4 3167 mov r14,r9 3168 shrd r15,r9,2 3169 shr r9,2 3170 add r15,r13 3171 adc r9,r14 3172 add r10,r15 3173 adc r11,r9 3174 adc r12,0 3175 paddd xmm0,xmm4 3176 pxor xmm12,xmm0 3177 pshufb xmm12,XMMWORD[$L$rol16] 3178 paddd xmm8,xmm12 3179 pxor xmm4,xmm8 3180 movdqa xmm3,xmm4 3181 pslld xmm3,12 3182 psrld xmm4,20 3183 pxor xmm4,xmm3 3184 paddd xmm0,xmm4 3185 pxor xmm12,xmm0 3186 pshufb xmm12,XMMWORD[$L$rol8] 3187 paddd xmm8,xmm12 3188 pxor xmm4,xmm8 3189 movdqa xmm3,xmm4 3190 pslld xmm3,7 3191 psrld xmm4,25 3192 pxor xmm4,xmm3 3193DB 102,15,58,15,228,12 3194DB 102,69,15,58,15,192,8 3195DB 102,69,15,58,15,228,4 3196 paddd xmm1,xmm5 3197 pxor xmm13,xmm1 3198 pshufb xmm13,XMMWORD[$L$rol16] 3199 paddd xmm9,xmm13 3200 pxor xmm5,xmm9 3201 movdqa xmm3,xmm5 3202 pslld xmm3,12 3203 psrld xmm5,20 3204 pxor xmm5,xmm3 3205 paddd xmm1,xmm5 3206 pxor xmm13,xmm1 3207 pshufb xmm13,XMMWORD[$L$rol8] 3208 paddd xmm9,xmm13 3209 pxor xmm5,xmm9 3210 movdqa xmm3,xmm5 3211 pslld xmm3,7 3212 psrld xmm5,25 3213 pxor xmm5,xmm3 3214DB 102,15,58,15,237,12 3215DB 102,69,15,58,15,201,8 3216DB 102,69,15,58,15,237,4 3217 3218 lea rdi,[16+rdi] 3219 dec rcx 3220 jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash 3221 dec r8 3222 jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash 3223 paddd xmm1,XMMWORD[$L$chacha20_consts] 3224 paddd xmm5,XMMWORD[((160+48))+rbp] 3225 paddd xmm9,XMMWORD[((160+64))+rbp] 3226 paddd xmm13,XMMWORD[((160+112))+rbp] 3227 paddd xmm0,XMMWORD[$L$chacha20_consts] 3228 paddd xmm4,XMMWORD[((160+48))+rbp] 3229 paddd xmm8,XMMWORD[((160+64))+rbp] 3230 paddd xmm12,XMMWORD[((160+96))+rbp] 3231 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3232 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3233 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3234 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3235 pxor xmm1,xmm3 3236 pxor xmm5,xmm7 3237 pxor xmm9,xmm11 3238 pxor xmm15,xmm13 3239 movdqu XMMWORD[(0 + 0)+rdi],xmm1 3240 movdqu XMMWORD[(16 + 0)+rdi],xmm5 3241 movdqu XMMWORD[(32 + 0)+rdi],xmm9 3242 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3243 3244 mov rcx,4*16 3245 sub rbx,4*16 3246 lea rsi,[64+rsi] 3247 jmp NEAR $L$seal_sse_128_tail_hash 3248 3249$L$seal_sse_tail_192: 3250 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3251 movdqa xmm4,XMMWORD[((160+48))+rbp] 3252 movdqa xmm8,XMMWORD[((160+64))+rbp] 3253 movdqa xmm1,xmm0 3254 movdqa xmm5,xmm4 3255 movdqa xmm9,xmm8 3256 movdqa xmm2,xmm0 3257 movdqa xmm6,xmm4 3258 movdqa xmm10,xmm8 3259 movdqa xmm14,XMMWORD[((160+96))+rbp] 3260 paddd xmm14,XMMWORD[$L$sse_inc] 3261 movdqa xmm13,xmm14 3262 paddd xmm13,XMMWORD[$L$sse_inc] 3263 movdqa xmm12,xmm13 3264 paddd xmm12,XMMWORD[$L$sse_inc] 3265 movdqa XMMWORD[(160+96)+rbp],xmm12 3266 movdqa XMMWORD[(160+112)+rbp],xmm13 3267 movdqa XMMWORD[(160+128)+rbp],xmm14 3268 3269$L$seal_sse_tail_192_rounds_and_x2hash: 3270 add r10,QWORD[((0+0))+rdi] 3271 adc r11,QWORD[((8+0))+rdi] 3272 adc r12,1 3273 mov rax,QWORD[((0+160+0))+rbp] 3274 mov r15,rax 3275 mul r10 3276 mov r13,rax 3277 mov r14,rdx 3278 mov rax,QWORD[((0+160+0))+rbp] 3279 mul r11 3280 imul r15,r12 3281 add r14,rax 3282 adc r15,rdx 3283 mov rax,QWORD[((8+160+0))+rbp] 3284 mov r9,rax 3285 mul r10 3286 add r14,rax 3287 adc rdx,0 3288 mov r10,rdx 3289 mov rax,QWORD[((8+160+0))+rbp] 3290 mul r11 3291 add r15,rax 3292 adc rdx,0 3293 imul r9,r12 3294 add r15,r10 3295 adc r9,rdx 3296 mov r10,r13 3297 mov r11,r14 3298 mov r12,r15 3299 and r12,3 3300 mov r13,r15 3301 and r13,-4 3302 mov r14,r9 3303 shrd r15,r9,2 3304 shr r9,2 3305 add r15,r13 3306 adc r9,r14 3307 add r10,r15 3308 adc r11,r9 3309 adc r12,0 3310 3311 lea rdi,[16+rdi] 3312$L$seal_sse_tail_192_rounds_and_x1hash: 3313 paddd xmm0,xmm4 3314 pxor xmm12,xmm0 3315 pshufb xmm12,XMMWORD[$L$rol16] 3316 paddd xmm8,xmm12 3317 pxor xmm4,xmm8 3318 movdqa xmm3,xmm4 3319 pslld xmm3,12 3320 psrld xmm4,20 3321 pxor xmm4,xmm3 3322 paddd xmm0,xmm4 3323 pxor xmm12,xmm0 3324 pshufb xmm12,XMMWORD[$L$rol8] 3325 paddd xmm8,xmm12 3326 pxor xmm4,xmm8 3327 movdqa xmm3,xmm4 3328 pslld xmm3,7 3329 psrld xmm4,25 3330 pxor xmm4,xmm3 3331DB 102,15,58,15,228,4 3332DB 102,69,15,58,15,192,8 3333DB 102,69,15,58,15,228,12 3334 paddd xmm1,xmm5 3335 pxor xmm13,xmm1 3336 pshufb xmm13,XMMWORD[$L$rol16] 3337 paddd xmm9,xmm13 3338 pxor xmm5,xmm9 3339 movdqa xmm3,xmm5 3340 pslld xmm3,12 3341 psrld xmm5,20 3342 pxor xmm5,xmm3 3343 paddd xmm1,xmm5 3344 pxor xmm13,xmm1 3345 pshufb xmm13,XMMWORD[$L$rol8] 3346 paddd xmm9,xmm13 3347 pxor xmm5,xmm9 3348 movdqa xmm3,xmm5 3349 pslld xmm3,7 3350 psrld xmm5,25 3351 pxor xmm5,xmm3 3352DB 102,15,58,15,237,4 3353DB 102,69,15,58,15,201,8 3354DB 102,69,15,58,15,237,12 3355 paddd xmm2,xmm6 3356 pxor xmm14,xmm2 3357 pshufb xmm14,XMMWORD[$L$rol16] 3358 paddd xmm10,xmm14 3359 pxor xmm6,xmm10 3360 movdqa xmm3,xmm6 3361 pslld xmm3,12 3362 psrld xmm6,20 3363 pxor xmm6,xmm3 3364 paddd xmm2,xmm6 3365 pxor xmm14,xmm2 3366 pshufb xmm14,XMMWORD[$L$rol8] 3367 paddd xmm10,xmm14 3368 pxor xmm6,xmm10 3369 movdqa xmm3,xmm6 3370 pslld xmm3,7 3371 psrld xmm6,25 3372 pxor xmm6,xmm3 3373DB 102,15,58,15,246,4 3374DB 102,69,15,58,15,210,8 3375DB 102,69,15,58,15,246,12 3376 add r10,QWORD[((0+0))+rdi] 3377 adc r11,QWORD[((8+0))+rdi] 3378 adc r12,1 3379 mov rax,QWORD[((0+160+0))+rbp] 3380 mov r15,rax 3381 mul r10 3382 mov r13,rax 3383 mov r14,rdx 3384 mov rax,QWORD[((0+160+0))+rbp] 3385 mul r11 3386 imul r15,r12 3387 add r14,rax 3388 adc r15,rdx 3389 mov rax,QWORD[((8+160+0))+rbp] 3390 mov r9,rax 3391 mul r10 3392 add r14,rax 3393 adc rdx,0 3394 mov r10,rdx 3395 mov rax,QWORD[((8+160+0))+rbp] 3396 mul r11 3397 add r15,rax 3398 adc rdx,0 3399 imul r9,r12 3400 add r15,r10 3401 adc r9,rdx 3402 mov r10,r13 3403 mov r11,r14 3404 mov r12,r15 3405 and r12,3 3406 mov r13,r15 3407 and r13,-4 3408 mov r14,r9 3409 shrd r15,r9,2 3410 shr r9,2 3411 add r15,r13 3412 adc r9,r14 3413 add r10,r15 3414 adc r11,r9 3415 adc r12,0 3416 paddd xmm0,xmm4 3417 pxor xmm12,xmm0 3418 pshufb xmm12,XMMWORD[$L$rol16] 3419 paddd xmm8,xmm12 3420 pxor xmm4,xmm8 3421 movdqa xmm3,xmm4 3422 pslld xmm3,12 3423 psrld xmm4,20 3424 pxor xmm4,xmm3 3425 paddd xmm0,xmm4 3426 pxor xmm12,xmm0 3427 pshufb xmm12,XMMWORD[$L$rol8] 3428 paddd xmm8,xmm12 3429 pxor xmm4,xmm8 3430 movdqa xmm3,xmm4 3431 pslld xmm3,7 3432 psrld xmm4,25 3433 pxor xmm4,xmm3 3434DB 102,15,58,15,228,12 3435DB 102,69,15,58,15,192,8 3436DB 102,69,15,58,15,228,4 3437 paddd xmm1,xmm5 3438 pxor xmm13,xmm1 3439 pshufb xmm13,XMMWORD[$L$rol16] 3440 paddd xmm9,xmm13 3441 pxor xmm5,xmm9 3442 movdqa xmm3,xmm5 3443 pslld xmm3,12 3444 psrld xmm5,20 3445 pxor xmm5,xmm3 3446 paddd xmm1,xmm5 3447 pxor xmm13,xmm1 3448 pshufb xmm13,XMMWORD[$L$rol8] 3449 paddd xmm9,xmm13 3450 pxor xmm5,xmm9 3451 movdqa xmm3,xmm5 3452 pslld xmm3,7 3453 psrld xmm5,25 3454 pxor xmm5,xmm3 3455DB 102,15,58,15,237,12 3456DB 102,69,15,58,15,201,8 3457DB 102,69,15,58,15,237,4 3458 paddd xmm2,xmm6 3459 pxor xmm14,xmm2 3460 pshufb xmm14,XMMWORD[$L$rol16] 3461 paddd xmm10,xmm14 3462 pxor xmm6,xmm10 3463 movdqa xmm3,xmm6 3464 pslld xmm3,12 3465 psrld xmm6,20 3466 pxor xmm6,xmm3 3467 paddd xmm2,xmm6 3468 pxor xmm14,xmm2 3469 pshufb xmm14,XMMWORD[$L$rol8] 3470 paddd xmm10,xmm14 3471 pxor xmm6,xmm10 3472 movdqa xmm3,xmm6 3473 pslld xmm3,7 3474 psrld xmm6,25 3475 pxor xmm6,xmm3 3476DB 102,15,58,15,246,12 3477DB 102,69,15,58,15,210,8 3478DB 102,69,15,58,15,246,4 3479 3480 lea rdi,[16+rdi] 3481 dec rcx 3482 jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash 3483 dec r8 3484 jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash 3485 paddd xmm2,XMMWORD[$L$chacha20_consts] 3486 paddd xmm6,XMMWORD[((160+48))+rbp] 3487 paddd xmm10,XMMWORD[((160+64))+rbp] 3488 paddd xmm14,XMMWORD[((160+128))+rbp] 3489 paddd xmm1,XMMWORD[$L$chacha20_consts] 3490 paddd xmm5,XMMWORD[((160+48))+rbp] 3491 paddd xmm9,XMMWORD[((160+64))+rbp] 3492 paddd xmm13,XMMWORD[((160+112))+rbp] 3493 paddd xmm0,XMMWORD[$L$chacha20_consts] 3494 paddd xmm4,XMMWORD[((160+48))+rbp] 3495 paddd xmm8,XMMWORD[((160+64))+rbp] 3496 paddd xmm12,XMMWORD[((160+96))+rbp] 3497 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3498 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3499 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3500 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3501 pxor xmm2,xmm3 3502 pxor xmm6,xmm7 3503 pxor xmm10,xmm11 3504 pxor xmm15,xmm14 3505 movdqu XMMWORD[(0 + 0)+rdi],xmm2 3506 movdqu XMMWORD[(16 + 0)+rdi],xmm6 3507 movdqu XMMWORD[(32 + 0)+rdi],xmm10 3508 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3509 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 3510 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 3511 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 3512 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 3513 pxor xmm1,xmm3 3514 pxor xmm5,xmm7 3515 pxor xmm9,xmm11 3516 pxor xmm15,xmm13 3517 movdqu XMMWORD[(0 + 64)+rdi],xmm1 3518 movdqu XMMWORD[(16 + 64)+rdi],xmm5 3519 movdqu XMMWORD[(32 + 64)+rdi],xmm9 3520 movdqu XMMWORD[(48 + 64)+rdi],xmm15 3521 3522 mov rcx,8*16 3523 sub rbx,8*16 3524 lea rsi,[128+rsi] 3525 3526$L$seal_sse_128_tail_hash: 3527 cmp rcx,16 3528 jb NEAR $L$seal_sse_128_tail_xor 3529 add r10,QWORD[((0+0))+rdi] 3530 adc r11,QWORD[((8+0))+rdi] 3531 adc r12,1 3532 mov rax,QWORD[((0+160+0))+rbp] 3533 mov r15,rax 3534 mul r10 3535 mov r13,rax 3536 mov r14,rdx 3537 mov rax,QWORD[((0+160+0))+rbp] 3538 mul r11 3539 imul r15,r12 3540 add r14,rax 3541 adc r15,rdx 3542 mov rax,QWORD[((8+160+0))+rbp] 3543 mov r9,rax 3544 mul r10 3545 add r14,rax 3546 adc rdx,0 3547 mov r10,rdx 3548 mov rax,QWORD[((8+160+0))+rbp] 3549 mul r11 3550 add r15,rax 3551 adc rdx,0 3552 imul r9,r12 3553 add r15,r10 3554 adc r9,rdx 3555 mov r10,r13 3556 mov r11,r14 3557 mov r12,r15 3558 and r12,3 3559 mov r13,r15 3560 and r13,-4 3561 mov r14,r9 3562 shrd r15,r9,2 3563 shr r9,2 3564 add r15,r13 3565 adc r9,r14 3566 add r10,r15 3567 adc r11,r9 3568 adc r12,0 3569 3570 sub rcx,16 3571 lea rdi,[16+rdi] 3572 jmp NEAR $L$seal_sse_128_tail_hash 3573 3574$L$seal_sse_128_tail_xor: 3575 cmp rbx,16 3576 jb NEAR $L$seal_sse_tail_16 3577 sub rbx,16 3578 3579 movdqu xmm3,XMMWORD[rsi] 3580 pxor xmm0,xmm3 3581 movdqu XMMWORD[rdi],xmm0 3582 3583 add r10,QWORD[rdi] 3584 adc r11,QWORD[8+rdi] 3585 adc r12,1 3586 lea rsi,[16+rsi] 3587 lea rdi,[16+rdi] 3588 mov rax,QWORD[((0+160+0))+rbp] 3589 mov r15,rax 3590 mul r10 3591 mov r13,rax 3592 mov r14,rdx 3593 mov rax,QWORD[((0+160+0))+rbp] 3594 mul r11 3595 imul r15,r12 3596 add r14,rax 3597 adc r15,rdx 3598 mov rax,QWORD[((8+160+0))+rbp] 3599 mov r9,rax 3600 mul r10 3601 add r14,rax 3602 adc rdx,0 3603 mov r10,rdx 3604 mov rax,QWORD[((8+160+0))+rbp] 3605 mul r11 3606 add r15,rax 3607 adc rdx,0 3608 imul r9,r12 3609 add r15,r10 3610 adc r9,rdx 3611 mov r10,r13 3612 mov r11,r14 3613 mov r12,r15 3614 and r12,3 3615 mov r13,r15 3616 and r13,-4 3617 mov r14,r9 3618 shrd r15,r9,2 3619 shr r9,2 3620 add r15,r13 3621 adc r9,r14 3622 add r10,r15 3623 adc r11,r9 3624 adc r12,0 3625 3626 3627 movdqa xmm0,xmm4 3628 movdqa xmm4,xmm8 3629 movdqa xmm8,xmm12 3630 movdqa xmm12,xmm1 3631 movdqa xmm1,xmm5 3632 movdqa xmm5,xmm9 3633 movdqa xmm9,xmm13 3634 jmp NEAR $L$seal_sse_128_tail_xor 3635 3636$L$seal_sse_tail_16: 3637 test rbx,rbx 3638 jz NEAR $L$process_blocks_of_extra_in 3639 3640 mov r8,rbx 3641 mov rcx,rbx 3642 lea rsi,[((-1))+rbx*1+rsi] 3643 pxor xmm15,xmm15 3644$L$seal_sse_tail_16_compose: 3645 pslldq xmm15,1 3646 pinsrb xmm15,BYTE[rsi],0 3647 lea rsi,[((-1))+rsi] 3648 dec rcx 3649 jne NEAR $L$seal_sse_tail_16_compose 3650 3651 3652 pxor xmm15,xmm0 3653 3654 3655 mov rcx,rbx 3656 movdqu xmm0,xmm15 3657$L$seal_sse_tail_16_extract: 3658 pextrb XMMWORD[rdi],xmm0,0 3659 psrldq xmm0,1 3660 add rdi,1 3661 sub rcx,1 3662 jnz NEAR $L$seal_sse_tail_16_extract 3663 3664 3665 3666 3667 3668 3669 3670 3671 mov r9,QWORD[((288 + 160 + 32))+rsp] 3672 mov r14,QWORD[56+r9] 3673 mov r13,QWORD[48+r9] 3674 test r14,r14 3675 jz NEAR $L$process_partial_block 3676 3677 mov r15,16 3678 sub r15,rbx 3679 cmp r14,r15 3680 3681 jge NEAR $L$load_extra_in 3682 mov r15,r14 3683 3684$L$load_extra_in: 3685 3686 3687 lea rsi,[((-1))+r15*1+r13] 3688 3689 3690 add r13,r15 3691 sub r14,r15 3692 mov QWORD[48+r9],r13 3693 mov QWORD[56+r9],r14 3694 3695 3696 3697 add r8,r15 3698 3699 3700 pxor xmm11,xmm11 3701$L$load_extra_load_loop: 3702 pslldq xmm11,1 3703 pinsrb xmm11,BYTE[rsi],0 3704 lea rsi,[((-1))+rsi] 3705 sub r15,1 3706 jnz NEAR $L$load_extra_load_loop 3707 3708 3709 3710 3711 mov r15,rbx 3712 3713$L$load_extra_shift_loop: 3714 pslldq xmm11,1 3715 sub r15,1 3716 jnz NEAR $L$load_extra_shift_loop 3717 3718 3719 3720 3721 lea r15,[$L$and_masks] 3722 shl rbx,4 3723 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3724 3725 3726 por xmm15,xmm11 3727 3728 3729 3730DB 102,77,15,126,253 3731 pextrq r14,xmm15,1 3732 add r10,r13 3733 adc r11,r14 3734 adc r12,1 3735 mov rax,QWORD[((0+160+0))+rbp] 3736 mov r15,rax 3737 mul r10 3738 mov r13,rax 3739 mov r14,rdx 3740 mov rax,QWORD[((0+160+0))+rbp] 3741 mul r11 3742 imul r15,r12 3743 add r14,rax 3744 adc r15,rdx 3745 mov rax,QWORD[((8+160+0))+rbp] 3746 mov r9,rax 3747 mul r10 3748 add r14,rax 3749 adc rdx,0 3750 mov r10,rdx 3751 mov rax,QWORD[((8+160+0))+rbp] 3752 mul r11 3753 add r15,rax 3754 adc rdx,0 3755 imul r9,r12 3756 add r15,r10 3757 adc r9,rdx 3758 mov r10,r13 3759 mov r11,r14 3760 mov r12,r15 3761 and r12,3 3762 mov r13,r15 3763 and r13,-4 3764 mov r14,r9 3765 shrd r15,r9,2 3766 shr r9,2 3767 add r15,r13 3768 adc r9,r14 3769 add r10,r15 3770 adc r11,r9 3771 adc r12,0 3772 3773 3774$L$process_blocks_of_extra_in: 3775 3776 mov r9,QWORD[((288+32+160 ))+rsp] 3777 mov rsi,QWORD[48+r9] 3778 mov r8,QWORD[56+r9] 3779 mov rcx,r8 3780 shr r8,4 3781 3782$L$process_extra_hash_loop: 3783 jz NEAR process_extra_in_trailer 3784 add r10,QWORD[((0+0))+rsi] 3785 adc r11,QWORD[((8+0))+rsi] 3786 adc r12,1 3787 mov rax,QWORD[((0+160+0))+rbp] 3788 mov r15,rax 3789 mul r10 3790 mov r13,rax 3791 mov r14,rdx 3792 mov rax,QWORD[((0+160+0))+rbp] 3793 mul r11 3794 imul r15,r12 3795 add r14,rax 3796 adc r15,rdx 3797 mov rax,QWORD[((8+160+0))+rbp] 3798 mov r9,rax 3799 mul r10 3800 add r14,rax 3801 adc rdx,0 3802 mov r10,rdx 3803 mov rax,QWORD[((8+160+0))+rbp] 3804 mul r11 3805 add r15,rax 3806 adc rdx,0 3807 imul r9,r12 3808 add r15,r10 3809 adc r9,rdx 3810 mov r10,r13 3811 mov r11,r14 3812 mov r12,r15 3813 and r12,3 3814 mov r13,r15 3815 and r13,-4 3816 mov r14,r9 3817 shrd r15,r9,2 3818 shr r9,2 3819 add r15,r13 3820 adc r9,r14 3821 add r10,r15 3822 adc r11,r9 3823 adc r12,0 3824 3825 lea rsi,[16+rsi] 3826 sub r8,1 3827 jmp NEAR $L$process_extra_hash_loop 3828process_extra_in_trailer: 3829 and rcx,15 3830 mov rbx,rcx 3831 jz NEAR $L$do_length_block 3832 lea rsi,[((-1))+rcx*1+rsi] 3833 3834$L$process_extra_in_trailer_load: 3835 pslldq xmm15,1 3836 pinsrb xmm15,BYTE[rsi],0 3837 lea rsi,[((-1))+rsi] 3838 sub rcx,1 3839 jnz NEAR $L$process_extra_in_trailer_load 3840 3841$L$process_partial_block: 3842 3843 lea r15,[$L$and_masks] 3844 shl rbx,4 3845 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3846DB 102,77,15,126,253 3847 pextrq r14,xmm15,1 3848 add r10,r13 3849 adc r11,r14 3850 adc r12,1 3851 mov rax,QWORD[((0+160+0))+rbp] 3852 mov r15,rax 3853 mul r10 3854 mov r13,rax 3855 mov r14,rdx 3856 mov rax,QWORD[((0+160+0))+rbp] 3857 mul r11 3858 imul r15,r12 3859 add r14,rax 3860 adc r15,rdx 3861 mov rax,QWORD[((8+160+0))+rbp] 3862 mov r9,rax 3863 mul r10 3864 add r14,rax 3865 adc rdx,0 3866 mov r10,rdx 3867 mov rax,QWORD[((8+160+0))+rbp] 3868 mul r11 3869 add r15,rax 3870 adc rdx,0 3871 imul r9,r12 3872 add r15,r10 3873 adc r9,rdx 3874 mov r10,r13 3875 mov r11,r14 3876 mov r12,r15 3877 and r12,3 3878 mov r13,r15 3879 and r13,-4 3880 mov r14,r9 3881 shrd r15,r9,2 3882 shr r9,2 3883 add r15,r13 3884 adc r9,r14 3885 add r10,r15 3886 adc r11,r9 3887 adc r12,0 3888 3889 3890$L$do_length_block: 3891 add r10,QWORD[((0+160+32))+rbp] 3892 adc r11,QWORD[((8+160+32))+rbp] 3893 adc r12,1 3894 mov rax,QWORD[((0+160+0))+rbp] 3895 mov r15,rax 3896 mul r10 3897 mov r13,rax 3898 mov r14,rdx 3899 mov rax,QWORD[((0+160+0))+rbp] 3900 mul r11 3901 imul r15,r12 3902 add r14,rax 3903 adc r15,rdx 3904 mov rax,QWORD[((8+160+0))+rbp] 3905 mov r9,rax 3906 mul r10 3907 add r14,rax 3908 adc rdx,0 3909 mov r10,rdx 3910 mov rax,QWORD[((8+160+0))+rbp] 3911 mul r11 3912 add r15,rax 3913 adc rdx,0 3914 imul r9,r12 3915 add r15,r10 3916 adc r9,rdx 3917 mov r10,r13 3918 mov r11,r14 3919 mov r12,r15 3920 and r12,3 3921 mov r13,r15 3922 and r13,-4 3923 mov r14,r9 3924 shrd r15,r9,2 3925 shr r9,2 3926 add r15,r13 3927 adc r9,r14 3928 add r10,r15 3929 adc r11,r9 3930 adc r12,0 3931 3932 3933 mov r13,r10 3934 mov r14,r11 3935 mov r15,r12 3936 sub r10,-5 3937 sbb r11,-1 3938 sbb r12,3 3939 cmovc r10,r13 3940 cmovc r11,r14 3941 cmovc r12,r15 3942 3943 add r10,QWORD[((0+160+16))+rbp] 3944 adc r11,QWORD[((8+160+16))+rbp] 3945 3946 movaps xmm6,XMMWORD[((0+0))+rbp] 3947 movaps xmm7,XMMWORD[((16+0))+rbp] 3948 movaps xmm8,XMMWORD[((32+0))+rbp] 3949 movaps xmm9,XMMWORD[((48+0))+rbp] 3950 movaps xmm10,XMMWORD[((64+0))+rbp] 3951 movaps xmm11,XMMWORD[((80+0))+rbp] 3952 movaps xmm12,XMMWORD[((96+0))+rbp] 3953 movaps xmm13,XMMWORD[((112+0))+rbp] 3954 movaps xmm14,XMMWORD[((128+0))+rbp] 3955 movaps xmm15,XMMWORD[((144+0))+rbp] 3956 3957 3958 add rsp,288 + 160 + 32 3959 3960 3961 pop r9 3962 3963 mov QWORD[r9],r10 3964 mov QWORD[8+r9],r11 3965 pop r15 3966 3967 pop r14 3968 3969 pop r13 3970 3971 pop r12 3972 3973 pop rbx 3974 3975 pop rbp 3976 3977 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3978 mov rsi,QWORD[16+rsp] 3979 ret 3980 3981$L$seal_sse_128: 3982 3983 movdqu xmm0,XMMWORD[$L$chacha20_consts] 3984 movdqa xmm1,xmm0 3985 movdqa xmm2,xmm0 3986 movdqu xmm4,XMMWORD[r9] 3987 movdqa xmm5,xmm4 3988 movdqa xmm6,xmm4 3989 movdqu xmm8,XMMWORD[16+r9] 3990 movdqa xmm9,xmm8 3991 movdqa xmm10,xmm8 3992 movdqu xmm14,XMMWORD[32+r9] 3993 movdqa xmm12,xmm14 3994 paddd xmm12,XMMWORD[$L$sse_inc] 3995 movdqa xmm13,xmm12 3996 paddd xmm13,XMMWORD[$L$sse_inc] 3997 movdqa xmm7,xmm4 3998 movdqa xmm11,xmm8 3999 movdqa xmm15,xmm12 4000 mov r10,10 4001 4002$L$seal_sse_128_rounds: 4003 paddd xmm0,xmm4 4004 pxor xmm12,xmm0 4005 pshufb xmm12,XMMWORD[$L$rol16] 4006 paddd xmm8,xmm12 4007 pxor xmm4,xmm8 4008 movdqa xmm3,xmm4 4009 pslld xmm3,12 4010 psrld xmm4,20 4011 pxor xmm4,xmm3 4012 paddd xmm0,xmm4 4013 pxor xmm12,xmm0 4014 pshufb xmm12,XMMWORD[$L$rol8] 4015 paddd xmm8,xmm12 4016 pxor xmm4,xmm8 4017 movdqa xmm3,xmm4 4018 pslld xmm3,7 4019 psrld xmm4,25 4020 pxor xmm4,xmm3 4021DB 102,15,58,15,228,4 4022DB 102,69,15,58,15,192,8 4023DB 102,69,15,58,15,228,12 4024 paddd xmm1,xmm5 4025 pxor xmm13,xmm1 4026 pshufb xmm13,XMMWORD[$L$rol16] 4027 paddd xmm9,xmm13 4028 pxor xmm5,xmm9 4029 movdqa xmm3,xmm5 4030 pslld xmm3,12 4031 psrld xmm5,20 4032 pxor xmm5,xmm3 4033 paddd xmm1,xmm5 4034 pxor xmm13,xmm1 4035 pshufb xmm13,XMMWORD[$L$rol8] 4036 paddd xmm9,xmm13 4037 pxor xmm5,xmm9 4038 movdqa xmm3,xmm5 4039 pslld xmm3,7 4040 psrld xmm5,25 4041 pxor xmm5,xmm3 4042DB 102,15,58,15,237,4 4043DB 102,69,15,58,15,201,8 4044DB 102,69,15,58,15,237,12 4045 paddd xmm2,xmm6 4046 pxor xmm14,xmm2 4047 pshufb xmm14,XMMWORD[$L$rol16] 4048 paddd xmm10,xmm14 4049 pxor xmm6,xmm10 4050 movdqa xmm3,xmm6 4051 pslld xmm3,12 4052 psrld xmm6,20 4053 pxor xmm6,xmm3 4054 paddd xmm2,xmm6 4055 pxor xmm14,xmm2 4056 pshufb xmm14,XMMWORD[$L$rol8] 4057 paddd xmm10,xmm14 4058 pxor xmm6,xmm10 4059 movdqa xmm3,xmm6 4060 pslld xmm3,7 4061 psrld xmm6,25 4062 pxor xmm6,xmm3 4063DB 102,15,58,15,246,4 4064DB 102,69,15,58,15,210,8 4065DB 102,69,15,58,15,246,12 4066 paddd xmm0,xmm4 4067 pxor xmm12,xmm0 4068 pshufb xmm12,XMMWORD[$L$rol16] 4069 paddd xmm8,xmm12 4070 pxor xmm4,xmm8 4071 movdqa xmm3,xmm4 4072 pslld xmm3,12 4073 psrld xmm4,20 4074 pxor xmm4,xmm3 4075 paddd xmm0,xmm4 4076 pxor xmm12,xmm0 4077 pshufb xmm12,XMMWORD[$L$rol8] 4078 paddd xmm8,xmm12 4079 pxor xmm4,xmm8 4080 movdqa xmm3,xmm4 4081 pslld xmm3,7 4082 psrld xmm4,25 4083 pxor xmm4,xmm3 4084DB 102,15,58,15,228,12 4085DB 102,69,15,58,15,192,8 4086DB 102,69,15,58,15,228,4 4087 paddd xmm1,xmm5 4088 pxor xmm13,xmm1 4089 pshufb xmm13,XMMWORD[$L$rol16] 4090 paddd xmm9,xmm13 4091 pxor xmm5,xmm9 4092 movdqa xmm3,xmm5 4093 pslld xmm3,12 4094 psrld xmm5,20 4095 pxor xmm5,xmm3 4096 paddd xmm1,xmm5 4097 pxor xmm13,xmm1 4098 pshufb xmm13,XMMWORD[$L$rol8] 4099 paddd xmm9,xmm13 4100 pxor xmm5,xmm9 4101 movdqa xmm3,xmm5 4102 pslld xmm3,7 4103 psrld xmm5,25 4104 pxor xmm5,xmm3 4105DB 102,15,58,15,237,12 4106DB 102,69,15,58,15,201,8 4107DB 102,69,15,58,15,237,4 4108 paddd xmm2,xmm6 4109 pxor xmm14,xmm2 4110 pshufb xmm14,XMMWORD[$L$rol16] 4111 paddd xmm10,xmm14 4112 pxor xmm6,xmm10 4113 movdqa xmm3,xmm6 4114 pslld xmm3,12 4115 psrld xmm6,20 4116 pxor xmm6,xmm3 4117 paddd xmm2,xmm6 4118 pxor xmm14,xmm2 4119 pshufb xmm14,XMMWORD[$L$rol8] 4120 paddd xmm10,xmm14 4121 pxor xmm6,xmm10 4122 movdqa xmm3,xmm6 4123 pslld xmm3,7 4124 psrld xmm6,25 4125 pxor xmm6,xmm3 4126DB 102,15,58,15,246,12 4127DB 102,69,15,58,15,210,8 4128DB 102,69,15,58,15,246,4 4129 4130 dec r10 4131 jnz NEAR $L$seal_sse_128_rounds 4132 paddd xmm0,XMMWORD[$L$chacha20_consts] 4133 paddd xmm1,XMMWORD[$L$chacha20_consts] 4134 paddd xmm2,XMMWORD[$L$chacha20_consts] 4135 paddd xmm4,xmm7 4136 paddd xmm5,xmm7 4137 paddd xmm6,xmm7 4138 paddd xmm8,xmm11 4139 paddd xmm9,xmm11 4140 paddd xmm12,xmm15 4141 paddd xmm15,XMMWORD[$L$sse_inc] 4142 paddd xmm13,xmm15 4143 4144 pand xmm2,XMMWORD[$L$clamp] 4145 movdqa XMMWORD[(160+0)+rbp],xmm2 4146 movdqa XMMWORD[(160+16)+rbp],xmm6 4147 4148 mov r8,r8 4149 call poly_hash_ad_internal 4150 jmp NEAR $L$seal_sse_128_tail_xor 4151$L$SEH_end_chacha20_poly1305_seal: 4152 4153 4154 4155 4156ALIGN 64 4157chacha20_poly1305_open_avx2: 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 vzeroupper 4171 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4172 vbroadcasti128 ymm4,XMMWORD[r9] 4173 vbroadcasti128 ymm8,XMMWORD[16+r9] 4174 vbroadcasti128 ymm12,XMMWORD[32+r9] 4175 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 4176 cmp rbx,6*32 4177 jbe NEAR $L$open_avx2_192 4178 cmp rbx,10*32 4179 jbe NEAR $L$open_avx2_320 4180 4181 vmovdqa YMMWORD[(160+64)+rbp],ymm4 4182 vmovdqa YMMWORD[(160+96)+rbp],ymm8 4183 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4184 mov r10,10 4185$L$open_avx2_init_rounds: 4186 vpaddd ymm0,ymm0,ymm4 4187 vpxor ymm12,ymm12,ymm0 4188 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4189 vpaddd ymm8,ymm8,ymm12 4190 vpxor ymm4,ymm4,ymm8 4191 vpsrld ymm3,ymm4,20 4192 vpslld ymm4,ymm4,12 4193 vpxor ymm4,ymm4,ymm3 4194 vpaddd ymm0,ymm0,ymm4 4195 vpxor ymm12,ymm12,ymm0 4196 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4197 vpaddd ymm8,ymm8,ymm12 4198 vpxor ymm4,ymm4,ymm8 4199 vpslld ymm3,ymm4,7 4200 vpsrld ymm4,ymm4,25 4201 vpxor ymm4,ymm4,ymm3 4202 vpalignr ymm12,ymm12,ymm12,12 4203 vpalignr ymm8,ymm8,ymm8,8 4204 vpalignr ymm4,ymm4,ymm4,4 4205 vpaddd ymm0,ymm0,ymm4 4206 vpxor ymm12,ymm12,ymm0 4207 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4208 vpaddd ymm8,ymm8,ymm12 4209 vpxor ymm4,ymm4,ymm8 4210 vpsrld ymm3,ymm4,20 4211 vpslld ymm4,ymm4,12 4212 vpxor ymm4,ymm4,ymm3 4213 vpaddd ymm0,ymm0,ymm4 4214 vpxor ymm12,ymm12,ymm0 4215 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4216 vpaddd ymm8,ymm8,ymm12 4217 vpxor ymm4,ymm4,ymm8 4218 vpslld ymm3,ymm4,7 4219 vpsrld ymm4,ymm4,25 4220 vpxor ymm4,ymm4,ymm3 4221 vpalignr ymm12,ymm12,ymm12,4 4222 vpalignr ymm8,ymm8,ymm8,8 4223 vpalignr ymm4,ymm4,ymm4,12 4224 4225 dec r10 4226 jne NEAR $L$open_avx2_init_rounds 4227 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4228 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4229 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4230 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4231 4232 vperm2i128 ymm3,ymm4,ymm0,0x02 4233 4234 vpand ymm3,ymm3,YMMWORD[$L$clamp] 4235 vmovdqa YMMWORD[(160+0)+rbp],ymm3 4236 4237 vperm2i128 ymm0,ymm4,ymm0,0x13 4238 vperm2i128 ymm4,ymm12,ymm8,0x13 4239 4240 mov r8,r8 4241 call poly_hash_ad_internal 4242 4243 xor rcx,rcx 4244$L$open_avx2_init_hash: 4245 add r10,QWORD[((0+0))+rcx*1+rsi] 4246 adc r11,QWORD[((8+0))+rcx*1+rsi] 4247 adc r12,1 4248 mov rax,QWORD[((0+160+0))+rbp] 4249 mov r15,rax 4250 mul r10 4251 mov r13,rax 4252 mov r14,rdx 4253 mov rax,QWORD[((0+160+0))+rbp] 4254 mul r11 4255 imul r15,r12 4256 add r14,rax 4257 adc r15,rdx 4258 mov rax,QWORD[((8+160+0))+rbp] 4259 mov r9,rax 4260 mul r10 4261 add r14,rax 4262 adc rdx,0 4263 mov r10,rdx 4264 mov rax,QWORD[((8+160+0))+rbp] 4265 mul r11 4266 add r15,rax 4267 adc rdx,0 4268 imul r9,r12 4269 add r15,r10 4270 adc r9,rdx 4271 mov r10,r13 4272 mov r11,r14 4273 mov r12,r15 4274 and r12,3 4275 mov r13,r15 4276 and r13,-4 4277 mov r14,r9 4278 shrd r15,r9,2 4279 shr r9,2 4280 add r15,r13 4281 adc r9,r14 4282 add r10,r15 4283 adc r11,r9 4284 adc r12,0 4285 4286 add rcx,16 4287 cmp rcx,2*32 4288 jne NEAR $L$open_avx2_init_hash 4289 4290 vpxor ymm0,ymm0,YMMWORD[rsi] 4291 vpxor ymm4,ymm4,YMMWORD[32+rsi] 4292 4293 vmovdqu YMMWORD[rdi],ymm0 4294 vmovdqu YMMWORD[32+rdi],ymm4 4295 lea rsi,[64+rsi] 4296 lea rdi,[64+rdi] 4297 sub rbx,2*32 4298$L$open_avx2_main_loop: 4299 4300 cmp rbx,16*32 4301 jb NEAR $L$open_avx2_main_loop_done 4302 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4303 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4304 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4305 vmovdqa ymm1,ymm0 4306 vmovdqa ymm5,ymm4 4307 vmovdqa ymm9,ymm8 4308 vmovdqa ymm2,ymm0 4309 vmovdqa ymm6,ymm4 4310 vmovdqa ymm10,ymm8 4311 vmovdqa ymm3,ymm0 4312 vmovdqa ymm7,ymm4 4313 vmovdqa ymm11,ymm8 4314 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4315 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 4316 vpaddd ymm14,ymm12,ymm15 4317 vpaddd ymm13,ymm12,ymm14 4318 vpaddd ymm12,ymm12,ymm13 4319 vmovdqa YMMWORD[(160+256)+rbp],ymm15 4320 vmovdqa YMMWORD[(160+224)+rbp],ymm14 4321 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4322 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4323 4324 xor rcx,rcx 4325$L$open_avx2_main_loop_rounds: 4326 add r10,QWORD[((0+0))+rcx*1+rsi] 4327 adc r11,QWORD[((8+0))+rcx*1+rsi] 4328 adc r12,1 4329 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4330 vmovdqa ymm8,YMMWORD[$L$rol16] 4331 vpaddd ymm3,ymm3,ymm7 4332 vpaddd ymm2,ymm2,ymm6 4333 vpaddd ymm1,ymm1,ymm5 4334 vpaddd ymm0,ymm0,ymm4 4335 vpxor ymm15,ymm15,ymm3 4336 vpxor ymm14,ymm14,ymm2 4337 vpxor ymm13,ymm13,ymm1 4338 vpxor ymm12,ymm12,ymm0 4339 mov rdx,QWORD[((0+160+0))+rbp] 4340 mov r15,rdx 4341 mulx r14,r13,r10 4342 mulx rdx,rax,r11 4343 imul r15,r12 4344 add r14,rax 4345 adc r15,rdx 4346 vpshufb ymm15,ymm15,ymm8 4347 vpshufb ymm14,ymm14,ymm8 4348 vpshufb ymm13,ymm13,ymm8 4349 vpshufb ymm12,ymm12,ymm8 4350 vpaddd ymm11,ymm11,ymm15 4351 vpaddd ymm10,ymm10,ymm14 4352 vpaddd ymm9,ymm9,ymm13 4353 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4354 vpxor ymm7,ymm7,ymm11 4355 mov rdx,QWORD[((8+160+0))+rbp] 4356 mulx rax,r10,r10 4357 add r14,r10 4358 mulx r9,r11,r11 4359 adc r15,r11 4360 adc r9,0 4361 imul rdx,r12 4362 vpxor ymm6,ymm6,ymm10 4363 vpxor ymm5,ymm5,ymm9 4364 vpxor ymm4,ymm4,ymm8 4365 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4366 vpsrld ymm8,ymm7,20 4367 vpslld ymm7,ymm7,32-20 4368 vpxor ymm7,ymm7,ymm8 4369 vpsrld ymm8,ymm6,20 4370 vpslld ymm6,ymm6,32-20 4371 vpxor ymm6,ymm6,ymm8 4372 vpsrld ymm8,ymm5,20 4373 vpslld ymm5,ymm5,32-20 4374 add r15,rax 4375 adc r9,rdx 4376 vpxor ymm5,ymm5,ymm8 4377 vpsrld ymm8,ymm4,20 4378 vpslld ymm4,ymm4,32-20 4379 vpxor ymm4,ymm4,ymm8 4380 vmovdqa ymm8,YMMWORD[$L$rol8] 4381 vpaddd ymm3,ymm3,ymm7 4382 vpaddd ymm2,ymm2,ymm6 4383 vpaddd ymm1,ymm1,ymm5 4384 vpaddd ymm0,ymm0,ymm4 4385 vpxor ymm15,ymm15,ymm3 4386 mov r10,r13 4387 mov r11,r14 4388 mov r12,r15 4389 and r12,3 4390 mov r13,r15 4391 and r13,-4 4392 mov r14,r9 4393 shrd r15,r9,2 4394 shr r9,2 4395 add r15,r13 4396 adc r9,r14 4397 add r10,r15 4398 adc r11,r9 4399 adc r12,0 4400 vpxor ymm14,ymm14,ymm2 4401 vpxor ymm13,ymm13,ymm1 4402 vpxor ymm12,ymm12,ymm0 4403 vpshufb ymm15,ymm15,ymm8 4404 vpshufb ymm14,ymm14,ymm8 4405 vpshufb ymm13,ymm13,ymm8 4406 vpshufb ymm12,ymm12,ymm8 4407 vpaddd ymm11,ymm11,ymm15 4408 vpaddd ymm10,ymm10,ymm14 4409 add r10,QWORD[((0+16))+rcx*1+rsi] 4410 adc r11,QWORD[((8+16))+rcx*1+rsi] 4411 adc r12,1 4412 vpaddd ymm9,ymm9,ymm13 4413 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4414 vpxor ymm7,ymm7,ymm11 4415 vpxor ymm6,ymm6,ymm10 4416 vpxor ymm5,ymm5,ymm9 4417 vpxor ymm4,ymm4,ymm8 4418 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4419 vpsrld ymm8,ymm7,25 4420 mov rdx,QWORD[((0+160+0))+rbp] 4421 mov r15,rdx 4422 mulx r14,r13,r10 4423 mulx rdx,rax,r11 4424 imul r15,r12 4425 add r14,rax 4426 adc r15,rdx 4427 vpslld ymm7,ymm7,32-25 4428 vpxor ymm7,ymm7,ymm8 4429 vpsrld ymm8,ymm6,25 4430 vpslld ymm6,ymm6,32-25 4431 vpxor ymm6,ymm6,ymm8 4432 vpsrld ymm8,ymm5,25 4433 vpslld ymm5,ymm5,32-25 4434 vpxor ymm5,ymm5,ymm8 4435 vpsrld ymm8,ymm4,25 4436 vpslld ymm4,ymm4,32-25 4437 vpxor ymm4,ymm4,ymm8 4438 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4439 vpalignr ymm7,ymm7,ymm7,4 4440 vpalignr ymm11,ymm11,ymm11,8 4441 vpalignr ymm15,ymm15,ymm15,12 4442 vpalignr ymm6,ymm6,ymm6,4 4443 vpalignr ymm10,ymm10,ymm10,8 4444 vpalignr ymm14,ymm14,ymm14,12 4445 mov rdx,QWORD[((8+160+0))+rbp] 4446 mulx rax,r10,r10 4447 add r14,r10 4448 mulx r9,r11,r11 4449 adc r15,r11 4450 adc r9,0 4451 imul rdx,r12 4452 vpalignr ymm5,ymm5,ymm5,4 4453 vpalignr ymm9,ymm9,ymm9,8 4454 vpalignr ymm13,ymm13,ymm13,12 4455 vpalignr ymm4,ymm4,ymm4,4 4456 vpalignr ymm8,ymm8,ymm8,8 4457 vpalignr ymm12,ymm12,ymm12,12 4458 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4459 vmovdqa ymm8,YMMWORD[$L$rol16] 4460 vpaddd ymm3,ymm3,ymm7 4461 vpaddd ymm2,ymm2,ymm6 4462 vpaddd ymm1,ymm1,ymm5 4463 vpaddd ymm0,ymm0,ymm4 4464 vpxor ymm15,ymm15,ymm3 4465 vpxor ymm14,ymm14,ymm2 4466 vpxor ymm13,ymm13,ymm1 4467 vpxor ymm12,ymm12,ymm0 4468 vpshufb ymm15,ymm15,ymm8 4469 vpshufb ymm14,ymm14,ymm8 4470 add r15,rax 4471 adc r9,rdx 4472 vpshufb ymm13,ymm13,ymm8 4473 vpshufb ymm12,ymm12,ymm8 4474 vpaddd ymm11,ymm11,ymm15 4475 vpaddd ymm10,ymm10,ymm14 4476 vpaddd ymm9,ymm9,ymm13 4477 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4478 vpxor ymm7,ymm7,ymm11 4479 vpxor ymm6,ymm6,ymm10 4480 vpxor ymm5,ymm5,ymm9 4481 mov r10,r13 4482 mov r11,r14 4483 mov r12,r15 4484 and r12,3 4485 mov r13,r15 4486 and r13,-4 4487 mov r14,r9 4488 shrd r15,r9,2 4489 shr r9,2 4490 add r15,r13 4491 adc r9,r14 4492 add r10,r15 4493 adc r11,r9 4494 adc r12,0 4495 vpxor ymm4,ymm4,ymm8 4496 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4497 vpsrld ymm8,ymm7,20 4498 vpslld ymm7,ymm7,32-20 4499 vpxor ymm7,ymm7,ymm8 4500 vpsrld ymm8,ymm6,20 4501 vpslld ymm6,ymm6,32-20 4502 vpxor ymm6,ymm6,ymm8 4503 add r10,QWORD[((0+32))+rcx*1+rsi] 4504 adc r11,QWORD[((8+32))+rcx*1+rsi] 4505 adc r12,1 4506 4507 lea rcx,[48+rcx] 4508 vpsrld ymm8,ymm5,20 4509 vpslld ymm5,ymm5,32-20 4510 vpxor ymm5,ymm5,ymm8 4511 vpsrld ymm8,ymm4,20 4512 vpslld ymm4,ymm4,32-20 4513 vpxor ymm4,ymm4,ymm8 4514 vmovdqa ymm8,YMMWORD[$L$rol8] 4515 vpaddd ymm3,ymm3,ymm7 4516 vpaddd ymm2,ymm2,ymm6 4517 vpaddd ymm1,ymm1,ymm5 4518 vpaddd ymm0,ymm0,ymm4 4519 vpxor ymm15,ymm15,ymm3 4520 vpxor ymm14,ymm14,ymm2 4521 vpxor ymm13,ymm13,ymm1 4522 vpxor ymm12,ymm12,ymm0 4523 vpshufb ymm15,ymm15,ymm8 4524 vpshufb ymm14,ymm14,ymm8 4525 vpshufb ymm13,ymm13,ymm8 4526 mov rdx,QWORD[((0+160+0))+rbp] 4527 mov r15,rdx 4528 mulx r14,r13,r10 4529 mulx rdx,rax,r11 4530 imul r15,r12 4531 add r14,rax 4532 adc r15,rdx 4533 vpshufb ymm12,ymm12,ymm8 4534 vpaddd ymm11,ymm11,ymm15 4535 vpaddd ymm10,ymm10,ymm14 4536 vpaddd ymm9,ymm9,ymm13 4537 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4538 vpxor ymm7,ymm7,ymm11 4539 vpxor ymm6,ymm6,ymm10 4540 vpxor ymm5,ymm5,ymm9 4541 mov rdx,QWORD[((8+160+0))+rbp] 4542 mulx rax,r10,r10 4543 add r14,r10 4544 mulx r9,r11,r11 4545 adc r15,r11 4546 adc r9,0 4547 imul rdx,r12 4548 vpxor ymm4,ymm4,ymm8 4549 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4550 vpsrld ymm8,ymm7,25 4551 vpslld ymm7,ymm7,32-25 4552 vpxor ymm7,ymm7,ymm8 4553 vpsrld ymm8,ymm6,25 4554 vpslld ymm6,ymm6,32-25 4555 vpxor ymm6,ymm6,ymm8 4556 add r15,rax 4557 adc r9,rdx 4558 vpsrld ymm8,ymm5,25 4559 vpslld ymm5,ymm5,32-25 4560 vpxor ymm5,ymm5,ymm8 4561 vpsrld ymm8,ymm4,25 4562 vpslld ymm4,ymm4,32-25 4563 vpxor ymm4,ymm4,ymm8 4564 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4565 vpalignr ymm7,ymm7,ymm7,12 4566 vpalignr ymm11,ymm11,ymm11,8 4567 vpalignr ymm15,ymm15,ymm15,4 4568 vpalignr ymm6,ymm6,ymm6,12 4569 vpalignr ymm10,ymm10,ymm10,8 4570 vpalignr ymm14,ymm14,ymm14,4 4571 vpalignr ymm5,ymm5,ymm5,12 4572 vpalignr ymm9,ymm9,ymm9,8 4573 vpalignr ymm13,ymm13,ymm13,4 4574 vpalignr ymm4,ymm4,ymm4,12 4575 vpalignr ymm8,ymm8,ymm8,8 4576 mov r10,r13 4577 mov r11,r14 4578 mov r12,r15 4579 and r12,3 4580 mov r13,r15 4581 and r13,-4 4582 mov r14,r9 4583 shrd r15,r9,2 4584 shr r9,2 4585 add r15,r13 4586 adc r9,r14 4587 add r10,r15 4588 adc r11,r9 4589 adc r12,0 4590 vpalignr ymm12,ymm12,ymm12,4 4591 4592 cmp rcx,10*6*8 4593 jne NEAR $L$open_avx2_main_loop_rounds 4594 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 4595 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 4596 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 4597 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 4598 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 4599 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 4600 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 4601 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 4602 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 4603 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 4604 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 4605 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 4606 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4607 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4608 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4609 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4610 4611 vmovdqa YMMWORD[(160+128)+rbp],ymm0 4612 add r10,QWORD[((0+480))+rsi] 4613 adc r11,QWORD[((8+480))+rsi] 4614 adc r12,1 4615 vperm2i128 ymm0,ymm7,ymm3,0x02 4616 vperm2i128 ymm7,ymm7,ymm3,0x13 4617 vperm2i128 ymm3,ymm15,ymm11,0x02 4618 vperm2i128 ymm11,ymm15,ymm11,0x13 4619 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 4620 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 4621 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 4622 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 4623 vmovdqu YMMWORD[(0+0)+rdi],ymm0 4624 vmovdqu YMMWORD[(32+0)+rdi],ymm3 4625 vmovdqu YMMWORD[(64+0)+rdi],ymm7 4626 vmovdqu YMMWORD[(96+0)+rdi],ymm11 4627 4628 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 4629 mov rax,QWORD[((0+160+0))+rbp] 4630 mov r15,rax 4631 mul r10 4632 mov r13,rax 4633 mov r14,rdx 4634 mov rax,QWORD[((0+160+0))+rbp] 4635 mul r11 4636 imul r15,r12 4637 add r14,rax 4638 adc r15,rdx 4639 mov rax,QWORD[((8+160+0))+rbp] 4640 mov r9,rax 4641 mul r10 4642 add r14,rax 4643 adc rdx,0 4644 mov r10,rdx 4645 mov rax,QWORD[((8+160+0))+rbp] 4646 mul r11 4647 add r15,rax 4648 adc rdx,0 4649 imul r9,r12 4650 add r15,r10 4651 adc r9,rdx 4652 mov r10,r13 4653 mov r11,r14 4654 mov r12,r15 4655 and r12,3 4656 mov r13,r15 4657 and r13,-4 4658 mov r14,r9 4659 shrd r15,r9,2 4660 shr r9,2 4661 add r15,r13 4662 adc r9,r14 4663 add r10,r15 4664 adc r11,r9 4665 adc r12,0 4666 vperm2i128 ymm3,ymm6,ymm2,0x02 4667 vperm2i128 ymm6,ymm6,ymm2,0x13 4668 vperm2i128 ymm2,ymm14,ymm10,0x02 4669 vperm2i128 ymm10,ymm14,ymm10,0x13 4670 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 4671 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 4672 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 4673 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 4674 vmovdqu YMMWORD[(0+128)+rdi],ymm3 4675 vmovdqu YMMWORD[(32+128)+rdi],ymm2 4676 vmovdqu YMMWORD[(64+128)+rdi],ymm6 4677 vmovdqu YMMWORD[(96+128)+rdi],ymm10 4678 add r10,QWORD[((0+480+16))+rsi] 4679 adc r11,QWORD[((8+480+16))+rsi] 4680 adc r12,1 4681 vperm2i128 ymm3,ymm5,ymm1,0x02 4682 vperm2i128 ymm5,ymm5,ymm1,0x13 4683 vperm2i128 ymm1,ymm13,ymm9,0x02 4684 vperm2i128 ymm9,ymm13,ymm9,0x13 4685 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 4686 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 4687 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 4688 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 4689 vmovdqu YMMWORD[(0+256)+rdi],ymm3 4690 vmovdqu YMMWORD[(32+256)+rdi],ymm1 4691 vmovdqu YMMWORD[(64+256)+rdi],ymm5 4692 vmovdqu YMMWORD[(96+256)+rdi],ymm9 4693 mov rax,QWORD[((0+160+0))+rbp] 4694 mov r15,rax 4695 mul r10 4696 mov r13,rax 4697 mov r14,rdx 4698 mov rax,QWORD[((0+160+0))+rbp] 4699 mul r11 4700 imul r15,r12 4701 add r14,rax 4702 adc r15,rdx 4703 mov rax,QWORD[((8+160+0))+rbp] 4704 mov r9,rax 4705 mul r10 4706 add r14,rax 4707 adc rdx,0 4708 mov r10,rdx 4709 mov rax,QWORD[((8+160+0))+rbp] 4710 mul r11 4711 add r15,rax 4712 adc rdx,0 4713 imul r9,r12 4714 add r15,r10 4715 adc r9,rdx 4716 mov r10,r13 4717 mov r11,r14 4718 mov r12,r15 4719 and r12,3 4720 mov r13,r15 4721 and r13,-4 4722 mov r14,r9 4723 shrd r15,r9,2 4724 shr r9,2 4725 add r15,r13 4726 adc r9,r14 4727 add r10,r15 4728 adc r11,r9 4729 adc r12,0 4730 vperm2i128 ymm3,ymm4,ymm0,0x02 4731 vperm2i128 ymm4,ymm4,ymm0,0x13 4732 vperm2i128 ymm0,ymm12,ymm8,0x02 4733 vperm2i128 ymm8,ymm12,ymm8,0x13 4734 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 4735 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 4736 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 4737 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 4738 vmovdqu YMMWORD[(0+384)+rdi],ymm3 4739 vmovdqu YMMWORD[(32+384)+rdi],ymm0 4740 vmovdqu YMMWORD[(64+384)+rdi],ymm4 4741 vmovdqu YMMWORD[(96+384)+rdi],ymm8 4742 4743 lea rsi,[512+rsi] 4744 lea rdi,[512+rdi] 4745 sub rbx,16*32 4746 jmp NEAR $L$open_avx2_main_loop 4747$L$open_avx2_main_loop_done: 4748 test rbx,rbx 4749 vzeroupper 4750 je NEAR $L$open_sse_finalize 4751 4752 cmp rbx,12*32 4753 ja NEAR $L$open_avx2_tail_512 4754 cmp rbx,8*32 4755 ja NEAR $L$open_avx2_tail_384 4756 cmp rbx,4*32 4757 ja NEAR $L$open_avx2_tail_256 4758 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4759 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4760 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4761 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4762 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4763 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4764 4765 xor r8,r8 4766 mov rcx,rbx 4767 and rcx,-16 4768 test rcx,rcx 4769 je NEAR $L$open_avx2_tail_128_rounds 4770$L$open_avx2_tail_128_rounds_and_x1hash: 4771 add r10,QWORD[((0+0))+r8*1+rsi] 4772 adc r11,QWORD[((8+0))+r8*1+rsi] 4773 adc r12,1 4774 mov rax,QWORD[((0+160+0))+rbp] 4775 mov r15,rax 4776 mul r10 4777 mov r13,rax 4778 mov r14,rdx 4779 mov rax,QWORD[((0+160+0))+rbp] 4780 mul r11 4781 imul r15,r12 4782 add r14,rax 4783 adc r15,rdx 4784 mov rax,QWORD[((8+160+0))+rbp] 4785 mov r9,rax 4786 mul r10 4787 add r14,rax 4788 adc rdx,0 4789 mov r10,rdx 4790 mov rax,QWORD[((8+160+0))+rbp] 4791 mul r11 4792 add r15,rax 4793 adc rdx,0 4794 imul r9,r12 4795 add r15,r10 4796 adc r9,rdx 4797 mov r10,r13 4798 mov r11,r14 4799 mov r12,r15 4800 and r12,3 4801 mov r13,r15 4802 and r13,-4 4803 mov r14,r9 4804 shrd r15,r9,2 4805 shr r9,2 4806 add r15,r13 4807 adc r9,r14 4808 add r10,r15 4809 adc r11,r9 4810 adc r12,0 4811 4812$L$open_avx2_tail_128_rounds: 4813 add r8,16 4814 vpaddd ymm0,ymm0,ymm4 4815 vpxor ymm12,ymm12,ymm0 4816 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4817 vpaddd ymm8,ymm8,ymm12 4818 vpxor ymm4,ymm4,ymm8 4819 vpsrld ymm3,ymm4,20 4820 vpslld ymm4,ymm4,12 4821 vpxor ymm4,ymm4,ymm3 4822 vpaddd ymm0,ymm0,ymm4 4823 vpxor ymm12,ymm12,ymm0 4824 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4825 vpaddd ymm8,ymm8,ymm12 4826 vpxor ymm4,ymm4,ymm8 4827 vpslld ymm3,ymm4,7 4828 vpsrld ymm4,ymm4,25 4829 vpxor ymm4,ymm4,ymm3 4830 vpalignr ymm12,ymm12,ymm12,12 4831 vpalignr ymm8,ymm8,ymm8,8 4832 vpalignr ymm4,ymm4,ymm4,4 4833 vpaddd ymm0,ymm0,ymm4 4834 vpxor ymm12,ymm12,ymm0 4835 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4836 vpaddd ymm8,ymm8,ymm12 4837 vpxor ymm4,ymm4,ymm8 4838 vpsrld ymm3,ymm4,20 4839 vpslld ymm4,ymm4,12 4840 vpxor ymm4,ymm4,ymm3 4841 vpaddd ymm0,ymm0,ymm4 4842 vpxor ymm12,ymm12,ymm0 4843 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4844 vpaddd ymm8,ymm8,ymm12 4845 vpxor ymm4,ymm4,ymm8 4846 vpslld ymm3,ymm4,7 4847 vpsrld ymm4,ymm4,25 4848 vpxor ymm4,ymm4,ymm3 4849 vpalignr ymm12,ymm12,ymm12,4 4850 vpalignr ymm8,ymm8,ymm8,8 4851 vpalignr ymm4,ymm4,ymm4,12 4852 4853 cmp r8,rcx 4854 jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash 4855 cmp r8,160 4856 jne NEAR $L$open_avx2_tail_128_rounds 4857 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4858 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4859 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4860 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4861 vperm2i128 ymm3,ymm4,ymm0,0x13 4862 vperm2i128 ymm0,ymm4,ymm0,0x02 4863 vperm2i128 ymm4,ymm12,ymm8,0x02 4864 vperm2i128 ymm12,ymm12,ymm8,0x13 4865 vmovdqa ymm8,ymm3 4866 4867 jmp NEAR $L$open_avx2_tail_128_xor 4868 4869$L$open_avx2_tail_256: 4870 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4871 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4872 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4873 vmovdqa ymm1,ymm0 4874 vmovdqa ymm5,ymm4 4875 vmovdqa ymm9,ymm8 4876 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4877 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 4878 vpaddd ymm12,ymm12,ymm13 4879 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4880 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4881 4882 mov QWORD[((160+128))+rbp],rbx 4883 mov rcx,rbx 4884 sub rcx,4*32 4885 shr rcx,4 4886 mov r8,10 4887 cmp rcx,10 4888 cmovg rcx,r8 4889 mov rbx,rsi 4890 xor r8,r8 4891$L$open_avx2_tail_256_rounds_and_x1hash: 4892 add r10,QWORD[((0+0))+rbx] 4893 adc r11,QWORD[((8+0))+rbx] 4894 adc r12,1 4895 mov rdx,QWORD[((0+160+0))+rbp] 4896 mov r15,rdx 4897 mulx r14,r13,r10 4898 mulx rdx,rax,r11 4899 imul r15,r12 4900 add r14,rax 4901 adc r15,rdx 4902 mov rdx,QWORD[((8+160+0))+rbp] 4903 mulx rax,r10,r10 4904 add r14,r10 4905 mulx r9,r11,r11 4906 adc r15,r11 4907 adc r9,0 4908 imul rdx,r12 4909 add r15,rax 4910 adc r9,rdx 4911 mov r10,r13 4912 mov r11,r14 4913 mov r12,r15 4914 and r12,3 4915 mov r13,r15 4916 and r13,-4 4917 mov r14,r9 4918 shrd r15,r9,2 4919 shr r9,2 4920 add r15,r13 4921 adc r9,r14 4922 add r10,r15 4923 adc r11,r9 4924 adc r12,0 4925 4926 lea rbx,[16+rbx] 4927$L$open_avx2_tail_256_rounds: 4928 vpaddd ymm0,ymm0,ymm4 4929 vpxor ymm12,ymm12,ymm0 4930 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4931 vpaddd ymm8,ymm8,ymm12 4932 vpxor ymm4,ymm4,ymm8 4933 vpsrld ymm3,ymm4,20 4934 vpslld ymm4,ymm4,12 4935 vpxor ymm4,ymm4,ymm3 4936 vpaddd ymm0,ymm0,ymm4 4937 vpxor ymm12,ymm12,ymm0 4938 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4939 vpaddd ymm8,ymm8,ymm12 4940 vpxor ymm4,ymm4,ymm8 4941 vpslld ymm3,ymm4,7 4942 vpsrld ymm4,ymm4,25 4943 vpxor ymm4,ymm4,ymm3 4944 vpalignr ymm12,ymm12,ymm12,12 4945 vpalignr ymm8,ymm8,ymm8,8 4946 vpalignr ymm4,ymm4,ymm4,4 4947 vpaddd ymm1,ymm1,ymm5 4948 vpxor ymm13,ymm13,ymm1 4949 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4950 vpaddd ymm9,ymm9,ymm13 4951 vpxor ymm5,ymm5,ymm9 4952 vpsrld ymm3,ymm5,20 4953 vpslld ymm5,ymm5,12 4954 vpxor ymm5,ymm5,ymm3 4955 vpaddd ymm1,ymm1,ymm5 4956 vpxor ymm13,ymm13,ymm1 4957 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4958 vpaddd ymm9,ymm9,ymm13 4959 vpxor ymm5,ymm5,ymm9 4960 vpslld ymm3,ymm5,7 4961 vpsrld ymm5,ymm5,25 4962 vpxor ymm5,ymm5,ymm3 4963 vpalignr ymm13,ymm13,ymm13,12 4964 vpalignr ymm9,ymm9,ymm9,8 4965 vpalignr ymm5,ymm5,ymm5,4 4966 4967 inc r8 4968 vpaddd ymm0,ymm0,ymm4 4969 vpxor ymm12,ymm12,ymm0 4970 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4971 vpaddd ymm8,ymm8,ymm12 4972 vpxor ymm4,ymm4,ymm8 4973 vpsrld ymm3,ymm4,20 4974 vpslld ymm4,ymm4,12 4975 vpxor ymm4,ymm4,ymm3 4976 vpaddd ymm0,ymm0,ymm4 4977 vpxor ymm12,ymm12,ymm0 4978 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4979 vpaddd ymm8,ymm8,ymm12 4980 vpxor ymm4,ymm4,ymm8 4981 vpslld ymm3,ymm4,7 4982 vpsrld ymm4,ymm4,25 4983 vpxor ymm4,ymm4,ymm3 4984 vpalignr ymm12,ymm12,ymm12,4 4985 vpalignr ymm8,ymm8,ymm8,8 4986 vpalignr ymm4,ymm4,ymm4,12 4987 vpaddd ymm1,ymm1,ymm5 4988 vpxor ymm13,ymm13,ymm1 4989 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4990 vpaddd ymm9,ymm9,ymm13 4991 vpxor ymm5,ymm5,ymm9 4992 vpsrld ymm3,ymm5,20 4993 vpslld ymm5,ymm5,12 4994 vpxor ymm5,ymm5,ymm3 4995 vpaddd ymm1,ymm1,ymm5 4996 vpxor ymm13,ymm13,ymm1 4997 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4998 vpaddd ymm9,ymm9,ymm13 4999 vpxor ymm5,ymm5,ymm9 5000 vpslld ymm3,ymm5,7 5001 vpsrld ymm5,ymm5,25 5002 vpxor ymm5,ymm5,ymm3 5003 vpalignr ymm13,ymm13,ymm13,4 5004 vpalignr ymm9,ymm9,ymm9,8 5005 vpalignr ymm5,ymm5,ymm5,12 5006 vpaddd ymm2,ymm2,ymm6 5007 vpxor ymm14,ymm14,ymm2 5008 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5009 vpaddd ymm10,ymm10,ymm14 5010 vpxor ymm6,ymm6,ymm10 5011 vpsrld ymm3,ymm6,20 5012 vpslld ymm6,ymm6,12 5013 vpxor ymm6,ymm6,ymm3 5014 vpaddd ymm2,ymm2,ymm6 5015 vpxor ymm14,ymm14,ymm2 5016 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5017 vpaddd ymm10,ymm10,ymm14 5018 vpxor ymm6,ymm6,ymm10 5019 vpslld ymm3,ymm6,7 5020 vpsrld ymm6,ymm6,25 5021 vpxor ymm6,ymm6,ymm3 5022 vpalignr ymm14,ymm14,ymm14,4 5023 vpalignr ymm10,ymm10,ymm10,8 5024 vpalignr ymm6,ymm6,ymm6,12 5025 5026 cmp r8,rcx 5027 jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash 5028 cmp r8,10 5029 jne NEAR $L$open_avx2_tail_256_rounds 5030 mov r8,rbx 5031 sub rbx,rsi 5032 mov rcx,rbx 5033 mov rbx,QWORD[((160+128))+rbp] 5034$L$open_avx2_tail_256_hash: 5035 add rcx,16 5036 cmp rcx,rbx 5037 jg NEAR $L$open_avx2_tail_256_done 5038 add r10,QWORD[((0+0))+r8] 5039 adc r11,QWORD[((8+0))+r8] 5040 adc r12,1 5041 mov rdx,QWORD[((0+160+0))+rbp] 5042 mov r15,rdx 5043 mulx r14,r13,r10 5044 mulx rdx,rax,r11 5045 imul r15,r12 5046 add r14,rax 5047 adc r15,rdx 5048 mov rdx,QWORD[((8+160+0))+rbp] 5049 mulx rax,r10,r10 5050 add r14,r10 5051 mulx r9,r11,r11 5052 adc r15,r11 5053 adc r9,0 5054 imul rdx,r12 5055 add r15,rax 5056 adc r9,rdx 5057 mov r10,r13 5058 mov r11,r14 5059 mov r12,r15 5060 and r12,3 5061 mov r13,r15 5062 and r13,-4 5063 mov r14,r9 5064 shrd r15,r9,2 5065 shr r9,2 5066 add r15,r13 5067 adc r9,r14 5068 add r10,r15 5069 adc r11,r9 5070 adc r12,0 5071 5072 lea r8,[16+r8] 5073 jmp NEAR $L$open_avx2_tail_256_hash 5074$L$open_avx2_tail_256_done: 5075 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5076 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5077 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5078 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5079 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5080 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5081 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5082 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5083 vperm2i128 ymm3,ymm5,ymm1,0x02 5084 vperm2i128 ymm5,ymm5,ymm1,0x13 5085 vperm2i128 ymm1,ymm13,ymm9,0x02 5086 vperm2i128 ymm9,ymm13,ymm9,0x13 5087 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5088 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 5089 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 5090 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 5091 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5092 vmovdqu YMMWORD[(32+0)+rdi],ymm1 5093 vmovdqu YMMWORD[(64+0)+rdi],ymm5 5094 vmovdqu YMMWORD[(96+0)+rdi],ymm9 5095 vperm2i128 ymm3,ymm4,ymm0,0x13 5096 vperm2i128 ymm0,ymm4,ymm0,0x02 5097 vperm2i128 ymm4,ymm12,ymm8,0x02 5098 vperm2i128 ymm12,ymm12,ymm8,0x13 5099 vmovdqa ymm8,ymm3 5100 5101 lea rsi,[128+rsi] 5102 lea rdi,[128+rdi] 5103 sub rbx,4*32 5104 jmp NEAR $L$open_avx2_tail_128_xor 5105 5106$L$open_avx2_tail_384: 5107 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5108 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5109 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5110 vmovdqa ymm1,ymm0 5111 vmovdqa ymm5,ymm4 5112 vmovdqa ymm9,ymm8 5113 vmovdqa ymm2,ymm0 5114 vmovdqa ymm6,ymm4 5115 vmovdqa ymm10,ymm8 5116 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5117 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 5118 vpaddd ymm13,ymm12,ymm14 5119 vpaddd ymm12,ymm12,ymm13 5120 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5121 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5122 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5123 5124 mov QWORD[((160+128))+rbp],rbx 5125 mov rcx,rbx 5126 sub rcx,8*32 5127 shr rcx,4 5128 add rcx,6 5129 mov r8,10 5130 cmp rcx,10 5131 cmovg rcx,r8 5132 mov rbx,rsi 5133 xor r8,r8 5134$L$open_avx2_tail_384_rounds_and_x2hash: 5135 add r10,QWORD[((0+0))+rbx] 5136 adc r11,QWORD[((8+0))+rbx] 5137 adc r12,1 5138 mov rdx,QWORD[((0+160+0))+rbp] 5139 mov r15,rdx 5140 mulx r14,r13,r10 5141 mulx rdx,rax,r11 5142 imul r15,r12 5143 add r14,rax 5144 adc r15,rdx 5145 mov rdx,QWORD[((8+160+0))+rbp] 5146 mulx rax,r10,r10 5147 add r14,r10 5148 mulx r9,r11,r11 5149 adc r15,r11 5150 adc r9,0 5151 imul rdx,r12 5152 add r15,rax 5153 adc r9,rdx 5154 mov r10,r13 5155 mov r11,r14 5156 mov r12,r15 5157 and r12,3 5158 mov r13,r15 5159 and r13,-4 5160 mov r14,r9 5161 shrd r15,r9,2 5162 shr r9,2 5163 add r15,r13 5164 adc r9,r14 5165 add r10,r15 5166 adc r11,r9 5167 adc r12,0 5168 5169 lea rbx,[16+rbx] 5170$L$open_avx2_tail_384_rounds_and_x1hash: 5171 vpaddd ymm2,ymm2,ymm6 5172 vpxor ymm14,ymm14,ymm2 5173 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5174 vpaddd ymm10,ymm10,ymm14 5175 vpxor ymm6,ymm6,ymm10 5176 vpsrld ymm3,ymm6,20 5177 vpslld ymm6,ymm6,12 5178 vpxor ymm6,ymm6,ymm3 5179 vpaddd ymm2,ymm2,ymm6 5180 vpxor ymm14,ymm14,ymm2 5181 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5182 vpaddd ymm10,ymm10,ymm14 5183 vpxor ymm6,ymm6,ymm10 5184 vpslld ymm3,ymm6,7 5185 vpsrld ymm6,ymm6,25 5186 vpxor ymm6,ymm6,ymm3 5187 vpalignr ymm14,ymm14,ymm14,12 5188 vpalignr ymm10,ymm10,ymm10,8 5189 vpalignr ymm6,ymm6,ymm6,4 5190 vpaddd ymm1,ymm1,ymm5 5191 vpxor ymm13,ymm13,ymm1 5192 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5193 vpaddd ymm9,ymm9,ymm13 5194 vpxor ymm5,ymm5,ymm9 5195 vpsrld ymm3,ymm5,20 5196 vpslld ymm5,ymm5,12 5197 vpxor ymm5,ymm5,ymm3 5198 vpaddd ymm1,ymm1,ymm5 5199 vpxor ymm13,ymm13,ymm1 5200 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5201 vpaddd ymm9,ymm9,ymm13 5202 vpxor ymm5,ymm5,ymm9 5203 vpslld ymm3,ymm5,7 5204 vpsrld ymm5,ymm5,25 5205 vpxor ymm5,ymm5,ymm3 5206 vpalignr ymm13,ymm13,ymm13,12 5207 vpalignr ymm9,ymm9,ymm9,8 5208 vpalignr ymm5,ymm5,ymm5,4 5209 vpaddd ymm0,ymm0,ymm4 5210 vpxor ymm12,ymm12,ymm0 5211 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5212 vpaddd ymm8,ymm8,ymm12 5213 vpxor ymm4,ymm4,ymm8 5214 vpsrld ymm3,ymm4,20 5215 vpslld ymm4,ymm4,12 5216 vpxor ymm4,ymm4,ymm3 5217 vpaddd ymm0,ymm0,ymm4 5218 vpxor ymm12,ymm12,ymm0 5219 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5220 vpaddd ymm8,ymm8,ymm12 5221 vpxor ymm4,ymm4,ymm8 5222 vpslld ymm3,ymm4,7 5223 vpsrld ymm4,ymm4,25 5224 vpxor ymm4,ymm4,ymm3 5225 vpalignr ymm12,ymm12,ymm12,12 5226 vpalignr ymm8,ymm8,ymm8,8 5227 vpalignr ymm4,ymm4,ymm4,4 5228 add r10,QWORD[((0+0))+rbx] 5229 adc r11,QWORD[((8+0))+rbx] 5230 adc r12,1 5231 mov rax,QWORD[((0+160+0))+rbp] 5232 mov r15,rax 5233 mul r10 5234 mov r13,rax 5235 mov r14,rdx 5236 mov rax,QWORD[((0+160+0))+rbp] 5237 mul r11 5238 imul r15,r12 5239 add r14,rax 5240 adc r15,rdx 5241 mov rax,QWORD[((8+160+0))+rbp] 5242 mov r9,rax 5243 mul r10 5244 add r14,rax 5245 adc rdx,0 5246 mov r10,rdx 5247 mov rax,QWORD[((8+160+0))+rbp] 5248 mul r11 5249 add r15,rax 5250 adc rdx,0 5251 imul r9,r12 5252 add r15,r10 5253 adc r9,rdx 5254 mov r10,r13 5255 mov r11,r14 5256 mov r12,r15 5257 and r12,3 5258 mov r13,r15 5259 and r13,-4 5260 mov r14,r9 5261 shrd r15,r9,2 5262 shr r9,2 5263 add r15,r13 5264 adc r9,r14 5265 add r10,r15 5266 adc r11,r9 5267 adc r12,0 5268 5269 lea rbx,[16+rbx] 5270 inc r8 5271 vpaddd ymm2,ymm2,ymm6 5272 vpxor ymm14,ymm14,ymm2 5273 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5274 vpaddd ymm10,ymm10,ymm14 5275 vpxor ymm6,ymm6,ymm10 5276 vpsrld ymm3,ymm6,20 5277 vpslld ymm6,ymm6,12 5278 vpxor ymm6,ymm6,ymm3 5279 vpaddd ymm2,ymm2,ymm6 5280 vpxor ymm14,ymm14,ymm2 5281 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5282 vpaddd ymm10,ymm10,ymm14 5283 vpxor ymm6,ymm6,ymm10 5284 vpslld ymm3,ymm6,7 5285 vpsrld ymm6,ymm6,25 5286 vpxor ymm6,ymm6,ymm3 5287 vpalignr ymm14,ymm14,ymm14,4 5288 vpalignr ymm10,ymm10,ymm10,8 5289 vpalignr ymm6,ymm6,ymm6,12 5290 vpaddd ymm1,ymm1,ymm5 5291 vpxor ymm13,ymm13,ymm1 5292 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5293 vpaddd ymm9,ymm9,ymm13 5294 vpxor ymm5,ymm5,ymm9 5295 vpsrld ymm3,ymm5,20 5296 vpslld ymm5,ymm5,12 5297 vpxor ymm5,ymm5,ymm3 5298 vpaddd ymm1,ymm1,ymm5 5299 vpxor ymm13,ymm13,ymm1 5300 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5301 vpaddd ymm9,ymm9,ymm13 5302 vpxor ymm5,ymm5,ymm9 5303 vpslld ymm3,ymm5,7 5304 vpsrld ymm5,ymm5,25 5305 vpxor ymm5,ymm5,ymm3 5306 vpalignr ymm13,ymm13,ymm13,4 5307 vpalignr ymm9,ymm9,ymm9,8 5308 vpalignr ymm5,ymm5,ymm5,12 5309 vpaddd ymm0,ymm0,ymm4 5310 vpxor ymm12,ymm12,ymm0 5311 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5312 vpaddd ymm8,ymm8,ymm12 5313 vpxor ymm4,ymm4,ymm8 5314 vpsrld ymm3,ymm4,20 5315 vpslld ymm4,ymm4,12 5316 vpxor ymm4,ymm4,ymm3 5317 vpaddd ymm0,ymm0,ymm4 5318 vpxor ymm12,ymm12,ymm0 5319 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5320 vpaddd ymm8,ymm8,ymm12 5321 vpxor ymm4,ymm4,ymm8 5322 vpslld ymm3,ymm4,7 5323 vpsrld ymm4,ymm4,25 5324 vpxor ymm4,ymm4,ymm3 5325 vpalignr ymm12,ymm12,ymm12,4 5326 vpalignr ymm8,ymm8,ymm8,8 5327 vpalignr ymm4,ymm4,ymm4,12 5328 5329 cmp r8,rcx 5330 jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash 5331 cmp r8,10 5332 jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash 5333 mov r8,rbx 5334 sub rbx,rsi 5335 mov rcx,rbx 5336 mov rbx,QWORD[((160+128))+rbp] 5337$L$open_avx2_384_tail_hash: 5338 add rcx,16 5339 cmp rcx,rbx 5340 jg NEAR $L$open_avx2_384_tail_done 5341 add r10,QWORD[((0+0))+r8] 5342 adc r11,QWORD[((8+0))+r8] 5343 adc r12,1 5344 mov rdx,QWORD[((0+160+0))+rbp] 5345 mov r15,rdx 5346 mulx r14,r13,r10 5347 mulx rdx,rax,r11 5348 imul r15,r12 5349 add r14,rax 5350 adc r15,rdx 5351 mov rdx,QWORD[((8+160+0))+rbp] 5352 mulx rax,r10,r10 5353 add r14,r10 5354 mulx r9,r11,r11 5355 adc r15,r11 5356 adc r9,0 5357 imul rdx,r12 5358 add r15,rax 5359 adc r9,rdx 5360 mov r10,r13 5361 mov r11,r14 5362 mov r12,r15 5363 and r12,3 5364 mov r13,r15 5365 and r13,-4 5366 mov r14,r9 5367 shrd r15,r9,2 5368 shr r9,2 5369 add r15,r13 5370 adc r9,r14 5371 add r10,r15 5372 adc r11,r9 5373 adc r12,0 5374 5375 lea r8,[16+r8] 5376 jmp NEAR $L$open_avx2_384_tail_hash 5377$L$open_avx2_384_tail_done: 5378 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5379 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5380 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5381 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5382 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5383 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5384 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5385 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5386 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5387 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5388 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5389 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5390 vperm2i128 ymm3,ymm6,ymm2,0x02 5391 vperm2i128 ymm6,ymm6,ymm2,0x13 5392 vperm2i128 ymm2,ymm14,ymm10,0x02 5393 vperm2i128 ymm10,ymm14,ymm10,0x13 5394 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5395 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 5396 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 5397 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 5398 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5399 vmovdqu YMMWORD[(32+0)+rdi],ymm2 5400 vmovdqu YMMWORD[(64+0)+rdi],ymm6 5401 vmovdqu YMMWORD[(96+0)+rdi],ymm10 5402 vperm2i128 ymm3,ymm5,ymm1,0x02 5403 vperm2i128 ymm5,ymm5,ymm1,0x13 5404 vperm2i128 ymm1,ymm13,ymm9,0x02 5405 vperm2i128 ymm9,ymm13,ymm9,0x13 5406 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5407 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 5408 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 5409 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 5410 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5411 vmovdqu YMMWORD[(32+128)+rdi],ymm1 5412 vmovdqu YMMWORD[(64+128)+rdi],ymm5 5413 vmovdqu YMMWORD[(96+128)+rdi],ymm9 5414 vperm2i128 ymm3,ymm4,ymm0,0x13 5415 vperm2i128 ymm0,ymm4,ymm0,0x02 5416 vperm2i128 ymm4,ymm12,ymm8,0x02 5417 vperm2i128 ymm12,ymm12,ymm8,0x13 5418 vmovdqa ymm8,ymm3 5419 5420 lea rsi,[256+rsi] 5421 lea rdi,[256+rdi] 5422 sub rbx,8*32 5423 jmp NEAR $L$open_avx2_tail_128_xor 5424 5425$L$open_avx2_tail_512: 5426 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5427 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5428 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5429 vmovdqa ymm1,ymm0 5430 vmovdqa ymm5,ymm4 5431 vmovdqa ymm9,ymm8 5432 vmovdqa ymm2,ymm0 5433 vmovdqa ymm6,ymm4 5434 vmovdqa ymm10,ymm8 5435 vmovdqa ymm3,ymm0 5436 vmovdqa ymm7,ymm4 5437 vmovdqa ymm11,ymm8 5438 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5439 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 5440 vpaddd ymm14,ymm12,ymm15 5441 vpaddd ymm13,ymm12,ymm14 5442 vpaddd ymm12,ymm12,ymm13 5443 vmovdqa YMMWORD[(160+256)+rbp],ymm15 5444 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5445 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5446 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5447 5448 xor rcx,rcx 5449 mov r8,rsi 5450$L$open_avx2_tail_512_rounds_and_x2hash: 5451 add r10,QWORD[((0+0))+r8] 5452 adc r11,QWORD[((8+0))+r8] 5453 adc r12,1 5454 mov rax,QWORD[((0+160+0))+rbp] 5455 mov r15,rax 5456 mul r10 5457 mov r13,rax 5458 mov r14,rdx 5459 mov rax,QWORD[((0+160+0))+rbp] 5460 mul r11 5461 imul r15,r12 5462 add r14,rax 5463 adc r15,rdx 5464 mov rax,QWORD[((8+160+0))+rbp] 5465 mov r9,rax 5466 mul r10 5467 add r14,rax 5468 adc rdx,0 5469 mov r10,rdx 5470 mov rax,QWORD[((8+160+0))+rbp] 5471 mul r11 5472 add r15,rax 5473 adc rdx,0 5474 imul r9,r12 5475 add r15,r10 5476 adc r9,rdx 5477 mov r10,r13 5478 mov r11,r14 5479 mov r12,r15 5480 and r12,3 5481 mov r13,r15 5482 and r13,-4 5483 mov r14,r9 5484 shrd r15,r9,2 5485 shr r9,2 5486 add r15,r13 5487 adc r9,r14 5488 add r10,r15 5489 adc r11,r9 5490 adc r12,0 5491 5492 lea r8,[16+r8] 5493$L$open_avx2_tail_512_rounds_and_x1hash: 5494 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5495 vmovdqa ymm8,YMMWORD[$L$rol16] 5496 vpaddd ymm3,ymm3,ymm7 5497 vpaddd ymm2,ymm2,ymm6 5498 vpaddd ymm1,ymm1,ymm5 5499 vpaddd ymm0,ymm0,ymm4 5500 vpxor ymm15,ymm15,ymm3 5501 vpxor ymm14,ymm14,ymm2 5502 vpxor ymm13,ymm13,ymm1 5503 vpxor ymm12,ymm12,ymm0 5504 vpshufb ymm15,ymm15,ymm8 5505 vpshufb ymm14,ymm14,ymm8 5506 vpshufb ymm13,ymm13,ymm8 5507 vpshufb ymm12,ymm12,ymm8 5508 vpaddd ymm11,ymm11,ymm15 5509 vpaddd ymm10,ymm10,ymm14 5510 vpaddd ymm9,ymm9,ymm13 5511 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5512 vpxor ymm7,ymm7,ymm11 5513 vpxor ymm6,ymm6,ymm10 5514 vpxor ymm5,ymm5,ymm9 5515 vpxor ymm4,ymm4,ymm8 5516 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5517 vpsrld ymm8,ymm7,20 5518 vpslld ymm7,ymm7,32-20 5519 vpxor ymm7,ymm7,ymm8 5520 vpsrld ymm8,ymm6,20 5521 vpslld ymm6,ymm6,32-20 5522 vpxor ymm6,ymm6,ymm8 5523 vpsrld ymm8,ymm5,20 5524 vpslld ymm5,ymm5,32-20 5525 vpxor ymm5,ymm5,ymm8 5526 vpsrld ymm8,ymm4,20 5527 vpslld ymm4,ymm4,32-20 5528 vpxor ymm4,ymm4,ymm8 5529 vmovdqa ymm8,YMMWORD[$L$rol8] 5530 vpaddd ymm3,ymm3,ymm7 5531 add r10,QWORD[((0+0))+r8] 5532 adc r11,QWORD[((8+0))+r8] 5533 adc r12,1 5534 mov rdx,QWORD[((0+160+0))+rbp] 5535 mov r15,rdx 5536 mulx r14,r13,r10 5537 mulx rdx,rax,r11 5538 imul r15,r12 5539 add r14,rax 5540 adc r15,rdx 5541 mov rdx,QWORD[((8+160+0))+rbp] 5542 mulx rax,r10,r10 5543 add r14,r10 5544 mulx r9,r11,r11 5545 adc r15,r11 5546 adc r9,0 5547 imul rdx,r12 5548 add r15,rax 5549 adc r9,rdx 5550 mov r10,r13 5551 mov r11,r14 5552 mov r12,r15 5553 and r12,3 5554 mov r13,r15 5555 and r13,-4 5556 mov r14,r9 5557 shrd r15,r9,2 5558 shr r9,2 5559 add r15,r13 5560 adc r9,r14 5561 add r10,r15 5562 adc r11,r9 5563 adc r12,0 5564 vpaddd ymm2,ymm2,ymm6 5565 vpaddd ymm1,ymm1,ymm5 5566 vpaddd ymm0,ymm0,ymm4 5567 vpxor ymm15,ymm15,ymm3 5568 vpxor ymm14,ymm14,ymm2 5569 vpxor ymm13,ymm13,ymm1 5570 vpxor ymm12,ymm12,ymm0 5571 vpshufb ymm15,ymm15,ymm8 5572 vpshufb ymm14,ymm14,ymm8 5573 vpshufb ymm13,ymm13,ymm8 5574 vpshufb ymm12,ymm12,ymm8 5575 vpaddd ymm11,ymm11,ymm15 5576 vpaddd ymm10,ymm10,ymm14 5577 vpaddd ymm9,ymm9,ymm13 5578 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5579 vpxor ymm7,ymm7,ymm11 5580 vpxor ymm6,ymm6,ymm10 5581 vpxor ymm5,ymm5,ymm9 5582 vpxor ymm4,ymm4,ymm8 5583 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5584 vpsrld ymm8,ymm7,25 5585 vpslld ymm7,ymm7,32-25 5586 vpxor ymm7,ymm7,ymm8 5587 vpsrld ymm8,ymm6,25 5588 vpslld ymm6,ymm6,32-25 5589 vpxor ymm6,ymm6,ymm8 5590 vpsrld ymm8,ymm5,25 5591 vpslld ymm5,ymm5,32-25 5592 vpxor ymm5,ymm5,ymm8 5593 vpsrld ymm8,ymm4,25 5594 vpslld ymm4,ymm4,32-25 5595 vpxor ymm4,ymm4,ymm8 5596 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5597 vpalignr ymm7,ymm7,ymm7,4 5598 vpalignr ymm11,ymm11,ymm11,8 5599 vpalignr ymm15,ymm15,ymm15,12 5600 vpalignr ymm6,ymm6,ymm6,4 5601 vpalignr ymm10,ymm10,ymm10,8 5602 vpalignr ymm14,ymm14,ymm14,12 5603 vpalignr ymm5,ymm5,ymm5,4 5604 vpalignr ymm9,ymm9,ymm9,8 5605 vpalignr ymm13,ymm13,ymm13,12 5606 vpalignr ymm4,ymm4,ymm4,4 5607 vpalignr ymm8,ymm8,ymm8,8 5608 vpalignr ymm12,ymm12,ymm12,12 5609 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5610 vmovdqa ymm8,YMMWORD[$L$rol16] 5611 vpaddd ymm3,ymm3,ymm7 5612 add r10,QWORD[((0+16))+r8] 5613 adc r11,QWORD[((8+16))+r8] 5614 adc r12,1 5615 mov rdx,QWORD[((0+160+0))+rbp] 5616 mov r15,rdx 5617 mulx r14,r13,r10 5618 mulx rdx,rax,r11 5619 imul r15,r12 5620 add r14,rax 5621 adc r15,rdx 5622 mov rdx,QWORD[((8+160+0))+rbp] 5623 mulx rax,r10,r10 5624 add r14,r10 5625 mulx r9,r11,r11 5626 adc r15,r11 5627 adc r9,0 5628 imul rdx,r12 5629 add r15,rax 5630 adc r9,rdx 5631 mov r10,r13 5632 mov r11,r14 5633 mov r12,r15 5634 and r12,3 5635 mov r13,r15 5636 and r13,-4 5637 mov r14,r9 5638 shrd r15,r9,2 5639 shr r9,2 5640 add r15,r13 5641 adc r9,r14 5642 add r10,r15 5643 adc r11,r9 5644 adc r12,0 5645 5646 lea r8,[32+r8] 5647 vpaddd ymm2,ymm2,ymm6 5648 vpaddd ymm1,ymm1,ymm5 5649 vpaddd ymm0,ymm0,ymm4 5650 vpxor ymm15,ymm15,ymm3 5651 vpxor ymm14,ymm14,ymm2 5652 vpxor ymm13,ymm13,ymm1 5653 vpxor ymm12,ymm12,ymm0 5654 vpshufb ymm15,ymm15,ymm8 5655 vpshufb ymm14,ymm14,ymm8 5656 vpshufb ymm13,ymm13,ymm8 5657 vpshufb ymm12,ymm12,ymm8 5658 vpaddd ymm11,ymm11,ymm15 5659 vpaddd ymm10,ymm10,ymm14 5660 vpaddd ymm9,ymm9,ymm13 5661 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5662 vpxor ymm7,ymm7,ymm11 5663 vpxor ymm6,ymm6,ymm10 5664 vpxor ymm5,ymm5,ymm9 5665 vpxor ymm4,ymm4,ymm8 5666 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5667 vpsrld ymm8,ymm7,20 5668 vpslld ymm7,ymm7,32-20 5669 vpxor ymm7,ymm7,ymm8 5670 vpsrld ymm8,ymm6,20 5671 vpslld ymm6,ymm6,32-20 5672 vpxor ymm6,ymm6,ymm8 5673 vpsrld ymm8,ymm5,20 5674 vpslld ymm5,ymm5,32-20 5675 vpxor ymm5,ymm5,ymm8 5676 vpsrld ymm8,ymm4,20 5677 vpslld ymm4,ymm4,32-20 5678 vpxor ymm4,ymm4,ymm8 5679 vmovdqa ymm8,YMMWORD[$L$rol8] 5680 vpaddd ymm3,ymm3,ymm7 5681 vpaddd ymm2,ymm2,ymm6 5682 vpaddd ymm1,ymm1,ymm5 5683 vpaddd ymm0,ymm0,ymm4 5684 vpxor ymm15,ymm15,ymm3 5685 vpxor ymm14,ymm14,ymm2 5686 vpxor ymm13,ymm13,ymm1 5687 vpxor ymm12,ymm12,ymm0 5688 vpshufb ymm15,ymm15,ymm8 5689 vpshufb ymm14,ymm14,ymm8 5690 vpshufb ymm13,ymm13,ymm8 5691 vpshufb ymm12,ymm12,ymm8 5692 vpaddd ymm11,ymm11,ymm15 5693 vpaddd ymm10,ymm10,ymm14 5694 vpaddd ymm9,ymm9,ymm13 5695 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5696 vpxor ymm7,ymm7,ymm11 5697 vpxor ymm6,ymm6,ymm10 5698 vpxor ymm5,ymm5,ymm9 5699 vpxor ymm4,ymm4,ymm8 5700 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5701 vpsrld ymm8,ymm7,25 5702 vpslld ymm7,ymm7,32-25 5703 vpxor ymm7,ymm7,ymm8 5704 vpsrld ymm8,ymm6,25 5705 vpslld ymm6,ymm6,32-25 5706 vpxor ymm6,ymm6,ymm8 5707 vpsrld ymm8,ymm5,25 5708 vpslld ymm5,ymm5,32-25 5709 vpxor ymm5,ymm5,ymm8 5710 vpsrld ymm8,ymm4,25 5711 vpslld ymm4,ymm4,32-25 5712 vpxor ymm4,ymm4,ymm8 5713 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5714 vpalignr ymm7,ymm7,ymm7,12 5715 vpalignr ymm11,ymm11,ymm11,8 5716 vpalignr ymm15,ymm15,ymm15,4 5717 vpalignr ymm6,ymm6,ymm6,12 5718 vpalignr ymm10,ymm10,ymm10,8 5719 vpalignr ymm14,ymm14,ymm14,4 5720 vpalignr ymm5,ymm5,ymm5,12 5721 vpalignr ymm9,ymm9,ymm9,8 5722 vpalignr ymm13,ymm13,ymm13,4 5723 vpalignr ymm4,ymm4,ymm4,12 5724 vpalignr ymm8,ymm8,ymm8,8 5725 vpalignr ymm12,ymm12,ymm12,4 5726 5727 inc rcx 5728 cmp rcx,4 5729 jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash 5730 cmp rcx,10 5731 jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash 5732 mov rcx,rbx 5733 sub rcx,12*32 5734 and rcx,-16 5735$L$open_avx2_tail_512_hash: 5736 test rcx,rcx 5737 je NEAR $L$open_avx2_tail_512_done 5738 add r10,QWORD[((0+0))+r8] 5739 adc r11,QWORD[((8+0))+r8] 5740 adc r12,1 5741 mov rdx,QWORD[((0+160+0))+rbp] 5742 mov r15,rdx 5743 mulx r14,r13,r10 5744 mulx rdx,rax,r11 5745 imul r15,r12 5746 add r14,rax 5747 adc r15,rdx 5748 mov rdx,QWORD[((8+160+0))+rbp] 5749 mulx rax,r10,r10 5750 add r14,r10 5751 mulx r9,r11,r11 5752 adc r15,r11 5753 adc r9,0 5754 imul rdx,r12 5755 add r15,rax 5756 adc r9,rdx 5757 mov r10,r13 5758 mov r11,r14 5759 mov r12,r15 5760 and r12,3 5761 mov r13,r15 5762 and r13,-4 5763 mov r14,r9 5764 shrd r15,r9,2 5765 shr r9,2 5766 add r15,r13 5767 adc r9,r14 5768 add r10,r15 5769 adc r11,r9 5770 adc r12,0 5771 5772 lea r8,[16+r8] 5773 sub rcx,2*8 5774 jmp NEAR $L$open_avx2_tail_512_hash 5775$L$open_avx2_tail_512_done: 5776 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 5777 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 5778 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 5779 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 5780 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5781 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5782 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5783 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5784 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5785 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5786 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5787 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5788 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5789 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5790 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5791 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5792 5793 vmovdqa YMMWORD[(160+128)+rbp],ymm0 5794 vperm2i128 ymm0,ymm7,ymm3,0x02 5795 vperm2i128 ymm7,ymm7,ymm3,0x13 5796 vperm2i128 ymm3,ymm15,ymm11,0x02 5797 vperm2i128 ymm11,ymm15,ymm11,0x13 5798 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 5799 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 5800 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 5801 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 5802 vmovdqu YMMWORD[(0+0)+rdi],ymm0 5803 vmovdqu YMMWORD[(32+0)+rdi],ymm3 5804 vmovdqu YMMWORD[(64+0)+rdi],ymm7 5805 vmovdqu YMMWORD[(96+0)+rdi],ymm11 5806 5807 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 5808 vperm2i128 ymm3,ymm6,ymm2,0x02 5809 vperm2i128 ymm6,ymm6,ymm2,0x13 5810 vperm2i128 ymm2,ymm14,ymm10,0x02 5811 vperm2i128 ymm10,ymm14,ymm10,0x13 5812 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5813 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 5814 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 5815 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 5816 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5817 vmovdqu YMMWORD[(32+128)+rdi],ymm2 5818 vmovdqu YMMWORD[(64+128)+rdi],ymm6 5819 vmovdqu YMMWORD[(96+128)+rdi],ymm10 5820 vperm2i128 ymm3,ymm5,ymm1,0x02 5821 vperm2i128 ymm5,ymm5,ymm1,0x13 5822 vperm2i128 ymm1,ymm13,ymm9,0x02 5823 vperm2i128 ymm9,ymm13,ymm9,0x13 5824 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 5825 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 5826 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 5827 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 5828 vmovdqu YMMWORD[(0+256)+rdi],ymm3 5829 vmovdqu YMMWORD[(32+256)+rdi],ymm1 5830 vmovdqu YMMWORD[(64+256)+rdi],ymm5 5831 vmovdqu YMMWORD[(96+256)+rdi],ymm9 5832 vperm2i128 ymm3,ymm4,ymm0,0x13 5833 vperm2i128 ymm0,ymm4,ymm0,0x02 5834 vperm2i128 ymm4,ymm12,ymm8,0x02 5835 vperm2i128 ymm12,ymm12,ymm8,0x13 5836 vmovdqa ymm8,ymm3 5837 5838 lea rsi,[384+rsi] 5839 lea rdi,[384+rdi] 5840 sub rbx,12*32 5841$L$open_avx2_tail_128_xor: 5842 cmp rbx,32 5843 jb NEAR $L$open_avx2_tail_32_xor 5844 sub rbx,32 5845 vpxor ymm0,ymm0,YMMWORD[rsi] 5846 vmovdqu YMMWORD[rdi],ymm0 5847 lea rsi,[32+rsi] 5848 lea rdi,[32+rdi] 5849 vmovdqa ymm0,ymm4 5850 vmovdqa ymm4,ymm8 5851 vmovdqa ymm8,ymm12 5852 jmp NEAR $L$open_avx2_tail_128_xor 5853$L$open_avx2_tail_32_xor: 5854 cmp rbx,16 5855 vmovdqa xmm1,xmm0 5856 jb NEAR $L$open_avx2_exit 5857 sub rbx,16 5858 5859 vpxor xmm1,xmm0,XMMWORD[rsi] 5860 vmovdqu XMMWORD[rdi],xmm1 5861 lea rsi,[16+rsi] 5862 lea rdi,[16+rdi] 5863 vperm2i128 ymm0,ymm0,ymm0,0x11 5864 vmovdqa xmm1,xmm0 5865$L$open_avx2_exit: 5866 vzeroupper 5867 jmp NEAR $L$open_sse_tail_16 5868 5869$L$open_avx2_192: 5870 vmovdqa ymm1,ymm0 5871 vmovdqa ymm2,ymm0 5872 vmovdqa ymm5,ymm4 5873 vmovdqa ymm6,ymm4 5874 vmovdqa ymm9,ymm8 5875 vmovdqa ymm10,ymm8 5876 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 5877 vmovdqa ymm11,ymm12 5878 vmovdqa ymm15,ymm13 5879 mov r10,10 5880$L$open_avx2_192_rounds: 5881 vpaddd ymm0,ymm0,ymm4 5882 vpxor ymm12,ymm12,ymm0 5883 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5884 vpaddd ymm8,ymm8,ymm12 5885 vpxor ymm4,ymm4,ymm8 5886 vpsrld ymm3,ymm4,20 5887 vpslld ymm4,ymm4,12 5888 vpxor ymm4,ymm4,ymm3 5889 vpaddd ymm0,ymm0,ymm4 5890 vpxor ymm12,ymm12,ymm0 5891 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5892 vpaddd ymm8,ymm8,ymm12 5893 vpxor ymm4,ymm4,ymm8 5894 vpslld ymm3,ymm4,7 5895 vpsrld ymm4,ymm4,25 5896 vpxor ymm4,ymm4,ymm3 5897 vpalignr ymm12,ymm12,ymm12,12 5898 vpalignr ymm8,ymm8,ymm8,8 5899 vpalignr ymm4,ymm4,ymm4,4 5900 vpaddd ymm1,ymm1,ymm5 5901 vpxor ymm13,ymm13,ymm1 5902 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5903 vpaddd ymm9,ymm9,ymm13 5904 vpxor ymm5,ymm5,ymm9 5905 vpsrld ymm3,ymm5,20 5906 vpslld ymm5,ymm5,12 5907 vpxor ymm5,ymm5,ymm3 5908 vpaddd ymm1,ymm1,ymm5 5909 vpxor ymm13,ymm13,ymm1 5910 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5911 vpaddd ymm9,ymm9,ymm13 5912 vpxor ymm5,ymm5,ymm9 5913 vpslld ymm3,ymm5,7 5914 vpsrld ymm5,ymm5,25 5915 vpxor ymm5,ymm5,ymm3 5916 vpalignr ymm13,ymm13,ymm13,12 5917 vpalignr ymm9,ymm9,ymm9,8 5918 vpalignr ymm5,ymm5,ymm5,4 5919 vpaddd ymm0,ymm0,ymm4 5920 vpxor ymm12,ymm12,ymm0 5921 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5922 vpaddd ymm8,ymm8,ymm12 5923 vpxor ymm4,ymm4,ymm8 5924 vpsrld ymm3,ymm4,20 5925 vpslld ymm4,ymm4,12 5926 vpxor ymm4,ymm4,ymm3 5927 vpaddd ymm0,ymm0,ymm4 5928 vpxor ymm12,ymm12,ymm0 5929 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5930 vpaddd ymm8,ymm8,ymm12 5931 vpxor ymm4,ymm4,ymm8 5932 vpslld ymm3,ymm4,7 5933 vpsrld ymm4,ymm4,25 5934 vpxor ymm4,ymm4,ymm3 5935 vpalignr ymm12,ymm12,ymm12,4 5936 vpalignr ymm8,ymm8,ymm8,8 5937 vpalignr ymm4,ymm4,ymm4,12 5938 vpaddd ymm1,ymm1,ymm5 5939 vpxor ymm13,ymm13,ymm1 5940 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5941 vpaddd ymm9,ymm9,ymm13 5942 vpxor ymm5,ymm5,ymm9 5943 vpsrld ymm3,ymm5,20 5944 vpslld ymm5,ymm5,12 5945 vpxor ymm5,ymm5,ymm3 5946 vpaddd ymm1,ymm1,ymm5 5947 vpxor ymm13,ymm13,ymm1 5948 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5949 vpaddd ymm9,ymm9,ymm13 5950 vpxor ymm5,ymm5,ymm9 5951 vpslld ymm3,ymm5,7 5952 vpsrld ymm5,ymm5,25 5953 vpxor ymm5,ymm5,ymm3 5954 vpalignr ymm13,ymm13,ymm13,4 5955 vpalignr ymm9,ymm9,ymm9,8 5956 vpalignr ymm5,ymm5,ymm5,12 5957 5958 dec r10 5959 jne NEAR $L$open_avx2_192_rounds 5960 vpaddd ymm0,ymm0,ymm2 5961 vpaddd ymm1,ymm1,ymm2 5962 vpaddd ymm4,ymm4,ymm6 5963 vpaddd ymm5,ymm5,ymm6 5964 vpaddd ymm8,ymm8,ymm10 5965 vpaddd ymm9,ymm9,ymm10 5966 vpaddd ymm12,ymm12,ymm11 5967 vpaddd ymm13,ymm13,ymm15 5968 vperm2i128 ymm3,ymm4,ymm0,0x02 5969 5970 vpand ymm3,ymm3,YMMWORD[$L$clamp] 5971 vmovdqa YMMWORD[(160+0)+rbp],ymm3 5972 5973 vperm2i128 ymm0,ymm4,ymm0,0x13 5974 vperm2i128 ymm4,ymm12,ymm8,0x13 5975 vperm2i128 ymm8,ymm5,ymm1,0x02 5976 vperm2i128 ymm12,ymm13,ymm9,0x02 5977 vperm2i128 ymm1,ymm5,ymm1,0x13 5978 vperm2i128 ymm5,ymm13,ymm9,0x13 5979$L$open_avx2_short: 5980 mov r8,r8 5981 call poly_hash_ad_internal 5982$L$open_avx2_short_hash_and_xor_loop: 5983 cmp rbx,32 5984 jb NEAR $L$open_avx2_short_tail_32 5985 sub rbx,32 5986 add r10,QWORD[((0+0))+rsi] 5987 adc r11,QWORD[((8+0))+rsi] 5988 adc r12,1 5989 mov rax,QWORD[((0+160+0))+rbp] 5990 mov r15,rax 5991 mul r10 5992 mov r13,rax 5993 mov r14,rdx 5994 mov rax,QWORD[((0+160+0))+rbp] 5995 mul r11 5996 imul r15,r12 5997 add r14,rax 5998 adc r15,rdx 5999 mov rax,QWORD[((8+160+0))+rbp] 6000 mov r9,rax 6001 mul r10 6002 add r14,rax 6003 adc rdx,0 6004 mov r10,rdx 6005 mov rax,QWORD[((8+160+0))+rbp] 6006 mul r11 6007 add r15,rax 6008 adc rdx,0 6009 imul r9,r12 6010 add r15,r10 6011 adc r9,rdx 6012 mov r10,r13 6013 mov r11,r14 6014 mov r12,r15 6015 and r12,3 6016 mov r13,r15 6017 and r13,-4 6018 mov r14,r9 6019 shrd r15,r9,2 6020 shr r9,2 6021 add r15,r13 6022 adc r9,r14 6023 add r10,r15 6024 adc r11,r9 6025 adc r12,0 6026 add r10,QWORD[((0+16))+rsi] 6027 adc r11,QWORD[((8+16))+rsi] 6028 adc r12,1 6029 mov rax,QWORD[((0+160+0))+rbp] 6030 mov r15,rax 6031 mul r10 6032 mov r13,rax 6033 mov r14,rdx 6034 mov rax,QWORD[((0+160+0))+rbp] 6035 mul r11 6036 imul r15,r12 6037 add r14,rax 6038 adc r15,rdx 6039 mov rax,QWORD[((8+160+0))+rbp] 6040 mov r9,rax 6041 mul r10 6042 add r14,rax 6043 adc rdx,0 6044 mov r10,rdx 6045 mov rax,QWORD[((8+160+0))+rbp] 6046 mul r11 6047 add r15,rax 6048 adc rdx,0 6049 imul r9,r12 6050 add r15,r10 6051 adc r9,rdx 6052 mov r10,r13 6053 mov r11,r14 6054 mov r12,r15 6055 and r12,3 6056 mov r13,r15 6057 and r13,-4 6058 mov r14,r9 6059 shrd r15,r9,2 6060 shr r9,2 6061 add r15,r13 6062 adc r9,r14 6063 add r10,r15 6064 adc r11,r9 6065 adc r12,0 6066 6067 6068 vpxor ymm0,ymm0,YMMWORD[rsi] 6069 vmovdqu YMMWORD[rdi],ymm0 6070 lea rsi,[32+rsi] 6071 lea rdi,[32+rdi] 6072 6073 vmovdqa ymm0,ymm4 6074 vmovdqa ymm4,ymm8 6075 vmovdqa ymm8,ymm12 6076 vmovdqa ymm12,ymm1 6077 vmovdqa ymm1,ymm5 6078 vmovdqa ymm5,ymm9 6079 vmovdqa ymm9,ymm13 6080 vmovdqa ymm13,ymm2 6081 vmovdqa ymm2,ymm6 6082 jmp NEAR $L$open_avx2_short_hash_and_xor_loop 6083$L$open_avx2_short_tail_32: 6084 cmp rbx,16 6085 vmovdqa xmm1,xmm0 6086 jb NEAR $L$open_avx2_short_tail_32_exit 6087 sub rbx,16 6088 add r10,QWORD[((0+0))+rsi] 6089 adc r11,QWORD[((8+0))+rsi] 6090 adc r12,1 6091 mov rax,QWORD[((0+160+0))+rbp] 6092 mov r15,rax 6093 mul r10 6094 mov r13,rax 6095 mov r14,rdx 6096 mov rax,QWORD[((0+160+0))+rbp] 6097 mul r11 6098 imul r15,r12 6099 add r14,rax 6100 adc r15,rdx 6101 mov rax,QWORD[((8+160+0))+rbp] 6102 mov r9,rax 6103 mul r10 6104 add r14,rax 6105 adc rdx,0 6106 mov r10,rdx 6107 mov rax,QWORD[((8+160+0))+rbp] 6108 mul r11 6109 add r15,rax 6110 adc rdx,0 6111 imul r9,r12 6112 add r15,r10 6113 adc r9,rdx 6114 mov r10,r13 6115 mov r11,r14 6116 mov r12,r15 6117 and r12,3 6118 mov r13,r15 6119 and r13,-4 6120 mov r14,r9 6121 shrd r15,r9,2 6122 shr r9,2 6123 add r15,r13 6124 adc r9,r14 6125 add r10,r15 6126 adc r11,r9 6127 adc r12,0 6128 6129 vpxor xmm3,xmm0,XMMWORD[rsi] 6130 vmovdqu XMMWORD[rdi],xmm3 6131 lea rsi,[16+rsi] 6132 lea rdi,[16+rdi] 6133 vextracti128 xmm1,ymm0,1 6134$L$open_avx2_short_tail_32_exit: 6135 vzeroupper 6136 jmp NEAR $L$open_sse_tail_16 6137 6138$L$open_avx2_320: 6139 vmovdqa ymm1,ymm0 6140 vmovdqa ymm2,ymm0 6141 vmovdqa ymm5,ymm4 6142 vmovdqa ymm6,ymm4 6143 vmovdqa ymm9,ymm8 6144 vmovdqa ymm10,ymm8 6145 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 6146 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 6147 vmovdqa ymm7,ymm4 6148 vmovdqa ymm11,ymm8 6149 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6150 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6151 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6152 mov r10,10 6153$L$open_avx2_320_rounds: 6154 vpaddd ymm0,ymm0,ymm4 6155 vpxor ymm12,ymm12,ymm0 6156 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6157 vpaddd ymm8,ymm8,ymm12 6158 vpxor ymm4,ymm4,ymm8 6159 vpsrld ymm3,ymm4,20 6160 vpslld ymm4,ymm4,12 6161 vpxor ymm4,ymm4,ymm3 6162 vpaddd ymm0,ymm0,ymm4 6163 vpxor ymm12,ymm12,ymm0 6164 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6165 vpaddd ymm8,ymm8,ymm12 6166 vpxor ymm4,ymm4,ymm8 6167 vpslld ymm3,ymm4,7 6168 vpsrld ymm4,ymm4,25 6169 vpxor ymm4,ymm4,ymm3 6170 vpalignr ymm12,ymm12,ymm12,12 6171 vpalignr ymm8,ymm8,ymm8,8 6172 vpalignr ymm4,ymm4,ymm4,4 6173 vpaddd ymm1,ymm1,ymm5 6174 vpxor ymm13,ymm13,ymm1 6175 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6176 vpaddd ymm9,ymm9,ymm13 6177 vpxor ymm5,ymm5,ymm9 6178 vpsrld ymm3,ymm5,20 6179 vpslld ymm5,ymm5,12 6180 vpxor ymm5,ymm5,ymm3 6181 vpaddd ymm1,ymm1,ymm5 6182 vpxor ymm13,ymm13,ymm1 6183 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6184 vpaddd ymm9,ymm9,ymm13 6185 vpxor ymm5,ymm5,ymm9 6186 vpslld ymm3,ymm5,7 6187 vpsrld ymm5,ymm5,25 6188 vpxor ymm5,ymm5,ymm3 6189 vpalignr ymm13,ymm13,ymm13,12 6190 vpalignr ymm9,ymm9,ymm9,8 6191 vpalignr ymm5,ymm5,ymm5,4 6192 vpaddd ymm2,ymm2,ymm6 6193 vpxor ymm14,ymm14,ymm2 6194 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6195 vpaddd ymm10,ymm10,ymm14 6196 vpxor ymm6,ymm6,ymm10 6197 vpsrld ymm3,ymm6,20 6198 vpslld ymm6,ymm6,12 6199 vpxor ymm6,ymm6,ymm3 6200 vpaddd ymm2,ymm2,ymm6 6201 vpxor ymm14,ymm14,ymm2 6202 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6203 vpaddd ymm10,ymm10,ymm14 6204 vpxor ymm6,ymm6,ymm10 6205 vpslld ymm3,ymm6,7 6206 vpsrld ymm6,ymm6,25 6207 vpxor ymm6,ymm6,ymm3 6208 vpalignr ymm14,ymm14,ymm14,12 6209 vpalignr ymm10,ymm10,ymm10,8 6210 vpalignr ymm6,ymm6,ymm6,4 6211 vpaddd ymm0,ymm0,ymm4 6212 vpxor ymm12,ymm12,ymm0 6213 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6214 vpaddd ymm8,ymm8,ymm12 6215 vpxor ymm4,ymm4,ymm8 6216 vpsrld ymm3,ymm4,20 6217 vpslld ymm4,ymm4,12 6218 vpxor ymm4,ymm4,ymm3 6219 vpaddd ymm0,ymm0,ymm4 6220 vpxor ymm12,ymm12,ymm0 6221 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6222 vpaddd ymm8,ymm8,ymm12 6223 vpxor ymm4,ymm4,ymm8 6224 vpslld ymm3,ymm4,7 6225 vpsrld ymm4,ymm4,25 6226 vpxor ymm4,ymm4,ymm3 6227 vpalignr ymm12,ymm12,ymm12,4 6228 vpalignr ymm8,ymm8,ymm8,8 6229 vpalignr ymm4,ymm4,ymm4,12 6230 vpaddd ymm1,ymm1,ymm5 6231 vpxor ymm13,ymm13,ymm1 6232 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6233 vpaddd ymm9,ymm9,ymm13 6234 vpxor ymm5,ymm5,ymm9 6235 vpsrld ymm3,ymm5,20 6236 vpslld ymm5,ymm5,12 6237 vpxor ymm5,ymm5,ymm3 6238 vpaddd ymm1,ymm1,ymm5 6239 vpxor ymm13,ymm13,ymm1 6240 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6241 vpaddd ymm9,ymm9,ymm13 6242 vpxor ymm5,ymm5,ymm9 6243 vpslld ymm3,ymm5,7 6244 vpsrld ymm5,ymm5,25 6245 vpxor ymm5,ymm5,ymm3 6246 vpalignr ymm13,ymm13,ymm13,4 6247 vpalignr ymm9,ymm9,ymm9,8 6248 vpalignr ymm5,ymm5,ymm5,12 6249 vpaddd ymm2,ymm2,ymm6 6250 vpxor ymm14,ymm14,ymm2 6251 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6252 vpaddd ymm10,ymm10,ymm14 6253 vpxor ymm6,ymm6,ymm10 6254 vpsrld ymm3,ymm6,20 6255 vpslld ymm6,ymm6,12 6256 vpxor ymm6,ymm6,ymm3 6257 vpaddd ymm2,ymm2,ymm6 6258 vpxor ymm14,ymm14,ymm2 6259 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6260 vpaddd ymm10,ymm10,ymm14 6261 vpxor ymm6,ymm6,ymm10 6262 vpslld ymm3,ymm6,7 6263 vpsrld ymm6,ymm6,25 6264 vpxor ymm6,ymm6,ymm3 6265 vpalignr ymm14,ymm14,ymm14,4 6266 vpalignr ymm10,ymm10,ymm10,8 6267 vpalignr ymm6,ymm6,ymm6,12 6268 6269 dec r10 6270 jne NEAR $L$open_avx2_320_rounds 6271 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6272 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6273 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6274 vpaddd ymm4,ymm4,ymm7 6275 vpaddd ymm5,ymm5,ymm7 6276 vpaddd ymm6,ymm6,ymm7 6277 vpaddd ymm8,ymm8,ymm11 6278 vpaddd ymm9,ymm9,ymm11 6279 vpaddd ymm10,ymm10,ymm11 6280 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6281 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6282 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6283 vperm2i128 ymm3,ymm4,ymm0,0x02 6284 6285 vpand ymm3,ymm3,YMMWORD[$L$clamp] 6286 vmovdqa YMMWORD[(160+0)+rbp],ymm3 6287 6288 vperm2i128 ymm0,ymm4,ymm0,0x13 6289 vperm2i128 ymm4,ymm12,ymm8,0x13 6290 vperm2i128 ymm8,ymm5,ymm1,0x02 6291 vperm2i128 ymm12,ymm13,ymm9,0x02 6292 vperm2i128 ymm1,ymm5,ymm1,0x13 6293 vperm2i128 ymm5,ymm13,ymm9,0x13 6294 vperm2i128 ymm9,ymm6,ymm2,0x02 6295 vperm2i128 ymm13,ymm14,ymm10,0x02 6296 vperm2i128 ymm2,ymm6,ymm2,0x13 6297 vperm2i128 ymm6,ymm14,ymm10,0x13 6298 jmp NEAR $L$open_avx2_short 6299 6300 6301 6302 6303 6304ALIGN 64 6305chacha20_poly1305_seal_avx2: 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 vzeroupper 6319 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6320 vbroadcasti128 ymm4,XMMWORD[r9] 6321 vbroadcasti128 ymm8,XMMWORD[16+r9] 6322 vbroadcasti128 ymm12,XMMWORD[32+r9] 6323 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 6324 cmp rbx,6*32 6325 jbe NEAR $L$seal_avx2_192 6326 cmp rbx,10*32 6327 jbe NEAR $L$seal_avx2_320 6328 vmovdqa ymm1,ymm0 6329 vmovdqa ymm2,ymm0 6330 vmovdqa ymm3,ymm0 6331 vmovdqa ymm5,ymm4 6332 vmovdqa ymm6,ymm4 6333 vmovdqa ymm7,ymm4 6334 vmovdqa YMMWORD[(160+64)+rbp],ymm4 6335 vmovdqa ymm9,ymm8 6336 vmovdqa ymm10,ymm8 6337 vmovdqa ymm11,ymm8 6338 vmovdqa YMMWORD[(160+96)+rbp],ymm8 6339 vmovdqa ymm15,ymm12 6340 vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] 6341 vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] 6342 vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] 6343 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6344 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6345 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6346 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6347 mov r10,10 6348$L$seal_avx2_init_rounds: 6349 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6350 vmovdqa ymm8,YMMWORD[$L$rol16] 6351 vpaddd ymm3,ymm3,ymm7 6352 vpaddd ymm2,ymm2,ymm6 6353 vpaddd ymm1,ymm1,ymm5 6354 vpaddd ymm0,ymm0,ymm4 6355 vpxor ymm15,ymm15,ymm3 6356 vpxor ymm14,ymm14,ymm2 6357 vpxor ymm13,ymm13,ymm1 6358 vpxor ymm12,ymm12,ymm0 6359 vpshufb ymm15,ymm15,ymm8 6360 vpshufb ymm14,ymm14,ymm8 6361 vpshufb ymm13,ymm13,ymm8 6362 vpshufb ymm12,ymm12,ymm8 6363 vpaddd ymm11,ymm11,ymm15 6364 vpaddd ymm10,ymm10,ymm14 6365 vpaddd ymm9,ymm9,ymm13 6366 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6367 vpxor ymm7,ymm7,ymm11 6368 vpxor ymm6,ymm6,ymm10 6369 vpxor ymm5,ymm5,ymm9 6370 vpxor ymm4,ymm4,ymm8 6371 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6372 vpsrld ymm8,ymm7,20 6373 vpslld ymm7,ymm7,32-20 6374 vpxor ymm7,ymm7,ymm8 6375 vpsrld ymm8,ymm6,20 6376 vpslld ymm6,ymm6,32-20 6377 vpxor ymm6,ymm6,ymm8 6378 vpsrld ymm8,ymm5,20 6379 vpslld ymm5,ymm5,32-20 6380 vpxor ymm5,ymm5,ymm8 6381 vpsrld ymm8,ymm4,20 6382 vpslld ymm4,ymm4,32-20 6383 vpxor ymm4,ymm4,ymm8 6384 vmovdqa ymm8,YMMWORD[$L$rol8] 6385 vpaddd ymm3,ymm3,ymm7 6386 vpaddd ymm2,ymm2,ymm6 6387 vpaddd ymm1,ymm1,ymm5 6388 vpaddd ymm0,ymm0,ymm4 6389 vpxor ymm15,ymm15,ymm3 6390 vpxor ymm14,ymm14,ymm2 6391 vpxor ymm13,ymm13,ymm1 6392 vpxor ymm12,ymm12,ymm0 6393 vpshufb ymm15,ymm15,ymm8 6394 vpshufb ymm14,ymm14,ymm8 6395 vpshufb ymm13,ymm13,ymm8 6396 vpshufb ymm12,ymm12,ymm8 6397 vpaddd ymm11,ymm11,ymm15 6398 vpaddd ymm10,ymm10,ymm14 6399 vpaddd ymm9,ymm9,ymm13 6400 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6401 vpxor ymm7,ymm7,ymm11 6402 vpxor ymm6,ymm6,ymm10 6403 vpxor ymm5,ymm5,ymm9 6404 vpxor ymm4,ymm4,ymm8 6405 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6406 vpsrld ymm8,ymm7,25 6407 vpslld ymm7,ymm7,32-25 6408 vpxor ymm7,ymm7,ymm8 6409 vpsrld ymm8,ymm6,25 6410 vpslld ymm6,ymm6,32-25 6411 vpxor ymm6,ymm6,ymm8 6412 vpsrld ymm8,ymm5,25 6413 vpslld ymm5,ymm5,32-25 6414 vpxor ymm5,ymm5,ymm8 6415 vpsrld ymm8,ymm4,25 6416 vpslld ymm4,ymm4,32-25 6417 vpxor ymm4,ymm4,ymm8 6418 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6419 vpalignr ymm7,ymm7,ymm7,4 6420 vpalignr ymm11,ymm11,ymm11,8 6421 vpalignr ymm15,ymm15,ymm15,12 6422 vpalignr ymm6,ymm6,ymm6,4 6423 vpalignr ymm10,ymm10,ymm10,8 6424 vpalignr ymm14,ymm14,ymm14,12 6425 vpalignr ymm5,ymm5,ymm5,4 6426 vpalignr ymm9,ymm9,ymm9,8 6427 vpalignr ymm13,ymm13,ymm13,12 6428 vpalignr ymm4,ymm4,ymm4,4 6429 vpalignr ymm8,ymm8,ymm8,8 6430 vpalignr ymm12,ymm12,ymm12,12 6431 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6432 vmovdqa ymm8,YMMWORD[$L$rol16] 6433 vpaddd ymm3,ymm3,ymm7 6434 vpaddd ymm2,ymm2,ymm6 6435 vpaddd ymm1,ymm1,ymm5 6436 vpaddd ymm0,ymm0,ymm4 6437 vpxor ymm15,ymm15,ymm3 6438 vpxor ymm14,ymm14,ymm2 6439 vpxor ymm13,ymm13,ymm1 6440 vpxor ymm12,ymm12,ymm0 6441 vpshufb ymm15,ymm15,ymm8 6442 vpshufb ymm14,ymm14,ymm8 6443 vpshufb ymm13,ymm13,ymm8 6444 vpshufb ymm12,ymm12,ymm8 6445 vpaddd ymm11,ymm11,ymm15 6446 vpaddd ymm10,ymm10,ymm14 6447 vpaddd ymm9,ymm9,ymm13 6448 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6449 vpxor ymm7,ymm7,ymm11 6450 vpxor ymm6,ymm6,ymm10 6451 vpxor ymm5,ymm5,ymm9 6452 vpxor ymm4,ymm4,ymm8 6453 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6454 vpsrld ymm8,ymm7,20 6455 vpslld ymm7,ymm7,32-20 6456 vpxor ymm7,ymm7,ymm8 6457 vpsrld ymm8,ymm6,20 6458 vpslld ymm6,ymm6,32-20 6459 vpxor ymm6,ymm6,ymm8 6460 vpsrld ymm8,ymm5,20 6461 vpslld ymm5,ymm5,32-20 6462 vpxor ymm5,ymm5,ymm8 6463 vpsrld ymm8,ymm4,20 6464 vpslld ymm4,ymm4,32-20 6465 vpxor ymm4,ymm4,ymm8 6466 vmovdqa ymm8,YMMWORD[$L$rol8] 6467 vpaddd ymm3,ymm3,ymm7 6468 vpaddd ymm2,ymm2,ymm6 6469 vpaddd ymm1,ymm1,ymm5 6470 vpaddd ymm0,ymm0,ymm4 6471 vpxor ymm15,ymm15,ymm3 6472 vpxor ymm14,ymm14,ymm2 6473 vpxor ymm13,ymm13,ymm1 6474 vpxor ymm12,ymm12,ymm0 6475 vpshufb ymm15,ymm15,ymm8 6476 vpshufb ymm14,ymm14,ymm8 6477 vpshufb ymm13,ymm13,ymm8 6478 vpshufb ymm12,ymm12,ymm8 6479 vpaddd ymm11,ymm11,ymm15 6480 vpaddd ymm10,ymm10,ymm14 6481 vpaddd ymm9,ymm9,ymm13 6482 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6483 vpxor ymm7,ymm7,ymm11 6484 vpxor ymm6,ymm6,ymm10 6485 vpxor ymm5,ymm5,ymm9 6486 vpxor ymm4,ymm4,ymm8 6487 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6488 vpsrld ymm8,ymm7,25 6489 vpslld ymm7,ymm7,32-25 6490 vpxor ymm7,ymm7,ymm8 6491 vpsrld ymm8,ymm6,25 6492 vpslld ymm6,ymm6,32-25 6493 vpxor ymm6,ymm6,ymm8 6494 vpsrld ymm8,ymm5,25 6495 vpslld ymm5,ymm5,32-25 6496 vpxor ymm5,ymm5,ymm8 6497 vpsrld ymm8,ymm4,25 6498 vpslld ymm4,ymm4,32-25 6499 vpxor ymm4,ymm4,ymm8 6500 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6501 vpalignr ymm7,ymm7,ymm7,12 6502 vpalignr ymm11,ymm11,ymm11,8 6503 vpalignr ymm15,ymm15,ymm15,4 6504 vpalignr ymm6,ymm6,ymm6,12 6505 vpalignr ymm10,ymm10,ymm10,8 6506 vpalignr ymm14,ymm14,ymm14,4 6507 vpalignr ymm5,ymm5,ymm5,12 6508 vpalignr ymm9,ymm9,ymm9,8 6509 vpalignr ymm13,ymm13,ymm13,4 6510 vpalignr ymm4,ymm4,ymm4,12 6511 vpalignr ymm8,ymm8,ymm8,8 6512 vpalignr ymm12,ymm12,ymm12,4 6513 6514 dec r10 6515 jnz NEAR $L$seal_avx2_init_rounds 6516 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 6517 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 6518 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 6519 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 6520 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6521 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 6522 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 6523 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6524 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6525 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 6526 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 6527 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6528 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6529 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 6530 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 6531 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6532 6533 vperm2i128 ymm11,ymm15,ymm11,0x13 6534 vperm2i128 ymm15,ymm7,ymm3,0x02 6535 vperm2i128 ymm3,ymm7,ymm3,0x13 6536 vpand ymm15,ymm15,YMMWORD[$L$clamp] 6537 vmovdqa YMMWORD[(160+0)+rbp],ymm15 6538 mov r8,r8 6539 call poly_hash_ad_internal 6540 6541 vpxor ymm3,ymm3,YMMWORD[rsi] 6542 vpxor ymm11,ymm11,YMMWORD[32+rsi] 6543 vmovdqu YMMWORD[rdi],ymm3 6544 vmovdqu YMMWORD[32+rdi],ymm11 6545 vperm2i128 ymm15,ymm6,ymm2,0x02 6546 vperm2i128 ymm6,ymm6,ymm2,0x13 6547 vperm2i128 ymm2,ymm14,ymm10,0x02 6548 vperm2i128 ymm10,ymm14,ymm10,0x13 6549 vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] 6550 vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] 6551 vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] 6552 vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] 6553 vmovdqu YMMWORD[(0+64)+rdi],ymm15 6554 vmovdqu YMMWORD[(32+64)+rdi],ymm2 6555 vmovdqu YMMWORD[(64+64)+rdi],ymm6 6556 vmovdqu YMMWORD[(96+64)+rdi],ymm10 6557 vperm2i128 ymm15,ymm5,ymm1,0x02 6558 vperm2i128 ymm5,ymm5,ymm1,0x13 6559 vperm2i128 ymm1,ymm13,ymm9,0x02 6560 vperm2i128 ymm9,ymm13,ymm9,0x13 6561 vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] 6562 vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] 6563 vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] 6564 vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] 6565 vmovdqu YMMWORD[(0+192)+rdi],ymm15 6566 vmovdqu YMMWORD[(32+192)+rdi],ymm1 6567 vmovdqu YMMWORD[(64+192)+rdi],ymm5 6568 vmovdqu YMMWORD[(96+192)+rdi],ymm9 6569 vperm2i128 ymm15,ymm4,ymm0,0x13 6570 vperm2i128 ymm0,ymm4,ymm0,0x02 6571 vperm2i128 ymm4,ymm12,ymm8,0x02 6572 vperm2i128 ymm12,ymm12,ymm8,0x13 6573 vmovdqa ymm8,ymm15 6574 6575 lea rsi,[320+rsi] 6576 sub rbx,10*32 6577 mov rcx,10*32 6578 cmp rbx,4*32 6579 jbe NEAR $L$seal_avx2_short_hash_remainder 6580 vpxor ymm0,ymm0,YMMWORD[rsi] 6581 vpxor ymm4,ymm4,YMMWORD[32+rsi] 6582 vpxor ymm8,ymm8,YMMWORD[64+rsi] 6583 vpxor ymm12,ymm12,YMMWORD[96+rsi] 6584 vmovdqu YMMWORD[320+rdi],ymm0 6585 vmovdqu YMMWORD[352+rdi],ymm4 6586 vmovdqu YMMWORD[384+rdi],ymm8 6587 vmovdqu YMMWORD[416+rdi],ymm12 6588 lea rsi,[128+rsi] 6589 sub rbx,4*32 6590 mov rcx,8 6591 mov r8,2 6592 cmp rbx,4*32 6593 jbe NEAR $L$seal_avx2_tail_128 6594 cmp rbx,8*32 6595 jbe NEAR $L$seal_avx2_tail_256 6596 cmp rbx,12*32 6597 jbe NEAR $L$seal_avx2_tail_384 6598 cmp rbx,16*32 6599 jbe NEAR $L$seal_avx2_tail_512 6600 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6601 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6602 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6603 vmovdqa ymm1,ymm0 6604 vmovdqa ymm5,ymm4 6605 vmovdqa ymm9,ymm8 6606 vmovdqa ymm2,ymm0 6607 vmovdqa ymm6,ymm4 6608 vmovdqa ymm10,ymm8 6609 vmovdqa ymm3,ymm0 6610 vmovdqa ymm7,ymm4 6611 vmovdqa ymm11,ymm8 6612 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6613 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6614 vpaddd ymm14,ymm12,ymm15 6615 vpaddd ymm13,ymm12,ymm14 6616 vpaddd ymm12,ymm12,ymm13 6617 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6618 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6619 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6620 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6621 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6622 vmovdqa ymm8,YMMWORD[$L$rol16] 6623 vpaddd ymm3,ymm3,ymm7 6624 vpaddd ymm2,ymm2,ymm6 6625 vpaddd ymm1,ymm1,ymm5 6626 vpaddd ymm0,ymm0,ymm4 6627 vpxor ymm15,ymm15,ymm3 6628 vpxor ymm14,ymm14,ymm2 6629 vpxor ymm13,ymm13,ymm1 6630 vpxor ymm12,ymm12,ymm0 6631 vpshufb ymm15,ymm15,ymm8 6632 vpshufb ymm14,ymm14,ymm8 6633 vpshufb ymm13,ymm13,ymm8 6634 vpshufb ymm12,ymm12,ymm8 6635 vpaddd ymm11,ymm11,ymm15 6636 vpaddd ymm10,ymm10,ymm14 6637 vpaddd ymm9,ymm9,ymm13 6638 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6639 vpxor ymm7,ymm7,ymm11 6640 vpxor ymm6,ymm6,ymm10 6641 vpxor ymm5,ymm5,ymm9 6642 vpxor ymm4,ymm4,ymm8 6643 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6644 vpsrld ymm8,ymm7,20 6645 vpslld ymm7,ymm7,32-20 6646 vpxor ymm7,ymm7,ymm8 6647 vpsrld ymm8,ymm6,20 6648 vpslld ymm6,ymm6,32-20 6649 vpxor ymm6,ymm6,ymm8 6650 vpsrld ymm8,ymm5,20 6651 vpslld ymm5,ymm5,32-20 6652 vpxor ymm5,ymm5,ymm8 6653 vpsrld ymm8,ymm4,20 6654 vpslld ymm4,ymm4,32-20 6655 vpxor ymm4,ymm4,ymm8 6656 vmovdqa ymm8,YMMWORD[$L$rol8] 6657 vpaddd ymm3,ymm3,ymm7 6658 vpaddd ymm2,ymm2,ymm6 6659 vpaddd ymm1,ymm1,ymm5 6660 vpaddd ymm0,ymm0,ymm4 6661 vpxor ymm15,ymm15,ymm3 6662 vpxor ymm14,ymm14,ymm2 6663 vpxor ymm13,ymm13,ymm1 6664 vpxor ymm12,ymm12,ymm0 6665 vpshufb ymm15,ymm15,ymm8 6666 vpshufb ymm14,ymm14,ymm8 6667 vpshufb ymm13,ymm13,ymm8 6668 vpshufb ymm12,ymm12,ymm8 6669 vpaddd ymm11,ymm11,ymm15 6670 vpaddd ymm10,ymm10,ymm14 6671 vpaddd ymm9,ymm9,ymm13 6672 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6673 vpxor ymm7,ymm7,ymm11 6674 vpxor ymm6,ymm6,ymm10 6675 vpxor ymm5,ymm5,ymm9 6676 vpxor ymm4,ymm4,ymm8 6677 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6678 vpsrld ymm8,ymm7,25 6679 vpslld ymm7,ymm7,32-25 6680 vpxor ymm7,ymm7,ymm8 6681 vpsrld ymm8,ymm6,25 6682 vpslld ymm6,ymm6,32-25 6683 vpxor ymm6,ymm6,ymm8 6684 vpsrld ymm8,ymm5,25 6685 vpslld ymm5,ymm5,32-25 6686 vpxor ymm5,ymm5,ymm8 6687 vpsrld ymm8,ymm4,25 6688 vpslld ymm4,ymm4,32-25 6689 vpxor ymm4,ymm4,ymm8 6690 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6691 vpalignr ymm7,ymm7,ymm7,4 6692 vpalignr ymm11,ymm11,ymm11,8 6693 vpalignr ymm15,ymm15,ymm15,12 6694 vpalignr ymm6,ymm6,ymm6,4 6695 vpalignr ymm10,ymm10,ymm10,8 6696 vpalignr ymm14,ymm14,ymm14,12 6697 vpalignr ymm5,ymm5,ymm5,4 6698 vpalignr ymm9,ymm9,ymm9,8 6699 vpalignr ymm13,ymm13,ymm13,12 6700 vpalignr ymm4,ymm4,ymm4,4 6701 vpalignr ymm8,ymm8,ymm8,8 6702 vpalignr ymm12,ymm12,ymm12,12 6703 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6704 vmovdqa ymm8,YMMWORD[$L$rol16] 6705 vpaddd ymm3,ymm3,ymm7 6706 vpaddd ymm2,ymm2,ymm6 6707 vpaddd ymm1,ymm1,ymm5 6708 vpaddd ymm0,ymm0,ymm4 6709 vpxor ymm15,ymm15,ymm3 6710 vpxor ymm14,ymm14,ymm2 6711 vpxor ymm13,ymm13,ymm1 6712 vpxor ymm12,ymm12,ymm0 6713 vpshufb ymm15,ymm15,ymm8 6714 vpshufb ymm14,ymm14,ymm8 6715 vpshufb ymm13,ymm13,ymm8 6716 vpshufb ymm12,ymm12,ymm8 6717 vpaddd ymm11,ymm11,ymm15 6718 vpaddd ymm10,ymm10,ymm14 6719 vpaddd ymm9,ymm9,ymm13 6720 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6721 vpxor ymm7,ymm7,ymm11 6722 vpxor ymm6,ymm6,ymm10 6723 vpxor ymm5,ymm5,ymm9 6724 vpxor ymm4,ymm4,ymm8 6725 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6726 vpsrld ymm8,ymm7,20 6727 vpslld ymm7,ymm7,32-20 6728 vpxor ymm7,ymm7,ymm8 6729 vpsrld ymm8,ymm6,20 6730 vpslld ymm6,ymm6,32-20 6731 vpxor ymm6,ymm6,ymm8 6732 vpsrld ymm8,ymm5,20 6733 vpslld ymm5,ymm5,32-20 6734 vpxor ymm5,ymm5,ymm8 6735 vpsrld ymm8,ymm4,20 6736 vpslld ymm4,ymm4,32-20 6737 vpxor ymm4,ymm4,ymm8 6738 vmovdqa ymm8,YMMWORD[$L$rol8] 6739 vpaddd ymm3,ymm3,ymm7 6740 vpaddd ymm2,ymm2,ymm6 6741 vpaddd ymm1,ymm1,ymm5 6742 vpaddd ymm0,ymm0,ymm4 6743 vpxor ymm15,ymm15,ymm3 6744 vpxor ymm14,ymm14,ymm2 6745 vpxor ymm13,ymm13,ymm1 6746 vpxor ymm12,ymm12,ymm0 6747 vpshufb ymm15,ymm15,ymm8 6748 vpshufb ymm14,ymm14,ymm8 6749 vpshufb ymm13,ymm13,ymm8 6750 vpshufb ymm12,ymm12,ymm8 6751 vpaddd ymm11,ymm11,ymm15 6752 vpaddd ymm10,ymm10,ymm14 6753 vpaddd ymm9,ymm9,ymm13 6754 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6755 vpxor ymm7,ymm7,ymm11 6756 vpxor ymm6,ymm6,ymm10 6757 vpxor ymm5,ymm5,ymm9 6758 vpxor ymm4,ymm4,ymm8 6759 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6760 vpsrld ymm8,ymm7,25 6761 vpslld ymm7,ymm7,32-25 6762 vpxor ymm7,ymm7,ymm8 6763 vpsrld ymm8,ymm6,25 6764 vpslld ymm6,ymm6,32-25 6765 vpxor ymm6,ymm6,ymm8 6766 vpsrld ymm8,ymm5,25 6767 vpslld ymm5,ymm5,32-25 6768 vpxor ymm5,ymm5,ymm8 6769 vpsrld ymm8,ymm4,25 6770 vpslld ymm4,ymm4,32-25 6771 vpxor ymm4,ymm4,ymm8 6772 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6773 vpalignr ymm7,ymm7,ymm7,12 6774 vpalignr ymm11,ymm11,ymm11,8 6775 vpalignr ymm15,ymm15,ymm15,4 6776 vpalignr ymm6,ymm6,ymm6,12 6777 vpalignr ymm10,ymm10,ymm10,8 6778 vpalignr ymm14,ymm14,ymm14,4 6779 vpalignr ymm5,ymm5,ymm5,12 6780 vpalignr ymm9,ymm9,ymm9,8 6781 vpalignr ymm13,ymm13,ymm13,4 6782 vpalignr ymm4,ymm4,ymm4,12 6783 vpalignr ymm8,ymm8,ymm8,8 6784 vpalignr ymm12,ymm12,ymm12,4 6785 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6786 vmovdqa ymm8,YMMWORD[$L$rol16] 6787 vpaddd ymm3,ymm3,ymm7 6788 vpaddd ymm2,ymm2,ymm6 6789 vpaddd ymm1,ymm1,ymm5 6790 vpaddd ymm0,ymm0,ymm4 6791 vpxor ymm15,ymm15,ymm3 6792 vpxor ymm14,ymm14,ymm2 6793 vpxor ymm13,ymm13,ymm1 6794 vpxor ymm12,ymm12,ymm0 6795 vpshufb ymm15,ymm15,ymm8 6796 vpshufb ymm14,ymm14,ymm8 6797 vpshufb ymm13,ymm13,ymm8 6798 vpshufb ymm12,ymm12,ymm8 6799 vpaddd ymm11,ymm11,ymm15 6800 vpaddd ymm10,ymm10,ymm14 6801 vpaddd ymm9,ymm9,ymm13 6802 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6803 vpxor ymm7,ymm7,ymm11 6804 vpxor ymm6,ymm6,ymm10 6805 vpxor ymm5,ymm5,ymm9 6806 vpxor ymm4,ymm4,ymm8 6807 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6808 vpsrld ymm8,ymm7,20 6809 vpslld ymm7,ymm7,32-20 6810 vpxor ymm7,ymm7,ymm8 6811 vpsrld ymm8,ymm6,20 6812 vpslld ymm6,ymm6,32-20 6813 vpxor ymm6,ymm6,ymm8 6814 vpsrld ymm8,ymm5,20 6815 vpslld ymm5,ymm5,32-20 6816 vpxor ymm5,ymm5,ymm8 6817 vpsrld ymm8,ymm4,20 6818 vpslld ymm4,ymm4,32-20 6819 vpxor ymm4,ymm4,ymm8 6820 vmovdqa ymm8,YMMWORD[$L$rol8] 6821 vpaddd ymm3,ymm3,ymm7 6822 vpaddd ymm2,ymm2,ymm6 6823 vpaddd ymm1,ymm1,ymm5 6824 vpaddd ymm0,ymm0,ymm4 6825 vpxor ymm15,ymm15,ymm3 6826 6827 sub rdi,16 6828 mov rcx,9 6829 jmp NEAR $L$seal_avx2_main_loop_rounds_entry 6830ALIGN 32 6831$L$seal_avx2_main_loop: 6832 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6833 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6834 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6835 vmovdqa ymm1,ymm0 6836 vmovdqa ymm5,ymm4 6837 vmovdqa ymm9,ymm8 6838 vmovdqa ymm2,ymm0 6839 vmovdqa ymm6,ymm4 6840 vmovdqa ymm10,ymm8 6841 vmovdqa ymm3,ymm0 6842 vmovdqa ymm7,ymm4 6843 vmovdqa ymm11,ymm8 6844 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6845 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6846 vpaddd ymm14,ymm12,ymm15 6847 vpaddd ymm13,ymm12,ymm14 6848 vpaddd ymm12,ymm12,ymm13 6849 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6850 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6851 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6852 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6853 6854 mov rcx,10 6855ALIGN 32 6856$L$seal_avx2_main_loop_rounds: 6857 add r10,QWORD[((0+0))+rdi] 6858 adc r11,QWORD[((8+0))+rdi] 6859 adc r12,1 6860 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6861 vmovdqa ymm8,YMMWORD[$L$rol16] 6862 vpaddd ymm3,ymm3,ymm7 6863 vpaddd ymm2,ymm2,ymm6 6864 vpaddd ymm1,ymm1,ymm5 6865 vpaddd ymm0,ymm0,ymm4 6866 vpxor ymm15,ymm15,ymm3 6867 vpxor ymm14,ymm14,ymm2 6868 vpxor ymm13,ymm13,ymm1 6869 vpxor ymm12,ymm12,ymm0 6870 mov rdx,QWORD[((0+160+0))+rbp] 6871 mov r15,rdx 6872 mulx r14,r13,r10 6873 mulx rdx,rax,r11 6874 imul r15,r12 6875 add r14,rax 6876 adc r15,rdx 6877 vpshufb ymm15,ymm15,ymm8 6878 vpshufb ymm14,ymm14,ymm8 6879 vpshufb ymm13,ymm13,ymm8 6880 vpshufb ymm12,ymm12,ymm8 6881 vpaddd ymm11,ymm11,ymm15 6882 vpaddd ymm10,ymm10,ymm14 6883 vpaddd ymm9,ymm9,ymm13 6884 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6885 vpxor ymm7,ymm7,ymm11 6886 mov rdx,QWORD[((8+160+0))+rbp] 6887 mulx rax,r10,r10 6888 add r14,r10 6889 mulx r9,r11,r11 6890 adc r15,r11 6891 adc r9,0 6892 imul rdx,r12 6893 vpxor ymm6,ymm6,ymm10 6894 vpxor ymm5,ymm5,ymm9 6895 vpxor ymm4,ymm4,ymm8 6896 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6897 vpsrld ymm8,ymm7,20 6898 vpslld ymm7,ymm7,32-20 6899 vpxor ymm7,ymm7,ymm8 6900 vpsrld ymm8,ymm6,20 6901 vpslld ymm6,ymm6,32-20 6902 vpxor ymm6,ymm6,ymm8 6903 vpsrld ymm8,ymm5,20 6904 vpslld ymm5,ymm5,32-20 6905 add r15,rax 6906 adc r9,rdx 6907 vpxor ymm5,ymm5,ymm8 6908 vpsrld ymm8,ymm4,20 6909 vpslld ymm4,ymm4,32-20 6910 vpxor ymm4,ymm4,ymm8 6911 vmovdqa ymm8,YMMWORD[$L$rol8] 6912 vpaddd ymm3,ymm3,ymm7 6913 vpaddd ymm2,ymm2,ymm6 6914 vpaddd ymm1,ymm1,ymm5 6915 vpaddd ymm0,ymm0,ymm4 6916 vpxor ymm15,ymm15,ymm3 6917 mov r10,r13 6918 mov r11,r14 6919 mov r12,r15 6920 and r12,3 6921 mov r13,r15 6922 and r13,-4 6923 mov r14,r9 6924 shrd r15,r9,2 6925 shr r9,2 6926 add r15,r13 6927 adc r9,r14 6928 add r10,r15 6929 adc r11,r9 6930 adc r12,0 6931 6932$L$seal_avx2_main_loop_rounds_entry: 6933 vpxor ymm14,ymm14,ymm2 6934 vpxor ymm13,ymm13,ymm1 6935 vpxor ymm12,ymm12,ymm0 6936 vpshufb ymm15,ymm15,ymm8 6937 vpshufb ymm14,ymm14,ymm8 6938 vpshufb ymm13,ymm13,ymm8 6939 vpshufb ymm12,ymm12,ymm8 6940 vpaddd ymm11,ymm11,ymm15 6941 vpaddd ymm10,ymm10,ymm14 6942 add r10,QWORD[((0+16))+rdi] 6943 adc r11,QWORD[((8+16))+rdi] 6944 adc r12,1 6945 vpaddd ymm9,ymm9,ymm13 6946 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6947 vpxor ymm7,ymm7,ymm11 6948 vpxor ymm6,ymm6,ymm10 6949 vpxor ymm5,ymm5,ymm9 6950 vpxor ymm4,ymm4,ymm8 6951 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6952 vpsrld ymm8,ymm7,25 6953 mov rdx,QWORD[((0+160+0))+rbp] 6954 mov r15,rdx 6955 mulx r14,r13,r10 6956 mulx rdx,rax,r11 6957 imul r15,r12 6958 add r14,rax 6959 adc r15,rdx 6960 vpslld ymm7,ymm7,32-25 6961 vpxor ymm7,ymm7,ymm8 6962 vpsrld ymm8,ymm6,25 6963 vpslld ymm6,ymm6,32-25 6964 vpxor ymm6,ymm6,ymm8 6965 vpsrld ymm8,ymm5,25 6966 vpslld ymm5,ymm5,32-25 6967 vpxor ymm5,ymm5,ymm8 6968 vpsrld ymm8,ymm4,25 6969 vpslld ymm4,ymm4,32-25 6970 vpxor ymm4,ymm4,ymm8 6971 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6972 vpalignr ymm7,ymm7,ymm7,4 6973 vpalignr ymm11,ymm11,ymm11,8 6974 vpalignr ymm15,ymm15,ymm15,12 6975 vpalignr ymm6,ymm6,ymm6,4 6976 vpalignr ymm10,ymm10,ymm10,8 6977 vpalignr ymm14,ymm14,ymm14,12 6978 mov rdx,QWORD[((8+160+0))+rbp] 6979 mulx rax,r10,r10 6980 add r14,r10 6981 mulx r9,r11,r11 6982 adc r15,r11 6983 adc r9,0 6984 imul rdx,r12 6985 vpalignr ymm5,ymm5,ymm5,4 6986 vpalignr ymm9,ymm9,ymm9,8 6987 vpalignr ymm13,ymm13,ymm13,12 6988 vpalignr ymm4,ymm4,ymm4,4 6989 vpalignr ymm8,ymm8,ymm8,8 6990 vpalignr ymm12,ymm12,ymm12,12 6991 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6992 vmovdqa ymm8,YMMWORD[$L$rol16] 6993 vpaddd ymm3,ymm3,ymm7 6994 vpaddd ymm2,ymm2,ymm6 6995 vpaddd ymm1,ymm1,ymm5 6996 vpaddd ymm0,ymm0,ymm4 6997 vpxor ymm15,ymm15,ymm3 6998 vpxor ymm14,ymm14,ymm2 6999 vpxor ymm13,ymm13,ymm1 7000 vpxor ymm12,ymm12,ymm0 7001 vpshufb ymm15,ymm15,ymm8 7002 vpshufb ymm14,ymm14,ymm8 7003 add r15,rax 7004 adc r9,rdx 7005 vpshufb ymm13,ymm13,ymm8 7006 vpshufb ymm12,ymm12,ymm8 7007 vpaddd ymm11,ymm11,ymm15 7008 vpaddd ymm10,ymm10,ymm14 7009 vpaddd ymm9,ymm9,ymm13 7010 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7011 vpxor ymm7,ymm7,ymm11 7012 vpxor ymm6,ymm6,ymm10 7013 vpxor ymm5,ymm5,ymm9 7014 mov r10,r13 7015 mov r11,r14 7016 mov r12,r15 7017 and r12,3 7018 mov r13,r15 7019 and r13,-4 7020 mov r14,r9 7021 shrd r15,r9,2 7022 shr r9,2 7023 add r15,r13 7024 adc r9,r14 7025 add r10,r15 7026 adc r11,r9 7027 adc r12,0 7028 vpxor ymm4,ymm4,ymm8 7029 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7030 vpsrld ymm8,ymm7,20 7031 vpslld ymm7,ymm7,32-20 7032 vpxor ymm7,ymm7,ymm8 7033 vpsrld ymm8,ymm6,20 7034 vpslld ymm6,ymm6,32-20 7035 vpxor ymm6,ymm6,ymm8 7036 add r10,QWORD[((0+32))+rdi] 7037 adc r11,QWORD[((8+32))+rdi] 7038 adc r12,1 7039 7040 lea rdi,[48+rdi] 7041 vpsrld ymm8,ymm5,20 7042 vpslld ymm5,ymm5,32-20 7043 vpxor ymm5,ymm5,ymm8 7044 vpsrld ymm8,ymm4,20 7045 vpslld ymm4,ymm4,32-20 7046 vpxor ymm4,ymm4,ymm8 7047 vmovdqa ymm8,YMMWORD[$L$rol8] 7048 vpaddd ymm3,ymm3,ymm7 7049 vpaddd ymm2,ymm2,ymm6 7050 vpaddd ymm1,ymm1,ymm5 7051 vpaddd ymm0,ymm0,ymm4 7052 vpxor ymm15,ymm15,ymm3 7053 vpxor ymm14,ymm14,ymm2 7054 vpxor ymm13,ymm13,ymm1 7055 vpxor ymm12,ymm12,ymm0 7056 vpshufb ymm15,ymm15,ymm8 7057 vpshufb ymm14,ymm14,ymm8 7058 vpshufb ymm13,ymm13,ymm8 7059 mov rdx,QWORD[((0+160+0))+rbp] 7060 mov r15,rdx 7061 mulx r14,r13,r10 7062 mulx rdx,rax,r11 7063 imul r15,r12 7064 add r14,rax 7065 adc r15,rdx 7066 vpshufb ymm12,ymm12,ymm8 7067 vpaddd ymm11,ymm11,ymm15 7068 vpaddd ymm10,ymm10,ymm14 7069 vpaddd ymm9,ymm9,ymm13 7070 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7071 vpxor ymm7,ymm7,ymm11 7072 vpxor ymm6,ymm6,ymm10 7073 vpxor ymm5,ymm5,ymm9 7074 mov rdx,QWORD[((8+160+0))+rbp] 7075 mulx rax,r10,r10 7076 add r14,r10 7077 mulx r9,r11,r11 7078 adc r15,r11 7079 adc r9,0 7080 imul rdx,r12 7081 vpxor ymm4,ymm4,ymm8 7082 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7083 vpsrld ymm8,ymm7,25 7084 vpslld ymm7,ymm7,32-25 7085 vpxor ymm7,ymm7,ymm8 7086 vpsrld ymm8,ymm6,25 7087 vpslld ymm6,ymm6,32-25 7088 vpxor ymm6,ymm6,ymm8 7089 add r15,rax 7090 adc r9,rdx 7091 vpsrld ymm8,ymm5,25 7092 vpslld ymm5,ymm5,32-25 7093 vpxor ymm5,ymm5,ymm8 7094 vpsrld ymm8,ymm4,25 7095 vpslld ymm4,ymm4,32-25 7096 vpxor ymm4,ymm4,ymm8 7097 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 7098 vpalignr ymm7,ymm7,ymm7,12 7099 vpalignr ymm11,ymm11,ymm11,8 7100 vpalignr ymm15,ymm15,ymm15,4 7101 vpalignr ymm6,ymm6,ymm6,12 7102 vpalignr ymm10,ymm10,ymm10,8 7103 vpalignr ymm14,ymm14,ymm14,4 7104 vpalignr ymm5,ymm5,ymm5,12 7105 vpalignr ymm9,ymm9,ymm9,8 7106 vpalignr ymm13,ymm13,ymm13,4 7107 vpalignr ymm4,ymm4,ymm4,12 7108 vpalignr ymm8,ymm8,ymm8,8 7109 mov r10,r13 7110 mov r11,r14 7111 mov r12,r15 7112 and r12,3 7113 mov r13,r15 7114 and r13,-4 7115 mov r14,r9 7116 shrd r15,r9,2 7117 shr r9,2 7118 add r15,r13 7119 adc r9,r14 7120 add r10,r15 7121 adc r11,r9 7122 adc r12,0 7123 vpalignr ymm12,ymm12,ymm12,4 7124 7125 dec rcx 7126 jne NEAR $L$seal_avx2_main_loop_rounds 7127 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 7128 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 7129 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 7130 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 7131 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 7132 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 7133 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 7134 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 7135 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7136 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7137 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7138 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7139 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7140 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7141 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7142 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7143 7144 vmovdqa YMMWORD[(160+128)+rbp],ymm0 7145 add r10,QWORD[((0+0))+rdi] 7146 adc r11,QWORD[((8+0))+rdi] 7147 adc r12,1 7148 mov rdx,QWORD[((0+160+0))+rbp] 7149 mov r15,rdx 7150 mulx r14,r13,r10 7151 mulx rdx,rax,r11 7152 imul r15,r12 7153 add r14,rax 7154 adc r15,rdx 7155 mov rdx,QWORD[((8+160+0))+rbp] 7156 mulx rax,r10,r10 7157 add r14,r10 7158 mulx r9,r11,r11 7159 adc r15,r11 7160 adc r9,0 7161 imul rdx,r12 7162 add r15,rax 7163 adc r9,rdx 7164 mov r10,r13 7165 mov r11,r14 7166 mov r12,r15 7167 and r12,3 7168 mov r13,r15 7169 and r13,-4 7170 mov r14,r9 7171 shrd r15,r9,2 7172 shr r9,2 7173 add r15,r13 7174 adc r9,r14 7175 add r10,r15 7176 adc r11,r9 7177 adc r12,0 7178 add r10,QWORD[((0+16))+rdi] 7179 adc r11,QWORD[((8+16))+rdi] 7180 adc r12,1 7181 mov rdx,QWORD[((0+160+0))+rbp] 7182 mov r15,rdx 7183 mulx r14,r13,r10 7184 mulx rdx,rax,r11 7185 imul r15,r12 7186 add r14,rax 7187 adc r15,rdx 7188 mov rdx,QWORD[((8+160+0))+rbp] 7189 mulx rax,r10,r10 7190 add r14,r10 7191 mulx r9,r11,r11 7192 adc r15,r11 7193 adc r9,0 7194 imul rdx,r12 7195 add r15,rax 7196 adc r9,rdx 7197 mov r10,r13 7198 mov r11,r14 7199 mov r12,r15 7200 and r12,3 7201 mov r13,r15 7202 and r13,-4 7203 mov r14,r9 7204 shrd r15,r9,2 7205 shr r9,2 7206 add r15,r13 7207 adc r9,r14 7208 add r10,r15 7209 adc r11,r9 7210 adc r12,0 7211 7212 lea rdi,[32+rdi] 7213 vperm2i128 ymm0,ymm7,ymm3,0x02 7214 vperm2i128 ymm7,ymm7,ymm3,0x13 7215 vperm2i128 ymm3,ymm15,ymm11,0x02 7216 vperm2i128 ymm11,ymm15,ymm11,0x13 7217 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 7218 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 7219 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 7220 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 7221 vmovdqu YMMWORD[(0+0)+rdi],ymm0 7222 vmovdqu YMMWORD[(32+0)+rdi],ymm3 7223 vmovdqu YMMWORD[(64+0)+rdi],ymm7 7224 vmovdqu YMMWORD[(96+0)+rdi],ymm11 7225 7226 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 7227 vperm2i128 ymm3,ymm6,ymm2,0x02 7228 vperm2i128 ymm6,ymm6,ymm2,0x13 7229 vperm2i128 ymm2,ymm14,ymm10,0x02 7230 vperm2i128 ymm10,ymm14,ymm10,0x13 7231 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 7232 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 7233 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 7234 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 7235 vmovdqu YMMWORD[(0+128)+rdi],ymm3 7236 vmovdqu YMMWORD[(32+128)+rdi],ymm2 7237 vmovdqu YMMWORD[(64+128)+rdi],ymm6 7238 vmovdqu YMMWORD[(96+128)+rdi],ymm10 7239 vperm2i128 ymm3,ymm5,ymm1,0x02 7240 vperm2i128 ymm5,ymm5,ymm1,0x13 7241 vperm2i128 ymm1,ymm13,ymm9,0x02 7242 vperm2i128 ymm9,ymm13,ymm9,0x13 7243 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 7244 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 7245 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 7246 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 7247 vmovdqu YMMWORD[(0+256)+rdi],ymm3 7248 vmovdqu YMMWORD[(32+256)+rdi],ymm1 7249 vmovdqu YMMWORD[(64+256)+rdi],ymm5 7250 vmovdqu YMMWORD[(96+256)+rdi],ymm9 7251 vperm2i128 ymm3,ymm4,ymm0,0x02 7252 vperm2i128 ymm4,ymm4,ymm0,0x13 7253 vperm2i128 ymm0,ymm12,ymm8,0x02 7254 vperm2i128 ymm8,ymm12,ymm8,0x13 7255 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 7256 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 7257 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 7258 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 7259 vmovdqu YMMWORD[(0+384)+rdi],ymm3 7260 vmovdqu YMMWORD[(32+384)+rdi],ymm0 7261 vmovdqu YMMWORD[(64+384)+rdi],ymm4 7262 vmovdqu YMMWORD[(96+384)+rdi],ymm8 7263 7264 lea rsi,[512+rsi] 7265 sub rbx,16*32 7266 cmp rbx,16*32 7267 jg NEAR $L$seal_avx2_main_loop 7268 7269 add r10,QWORD[((0+0))+rdi] 7270 adc r11,QWORD[((8+0))+rdi] 7271 adc r12,1 7272 mov rdx,QWORD[((0+160+0))+rbp] 7273 mov r15,rdx 7274 mulx r14,r13,r10 7275 mulx rdx,rax,r11 7276 imul r15,r12 7277 add r14,rax 7278 adc r15,rdx 7279 mov rdx,QWORD[((8+160+0))+rbp] 7280 mulx rax,r10,r10 7281 add r14,r10 7282 mulx r9,r11,r11 7283 adc r15,r11 7284 adc r9,0 7285 imul rdx,r12 7286 add r15,rax 7287 adc r9,rdx 7288 mov r10,r13 7289 mov r11,r14 7290 mov r12,r15 7291 and r12,3 7292 mov r13,r15 7293 and r13,-4 7294 mov r14,r9 7295 shrd r15,r9,2 7296 shr r9,2 7297 add r15,r13 7298 adc r9,r14 7299 add r10,r15 7300 adc r11,r9 7301 adc r12,0 7302 add r10,QWORD[((0+16))+rdi] 7303 adc r11,QWORD[((8+16))+rdi] 7304 adc r12,1 7305 mov rdx,QWORD[((0+160+0))+rbp] 7306 mov r15,rdx 7307 mulx r14,r13,r10 7308 mulx rdx,rax,r11 7309 imul r15,r12 7310 add r14,rax 7311 adc r15,rdx 7312 mov rdx,QWORD[((8+160+0))+rbp] 7313 mulx rax,r10,r10 7314 add r14,r10 7315 mulx r9,r11,r11 7316 adc r15,r11 7317 adc r9,0 7318 imul rdx,r12 7319 add r15,rax 7320 adc r9,rdx 7321 mov r10,r13 7322 mov r11,r14 7323 mov r12,r15 7324 and r12,3 7325 mov r13,r15 7326 and r13,-4 7327 mov r14,r9 7328 shrd r15,r9,2 7329 shr r9,2 7330 add r15,r13 7331 adc r9,r14 7332 add r10,r15 7333 adc r11,r9 7334 adc r12,0 7335 7336 lea rdi,[32+rdi] 7337 mov rcx,10 7338 xor r8,r8 7339 7340 cmp rbx,12*32 7341 ja NEAR $L$seal_avx2_tail_512 7342 cmp rbx,8*32 7343 ja NEAR $L$seal_avx2_tail_384 7344 cmp rbx,4*32 7345 ja NEAR $L$seal_avx2_tail_256 7346 7347$L$seal_avx2_tail_128: 7348 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7349 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7350 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7351 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7352 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7353 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7354 7355$L$seal_avx2_tail_128_rounds_and_3xhash: 7356 add r10,QWORD[((0+0))+rdi] 7357 adc r11,QWORD[((8+0))+rdi] 7358 adc r12,1 7359 mov rdx,QWORD[((0+160+0))+rbp] 7360 mov r15,rdx 7361 mulx r14,r13,r10 7362 mulx rdx,rax,r11 7363 imul r15,r12 7364 add r14,rax 7365 adc r15,rdx 7366 mov rdx,QWORD[((8+160+0))+rbp] 7367 mulx rax,r10,r10 7368 add r14,r10 7369 mulx r9,r11,r11 7370 adc r15,r11 7371 adc r9,0 7372 imul rdx,r12 7373 add r15,rax 7374 adc r9,rdx 7375 mov r10,r13 7376 mov r11,r14 7377 mov r12,r15 7378 and r12,3 7379 mov r13,r15 7380 and r13,-4 7381 mov r14,r9 7382 shrd r15,r9,2 7383 shr r9,2 7384 add r15,r13 7385 adc r9,r14 7386 add r10,r15 7387 adc r11,r9 7388 adc r12,0 7389 7390 lea rdi,[16+rdi] 7391$L$seal_avx2_tail_128_rounds_and_2xhash: 7392 vpaddd ymm0,ymm0,ymm4 7393 vpxor ymm12,ymm12,ymm0 7394 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7395 vpaddd ymm8,ymm8,ymm12 7396 vpxor ymm4,ymm4,ymm8 7397 vpsrld ymm3,ymm4,20 7398 vpslld ymm4,ymm4,12 7399 vpxor ymm4,ymm4,ymm3 7400 vpaddd ymm0,ymm0,ymm4 7401 vpxor ymm12,ymm12,ymm0 7402 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7403 vpaddd ymm8,ymm8,ymm12 7404 vpxor ymm4,ymm4,ymm8 7405 vpslld ymm3,ymm4,7 7406 vpsrld ymm4,ymm4,25 7407 vpxor ymm4,ymm4,ymm3 7408 vpalignr ymm12,ymm12,ymm12,12 7409 vpalignr ymm8,ymm8,ymm8,8 7410 vpalignr ymm4,ymm4,ymm4,4 7411 add r10,QWORD[((0+0))+rdi] 7412 adc r11,QWORD[((8+0))+rdi] 7413 adc r12,1 7414 mov rdx,QWORD[((0+160+0))+rbp] 7415 mov r15,rdx 7416 mulx r14,r13,r10 7417 mulx rdx,rax,r11 7418 imul r15,r12 7419 add r14,rax 7420 adc r15,rdx 7421 mov rdx,QWORD[((8+160+0))+rbp] 7422 mulx rax,r10,r10 7423 add r14,r10 7424 mulx r9,r11,r11 7425 adc r15,r11 7426 adc r9,0 7427 imul rdx,r12 7428 add r15,rax 7429 adc r9,rdx 7430 mov r10,r13 7431 mov r11,r14 7432 mov r12,r15 7433 and r12,3 7434 mov r13,r15 7435 and r13,-4 7436 mov r14,r9 7437 shrd r15,r9,2 7438 shr r9,2 7439 add r15,r13 7440 adc r9,r14 7441 add r10,r15 7442 adc r11,r9 7443 adc r12,0 7444 vpaddd ymm0,ymm0,ymm4 7445 vpxor ymm12,ymm12,ymm0 7446 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7447 vpaddd ymm8,ymm8,ymm12 7448 vpxor ymm4,ymm4,ymm8 7449 vpsrld ymm3,ymm4,20 7450 vpslld ymm4,ymm4,12 7451 vpxor ymm4,ymm4,ymm3 7452 vpaddd ymm0,ymm0,ymm4 7453 vpxor ymm12,ymm12,ymm0 7454 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7455 vpaddd ymm8,ymm8,ymm12 7456 vpxor ymm4,ymm4,ymm8 7457 vpslld ymm3,ymm4,7 7458 vpsrld ymm4,ymm4,25 7459 vpxor ymm4,ymm4,ymm3 7460 vpalignr ymm12,ymm12,ymm12,4 7461 vpalignr ymm8,ymm8,ymm8,8 7462 vpalignr ymm4,ymm4,ymm4,12 7463 add r10,QWORD[((0+16))+rdi] 7464 adc r11,QWORD[((8+16))+rdi] 7465 adc r12,1 7466 mov rdx,QWORD[((0+160+0))+rbp] 7467 mov r15,rdx 7468 mulx r14,r13,r10 7469 mulx rdx,rax,r11 7470 imul r15,r12 7471 add r14,rax 7472 adc r15,rdx 7473 mov rdx,QWORD[((8+160+0))+rbp] 7474 mulx rax,r10,r10 7475 add r14,r10 7476 mulx r9,r11,r11 7477 adc r15,r11 7478 adc r9,0 7479 imul rdx,r12 7480 add r15,rax 7481 adc r9,rdx 7482 mov r10,r13 7483 mov r11,r14 7484 mov r12,r15 7485 and r12,3 7486 mov r13,r15 7487 and r13,-4 7488 mov r14,r9 7489 shrd r15,r9,2 7490 shr r9,2 7491 add r15,r13 7492 adc r9,r14 7493 add r10,r15 7494 adc r11,r9 7495 adc r12,0 7496 7497 lea rdi,[32+rdi] 7498 dec rcx 7499 jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash 7500 dec r8 7501 jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash 7502 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7503 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7504 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7505 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7506 vperm2i128 ymm3,ymm4,ymm0,0x13 7507 vperm2i128 ymm0,ymm4,ymm0,0x02 7508 vperm2i128 ymm4,ymm12,ymm8,0x02 7509 vperm2i128 ymm12,ymm12,ymm8,0x13 7510 vmovdqa ymm8,ymm3 7511 7512 jmp NEAR $L$seal_avx2_short_loop 7513 7514$L$seal_avx2_tail_256: 7515 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7516 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7517 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7518 vmovdqa ymm1,ymm0 7519 vmovdqa ymm5,ymm4 7520 vmovdqa ymm9,ymm8 7521 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7522 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 7523 vpaddd ymm12,ymm12,ymm13 7524 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7525 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7526 7527$L$seal_avx2_tail_256_rounds_and_3xhash: 7528 add r10,QWORD[((0+0))+rdi] 7529 adc r11,QWORD[((8+0))+rdi] 7530 adc r12,1 7531 mov rax,QWORD[((0+160+0))+rbp] 7532 mov r15,rax 7533 mul r10 7534 mov r13,rax 7535 mov r14,rdx 7536 mov rax,QWORD[((0+160+0))+rbp] 7537 mul r11 7538 imul r15,r12 7539 add r14,rax 7540 adc r15,rdx 7541 mov rax,QWORD[((8+160+0))+rbp] 7542 mov r9,rax 7543 mul r10 7544 add r14,rax 7545 adc rdx,0 7546 mov r10,rdx 7547 mov rax,QWORD[((8+160+0))+rbp] 7548 mul r11 7549 add r15,rax 7550 adc rdx,0 7551 imul r9,r12 7552 add r15,r10 7553 adc r9,rdx 7554 mov r10,r13 7555 mov r11,r14 7556 mov r12,r15 7557 and r12,3 7558 mov r13,r15 7559 and r13,-4 7560 mov r14,r9 7561 shrd r15,r9,2 7562 shr r9,2 7563 add r15,r13 7564 adc r9,r14 7565 add r10,r15 7566 adc r11,r9 7567 adc r12,0 7568 7569 lea rdi,[16+rdi] 7570$L$seal_avx2_tail_256_rounds_and_2xhash: 7571 vpaddd ymm0,ymm0,ymm4 7572 vpxor ymm12,ymm12,ymm0 7573 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7574 vpaddd ymm8,ymm8,ymm12 7575 vpxor ymm4,ymm4,ymm8 7576 vpsrld ymm3,ymm4,20 7577 vpslld ymm4,ymm4,12 7578 vpxor ymm4,ymm4,ymm3 7579 vpaddd ymm0,ymm0,ymm4 7580 vpxor ymm12,ymm12,ymm0 7581 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7582 vpaddd ymm8,ymm8,ymm12 7583 vpxor ymm4,ymm4,ymm8 7584 vpslld ymm3,ymm4,7 7585 vpsrld ymm4,ymm4,25 7586 vpxor ymm4,ymm4,ymm3 7587 vpalignr ymm12,ymm12,ymm12,12 7588 vpalignr ymm8,ymm8,ymm8,8 7589 vpalignr ymm4,ymm4,ymm4,4 7590 vpaddd ymm1,ymm1,ymm5 7591 vpxor ymm13,ymm13,ymm1 7592 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7593 vpaddd ymm9,ymm9,ymm13 7594 vpxor ymm5,ymm5,ymm9 7595 vpsrld ymm3,ymm5,20 7596 vpslld ymm5,ymm5,12 7597 vpxor ymm5,ymm5,ymm3 7598 vpaddd ymm1,ymm1,ymm5 7599 vpxor ymm13,ymm13,ymm1 7600 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7601 vpaddd ymm9,ymm9,ymm13 7602 vpxor ymm5,ymm5,ymm9 7603 vpslld ymm3,ymm5,7 7604 vpsrld ymm5,ymm5,25 7605 vpxor ymm5,ymm5,ymm3 7606 vpalignr ymm13,ymm13,ymm13,12 7607 vpalignr ymm9,ymm9,ymm9,8 7608 vpalignr ymm5,ymm5,ymm5,4 7609 add r10,QWORD[((0+0))+rdi] 7610 adc r11,QWORD[((8+0))+rdi] 7611 adc r12,1 7612 mov rax,QWORD[((0+160+0))+rbp] 7613 mov r15,rax 7614 mul r10 7615 mov r13,rax 7616 mov r14,rdx 7617 mov rax,QWORD[((0+160+0))+rbp] 7618 mul r11 7619 imul r15,r12 7620 add r14,rax 7621 adc r15,rdx 7622 mov rax,QWORD[((8+160+0))+rbp] 7623 mov r9,rax 7624 mul r10 7625 add r14,rax 7626 adc rdx,0 7627 mov r10,rdx 7628 mov rax,QWORD[((8+160+0))+rbp] 7629 mul r11 7630 add r15,rax 7631 adc rdx,0 7632 imul r9,r12 7633 add r15,r10 7634 adc r9,rdx 7635 mov r10,r13 7636 mov r11,r14 7637 mov r12,r15 7638 and r12,3 7639 mov r13,r15 7640 and r13,-4 7641 mov r14,r9 7642 shrd r15,r9,2 7643 shr r9,2 7644 add r15,r13 7645 adc r9,r14 7646 add r10,r15 7647 adc r11,r9 7648 adc r12,0 7649 vpaddd ymm0,ymm0,ymm4 7650 vpxor ymm12,ymm12,ymm0 7651 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7652 vpaddd ymm8,ymm8,ymm12 7653 vpxor ymm4,ymm4,ymm8 7654 vpsrld ymm3,ymm4,20 7655 vpslld ymm4,ymm4,12 7656 vpxor ymm4,ymm4,ymm3 7657 vpaddd ymm0,ymm0,ymm4 7658 vpxor ymm12,ymm12,ymm0 7659 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7660 vpaddd ymm8,ymm8,ymm12 7661 vpxor ymm4,ymm4,ymm8 7662 vpslld ymm3,ymm4,7 7663 vpsrld ymm4,ymm4,25 7664 vpxor ymm4,ymm4,ymm3 7665 vpalignr ymm12,ymm12,ymm12,4 7666 vpalignr ymm8,ymm8,ymm8,8 7667 vpalignr ymm4,ymm4,ymm4,12 7668 vpaddd ymm1,ymm1,ymm5 7669 vpxor ymm13,ymm13,ymm1 7670 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7671 vpaddd ymm9,ymm9,ymm13 7672 vpxor ymm5,ymm5,ymm9 7673 vpsrld ymm3,ymm5,20 7674 vpslld ymm5,ymm5,12 7675 vpxor ymm5,ymm5,ymm3 7676 vpaddd ymm1,ymm1,ymm5 7677 vpxor ymm13,ymm13,ymm1 7678 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7679 vpaddd ymm9,ymm9,ymm13 7680 vpxor ymm5,ymm5,ymm9 7681 vpslld ymm3,ymm5,7 7682 vpsrld ymm5,ymm5,25 7683 vpxor ymm5,ymm5,ymm3 7684 vpalignr ymm13,ymm13,ymm13,4 7685 vpalignr ymm9,ymm9,ymm9,8 7686 vpalignr ymm5,ymm5,ymm5,12 7687 add r10,QWORD[((0+16))+rdi] 7688 adc r11,QWORD[((8+16))+rdi] 7689 adc r12,1 7690 mov rax,QWORD[((0+160+0))+rbp] 7691 mov r15,rax 7692 mul r10 7693 mov r13,rax 7694 mov r14,rdx 7695 mov rax,QWORD[((0+160+0))+rbp] 7696 mul r11 7697 imul r15,r12 7698 add r14,rax 7699 adc r15,rdx 7700 mov rax,QWORD[((8+160+0))+rbp] 7701 mov r9,rax 7702 mul r10 7703 add r14,rax 7704 adc rdx,0 7705 mov r10,rdx 7706 mov rax,QWORD[((8+160+0))+rbp] 7707 mul r11 7708 add r15,rax 7709 adc rdx,0 7710 imul r9,r12 7711 add r15,r10 7712 adc r9,rdx 7713 mov r10,r13 7714 mov r11,r14 7715 mov r12,r15 7716 and r12,3 7717 mov r13,r15 7718 and r13,-4 7719 mov r14,r9 7720 shrd r15,r9,2 7721 shr r9,2 7722 add r15,r13 7723 adc r9,r14 7724 add r10,r15 7725 adc r11,r9 7726 adc r12,0 7727 7728 lea rdi,[32+rdi] 7729 dec rcx 7730 jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash 7731 dec r8 7732 jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash 7733 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7734 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7735 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7736 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7737 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7738 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7739 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7740 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7741 vperm2i128 ymm3,ymm5,ymm1,0x02 7742 vperm2i128 ymm5,ymm5,ymm1,0x13 7743 vperm2i128 ymm1,ymm13,ymm9,0x02 7744 vperm2i128 ymm9,ymm13,ymm9,0x13 7745 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 7746 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 7747 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 7748 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 7749 vmovdqu YMMWORD[(0+0)+rdi],ymm3 7750 vmovdqu YMMWORD[(32+0)+rdi],ymm1 7751 vmovdqu YMMWORD[(64+0)+rdi],ymm5 7752 vmovdqu YMMWORD[(96+0)+rdi],ymm9 7753 vperm2i128 ymm3,ymm4,ymm0,0x13 7754 vperm2i128 ymm0,ymm4,ymm0,0x02 7755 vperm2i128 ymm4,ymm12,ymm8,0x02 7756 vperm2i128 ymm12,ymm12,ymm8,0x13 7757 vmovdqa ymm8,ymm3 7758 7759 mov rcx,4*32 7760 lea rsi,[128+rsi] 7761 sub rbx,4*32 7762 jmp NEAR $L$seal_avx2_short_hash_remainder 7763 7764$L$seal_avx2_tail_384: 7765 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7766 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7767 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7768 vmovdqa ymm1,ymm0 7769 vmovdqa ymm5,ymm4 7770 vmovdqa ymm9,ymm8 7771 vmovdqa ymm2,ymm0 7772 vmovdqa ymm6,ymm4 7773 vmovdqa ymm10,ymm8 7774 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7775 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 7776 vpaddd ymm13,ymm12,ymm14 7777 vpaddd ymm12,ymm12,ymm13 7778 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7779 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7780 vmovdqa YMMWORD[(160+224)+rbp],ymm14 7781 7782$L$seal_avx2_tail_384_rounds_and_3xhash: 7783 add r10,QWORD[((0+0))+rdi] 7784 adc r11,QWORD[((8+0))+rdi] 7785 adc r12,1 7786 mov rax,QWORD[((0+160+0))+rbp] 7787 mov r15,rax 7788 mul r10 7789 mov r13,rax 7790 mov r14,rdx 7791 mov rax,QWORD[((0+160+0))+rbp] 7792 mul r11 7793 imul r15,r12 7794 add r14,rax 7795 adc r15,rdx 7796 mov rax,QWORD[((8+160+0))+rbp] 7797 mov r9,rax 7798 mul r10 7799 add r14,rax 7800 adc rdx,0 7801 mov r10,rdx 7802 mov rax,QWORD[((8+160+0))+rbp] 7803 mul r11 7804 add r15,rax 7805 adc rdx,0 7806 imul r9,r12 7807 add r15,r10 7808 adc r9,rdx 7809 mov r10,r13 7810 mov r11,r14 7811 mov r12,r15 7812 and r12,3 7813 mov r13,r15 7814 and r13,-4 7815 mov r14,r9 7816 shrd r15,r9,2 7817 shr r9,2 7818 add r15,r13 7819 adc r9,r14 7820 add r10,r15 7821 adc r11,r9 7822 adc r12,0 7823 7824 lea rdi,[16+rdi] 7825$L$seal_avx2_tail_384_rounds_and_2xhash: 7826 vpaddd ymm0,ymm0,ymm4 7827 vpxor ymm12,ymm12,ymm0 7828 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7829 vpaddd ymm8,ymm8,ymm12 7830 vpxor ymm4,ymm4,ymm8 7831 vpsrld ymm3,ymm4,20 7832 vpslld ymm4,ymm4,12 7833 vpxor ymm4,ymm4,ymm3 7834 vpaddd ymm0,ymm0,ymm4 7835 vpxor ymm12,ymm12,ymm0 7836 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7837 vpaddd ymm8,ymm8,ymm12 7838 vpxor ymm4,ymm4,ymm8 7839 vpslld ymm3,ymm4,7 7840 vpsrld ymm4,ymm4,25 7841 vpxor ymm4,ymm4,ymm3 7842 vpalignr ymm12,ymm12,ymm12,12 7843 vpalignr ymm8,ymm8,ymm8,8 7844 vpalignr ymm4,ymm4,ymm4,4 7845 vpaddd ymm1,ymm1,ymm5 7846 vpxor ymm13,ymm13,ymm1 7847 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7848 vpaddd ymm9,ymm9,ymm13 7849 vpxor ymm5,ymm5,ymm9 7850 vpsrld ymm3,ymm5,20 7851 vpslld ymm5,ymm5,12 7852 vpxor ymm5,ymm5,ymm3 7853 vpaddd ymm1,ymm1,ymm5 7854 vpxor ymm13,ymm13,ymm1 7855 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7856 vpaddd ymm9,ymm9,ymm13 7857 vpxor ymm5,ymm5,ymm9 7858 vpslld ymm3,ymm5,7 7859 vpsrld ymm5,ymm5,25 7860 vpxor ymm5,ymm5,ymm3 7861 vpalignr ymm13,ymm13,ymm13,12 7862 vpalignr ymm9,ymm9,ymm9,8 7863 vpalignr ymm5,ymm5,ymm5,4 7864 add r10,QWORD[((0+0))+rdi] 7865 adc r11,QWORD[((8+0))+rdi] 7866 adc r12,1 7867 mov rax,QWORD[((0+160+0))+rbp] 7868 mov r15,rax 7869 mul r10 7870 mov r13,rax 7871 mov r14,rdx 7872 mov rax,QWORD[((0+160+0))+rbp] 7873 mul r11 7874 imul r15,r12 7875 add r14,rax 7876 adc r15,rdx 7877 mov rax,QWORD[((8+160+0))+rbp] 7878 mov r9,rax 7879 mul r10 7880 add r14,rax 7881 adc rdx,0 7882 mov r10,rdx 7883 mov rax,QWORD[((8+160+0))+rbp] 7884 mul r11 7885 add r15,rax 7886 adc rdx,0 7887 imul r9,r12 7888 add r15,r10 7889 adc r9,rdx 7890 mov r10,r13 7891 mov r11,r14 7892 mov r12,r15 7893 and r12,3 7894 mov r13,r15 7895 and r13,-4 7896 mov r14,r9 7897 shrd r15,r9,2 7898 shr r9,2 7899 add r15,r13 7900 adc r9,r14 7901 add r10,r15 7902 adc r11,r9 7903 adc r12,0 7904 vpaddd ymm2,ymm2,ymm6 7905 vpxor ymm14,ymm14,ymm2 7906 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7907 vpaddd ymm10,ymm10,ymm14 7908 vpxor ymm6,ymm6,ymm10 7909 vpsrld ymm3,ymm6,20 7910 vpslld ymm6,ymm6,12 7911 vpxor ymm6,ymm6,ymm3 7912 vpaddd ymm2,ymm2,ymm6 7913 vpxor ymm14,ymm14,ymm2 7914 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 7915 vpaddd ymm10,ymm10,ymm14 7916 vpxor ymm6,ymm6,ymm10 7917 vpslld ymm3,ymm6,7 7918 vpsrld ymm6,ymm6,25 7919 vpxor ymm6,ymm6,ymm3 7920 vpalignr ymm14,ymm14,ymm14,12 7921 vpalignr ymm10,ymm10,ymm10,8 7922 vpalignr ymm6,ymm6,ymm6,4 7923 vpaddd ymm0,ymm0,ymm4 7924 vpxor ymm12,ymm12,ymm0 7925 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7926 vpaddd ymm8,ymm8,ymm12 7927 vpxor ymm4,ymm4,ymm8 7928 vpsrld ymm3,ymm4,20 7929 vpslld ymm4,ymm4,12 7930 vpxor ymm4,ymm4,ymm3 7931 vpaddd ymm0,ymm0,ymm4 7932 vpxor ymm12,ymm12,ymm0 7933 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7934 vpaddd ymm8,ymm8,ymm12 7935 vpxor ymm4,ymm4,ymm8 7936 vpslld ymm3,ymm4,7 7937 vpsrld ymm4,ymm4,25 7938 vpxor ymm4,ymm4,ymm3 7939 vpalignr ymm12,ymm12,ymm12,4 7940 vpalignr ymm8,ymm8,ymm8,8 7941 vpalignr ymm4,ymm4,ymm4,12 7942 add r10,QWORD[((0+16))+rdi] 7943 adc r11,QWORD[((8+16))+rdi] 7944 adc r12,1 7945 mov rax,QWORD[((0+160+0))+rbp] 7946 mov r15,rax 7947 mul r10 7948 mov r13,rax 7949 mov r14,rdx 7950 mov rax,QWORD[((0+160+0))+rbp] 7951 mul r11 7952 imul r15,r12 7953 add r14,rax 7954 adc r15,rdx 7955 mov rax,QWORD[((8+160+0))+rbp] 7956 mov r9,rax 7957 mul r10 7958 add r14,rax 7959 adc rdx,0 7960 mov r10,rdx 7961 mov rax,QWORD[((8+160+0))+rbp] 7962 mul r11 7963 add r15,rax 7964 adc rdx,0 7965 imul r9,r12 7966 add r15,r10 7967 adc r9,rdx 7968 mov r10,r13 7969 mov r11,r14 7970 mov r12,r15 7971 and r12,3 7972 mov r13,r15 7973 and r13,-4 7974 mov r14,r9 7975 shrd r15,r9,2 7976 shr r9,2 7977 add r15,r13 7978 adc r9,r14 7979 add r10,r15 7980 adc r11,r9 7981 adc r12,0 7982 vpaddd ymm1,ymm1,ymm5 7983 vpxor ymm13,ymm13,ymm1 7984 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7985 vpaddd ymm9,ymm9,ymm13 7986 vpxor ymm5,ymm5,ymm9 7987 vpsrld ymm3,ymm5,20 7988 vpslld ymm5,ymm5,12 7989 vpxor ymm5,ymm5,ymm3 7990 vpaddd ymm1,ymm1,ymm5 7991 vpxor ymm13,ymm13,ymm1 7992 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7993 vpaddd ymm9,ymm9,ymm13 7994 vpxor ymm5,ymm5,ymm9 7995 vpslld ymm3,ymm5,7 7996 vpsrld ymm5,ymm5,25 7997 vpxor ymm5,ymm5,ymm3 7998 vpalignr ymm13,ymm13,ymm13,4 7999 vpalignr ymm9,ymm9,ymm9,8 8000 vpalignr ymm5,ymm5,ymm5,12 8001 vpaddd ymm2,ymm2,ymm6 8002 vpxor ymm14,ymm14,ymm2 8003 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8004 vpaddd ymm10,ymm10,ymm14 8005 vpxor ymm6,ymm6,ymm10 8006 vpsrld ymm3,ymm6,20 8007 vpslld ymm6,ymm6,12 8008 vpxor ymm6,ymm6,ymm3 8009 vpaddd ymm2,ymm2,ymm6 8010 vpxor ymm14,ymm14,ymm2 8011 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8012 vpaddd ymm10,ymm10,ymm14 8013 vpxor ymm6,ymm6,ymm10 8014 vpslld ymm3,ymm6,7 8015 vpsrld ymm6,ymm6,25 8016 vpxor ymm6,ymm6,ymm3 8017 vpalignr ymm14,ymm14,ymm14,4 8018 vpalignr ymm10,ymm10,ymm10,8 8019 vpalignr ymm6,ymm6,ymm6,12 8020 8021 lea rdi,[32+rdi] 8022 dec rcx 8023 jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash 8024 dec r8 8025 jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash 8026 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8027 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8028 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8029 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8030 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8031 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8032 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8033 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8034 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8035 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8036 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8037 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8038 vperm2i128 ymm3,ymm6,ymm2,0x02 8039 vperm2i128 ymm6,ymm6,ymm2,0x13 8040 vperm2i128 ymm2,ymm14,ymm10,0x02 8041 vperm2i128 ymm10,ymm14,ymm10,0x13 8042 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 8043 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 8044 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 8045 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 8046 vmovdqu YMMWORD[(0+0)+rdi],ymm3 8047 vmovdqu YMMWORD[(32+0)+rdi],ymm2 8048 vmovdqu YMMWORD[(64+0)+rdi],ymm6 8049 vmovdqu YMMWORD[(96+0)+rdi],ymm10 8050 vperm2i128 ymm3,ymm5,ymm1,0x02 8051 vperm2i128 ymm5,ymm5,ymm1,0x13 8052 vperm2i128 ymm1,ymm13,ymm9,0x02 8053 vperm2i128 ymm9,ymm13,ymm9,0x13 8054 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8055 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 8056 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 8057 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 8058 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8059 vmovdqu YMMWORD[(32+128)+rdi],ymm1 8060 vmovdqu YMMWORD[(64+128)+rdi],ymm5 8061 vmovdqu YMMWORD[(96+128)+rdi],ymm9 8062 vperm2i128 ymm3,ymm4,ymm0,0x13 8063 vperm2i128 ymm0,ymm4,ymm0,0x02 8064 vperm2i128 ymm4,ymm12,ymm8,0x02 8065 vperm2i128 ymm12,ymm12,ymm8,0x13 8066 vmovdqa ymm8,ymm3 8067 8068 mov rcx,8*32 8069 lea rsi,[256+rsi] 8070 sub rbx,8*32 8071 jmp NEAR $L$seal_avx2_short_hash_remainder 8072 8073$L$seal_avx2_tail_512: 8074 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 8075 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 8076 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 8077 vmovdqa ymm1,ymm0 8078 vmovdqa ymm5,ymm4 8079 vmovdqa ymm9,ymm8 8080 vmovdqa ymm2,ymm0 8081 vmovdqa ymm6,ymm4 8082 vmovdqa ymm10,ymm8 8083 vmovdqa ymm3,ymm0 8084 vmovdqa ymm7,ymm4 8085 vmovdqa ymm11,ymm8 8086 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 8087 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 8088 vpaddd ymm14,ymm12,ymm15 8089 vpaddd ymm13,ymm12,ymm14 8090 vpaddd ymm12,ymm12,ymm13 8091 vmovdqa YMMWORD[(160+256)+rbp],ymm15 8092 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8093 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8094 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8095 8096$L$seal_avx2_tail_512_rounds_and_3xhash: 8097 add r10,QWORD[((0+0))+rdi] 8098 adc r11,QWORD[((8+0))+rdi] 8099 adc r12,1 8100 mov rdx,QWORD[((0+160+0))+rbp] 8101 mov r15,rdx 8102 mulx r14,r13,r10 8103 mulx rdx,rax,r11 8104 imul r15,r12 8105 add r14,rax 8106 adc r15,rdx 8107 mov rdx,QWORD[((8+160+0))+rbp] 8108 mulx rax,r10,r10 8109 add r14,r10 8110 mulx r9,r11,r11 8111 adc r15,r11 8112 adc r9,0 8113 imul rdx,r12 8114 add r15,rax 8115 adc r9,rdx 8116 mov r10,r13 8117 mov r11,r14 8118 mov r12,r15 8119 and r12,3 8120 mov r13,r15 8121 and r13,-4 8122 mov r14,r9 8123 shrd r15,r9,2 8124 shr r9,2 8125 add r15,r13 8126 adc r9,r14 8127 add r10,r15 8128 adc r11,r9 8129 adc r12,0 8130 8131 lea rdi,[16+rdi] 8132$L$seal_avx2_tail_512_rounds_and_2xhash: 8133 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8134 vmovdqa ymm8,YMMWORD[$L$rol16] 8135 vpaddd ymm3,ymm3,ymm7 8136 vpaddd ymm2,ymm2,ymm6 8137 vpaddd ymm1,ymm1,ymm5 8138 vpaddd ymm0,ymm0,ymm4 8139 vpxor ymm15,ymm15,ymm3 8140 vpxor ymm14,ymm14,ymm2 8141 vpxor ymm13,ymm13,ymm1 8142 vpxor ymm12,ymm12,ymm0 8143 vpshufb ymm15,ymm15,ymm8 8144 vpshufb ymm14,ymm14,ymm8 8145 vpshufb ymm13,ymm13,ymm8 8146 vpshufb ymm12,ymm12,ymm8 8147 vpaddd ymm11,ymm11,ymm15 8148 vpaddd ymm10,ymm10,ymm14 8149 vpaddd ymm9,ymm9,ymm13 8150 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8151 vpxor ymm7,ymm7,ymm11 8152 vpxor ymm6,ymm6,ymm10 8153 add r10,QWORD[((0+0))+rdi] 8154 adc r11,QWORD[((8+0))+rdi] 8155 adc r12,1 8156 vpxor ymm5,ymm5,ymm9 8157 vpxor ymm4,ymm4,ymm8 8158 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8159 vpsrld ymm8,ymm7,20 8160 vpslld ymm7,ymm7,32-20 8161 vpxor ymm7,ymm7,ymm8 8162 vpsrld ymm8,ymm6,20 8163 vpslld ymm6,ymm6,32-20 8164 vpxor ymm6,ymm6,ymm8 8165 vpsrld ymm8,ymm5,20 8166 vpslld ymm5,ymm5,32-20 8167 vpxor ymm5,ymm5,ymm8 8168 vpsrld ymm8,ymm4,20 8169 vpslld ymm4,ymm4,32-20 8170 vpxor ymm4,ymm4,ymm8 8171 vmovdqa ymm8,YMMWORD[$L$rol8] 8172 vpaddd ymm3,ymm3,ymm7 8173 vpaddd ymm2,ymm2,ymm6 8174 vpaddd ymm1,ymm1,ymm5 8175 vpaddd ymm0,ymm0,ymm4 8176 mov rdx,QWORD[((0+160+0))+rbp] 8177 mov r15,rdx 8178 mulx r14,r13,r10 8179 mulx rdx,rax,r11 8180 imul r15,r12 8181 add r14,rax 8182 adc r15,rdx 8183 vpxor ymm15,ymm15,ymm3 8184 vpxor ymm14,ymm14,ymm2 8185 vpxor ymm13,ymm13,ymm1 8186 vpxor ymm12,ymm12,ymm0 8187 vpshufb ymm15,ymm15,ymm8 8188 vpshufb ymm14,ymm14,ymm8 8189 vpshufb ymm13,ymm13,ymm8 8190 vpshufb ymm12,ymm12,ymm8 8191 vpaddd ymm11,ymm11,ymm15 8192 vpaddd ymm10,ymm10,ymm14 8193 vpaddd ymm9,ymm9,ymm13 8194 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8195 vpxor ymm7,ymm7,ymm11 8196 vpxor ymm6,ymm6,ymm10 8197 vpxor ymm5,ymm5,ymm9 8198 vpxor ymm4,ymm4,ymm8 8199 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8200 vpsrld ymm8,ymm7,25 8201 vpslld ymm7,ymm7,32-25 8202 vpxor ymm7,ymm7,ymm8 8203 mov rdx,QWORD[((8+160+0))+rbp] 8204 mulx rax,r10,r10 8205 add r14,r10 8206 mulx r9,r11,r11 8207 adc r15,r11 8208 adc r9,0 8209 imul rdx,r12 8210 vpsrld ymm8,ymm6,25 8211 vpslld ymm6,ymm6,32-25 8212 vpxor ymm6,ymm6,ymm8 8213 vpsrld ymm8,ymm5,25 8214 vpslld ymm5,ymm5,32-25 8215 vpxor ymm5,ymm5,ymm8 8216 vpsrld ymm8,ymm4,25 8217 vpslld ymm4,ymm4,32-25 8218 vpxor ymm4,ymm4,ymm8 8219 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8220 vpalignr ymm7,ymm7,ymm7,4 8221 vpalignr ymm11,ymm11,ymm11,8 8222 vpalignr ymm15,ymm15,ymm15,12 8223 vpalignr ymm6,ymm6,ymm6,4 8224 vpalignr ymm10,ymm10,ymm10,8 8225 vpalignr ymm14,ymm14,ymm14,12 8226 vpalignr ymm5,ymm5,ymm5,4 8227 vpalignr ymm9,ymm9,ymm9,8 8228 vpalignr ymm13,ymm13,ymm13,12 8229 vpalignr ymm4,ymm4,ymm4,4 8230 add r15,rax 8231 adc r9,rdx 8232 vpalignr ymm8,ymm8,ymm8,8 8233 vpalignr ymm12,ymm12,ymm12,12 8234 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8235 vmovdqa ymm8,YMMWORD[$L$rol16] 8236 vpaddd ymm3,ymm3,ymm7 8237 vpaddd ymm2,ymm2,ymm6 8238 vpaddd ymm1,ymm1,ymm5 8239 vpaddd ymm0,ymm0,ymm4 8240 vpxor ymm15,ymm15,ymm3 8241 vpxor ymm14,ymm14,ymm2 8242 vpxor ymm13,ymm13,ymm1 8243 vpxor ymm12,ymm12,ymm0 8244 vpshufb ymm15,ymm15,ymm8 8245 vpshufb ymm14,ymm14,ymm8 8246 vpshufb ymm13,ymm13,ymm8 8247 vpshufb ymm12,ymm12,ymm8 8248 vpaddd ymm11,ymm11,ymm15 8249 vpaddd ymm10,ymm10,ymm14 8250 vpaddd ymm9,ymm9,ymm13 8251 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8252 mov r10,r13 8253 mov r11,r14 8254 mov r12,r15 8255 and r12,3 8256 mov r13,r15 8257 and r13,-4 8258 mov r14,r9 8259 shrd r15,r9,2 8260 shr r9,2 8261 add r15,r13 8262 adc r9,r14 8263 add r10,r15 8264 adc r11,r9 8265 adc r12,0 8266 vpxor ymm7,ymm7,ymm11 8267 vpxor ymm6,ymm6,ymm10 8268 vpxor ymm5,ymm5,ymm9 8269 vpxor ymm4,ymm4,ymm8 8270 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8271 vpsrld ymm8,ymm7,20 8272 vpslld ymm7,ymm7,32-20 8273 vpxor ymm7,ymm7,ymm8 8274 vpsrld ymm8,ymm6,20 8275 vpslld ymm6,ymm6,32-20 8276 vpxor ymm6,ymm6,ymm8 8277 vpsrld ymm8,ymm5,20 8278 vpslld ymm5,ymm5,32-20 8279 vpxor ymm5,ymm5,ymm8 8280 vpsrld ymm8,ymm4,20 8281 vpslld ymm4,ymm4,32-20 8282 vpxor ymm4,ymm4,ymm8 8283 vmovdqa ymm8,YMMWORD[$L$rol8] 8284 vpaddd ymm3,ymm3,ymm7 8285 vpaddd ymm2,ymm2,ymm6 8286 add r10,QWORD[((0+16))+rdi] 8287 adc r11,QWORD[((8+16))+rdi] 8288 adc r12,1 8289 vpaddd ymm1,ymm1,ymm5 8290 vpaddd ymm0,ymm0,ymm4 8291 vpxor ymm15,ymm15,ymm3 8292 vpxor ymm14,ymm14,ymm2 8293 vpxor ymm13,ymm13,ymm1 8294 vpxor ymm12,ymm12,ymm0 8295 vpshufb ymm15,ymm15,ymm8 8296 vpshufb ymm14,ymm14,ymm8 8297 vpshufb ymm13,ymm13,ymm8 8298 vpshufb ymm12,ymm12,ymm8 8299 vpaddd ymm11,ymm11,ymm15 8300 vpaddd ymm10,ymm10,ymm14 8301 vpaddd ymm9,ymm9,ymm13 8302 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8303 vpxor ymm7,ymm7,ymm11 8304 vpxor ymm6,ymm6,ymm10 8305 vpxor ymm5,ymm5,ymm9 8306 vpxor ymm4,ymm4,ymm8 8307 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8308 vpsrld ymm8,ymm7,25 8309 mov rdx,QWORD[((0+160+0))+rbp] 8310 mov r15,rdx 8311 mulx r14,r13,r10 8312 mulx rdx,rax,r11 8313 imul r15,r12 8314 add r14,rax 8315 adc r15,rdx 8316 vpslld ymm7,ymm7,32-25 8317 vpxor ymm7,ymm7,ymm8 8318 vpsrld ymm8,ymm6,25 8319 vpslld ymm6,ymm6,32-25 8320 vpxor ymm6,ymm6,ymm8 8321 vpsrld ymm8,ymm5,25 8322 vpslld ymm5,ymm5,32-25 8323 vpxor ymm5,ymm5,ymm8 8324 vpsrld ymm8,ymm4,25 8325 vpslld ymm4,ymm4,32-25 8326 vpxor ymm4,ymm4,ymm8 8327 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8328 vpalignr ymm7,ymm7,ymm7,12 8329 vpalignr ymm11,ymm11,ymm11,8 8330 vpalignr ymm15,ymm15,ymm15,4 8331 vpalignr ymm6,ymm6,ymm6,12 8332 vpalignr ymm10,ymm10,ymm10,8 8333 vpalignr ymm14,ymm14,ymm14,4 8334 vpalignr ymm5,ymm5,ymm5,12 8335 vpalignr ymm9,ymm9,ymm9,8 8336 mov rdx,QWORD[((8+160+0))+rbp] 8337 mulx rax,r10,r10 8338 add r14,r10 8339 mulx r9,r11,r11 8340 adc r15,r11 8341 adc r9,0 8342 imul rdx,r12 8343 vpalignr ymm13,ymm13,ymm13,4 8344 vpalignr ymm4,ymm4,ymm4,12 8345 vpalignr ymm8,ymm8,ymm8,8 8346 vpalignr ymm12,ymm12,ymm12,4 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 add r15,rax 8364 adc r9,rdx 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 mov r10,r13 8386 mov r11,r14 8387 mov r12,r15 8388 and r12,3 8389 mov r13,r15 8390 and r13,-4 8391 mov r14,r9 8392 shrd r15,r9,2 8393 shr r9,2 8394 add r15,r13 8395 adc r9,r14 8396 add r10,r15 8397 adc r11,r9 8398 adc r12,0 8399 8400 lea rdi,[32+rdi] 8401 dec rcx 8402 jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash 8403 dec r8 8404 jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash 8405 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 8406 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 8407 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 8408 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 8409 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8410 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8411 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8412 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8413 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8414 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8415 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8416 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8417 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8418 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8419 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8420 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8421 8422 vmovdqa YMMWORD[(160+128)+rbp],ymm0 8423 vperm2i128 ymm0,ymm7,ymm3,0x02 8424 vperm2i128 ymm7,ymm7,ymm3,0x13 8425 vperm2i128 ymm3,ymm15,ymm11,0x02 8426 vperm2i128 ymm11,ymm15,ymm11,0x13 8427 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 8428 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 8429 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 8430 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 8431 vmovdqu YMMWORD[(0+0)+rdi],ymm0 8432 vmovdqu YMMWORD[(32+0)+rdi],ymm3 8433 vmovdqu YMMWORD[(64+0)+rdi],ymm7 8434 vmovdqu YMMWORD[(96+0)+rdi],ymm11 8435 8436 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 8437 vperm2i128 ymm3,ymm6,ymm2,0x02 8438 vperm2i128 ymm6,ymm6,ymm2,0x13 8439 vperm2i128 ymm2,ymm14,ymm10,0x02 8440 vperm2i128 ymm10,ymm14,ymm10,0x13 8441 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8442 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 8443 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 8444 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 8445 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8446 vmovdqu YMMWORD[(32+128)+rdi],ymm2 8447 vmovdqu YMMWORD[(64+128)+rdi],ymm6 8448 vmovdqu YMMWORD[(96+128)+rdi],ymm10 8449 vperm2i128 ymm3,ymm5,ymm1,0x02 8450 vperm2i128 ymm5,ymm5,ymm1,0x13 8451 vperm2i128 ymm1,ymm13,ymm9,0x02 8452 vperm2i128 ymm9,ymm13,ymm9,0x13 8453 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 8454 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 8455 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 8456 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 8457 vmovdqu YMMWORD[(0+256)+rdi],ymm3 8458 vmovdqu YMMWORD[(32+256)+rdi],ymm1 8459 vmovdqu YMMWORD[(64+256)+rdi],ymm5 8460 vmovdqu YMMWORD[(96+256)+rdi],ymm9 8461 vperm2i128 ymm3,ymm4,ymm0,0x13 8462 vperm2i128 ymm0,ymm4,ymm0,0x02 8463 vperm2i128 ymm4,ymm12,ymm8,0x02 8464 vperm2i128 ymm12,ymm12,ymm8,0x13 8465 vmovdqa ymm8,ymm3 8466 8467 mov rcx,12*32 8468 lea rsi,[384+rsi] 8469 sub rbx,12*32 8470 jmp NEAR $L$seal_avx2_short_hash_remainder 8471 8472$L$seal_avx2_320: 8473 vmovdqa ymm1,ymm0 8474 vmovdqa ymm2,ymm0 8475 vmovdqa ymm5,ymm4 8476 vmovdqa ymm6,ymm4 8477 vmovdqa ymm9,ymm8 8478 vmovdqa ymm10,ymm8 8479 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8480 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 8481 vmovdqa ymm7,ymm4 8482 vmovdqa ymm11,ymm8 8483 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8484 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8485 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8486 mov r10,10 8487$L$seal_avx2_320_rounds: 8488 vpaddd ymm0,ymm0,ymm4 8489 vpxor ymm12,ymm12,ymm0 8490 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8491 vpaddd ymm8,ymm8,ymm12 8492 vpxor ymm4,ymm4,ymm8 8493 vpsrld ymm3,ymm4,20 8494 vpslld ymm4,ymm4,12 8495 vpxor ymm4,ymm4,ymm3 8496 vpaddd ymm0,ymm0,ymm4 8497 vpxor ymm12,ymm12,ymm0 8498 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8499 vpaddd ymm8,ymm8,ymm12 8500 vpxor ymm4,ymm4,ymm8 8501 vpslld ymm3,ymm4,7 8502 vpsrld ymm4,ymm4,25 8503 vpxor ymm4,ymm4,ymm3 8504 vpalignr ymm12,ymm12,ymm12,12 8505 vpalignr ymm8,ymm8,ymm8,8 8506 vpalignr ymm4,ymm4,ymm4,4 8507 vpaddd ymm1,ymm1,ymm5 8508 vpxor ymm13,ymm13,ymm1 8509 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8510 vpaddd ymm9,ymm9,ymm13 8511 vpxor ymm5,ymm5,ymm9 8512 vpsrld ymm3,ymm5,20 8513 vpslld ymm5,ymm5,12 8514 vpxor ymm5,ymm5,ymm3 8515 vpaddd ymm1,ymm1,ymm5 8516 vpxor ymm13,ymm13,ymm1 8517 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8518 vpaddd ymm9,ymm9,ymm13 8519 vpxor ymm5,ymm5,ymm9 8520 vpslld ymm3,ymm5,7 8521 vpsrld ymm5,ymm5,25 8522 vpxor ymm5,ymm5,ymm3 8523 vpalignr ymm13,ymm13,ymm13,12 8524 vpalignr ymm9,ymm9,ymm9,8 8525 vpalignr ymm5,ymm5,ymm5,4 8526 vpaddd ymm2,ymm2,ymm6 8527 vpxor ymm14,ymm14,ymm2 8528 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8529 vpaddd ymm10,ymm10,ymm14 8530 vpxor ymm6,ymm6,ymm10 8531 vpsrld ymm3,ymm6,20 8532 vpslld ymm6,ymm6,12 8533 vpxor ymm6,ymm6,ymm3 8534 vpaddd ymm2,ymm2,ymm6 8535 vpxor ymm14,ymm14,ymm2 8536 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8537 vpaddd ymm10,ymm10,ymm14 8538 vpxor ymm6,ymm6,ymm10 8539 vpslld ymm3,ymm6,7 8540 vpsrld ymm6,ymm6,25 8541 vpxor ymm6,ymm6,ymm3 8542 vpalignr ymm14,ymm14,ymm14,12 8543 vpalignr ymm10,ymm10,ymm10,8 8544 vpalignr ymm6,ymm6,ymm6,4 8545 vpaddd ymm0,ymm0,ymm4 8546 vpxor ymm12,ymm12,ymm0 8547 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8548 vpaddd ymm8,ymm8,ymm12 8549 vpxor ymm4,ymm4,ymm8 8550 vpsrld ymm3,ymm4,20 8551 vpslld ymm4,ymm4,12 8552 vpxor ymm4,ymm4,ymm3 8553 vpaddd ymm0,ymm0,ymm4 8554 vpxor ymm12,ymm12,ymm0 8555 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8556 vpaddd ymm8,ymm8,ymm12 8557 vpxor ymm4,ymm4,ymm8 8558 vpslld ymm3,ymm4,7 8559 vpsrld ymm4,ymm4,25 8560 vpxor ymm4,ymm4,ymm3 8561 vpalignr ymm12,ymm12,ymm12,4 8562 vpalignr ymm8,ymm8,ymm8,8 8563 vpalignr ymm4,ymm4,ymm4,12 8564 vpaddd ymm1,ymm1,ymm5 8565 vpxor ymm13,ymm13,ymm1 8566 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8567 vpaddd ymm9,ymm9,ymm13 8568 vpxor ymm5,ymm5,ymm9 8569 vpsrld ymm3,ymm5,20 8570 vpslld ymm5,ymm5,12 8571 vpxor ymm5,ymm5,ymm3 8572 vpaddd ymm1,ymm1,ymm5 8573 vpxor ymm13,ymm13,ymm1 8574 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8575 vpaddd ymm9,ymm9,ymm13 8576 vpxor ymm5,ymm5,ymm9 8577 vpslld ymm3,ymm5,7 8578 vpsrld ymm5,ymm5,25 8579 vpxor ymm5,ymm5,ymm3 8580 vpalignr ymm13,ymm13,ymm13,4 8581 vpalignr ymm9,ymm9,ymm9,8 8582 vpalignr ymm5,ymm5,ymm5,12 8583 vpaddd ymm2,ymm2,ymm6 8584 vpxor ymm14,ymm14,ymm2 8585 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8586 vpaddd ymm10,ymm10,ymm14 8587 vpxor ymm6,ymm6,ymm10 8588 vpsrld ymm3,ymm6,20 8589 vpslld ymm6,ymm6,12 8590 vpxor ymm6,ymm6,ymm3 8591 vpaddd ymm2,ymm2,ymm6 8592 vpxor ymm14,ymm14,ymm2 8593 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8594 vpaddd ymm10,ymm10,ymm14 8595 vpxor ymm6,ymm6,ymm10 8596 vpslld ymm3,ymm6,7 8597 vpsrld ymm6,ymm6,25 8598 vpxor ymm6,ymm6,ymm3 8599 vpalignr ymm14,ymm14,ymm14,4 8600 vpalignr ymm10,ymm10,ymm10,8 8601 vpalignr ymm6,ymm6,ymm6,12 8602 8603 dec r10 8604 jne NEAR $L$seal_avx2_320_rounds 8605 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8606 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8607 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8608 vpaddd ymm4,ymm4,ymm7 8609 vpaddd ymm5,ymm5,ymm7 8610 vpaddd ymm6,ymm6,ymm7 8611 vpaddd ymm8,ymm8,ymm11 8612 vpaddd ymm9,ymm9,ymm11 8613 vpaddd ymm10,ymm10,ymm11 8614 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8615 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8616 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8617 vperm2i128 ymm3,ymm4,ymm0,0x02 8618 8619 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8620 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8621 8622 vperm2i128 ymm0,ymm4,ymm0,0x13 8623 vperm2i128 ymm4,ymm12,ymm8,0x13 8624 vperm2i128 ymm8,ymm5,ymm1,0x02 8625 vperm2i128 ymm12,ymm13,ymm9,0x02 8626 vperm2i128 ymm1,ymm5,ymm1,0x13 8627 vperm2i128 ymm5,ymm13,ymm9,0x13 8628 vperm2i128 ymm9,ymm6,ymm2,0x02 8629 vperm2i128 ymm13,ymm14,ymm10,0x02 8630 vperm2i128 ymm2,ymm6,ymm2,0x13 8631 vperm2i128 ymm6,ymm14,ymm10,0x13 8632 jmp NEAR $L$seal_avx2_short 8633 8634$L$seal_avx2_192: 8635 vmovdqa ymm1,ymm0 8636 vmovdqa ymm2,ymm0 8637 vmovdqa ymm5,ymm4 8638 vmovdqa ymm6,ymm4 8639 vmovdqa ymm9,ymm8 8640 vmovdqa ymm10,ymm8 8641 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8642 vmovdqa ymm11,ymm12 8643 vmovdqa ymm15,ymm13 8644 mov r10,10 8645$L$seal_avx2_192_rounds: 8646 vpaddd ymm0,ymm0,ymm4 8647 vpxor ymm12,ymm12,ymm0 8648 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8649 vpaddd ymm8,ymm8,ymm12 8650 vpxor ymm4,ymm4,ymm8 8651 vpsrld ymm3,ymm4,20 8652 vpslld ymm4,ymm4,12 8653 vpxor ymm4,ymm4,ymm3 8654 vpaddd ymm0,ymm0,ymm4 8655 vpxor ymm12,ymm12,ymm0 8656 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8657 vpaddd ymm8,ymm8,ymm12 8658 vpxor ymm4,ymm4,ymm8 8659 vpslld ymm3,ymm4,7 8660 vpsrld ymm4,ymm4,25 8661 vpxor ymm4,ymm4,ymm3 8662 vpalignr ymm12,ymm12,ymm12,12 8663 vpalignr ymm8,ymm8,ymm8,8 8664 vpalignr ymm4,ymm4,ymm4,4 8665 vpaddd ymm1,ymm1,ymm5 8666 vpxor ymm13,ymm13,ymm1 8667 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8668 vpaddd ymm9,ymm9,ymm13 8669 vpxor ymm5,ymm5,ymm9 8670 vpsrld ymm3,ymm5,20 8671 vpslld ymm5,ymm5,12 8672 vpxor ymm5,ymm5,ymm3 8673 vpaddd ymm1,ymm1,ymm5 8674 vpxor ymm13,ymm13,ymm1 8675 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8676 vpaddd ymm9,ymm9,ymm13 8677 vpxor ymm5,ymm5,ymm9 8678 vpslld ymm3,ymm5,7 8679 vpsrld ymm5,ymm5,25 8680 vpxor ymm5,ymm5,ymm3 8681 vpalignr ymm13,ymm13,ymm13,12 8682 vpalignr ymm9,ymm9,ymm9,8 8683 vpalignr ymm5,ymm5,ymm5,4 8684 vpaddd ymm0,ymm0,ymm4 8685 vpxor ymm12,ymm12,ymm0 8686 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8687 vpaddd ymm8,ymm8,ymm12 8688 vpxor ymm4,ymm4,ymm8 8689 vpsrld ymm3,ymm4,20 8690 vpslld ymm4,ymm4,12 8691 vpxor ymm4,ymm4,ymm3 8692 vpaddd ymm0,ymm0,ymm4 8693 vpxor ymm12,ymm12,ymm0 8694 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8695 vpaddd ymm8,ymm8,ymm12 8696 vpxor ymm4,ymm4,ymm8 8697 vpslld ymm3,ymm4,7 8698 vpsrld ymm4,ymm4,25 8699 vpxor ymm4,ymm4,ymm3 8700 vpalignr ymm12,ymm12,ymm12,4 8701 vpalignr ymm8,ymm8,ymm8,8 8702 vpalignr ymm4,ymm4,ymm4,12 8703 vpaddd ymm1,ymm1,ymm5 8704 vpxor ymm13,ymm13,ymm1 8705 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8706 vpaddd ymm9,ymm9,ymm13 8707 vpxor ymm5,ymm5,ymm9 8708 vpsrld ymm3,ymm5,20 8709 vpslld ymm5,ymm5,12 8710 vpxor ymm5,ymm5,ymm3 8711 vpaddd ymm1,ymm1,ymm5 8712 vpxor ymm13,ymm13,ymm1 8713 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8714 vpaddd ymm9,ymm9,ymm13 8715 vpxor ymm5,ymm5,ymm9 8716 vpslld ymm3,ymm5,7 8717 vpsrld ymm5,ymm5,25 8718 vpxor ymm5,ymm5,ymm3 8719 vpalignr ymm13,ymm13,ymm13,4 8720 vpalignr ymm9,ymm9,ymm9,8 8721 vpalignr ymm5,ymm5,ymm5,12 8722 8723 dec r10 8724 jne NEAR $L$seal_avx2_192_rounds 8725 vpaddd ymm0,ymm0,ymm2 8726 vpaddd ymm1,ymm1,ymm2 8727 vpaddd ymm4,ymm4,ymm6 8728 vpaddd ymm5,ymm5,ymm6 8729 vpaddd ymm8,ymm8,ymm10 8730 vpaddd ymm9,ymm9,ymm10 8731 vpaddd ymm12,ymm12,ymm11 8732 vpaddd ymm13,ymm13,ymm15 8733 vperm2i128 ymm3,ymm4,ymm0,0x02 8734 8735 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8736 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8737 8738 vperm2i128 ymm0,ymm4,ymm0,0x13 8739 vperm2i128 ymm4,ymm12,ymm8,0x13 8740 vperm2i128 ymm8,ymm5,ymm1,0x02 8741 vperm2i128 ymm12,ymm13,ymm9,0x02 8742 vperm2i128 ymm1,ymm5,ymm1,0x13 8743 vperm2i128 ymm5,ymm13,ymm9,0x13 8744$L$seal_avx2_short: 8745 mov r8,r8 8746 call poly_hash_ad_internal 8747 xor rcx,rcx 8748$L$seal_avx2_short_hash_remainder: 8749 cmp rcx,16 8750 jb NEAR $L$seal_avx2_short_loop 8751 add r10,QWORD[((0+0))+rdi] 8752 adc r11,QWORD[((8+0))+rdi] 8753 adc r12,1 8754 mov rax,QWORD[((0+160+0))+rbp] 8755 mov r15,rax 8756 mul r10 8757 mov r13,rax 8758 mov r14,rdx 8759 mov rax,QWORD[((0+160+0))+rbp] 8760 mul r11 8761 imul r15,r12 8762 add r14,rax 8763 adc r15,rdx 8764 mov rax,QWORD[((8+160+0))+rbp] 8765 mov r9,rax 8766 mul r10 8767 add r14,rax 8768 adc rdx,0 8769 mov r10,rdx 8770 mov rax,QWORD[((8+160+0))+rbp] 8771 mul r11 8772 add r15,rax 8773 adc rdx,0 8774 imul r9,r12 8775 add r15,r10 8776 adc r9,rdx 8777 mov r10,r13 8778 mov r11,r14 8779 mov r12,r15 8780 and r12,3 8781 mov r13,r15 8782 and r13,-4 8783 mov r14,r9 8784 shrd r15,r9,2 8785 shr r9,2 8786 add r15,r13 8787 adc r9,r14 8788 add r10,r15 8789 adc r11,r9 8790 adc r12,0 8791 8792 sub rcx,16 8793 add rdi,16 8794 jmp NEAR $L$seal_avx2_short_hash_remainder 8795$L$seal_avx2_short_loop: 8796 cmp rbx,32 8797 jb NEAR $L$seal_avx2_short_tail 8798 sub rbx,32 8799 8800 vpxor ymm0,ymm0,YMMWORD[rsi] 8801 vmovdqu YMMWORD[rdi],ymm0 8802 lea rsi,[32+rsi] 8803 8804 add r10,QWORD[((0+0))+rdi] 8805 adc r11,QWORD[((8+0))+rdi] 8806 adc r12,1 8807 mov rax,QWORD[((0+160+0))+rbp] 8808 mov r15,rax 8809 mul r10 8810 mov r13,rax 8811 mov r14,rdx 8812 mov rax,QWORD[((0+160+0))+rbp] 8813 mul r11 8814 imul r15,r12 8815 add r14,rax 8816 adc r15,rdx 8817 mov rax,QWORD[((8+160+0))+rbp] 8818 mov r9,rax 8819 mul r10 8820 add r14,rax 8821 adc rdx,0 8822 mov r10,rdx 8823 mov rax,QWORD[((8+160+0))+rbp] 8824 mul r11 8825 add r15,rax 8826 adc rdx,0 8827 imul r9,r12 8828 add r15,r10 8829 adc r9,rdx 8830 mov r10,r13 8831 mov r11,r14 8832 mov r12,r15 8833 and r12,3 8834 mov r13,r15 8835 and r13,-4 8836 mov r14,r9 8837 shrd r15,r9,2 8838 shr r9,2 8839 add r15,r13 8840 adc r9,r14 8841 add r10,r15 8842 adc r11,r9 8843 adc r12,0 8844 add r10,QWORD[((0+16))+rdi] 8845 adc r11,QWORD[((8+16))+rdi] 8846 adc r12,1 8847 mov rax,QWORD[((0+160+0))+rbp] 8848 mov r15,rax 8849 mul r10 8850 mov r13,rax 8851 mov r14,rdx 8852 mov rax,QWORD[((0+160+0))+rbp] 8853 mul r11 8854 imul r15,r12 8855 add r14,rax 8856 adc r15,rdx 8857 mov rax,QWORD[((8+160+0))+rbp] 8858 mov r9,rax 8859 mul r10 8860 add r14,rax 8861 adc rdx,0 8862 mov r10,rdx 8863 mov rax,QWORD[((8+160+0))+rbp] 8864 mul r11 8865 add r15,rax 8866 adc rdx,0 8867 imul r9,r12 8868 add r15,r10 8869 adc r9,rdx 8870 mov r10,r13 8871 mov r11,r14 8872 mov r12,r15 8873 and r12,3 8874 mov r13,r15 8875 and r13,-4 8876 mov r14,r9 8877 shrd r15,r9,2 8878 shr r9,2 8879 add r15,r13 8880 adc r9,r14 8881 add r10,r15 8882 adc r11,r9 8883 adc r12,0 8884 8885 lea rdi,[32+rdi] 8886 8887 vmovdqa ymm0,ymm4 8888 vmovdqa ymm4,ymm8 8889 vmovdqa ymm8,ymm12 8890 vmovdqa ymm12,ymm1 8891 vmovdqa ymm1,ymm5 8892 vmovdqa ymm5,ymm9 8893 vmovdqa ymm9,ymm13 8894 vmovdqa ymm13,ymm2 8895 vmovdqa ymm2,ymm6 8896 jmp NEAR $L$seal_avx2_short_loop 8897$L$seal_avx2_short_tail: 8898 cmp rbx,16 8899 jb NEAR $L$seal_avx2_exit 8900 sub rbx,16 8901 vpxor xmm3,xmm0,XMMWORD[rsi] 8902 vmovdqu XMMWORD[rdi],xmm3 8903 lea rsi,[16+rsi] 8904 add r10,QWORD[((0+0))+rdi] 8905 adc r11,QWORD[((8+0))+rdi] 8906 adc r12,1 8907 mov rax,QWORD[((0+160+0))+rbp] 8908 mov r15,rax 8909 mul r10 8910 mov r13,rax 8911 mov r14,rdx 8912 mov rax,QWORD[((0+160+0))+rbp] 8913 mul r11 8914 imul r15,r12 8915 add r14,rax 8916 adc r15,rdx 8917 mov rax,QWORD[((8+160+0))+rbp] 8918 mov r9,rax 8919 mul r10 8920 add r14,rax 8921 adc rdx,0 8922 mov r10,rdx 8923 mov rax,QWORD[((8+160+0))+rbp] 8924 mul r11 8925 add r15,rax 8926 adc rdx,0 8927 imul r9,r12 8928 add r15,r10 8929 adc r9,rdx 8930 mov r10,r13 8931 mov r11,r14 8932 mov r12,r15 8933 and r12,3 8934 mov r13,r15 8935 and r13,-4 8936 mov r14,r9 8937 shrd r15,r9,2 8938 shr r9,2 8939 add r15,r13 8940 adc r9,r14 8941 add r10,r15 8942 adc r11,r9 8943 adc r12,0 8944 8945 lea rdi,[16+rdi] 8946 vextracti128 xmm0,ymm0,1 8947$L$seal_avx2_exit: 8948 vzeroupper 8949 jmp NEAR $L$seal_sse_tail_16 8950 8951 8952%else 8953; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 8954ret 8955%endif 8956