1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8 9 10chacha20_poly1305_constants: 11 12.section __DATA,__const 13.p2align 6 14L$chacha20_consts: 15.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 16.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 17L$rol8: 18.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 19.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 20L$rol16: 21.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 22.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 23L$avx2_init: 24.long 0,0,0,0 25L$sse_inc: 26.long 1,0,0,0 27L$avx2_inc: 28.long 2,0,0,0,2,0,0,0 29L$clamp: 30.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 31.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 32.p2align 4 33L$and_masks: 34.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 35.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 50.text 51 52 53.p2align 6 54poly_hash_ad_internal: 55 56 57 xorq %r10,%r10 58 xorq %r11,%r11 59 xorq %r12,%r12 60 cmpq $13,%r8 61 jne L$hash_ad_loop 62L$poly_fast_tls_ad: 63 64 movq (%rcx),%r10 65 movq 5(%rcx),%r11 66 shrq $24,%r11 67 movq $1,%r12 68 movq 0+0+0(%rbp),%rax 69 movq %rax,%r15 70 mulq %r10 71 movq %rax,%r13 72 movq %rdx,%r14 73 movq 0+0+0(%rbp),%rax 74 mulq %r11 75 imulq %r12,%r15 76 addq %rax,%r14 77 adcq %rdx,%r15 78 movq 8+0+0(%rbp),%rax 79 movq %rax,%r9 80 mulq %r10 81 addq %rax,%r14 82 adcq $0,%rdx 83 movq %rdx,%r10 84 movq 8+0+0(%rbp),%rax 85 mulq %r11 86 addq %rax,%r15 87 adcq $0,%rdx 88 imulq %r12,%r9 89 addq %r10,%r15 90 adcq %rdx,%r9 91 movq %r13,%r10 92 movq %r14,%r11 93 movq %r15,%r12 94 andq $3,%r12 95 movq %r15,%r13 96 andq $-4,%r13 97 movq %r9,%r14 98 shrdq $2,%r9,%r15 99 shrq $2,%r9 100 addq %r13,%r15 101 adcq %r14,%r9 102 addq %r15,%r10 103 adcq %r9,%r11 104 adcq $0,%r12 105 106 ret 107L$hash_ad_loop: 108 109 cmpq $16,%r8 110 jb L$hash_ad_tail 111 addq 0+0(%rcx),%r10 112 adcq 8+0(%rcx),%r11 113 adcq $1,%r12 114 movq 0+0+0(%rbp),%rax 115 movq %rax,%r15 116 mulq %r10 117 movq %rax,%r13 118 movq %rdx,%r14 119 movq 0+0+0(%rbp),%rax 120 mulq %r11 121 imulq %r12,%r15 122 addq %rax,%r14 123 adcq %rdx,%r15 124 movq 8+0+0(%rbp),%rax 125 movq %rax,%r9 126 mulq %r10 127 addq %rax,%r14 128 adcq $0,%rdx 129 movq %rdx,%r10 130 movq 8+0+0(%rbp),%rax 131 mulq %r11 132 addq %rax,%r15 133 adcq $0,%rdx 134 imulq %r12,%r9 135 addq %r10,%r15 136 adcq %rdx,%r9 137 movq %r13,%r10 138 movq %r14,%r11 139 movq %r15,%r12 140 andq $3,%r12 141 movq %r15,%r13 142 andq $-4,%r13 143 movq %r9,%r14 144 shrdq $2,%r9,%r15 145 shrq $2,%r9 146 addq %r13,%r15 147 adcq %r14,%r9 148 addq %r15,%r10 149 adcq %r9,%r11 150 adcq $0,%r12 151 152 leaq 16(%rcx),%rcx 153 subq $16,%r8 154 jmp L$hash_ad_loop 155L$hash_ad_tail: 156 cmpq $0,%r8 157 je L$hash_ad_done 158 159 xorq %r13,%r13 160 xorq %r14,%r14 161 xorq %r15,%r15 162 addq %r8,%rcx 163L$hash_ad_tail_loop: 164 shldq $8,%r13,%r14 165 shlq $8,%r13 166 movzbq -1(%rcx),%r15 167 xorq %r15,%r13 168 decq %rcx 169 decq %r8 170 jne L$hash_ad_tail_loop 171 172 addq %r13,%r10 173 adcq %r14,%r11 174 adcq $1,%r12 175 movq 0+0+0(%rbp),%rax 176 movq %rax,%r15 177 mulq %r10 178 movq %rax,%r13 179 movq %rdx,%r14 180 movq 0+0+0(%rbp),%rax 181 mulq %r11 182 imulq %r12,%r15 183 addq %rax,%r14 184 adcq %rdx,%r15 185 movq 8+0+0(%rbp),%rax 186 movq %rax,%r9 187 mulq %r10 188 addq %rax,%r14 189 adcq $0,%rdx 190 movq %rdx,%r10 191 movq 8+0+0(%rbp),%rax 192 mulq %r11 193 addq %rax,%r15 194 adcq $0,%rdx 195 imulq %r12,%r9 196 addq %r10,%r15 197 adcq %rdx,%r9 198 movq %r13,%r10 199 movq %r14,%r11 200 movq %r15,%r12 201 andq $3,%r12 202 movq %r15,%r13 203 andq $-4,%r13 204 movq %r9,%r14 205 shrdq $2,%r9,%r15 206 shrq $2,%r9 207 addq %r13,%r15 208 adcq %r14,%r9 209 addq %r15,%r10 210 adcq %r9,%r11 211 adcq $0,%r12 212 213 214L$hash_ad_done: 215 ret 216 217 218 219.globl _chacha20_poly1305_open 220.private_extern _chacha20_poly1305_open 221 222.p2align 6 223_chacha20_poly1305_open: 224 225_CET_ENDBR 226 pushq %rbp 227 228 pushq %rbx 229 230 pushq %r12 231 232 pushq %r13 233 234 pushq %r14 235 236 pushq %r15 237 238 239 240 pushq %r9 241 242 subq $288 + 0 + 32,%rsp 243 244 245 leaq 32(%rsp),%rbp 246 andq $-32,%rbp 247 248 movq %rdx,%rbx 249 movq %r8,0+0+32(%rbp) 250 movq %rbx,8+0+32(%rbp) 251 252 movl _OPENSSL_ia32cap_P+8(%rip),%eax 253 andl $288,%eax 254 xorl $288,%eax 255 jz chacha20_poly1305_open_avx2 256 257 cmpq $128,%rbx 258 jbe L$open_sse_128 259 260 movdqa L$chacha20_consts(%rip),%xmm0 261 movdqu 0(%r9),%xmm4 262 movdqu 16(%r9),%xmm8 263 movdqu 32(%r9),%xmm12 264 265 movdqa %xmm12,%xmm7 266 267 movdqa %xmm4,0+48(%rbp) 268 movdqa %xmm8,0+64(%rbp) 269 movdqa %xmm12,0+96(%rbp) 270 movq $10,%r10 271L$open_sse_init_rounds: 272 paddd %xmm4,%xmm0 273 pxor %xmm0,%xmm12 274 pshufb L$rol16(%rip),%xmm12 275 paddd %xmm12,%xmm8 276 pxor %xmm8,%xmm4 277 movdqa %xmm4,%xmm3 278 pslld $12,%xmm3 279 psrld $20,%xmm4 280 pxor %xmm3,%xmm4 281 paddd %xmm4,%xmm0 282 pxor %xmm0,%xmm12 283 pshufb L$rol8(%rip),%xmm12 284 paddd %xmm12,%xmm8 285 pxor %xmm8,%xmm4 286 movdqa %xmm4,%xmm3 287 pslld $7,%xmm3 288 psrld $25,%xmm4 289 pxor %xmm3,%xmm4 290.byte 102,15,58,15,228,4 291.byte 102,69,15,58,15,192,8 292.byte 102,69,15,58,15,228,12 293 paddd %xmm4,%xmm0 294 pxor %xmm0,%xmm12 295 pshufb L$rol16(%rip),%xmm12 296 paddd %xmm12,%xmm8 297 pxor %xmm8,%xmm4 298 movdqa %xmm4,%xmm3 299 pslld $12,%xmm3 300 psrld $20,%xmm4 301 pxor %xmm3,%xmm4 302 paddd %xmm4,%xmm0 303 pxor %xmm0,%xmm12 304 pshufb L$rol8(%rip),%xmm12 305 paddd %xmm12,%xmm8 306 pxor %xmm8,%xmm4 307 movdqa %xmm4,%xmm3 308 pslld $7,%xmm3 309 psrld $25,%xmm4 310 pxor %xmm3,%xmm4 311.byte 102,15,58,15,228,12 312.byte 102,69,15,58,15,192,8 313.byte 102,69,15,58,15,228,4 314 315 decq %r10 316 jne L$open_sse_init_rounds 317 318 paddd L$chacha20_consts(%rip),%xmm0 319 paddd 0+48(%rbp),%xmm4 320 321 pand L$clamp(%rip),%xmm0 322 movdqa %xmm0,0+0(%rbp) 323 movdqa %xmm4,0+16(%rbp) 324 325 movq %r8,%r8 326 call poly_hash_ad_internal 327L$open_sse_main_loop: 328 cmpq $256,%rbx 329 jb L$open_sse_tail 330 331 movdqa L$chacha20_consts(%rip),%xmm0 332 movdqa 0+48(%rbp),%xmm4 333 movdqa 0+64(%rbp),%xmm8 334 movdqa %xmm0,%xmm1 335 movdqa %xmm4,%xmm5 336 movdqa %xmm8,%xmm9 337 movdqa %xmm0,%xmm2 338 movdqa %xmm4,%xmm6 339 movdqa %xmm8,%xmm10 340 movdqa %xmm0,%xmm3 341 movdqa %xmm4,%xmm7 342 movdqa %xmm8,%xmm11 343 movdqa 0+96(%rbp),%xmm15 344 paddd L$sse_inc(%rip),%xmm15 345 movdqa %xmm15,%xmm14 346 paddd L$sse_inc(%rip),%xmm14 347 movdqa %xmm14,%xmm13 348 paddd L$sse_inc(%rip),%xmm13 349 movdqa %xmm13,%xmm12 350 paddd L$sse_inc(%rip),%xmm12 351 movdqa %xmm12,0+96(%rbp) 352 movdqa %xmm13,0+112(%rbp) 353 movdqa %xmm14,0+128(%rbp) 354 movdqa %xmm15,0+144(%rbp) 355 356 357 358 movq $4,%rcx 359 movq %rsi,%r8 360L$open_sse_main_loop_rounds: 361 movdqa %xmm8,0+80(%rbp) 362 movdqa L$rol16(%rip),%xmm8 363 paddd %xmm7,%xmm3 364 paddd %xmm6,%xmm2 365 paddd %xmm5,%xmm1 366 paddd %xmm4,%xmm0 367 pxor %xmm3,%xmm15 368 pxor %xmm2,%xmm14 369 pxor %xmm1,%xmm13 370 pxor %xmm0,%xmm12 371.byte 102,69,15,56,0,248 372.byte 102,69,15,56,0,240 373.byte 102,69,15,56,0,232 374.byte 102,69,15,56,0,224 375 movdqa 0+80(%rbp),%xmm8 376 paddd %xmm15,%xmm11 377 paddd %xmm14,%xmm10 378 paddd %xmm13,%xmm9 379 paddd %xmm12,%xmm8 380 pxor %xmm11,%xmm7 381 addq 0+0(%r8),%r10 382 adcq 8+0(%r8),%r11 383 adcq $1,%r12 384 385 leaq 16(%r8),%r8 386 pxor %xmm10,%xmm6 387 pxor %xmm9,%xmm5 388 pxor %xmm8,%xmm4 389 movdqa %xmm8,0+80(%rbp) 390 movdqa %xmm7,%xmm8 391 psrld $20,%xmm8 392 pslld $32-20,%xmm7 393 pxor %xmm8,%xmm7 394 movdqa %xmm6,%xmm8 395 psrld $20,%xmm8 396 pslld $32-20,%xmm6 397 pxor %xmm8,%xmm6 398 movdqa %xmm5,%xmm8 399 psrld $20,%xmm8 400 pslld $32-20,%xmm5 401 pxor %xmm8,%xmm5 402 movdqa %xmm4,%xmm8 403 psrld $20,%xmm8 404 pslld $32-20,%xmm4 405 pxor %xmm8,%xmm4 406 movq 0+0+0(%rbp),%rax 407 movq %rax,%r15 408 mulq %r10 409 movq %rax,%r13 410 movq %rdx,%r14 411 movq 0+0+0(%rbp),%rax 412 mulq %r11 413 imulq %r12,%r15 414 addq %rax,%r14 415 adcq %rdx,%r15 416 movdqa L$rol8(%rip),%xmm8 417 paddd %xmm7,%xmm3 418 paddd %xmm6,%xmm2 419 paddd %xmm5,%xmm1 420 paddd %xmm4,%xmm0 421 pxor %xmm3,%xmm15 422 pxor %xmm2,%xmm14 423 pxor %xmm1,%xmm13 424 pxor %xmm0,%xmm12 425.byte 102,69,15,56,0,248 426.byte 102,69,15,56,0,240 427.byte 102,69,15,56,0,232 428.byte 102,69,15,56,0,224 429 movdqa 0+80(%rbp),%xmm8 430 paddd %xmm15,%xmm11 431 paddd %xmm14,%xmm10 432 paddd %xmm13,%xmm9 433 paddd %xmm12,%xmm8 434 pxor %xmm11,%xmm7 435 pxor %xmm10,%xmm6 436 movq 8+0+0(%rbp),%rax 437 movq %rax,%r9 438 mulq %r10 439 addq %rax,%r14 440 adcq $0,%rdx 441 movq %rdx,%r10 442 movq 8+0+0(%rbp),%rax 443 mulq %r11 444 addq %rax,%r15 445 adcq $0,%rdx 446 pxor %xmm9,%xmm5 447 pxor %xmm8,%xmm4 448 movdqa %xmm8,0+80(%rbp) 449 movdqa %xmm7,%xmm8 450 psrld $25,%xmm8 451 pslld $32-25,%xmm7 452 pxor %xmm8,%xmm7 453 movdqa %xmm6,%xmm8 454 psrld $25,%xmm8 455 pslld $32-25,%xmm6 456 pxor %xmm8,%xmm6 457 movdqa %xmm5,%xmm8 458 psrld $25,%xmm8 459 pslld $32-25,%xmm5 460 pxor %xmm8,%xmm5 461 movdqa %xmm4,%xmm8 462 psrld $25,%xmm8 463 pslld $32-25,%xmm4 464 pxor %xmm8,%xmm4 465 movdqa 0+80(%rbp),%xmm8 466 imulq %r12,%r9 467 addq %r10,%r15 468 adcq %rdx,%r9 469.byte 102,15,58,15,255,4 470.byte 102,69,15,58,15,219,8 471.byte 102,69,15,58,15,255,12 472.byte 102,15,58,15,246,4 473.byte 102,69,15,58,15,210,8 474.byte 102,69,15,58,15,246,12 475.byte 102,15,58,15,237,4 476.byte 102,69,15,58,15,201,8 477.byte 102,69,15,58,15,237,12 478.byte 102,15,58,15,228,4 479.byte 102,69,15,58,15,192,8 480.byte 102,69,15,58,15,228,12 481 movdqa %xmm8,0+80(%rbp) 482 movdqa L$rol16(%rip),%xmm8 483 paddd %xmm7,%xmm3 484 paddd %xmm6,%xmm2 485 paddd %xmm5,%xmm1 486 paddd %xmm4,%xmm0 487 pxor %xmm3,%xmm15 488 pxor %xmm2,%xmm14 489 movq %r13,%r10 490 movq %r14,%r11 491 movq %r15,%r12 492 andq $3,%r12 493 movq %r15,%r13 494 andq $-4,%r13 495 movq %r9,%r14 496 shrdq $2,%r9,%r15 497 shrq $2,%r9 498 addq %r13,%r15 499 adcq %r14,%r9 500 addq %r15,%r10 501 adcq %r9,%r11 502 adcq $0,%r12 503 pxor %xmm1,%xmm13 504 pxor %xmm0,%xmm12 505.byte 102,69,15,56,0,248 506.byte 102,69,15,56,0,240 507.byte 102,69,15,56,0,232 508.byte 102,69,15,56,0,224 509 movdqa 0+80(%rbp),%xmm8 510 paddd %xmm15,%xmm11 511 paddd %xmm14,%xmm10 512 paddd %xmm13,%xmm9 513 paddd %xmm12,%xmm8 514 pxor %xmm11,%xmm7 515 pxor %xmm10,%xmm6 516 pxor %xmm9,%xmm5 517 pxor %xmm8,%xmm4 518 movdqa %xmm8,0+80(%rbp) 519 movdqa %xmm7,%xmm8 520 psrld $20,%xmm8 521 pslld $32-20,%xmm7 522 pxor %xmm8,%xmm7 523 movdqa %xmm6,%xmm8 524 psrld $20,%xmm8 525 pslld $32-20,%xmm6 526 pxor %xmm8,%xmm6 527 movdqa %xmm5,%xmm8 528 psrld $20,%xmm8 529 pslld $32-20,%xmm5 530 pxor %xmm8,%xmm5 531 movdqa %xmm4,%xmm8 532 psrld $20,%xmm8 533 pslld $32-20,%xmm4 534 pxor %xmm8,%xmm4 535 movdqa L$rol8(%rip),%xmm8 536 paddd %xmm7,%xmm3 537 paddd %xmm6,%xmm2 538 paddd %xmm5,%xmm1 539 paddd %xmm4,%xmm0 540 pxor %xmm3,%xmm15 541 pxor %xmm2,%xmm14 542 pxor %xmm1,%xmm13 543 pxor %xmm0,%xmm12 544.byte 102,69,15,56,0,248 545.byte 102,69,15,56,0,240 546.byte 102,69,15,56,0,232 547.byte 102,69,15,56,0,224 548 movdqa 0+80(%rbp),%xmm8 549 paddd %xmm15,%xmm11 550 paddd %xmm14,%xmm10 551 paddd %xmm13,%xmm9 552 paddd %xmm12,%xmm8 553 pxor %xmm11,%xmm7 554 pxor %xmm10,%xmm6 555 pxor %xmm9,%xmm5 556 pxor %xmm8,%xmm4 557 movdqa %xmm8,0+80(%rbp) 558 movdqa %xmm7,%xmm8 559 psrld $25,%xmm8 560 pslld $32-25,%xmm7 561 pxor %xmm8,%xmm7 562 movdqa %xmm6,%xmm8 563 psrld $25,%xmm8 564 pslld $32-25,%xmm6 565 pxor %xmm8,%xmm6 566 movdqa %xmm5,%xmm8 567 psrld $25,%xmm8 568 pslld $32-25,%xmm5 569 pxor %xmm8,%xmm5 570 movdqa %xmm4,%xmm8 571 psrld $25,%xmm8 572 pslld $32-25,%xmm4 573 pxor %xmm8,%xmm4 574 movdqa 0+80(%rbp),%xmm8 575.byte 102,15,58,15,255,12 576.byte 102,69,15,58,15,219,8 577.byte 102,69,15,58,15,255,4 578.byte 102,15,58,15,246,12 579.byte 102,69,15,58,15,210,8 580.byte 102,69,15,58,15,246,4 581.byte 102,15,58,15,237,12 582.byte 102,69,15,58,15,201,8 583.byte 102,69,15,58,15,237,4 584.byte 102,15,58,15,228,12 585.byte 102,69,15,58,15,192,8 586.byte 102,69,15,58,15,228,4 587 588 decq %rcx 589 jge L$open_sse_main_loop_rounds 590 addq 0+0(%r8),%r10 591 adcq 8+0(%r8),%r11 592 adcq $1,%r12 593 movq 0+0+0(%rbp),%rax 594 movq %rax,%r15 595 mulq %r10 596 movq %rax,%r13 597 movq %rdx,%r14 598 movq 0+0+0(%rbp),%rax 599 mulq %r11 600 imulq %r12,%r15 601 addq %rax,%r14 602 adcq %rdx,%r15 603 movq 8+0+0(%rbp),%rax 604 movq %rax,%r9 605 mulq %r10 606 addq %rax,%r14 607 adcq $0,%rdx 608 movq %rdx,%r10 609 movq 8+0+0(%rbp),%rax 610 mulq %r11 611 addq %rax,%r15 612 adcq $0,%rdx 613 imulq %r12,%r9 614 addq %r10,%r15 615 adcq %rdx,%r9 616 movq %r13,%r10 617 movq %r14,%r11 618 movq %r15,%r12 619 andq $3,%r12 620 movq %r15,%r13 621 andq $-4,%r13 622 movq %r9,%r14 623 shrdq $2,%r9,%r15 624 shrq $2,%r9 625 addq %r13,%r15 626 adcq %r14,%r9 627 addq %r15,%r10 628 adcq %r9,%r11 629 adcq $0,%r12 630 631 leaq 16(%r8),%r8 632 cmpq $-6,%rcx 633 jg L$open_sse_main_loop_rounds 634 paddd L$chacha20_consts(%rip),%xmm3 635 paddd 0+48(%rbp),%xmm7 636 paddd 0+64(%rbp),%xmm11 637 paddd 0+144(%rbp),%xmm15 638 paddd L$chacha20_consts(%rip),%xmm2 639 paddd 0+48(%rbp),%xmm6 640 paddd 0+64(%rbp),%xmm10 641 paddd 0+128(%rbp),%xmm14 642 paddd L$chacha20_consts(%rip),%xmm1 643 paddd 0+48(%rbp),%xmm5 644 paddd 0+64(%rbp),%xmm9 645 paddd 0+112(%rbp),%xmm13 646 paddd L$chacha20_consts(%rip),%xmm0 647 paddd 0+48(%rbp),%xmm4 648 paddd 0+64(%rbp),%xmm8 649 paddd 0+96(%rbp),%xmm12 650 movdqa %xmm12,0+80(%rbp) 651 movdqu 0 + 0(%rsi),%xmm12 652 pxor %xmm3,%xmm12 653 movdqu %xmm12,0 + 0(%rdi) 654 movdqu 16 + 0(%rsi),%xmm12 655 pxor %xmm7,%xmm12 656 movdqu %xmm12,16 + 0(%rdi) 657 movdqu 32 + 0(%rsi),%xmm12 658 pxor %xmm11,%xmm12 659 movdqu %xmm12,32 + 0(%rdi) 660 movdqu 48 + 0(%rsi),%xmm12 661 pxor %xmm15,%xmm12 662 movdqu %xmm12,48 + 0(%rdi) 663 movdqu 0 + 64(%rsi),%xmm3 664 movdqu 16 + 64(%rsi),%xmm7 665 movdqu 32 + 64(%rsi),%xmm11 666 movdqu 48 + 64(%rsi),%xmm15 667 pxor %xmm3,%xmm2 668 pxor %xmm7,%xmm6 669 pxor %xmm11,%xmm10 670 pxor %xmm14,%xmm15 671 movdqu %xmm2,0 + 64(%rdi) 672 movdqu %xmm6,16 + 64(%rdi) 673 movdqu %xmm10,32 + 64(%rdi) 674 movdqu %xmm15,48 + 64(%rdi) 675 movdqu 0 + 128(%rsi),%xmm3 676 movdqu 16 + 128(%rsi),%xmm7 677 movdqu 32 + 128(%rsi),%xmm11 678 movdqu 48 + 128(%rsi),%xmm15 679 pxor %xmm3,%xmm1 680 pxor %xmm7,%xmm5 681 pxor %xmm11,%xmm9 682 pxor %xmm13,%xmm15 683 movdqu %xmm1,0 + 128(%rdi) 684 movdqu %xmm5,16 + 128(%rdi) 685 movdqu %xmm9,32 + 128(%rdi) 686 movdqu %xmm15,48 + 128(%rdi) 687 movdqu 0 + 192(%rsi),%xmm3 688 movdqu 16 + 192(%rsi),%xmm7 689 movdqu 32 + 192(%rsi),%xmm11 690 movdqu 48 + 192(%rsi),%xmm15 691 pxor %xmm3,%xmm0 692 pxor %xmm7,%xmm4 693 pxor %xmm11,%xmm8 694 pxor 0+80(%rbp),%xmm15 695 movdqu %xmm0,0 + 192(%rdi) 696 movdqu %xmm4,16 + 192(%rdi) 697 movdqu %xmm8,32 + 192(%rdi) 698 movdqu %xmm15,48 + 192(%rdi) 699 700 leaq 256(%rsi),%rsi 701 leaq 256(%rdi),%rdi 702 subq $256,%rbx 703 jmp L$open_sse_main_loop 704L$open_sse_tail: 705 706 testq %rbx,%rbx 707 jz L$open_sse_finalize 708 cmpq $192,%rbx 709 ja L$open_sse_tail_256 710 cmpq $128,%rbx 711 ja L$open_sse_tail_192 712 cmpq $64,%rbx 713 ja L$open_sse_tail_128 714 movdqa L$chacha20_consts(%rip),%xmm0 715 movdqa 0+48(%rbp),%xmm4 716 movdqa 0+64(%rbp),%xmm8 717 movdqa 0+96(%rbp),%xmm12 718 paddd L$sse_inc(%rip),%xmm12 719 movdqa %xmm12,0+96(%rbp) 720 721 xorq %r8,%r8 722 movq %rbx,%rcx 723 cmpq $16,%rcx 724 jb L$open_sse_tail_64_rounds 725L$open_sse_tail_64_rounds_and_x1hash: 726 addq 0+0(%rsi,%r8,1),%r10 727 adcq 8+0(%rsi,%r8,1),%r11 728 adcq $1,%r12 729 movq 0+0+0(%rbp),%rax 730 movq %rax,%r15 731 mulq %r10 732 movq %rax,%r13 733 movq %rdx,%r14 734 movq 0+0+0(%rbp),%rax 735 mulq %r11 736 imulq %r12,%r15 737 addq %rax,%r14 738 adcq %rdx,%r15 739 movq 8+0+0(%rbp),%rax 740 movq %rax,%r9 741 mulq %r10 742 addq %rax,%r14 743 adcq $0,%rdx 744 movq %rdx,%r10 745 movq 8+0+0(%rbp),%rax 746 mulq %r11 747 addq %rax,%r15 748 adcq $0,%rdx 749 imulq %r12,%r9 750 addq %r10,%r15 751 adcq %rdx,%r9 752 movq %r13,%r10 753 movq %r14,%r11 754 movq %r15,%r12 755 andq $3,%r12 756 movq %r15,%r13 757 andq $-4,%r13 758 movq %r9,%r14 759 shrdq $2,%r9,%r15 760 shrq $2,%r9 761 addq %r13,%r15 762 adcq %r14,%r9 763 addq %r15,%r10 764 adcq %r9,%r11 765 adcq $0,%r12 766 767 subq $16,%rcx 768L$open_sse_tail_64_rounds: 769 addq $16,%r8 770 paddd %xmm4,%xmm0 771 pxor %xmm0,%xmm12 772 pshufb L$rol16(%rip),%xmm12 773 paddd %xmm12,%xmm8 774 pxor %xmm8,%xmm4 775 movdqa %xmm4,%xmm3 776 pslld $12,%xmm3 777 psrld $20,%xmm4 778 pxor %xmm3,%xmm4 779 paddd %xmm4,%xmm0 780 pxor %xmm0,%xmm12 781 pshufb L$rol8(%rip),%xmm12 782 paddd %xmm12,%xmm8 783 pxor %xmm8,%xmm4 784 movdqa %xmm4,%xmm3 785 pslld $7,%xmm3 786 psrld $25,%xmm4 787 pxor %xmm3,%xmm4 788.byte 102,15,58,15,228,4 789.byte 102,69,15,58,15,192,8 790.byte 102,69,15,58,15,228,12 791 paddd %xmm4,%xmm0 792 pxor %xmm0,%xmm12 793 pshufb L$rol16(%rip),%xmm12 794 paddd %xmm12,%xmm8 795 pxor %xmm8,%xmm4 796 movdqa %xmm4,%xmm3 797 pslld $12,%xmm3 798 psrld $20,%xmm4 799 pxor %xmm3,%xmm4 800 paddd %xmm4,%xmm0 801 pxor %xmm0,%xmm12 802 pshufb L$rol8(%rip),%xmm12 803 paddd %xmm12,%xmm8 804 pxor %xmm8,%xmm4 805 movdqa %xmm4,%xmm3 806 pslld $7,%xmm3 807 psrld $25,%xmm4 808 pxor %xmm3,%xmm4 809.byte 102,15,58,15,228,12 810.byte 102,69,15,58,15,192,8 811.byte 102,69,15,58,15,228,4 812 813 cmpq $16,%rcx 814 jae L$open_sse_tail_64_rounds_and_x1hash 815 cmpq $160,%r8 816 jne L$open_sse_tail_64_rounds 817 paddd L$chacha20_consts(%rip),%xmm0 818 paddd 0+48(%rbp),%xmm4 819 paddd 0+64(%rbp),%xmm8 820 paddd 0+96(%rbp),%xmm12 821 822 jmp L$open_sse_tail_64_dec_loop 823 824L$open_sse_tail_128: 825 movdqa L$chacha20_consts(%rip),%xmm0 826 movdqa 0+48(%rbp),%xmm4 827 movdqa 0+64(%rbp),%xmm8 828 movdqa %xmm0,%xmm1 829 movdqa %xmm4,%xmm5 830 movdqa %xmm8,%xmm9 831 movdqa 0+96(%rbp),%xmm13 832 paddd L$sse_inc(%rip),%xmm13 833 movdqa %xmm13,%xmm12 834 paddd L$sse_inc(%rip),%xmm12 835 movdqa %xmm12,0+96(%rbp) 836 movdqa %xmm13,0+112(%rbp) 837 838 movq %rbx,%rcx 839 andq $-16,%rcx 840 xorq %r8,%r8 841L$open_sse_tail_128_rounds_and_x1hash: 842 addq 0+0(%rsi,%r8,1),%r10 843 adcq 8+0(%rsi,%r8,1),%r11 844 adcq $1,%r12 845 movq 0+0+0(%rbp),%rax 846 movq %rax,%r15 847 mulq %r10 848 movq %rax,%r13 849 movq %rdx,%r14 850 movq 0+0+0(%rbp),%rax 851 mulq %r11 852 imulq %r12,%r15 853 addq %rax,%r14 854 adcq %rdx,%r15 855 movq 8+0+0(%rbp),%rax 856 movq %rax,%r9 857 mulq %r10 858 addq %rax,%r14 859 adcq $0,%rdx 860 movq %rdx,%r10 861 movq 8+0+0(%rbp),%rax 862 mulq %r11 863 addq %rax,%r15 864 adcq $0,%rdx 865 imulq %r12,%r9 866 addq %r10,%r15 867 adcq %rdx,%r9 868 movq %r13,%r10 869 movq %r14,%r11 870 movq %r15,%r12 871 andq $3,%r12 872 movq %r15,%r13 873 andq $-4,%r13 874 movq %r9,%r14 875 shrdq $2,%r9,%r15 876 shrq $2,%r9 877 addq %r13,%r15 878 adcq %r14,%r9 879 addq %r15,%r10 880 adcq %r9,%r11 881 adcq $0,%r12 882 883L$open_sse_tail_128_rounds: 884 addq $16,%r8 885 paddd %xmm4,%xmm0 886 pxor %xmm0,%xmm12 887 pshufb L$rol16(%rip),%xmm12 888 paddd %xmm12,%xmm8 889 pxor %xmm8,%xmm4 890 movdqa %xmm4,%xmm3 891 pslld $12,%xmm3 892 psrld $20,%xmm4 893 pxor %xmm3,%xmm4 894 paddd %xmm4,%xmm0 895 pxor %xmm0,%xmm12 896 pshufb L$rol8(%rip),%xmm12 897 paddd %xmm12,%xmm8 898 pxor %xmm8,%xmm4 899 movdqa %xmm4,%xmm3 900 pslld $7,%xmm3 901 psrld $25,%xmm4 902 pxor %xmm3,%xmm4 903.byte 102,15,58,15,228,4 904.byte 102,69,15,58,15,192,8 905.byte 102,69,15,58,15,228,12 906 paddd %xmm5,%xmm1 907 pxor %xmm1,%xmm13 908 pshufb L$rol16(%rip),%xmm13 909 paddd %xmm13,%xmm9 910 pxor %xmm9,%xmm5 911 movdqa %xmm5,%xmm3 912 pslld $12,%xmm3 913 psrld $20,%xmm5 914 pxor %xmm3,%xmm5 915 paddd %xmm5,%xmm1 916 pxor %xmm1,%xmm13 917 pshufb L$rol8(%rip),%xmm13 918 paddd %xmm13,%xmm9 919 pxor %xmm9,%xmm5 920 movdqa %xmm5,%xmm3 921 pslld $7,%xmm3 922 psrld $25,%xmm5 923 pxor %xmm3,%xmm5 924.byte 102,15,58,15,237,4 925.byte 102,69,15,58,15,201,8 926.byte 102,69,15,58,15,237,12 927 paddd %xmm4,%xmm0 928 pxor %xmm0,%xmm12 929 pshufb L$rol16(%rip),%xmm12 930 paddd %xmm12,%xmm8 931 pxor %xmm8,%xmm4 932 movdqa %xmm4,%xmm3 933 pslld $12,%xmm3 934 psrld $20,%xmm4 935 pxor %xmm3,%xmm4 936 paddd %xmm4,%xmm0 937 pxor %xmm0,%xmm12 938 pshufb L$rol8(%rip),%xmm12 939 paddd %xmm12,%xmm8 940 pxor %xmm8,%xmm4 941 movdqa %xmm4,%xmm3 942 pslld $7,%xmm3 943 psrld $25,%xmm4 944 pxor %xmm3,%xmm4 945.byte 102,15,58,15,228,12 946.byte 102,69,15,58,15,192,8 947.byte 102,69,15,58,15,228,4 948 paddd %xmm5,%xmm1 949 pxor %xmm1,%xmm13 950 pshufb L$rol16(%rip),%xmm13 951 paddd %xmm13,%xmm9 952 pxor %xmm9,%xmm5 953 movdqa %xmm5,%xmm3 954 pslld $12,%xmm3 955 psrld $20,%xmm5 956 pxor %xmm3,%xmm5 957 paddd %xmm5,%xmm1 958 pxor %xmm1,%xmm13 959 pshufb L$rol8(%rip),%xmm13 960 paddd %xmm13,%xmm9 961 pxor %xmm9,%xmm5 962 movdqa %xmm5,%xmm3 963 pslld $7,%xmm3 964 psrld $25,%xmm5 965 pxor %xmm3,%xmm5 966.byte 102,15,58,15,237,12 967.byte 102,69,15,58,15,201,8 968.byte 102,69,15,58,15,237,4 969 970 cmpq %rcx,%r8 971 jb L$open_sse_tail_128_rounds_and_x1hash 972 cmpq $160,%r8 973 jne L$open_sse_tail_128_rounds 974 paddd L$chacha20_consts(%rip),%xmm1 975 paddd 0+48(%rbp),%xmm5 976 paddd 0+64(%rbp),%xmm9 977 paddd 0+112(%rbp),%xmm13 978 paddd L$chacha20_consts(%rip),%xmm0 979 paddd 0+48(%rbp),%xmm4 980 paddd 0+64(%rbp),%xmm8 981 paddd 0+96(%rbp),%xmm12 982 movdqu 0 + 0(%rsi),%xmm3 983 movdqu 16 + 0(%rsi),%xmm7 984 movdqu 32 + 0(%rsi),%xmm11 985 movdqu 48 + 0(%rsi),%xmm15 986 pxor %xmm3,%xmm1 987 pxor %xmm7,%xmm5 988 pxor %xmm11,%xmm9 989 pxor %xmm13,%xmm15 990 movdqu %xmm1,0 + 0(%rdi) 991 movdqu %xmm5,16 + 0(%rdi) 992 movdqu %xmm9,32 + 0(%rdi) 993 movdqu %xmm15,48 + 0(%rdi) 994 995 subq $64,%rbx 996 leaq 64(%rsi),%rsi 997 leaq 64(%rdi),%rdi 998 jmp L$open_sse_tail_64_dec_loop 999 1000L$open_sse_tail_192: 1001 movdqa L$chacha20_consts(%rip),%xmm0 1002 movdqa 0+48(%rbp),%xmm4 1003 movdqa 0+64(%rbp),%xmm8 1004 movdqa %xmm0,%xmm1 1005 movdqa %xmm4,%xmm5 1006 movdqa %xmm8,%xmm9 1007 movdqa %xmm0,%xmm2 1008 movdqa %xmm4,%xmm6 1009 movdqa %xmm8,%xmm10 1010 movdqa 0+96(%rbp),%xmm14 1011 paddd L$sse_inc(%rip),%xmm14 1012 movdqa %xmm14,%xmm13 1013 paddd L$sse_inc(%rip),%xmm13 1014 movdqa %xmm13,%xmm12 1015 paddd L$sse_inc(%rip),%xmm12 1016 movdqa %xmm12,0+96(%rbp) 1017 movdqa %xmm13,0+112(%rbp) 1018 movdqa %xmm14,0+128(%rbp) 1019 1020 movq %rbx,%rcx 1021 movq $160,%r8 1022 cmpq $160,%rcx 1023 cmovgq %r8,%rcx 1024 andq $-16,%rcx 1025 xorq %r8,%r8 1026L$open_sse_tail_192_rounds_and_x1hash: 1027 addq 0+0(%rsi,%r8,1),%r10 1028 adcq 8+0(%rsi,%r8,1),%r11 1029 adcq $1,%r12 1030 movq 0+0+0(%rbp),%rax 1031 movq %rax,%r15 1032 mulq %r10 1033 movq %rax,%r13 1034 movq %rdx,%r14 1035 movq 0+0+0(%rbp),%rax 1036 mulq %r11 1037 imulq %r12,%r15 1038 addq %rax,%r14 1039 adcq %rdx,%r15 1040 movq 8+0+0(%rbp),%rax 1041 movq %rax,%r9 1042 mulq %r10 1043 addq %rax,%r14 1044 adcq $0,%rdx 1045 movq %rdx,%r10 1046 movq 8+0+0(%rbp),%rax 1047 mulq %r11 1048 addq %rax,%r15 1049 adcq $0,%rdx 1050 imulq %r12,%r9 1051 addq %r10,%r15 1052 adcq %rdx,%r9 1053 movq %r13,%r10 1054 movq %r14,%r11 1055 movq %r15,%r12 1056 andq $3,%r12 1057 movq %r15,%r13 1058 andq $-4,%r13 1059 movq %r9,%r14 1060 shrdq $2,%r9,%r15 1061 shrq $2,%r9 1062 addq %r13,%r15 1063 adcq %r14,%r9 1064 addq %r15,%r10 1065 adcq %r9,%r11 1066 adcq $0,%r12 1067 1068L$open_sse_tail_192_rounds: 1069 addq $16,%r8 1070 paddd %xmm4,%xmm0 1071 pxor %xmm0,%xmm12 1072 pshufb L$rol16(%rip),%xmm12 1073 paddd %xmm12,%xmm8 1074 pxor %xmm8,%xmm4 1075 movdqa %xmm4,%xmm3 1076 pslld $12,%xmm3 1077 psrld $20,%xmm4 1078 pxor %xmm3,%xmm4 1079 paddd %xmm4,%xmm0 1080 pxor %xmm0,%xmm12 1081 pshufb L$rol8(%rip),%xmm12 1082 paddd %xmm12,%xmm8 1083 pxor %xmm8,%xmm4 1084 movdqa %xmm4,%xmm3 1085 pslld $7,%xmm3 1086 psrld $25,%xmm4 1087 pxor %xmm3,%xmm4 1088.byte 102,15,58,15,228,4 1089.byte 102,69,15,58,15,192,8 1090.byte 102,69,15,58,15,228,12 1091 paddd %xmm5,%xmm1 1092 pxor %xmm1,%xmm13 1093 pshufb L$rol16(%rip),%xmm13 1094 paddd %xmm13,%xmm9 1095 pxor %xmm9,%xmm5 1096 movdqa %xmm5,%xmm3 1097 pslld $12,%xmm3 1098 psrld $20,%xmm5 1099 pxor %xmm3,%xmm5 1100 paddd %xmm5,%xmm1 1101 pxor %xmm1,%xmm13 1102 pshufb L$rol8(%rip),%xmm13 1103 paddd %xmm13,%xmm9 1104 pxor %xmm9,%xmm5 1105 movdqa %xmm5,%xmm3 1106 pslld $7,%xmm3 1107 psrld $25,%xmm5 1108 pxor %xmm3,%xmm5 1109.byte 102,15,58,15,237,4 1110.byte 102,69,15,58,15,201,8 1111.byte 102,69,15,58,15,237,12 1112 paddd %xmm6,%xmm2 1113 pxor %xmm2,%xmm14 1114 pshufb L$rol16(%rip),%xmm14 1115 paddd %xmm14,%xmm10 1116 pxor %xmm10,%xmm6 1117 movdqa %xmm6,%xmm3 1118 pslld $12,%xmm3 1119 psrld $20,%xmm6 1120 pxor %xmm3,%xmm6 1121 paddd %xmm6,%xmm2 1122 pxor %xmm2,%xmm14 1123 pshufb L$rol8(%rip),%xmm14 1124 paddd %xmm14,%xmm10 1125 pxor %xmm10,%xmm6 1126 movdqa %xmm6,%xmm3 1127 pslld $7,%xmm3 1128 psrld $25,%xmm6 1129 pxor %xmm3,%xmm6 1130.byte 102,15,58,15,246,4 1131.byte 102,69,15,58,15,210,8 1132.byte 102,69,15,58,15,246,12 1133 paddd %xmm4,%xmm0 1134 pxor %xmm0,%xmm12 1135 pshufb L$rol16(%rip),%xmm12 1136 paddd %xmm12,%xmm8 1137 pxor %xmm8,%xmm4 1138 movdqa %xmm4,%xmm3 1139 pslld $12,%xmm3 1140 psrld $20,%xmm4 1141 pxor %xmm3,%xmm4 1142 paddd %xmm4,%xmm0 1143 pxor %xmm0,%xmm12 1144 pshufb L$rol8(%rip),%xmm12 1145 paddd %xmm12,%xmm8 1146 pxor %xmm8,%xmm4 1147 movdqa %xmm4,%xmm3 1148 pslld $7,%xmm3 1149 psrld $25,%xmm4 1150 pxor %xmm3,%xmm4 1151.byte 102,15,58,15,228,12 1152.byte 102,69,15,58,15,192,8 1153.byte 102,69,15,58,15,228,4 1154 paddd %xmm5,%xmm1 1155 pxor %xmm1,%xmm13 1156 pshufb L$rol16(%rip),%xmm13 1157 paddd %xmm13,%xmm9 1158 pxor %xmm9,%xmm5 1159 movdqa %xmm5,%xmm3 1160 pslld $12,%xmm3 1161 psrld $20,%xmm5 1162 pxor %xmm3,%xmm5 1163 paddd %xmm5,%xmm1 1164 pxor %xmm1,%xmm13 1165 pshufb L$rol8(%rip),%xmm13 1166 paddd %xmm13,%xmm9 1167 pxor %xmm9,%xmm5 1168 movdqa %xmm5,%xmm3 1169 pslld $7,%xmm3 1170 psrld $25,%xmm5 1171 pxor %xmm3,%xmm5 1172.byte 102,15,58,15,237,12 1173.byte 102,69,15,58,15,201,8 1174.byte 102,69,15,58,15,237,4 1175 paddd %xmm6,%xmm2 1176 pxor %xmm2,%xmm14 1177 pshufb L$rol16(%rip),%xmm14 1178 paddd %xmm14,%xmm10 1179 pxor %xmm10,%xmm6 1180 movdqa %xmm6,%xmm3 1181 pslld $12,%xmm3 1182 psrld $20,%xmm6 1183 pxor %xmm3,%xmm6 1184 paddd %xmm6,%xmm2 1185 pxor %xmm2,%xmm14 1186 pshufb L$rol8(%rip),%xmm14 1187 paddd %xmm14,%xmm10 1188 pxor %xmm10,%xmm6 1189 movdqa %xmm6,%xmm3 1190 pslld $7,%xmm3 1191 psrld $25,%xmm6 1192 pxor %xmm3,%xmm6 1193.byte 102,15,58,15,246,12 1194.byte 102,69,15,58,15,210,8 1195.byte 102,69,15,58,15,246,4 1196 1197 cmpq %rcx,%r8 1198 jb L$open_sse_tail_192_rounds_and_x1hash 1199 cmpq $160,%r8 1200 jne L$open_sse_tail_192_rounds 1201 cmpq $176,%rbx 1202 jb L$open_sse_tail_192_finish 1203 addq 0+160(%rsi),%r10 1204 adcq 8+160(%rsi),%r11 1205 adcq $1,%r12 1206 movq 0+0+0(%rbp),%rax 1207 movq %rax,%r15 1208 mulq %r10 1209 movq %rax,%r13 1210 movq %rdx,%r14 1211 movq 0+0+0(%rbp),%rax 1212 mulq %r11 1213 imulq %r12,%r15 1214 addq %rax,%r14 1215 adcq %rdx,%r15 1216 movq 8+0+0(%rbp),%rax 1217 movq %rax,%r9 1218 mulq %r10 1219 addq %rax,%r14 1220 adcq $0,%rdx 1221 movq %rdx,%r10 1222 movq 8+0+0(%rbp),%rax 1223 mulq %r11 1224 addq %rax,%r15 1225 adcq $0,%rdx 1226 imulq %r12,%r9 1227 addq %r10,%r15 1228 adcq %rdx,%r9 1229 movq %r13,%r10 1230 movq %r14,%r11 1231 movq %r15,%r12 1232 andq $3,%r12 1233 movq %r15,%r13 1234 andq $-4,%r13 1235 movq %r9,%r14 1236 shrdq $2,%r9,%r15 1237 shrq $2,%r9 1238 addq %r13,%r15 1239 adcq %r14,%r9 1240 addq %r15,%r10 1241 adcq %r9,%r11 1242 adcq $0,%r12 1243 1244 cmpq $192,%rbx 1245 jb L$open_sse_tail_192_finish 1246 addq 0+176(%rsi),%r10 1247 adcq 8+176(%rsi),%r11 1248 adcq $1,%r12 1249 movq 0+0+0(%rbp),%rax 1250 movq %rax,%r15 1251 mulq %r10 1252 movq %rax,%r13 1253 movq %rdx,%r14 1254 movq 0+0+0(%rbp),%rax 1255 mulq %r11 1256 imulq %r12,%r15 1257 addq %rax,%r14 1258 adcq %rdx,%r15 1259 movq 8+0+0(%rbp),%rax 1260 movq %rax,%r9 1261 mulq %r10 1262 addq %rax,%r14 1263 adcq $0,%rdx 1264 movq %rdx,%r10 1265 movq 8+0+0(%rbp),%rax 1266 mulq %r11 1267 addq %rax,%r15 1268 adcq $0,%rdx 1269 imulq %r12,%r9 1270 addq %r10,%r15 1271 adcq %rdx,%r9 1272 movq %r13,%r10 1273 movq %r14,%r11 1274 movq %r15,%r12 1275 andq $3,%r12 1276 movq %r15,%r13 1277 andq $-4,%r13 1278 movq %r9,%r14 1279 shrdq $2,%r9,%r15 1280 shrq $2,%r9 1281 addq %r13,%r15 1282 adcq %r14,%r9 1283 addq %r15,%r10 1284 adcq %r9,%r11 1285 adcq $0,%r12 1286 1287L$open_sse_tail_192_finish: 1288 paddd L$chacha20_consts(%rip),%xmm2 1289 paddd 0+48(%rbp),%xmm6 1290 paddd 0+64(%rbp),%xmm10 1291 paddd 0+128(%rbp),%xmm14 1292 paddd L$chacha20_consts(%rip),%xmm1 1293 paddd 0+48(%rbp),%xmm5 1294 paddd 0+64(%rbp),%xmm9 1295 paddd 0+112(%rbp),%xmm13 1296 paddd L$chacha20_consts(%rip),%xmm0 1297 paddd 0+48(%rbp),%xmm4 1298 paddd 0+64(%rbp),%xmm8 1299 paddd 0+96(%rbp),%xmm12 1300 movdqu 0 + 0(%rsi),%xmm3 1301 movdqu 16 + 0(%rsi),%xmm7 1302 movdqu 32 + 0(%rsi),%xmm11 1303 movdqu 48 + 0(%rsi),%xmm15 1304 pxor %xmm3,%xmm2 1305 pxor %xmm7,%xmm6 1306 pxor %xmm11,%xmm10 1307 pxor %xmm14,%xmm15 1308 movdqu %xmm2,0 + 0(%rdi) 1309 movdqu %xmm6,16 + 0(%rdi) 1310 movdqu %xmm10,32 + 0(%rdi) 1311 movdqu %xmm15,48 + 0(%rdi) 1312 movdqu 0 + 64(%rsi),%xmm3 1313 movdqu 16 + 64(%rsi),%xmm7 1314 movdqu 32 + 64(%rsi),%xmm11 1315 movdqu 48 + 64(%rsi),%xmm15 1316 pxor %xmm3,%xmm1 1317 pxor %xmm7,%xmm5 1318 pxor %xmm11,%xmm9 1319 pxor %xmm13,%xmm15 1320 movdqu %xmm1,0 + 64(%rdi) 1321 movdqu %xmm5,16 + 64(%rdi) 1322 movdqu %xmm9,32 + 64(%rdi) 1323 movdqu %xmm15,48 + 64(%rdi) 1324 1325 subq $128,%rbx 1326 leaq 128(%rsi),%rsi 1327 leaq 128(%rdi),%rdi 1328 jmp L$open_sse_tail_64_dec_loop 1329 1330L$open_sse_tail_256: 1331 movdqa L$chacha20_consts(%rip),%xmm0 1332 movdqa 0+48(%rbp),%xmm4 1333 movdqa 0+64(%rbp),%xmm8 1334 movdqa %xmm0,%xmm1 1335 movdqa %xmm4,%xmm5 1336 movdqa %xmm8,%xmm9 1337 movdqa %xmm0,%xmm2 1338 movdqa %xmm4,%xmm6 1339 movdqa %xmm8,%xmm10 1340 movdqa %xmm0,%xmm3 1341 movdqa %xmm4,%xmm7 1342 movdqa %xmm8,%xmm11 1343 movdqa 0+96(%rbp),%xmm15 1344 paddd L$sse_inc(%rip),%xmm15 1345 movdqa %xmm15,%xmm14 1346 paddd L$sse_inc(%rip),%xmm14 1347 movdqa %xmm14,%xmm13 1348 paddd L$sse_inc(%rip),%xmm13 1349 movdqa %xmm13,%xmm12 1350 paddd L$sse_inc(%rip),%xmm12 1351 movdqa %xmm12,0+96(%rbp) 1352 movdqa %xmm13,0+112(%rbp) 1353 movdqa %xmm14,0+128(%rbp) 1354 movdqa %xmm15,0+144(%rbp) 1355 1356 xorq %r8,%r8 1357L$open_sse_tail_256_rounds_and_x1hash: 1358 addq 0+0(%rsi,%r8,1),%r10 1359 adcq 8+0(%rsi,%r8,1),%r11 1360 adcq $1,%r12 1361 movdqa %xmm11,0+80(%rbp) 1362 paddd %xmm4,%xmm0 1363 pxor %xmm0,%xmm12 1364 pshufb L$rol16(%rip),%xmm12 1365 paddd %xmm12,%xmm8 1366 pxor %xmm8,%xmm4 1367 movdqa %xmm4,%xmm11 1368 pslld $12,%xmm11 1369 psrld $20,%xmm4 1370 pxor %xmm11,%xmm4 1371 paddd %xmm4,%xmm0 1372 pxor %xmm0,%xmm12 1373 pshufb L$rol8(%rip),%xmm12 1374 paddd %xmm12,%xmm8 1375 pxor %xmm8,%xmm4 1376 movdqa %xmm4,%xmm11 1377 pslld $7,%xmm11 1378 psrld $25,%xmm4 1379 pxor %xmm11,%xmm4 1380.byte 102,15,58,15,228,4 1381.byte 102,69,15,58,15,192,8 1382.byte 102,69,15,58,15,228,12 1383 paddd %xmm5,%xmm1 1384 pxor %xmm1,%xmm13 1385 pshufb L$rol16(%rip),%xmm13 1386 paddd %xmm13,%xmm9 1387 pxor %xmm9,%xmm5 1388 movdqa %xmm5,%xmm11 1389 pslld $12,%xmm11 1390 psrld $20,%xmm5 1391 pxor %xmm11,%xmm5 1392 paddd %xmm5,%xmm1 1393 pxor %xmm1,%xmm13 1394 pshufb L$rol8(%rip),%xmm13 1395 paddd %xmm13,%xmm9 1396 pxor %xmm9,%xmm5 1397 movdqa %xmm5,%xmm11 1398 pslld $7,%xmm11 1399 psrld $25,%xmm5 1400 pxor %xmm11,%xmm5 1401.byte 102,15,58,15,237,4 1402.byte 102,69,15,58,15,201,8 1403.byte 102,69,15,58,15,237,12 1404 paddd %xmm6,%xmm2 1405 pxor %xmm2,%xmm14 1406 pshufb L$rol16(%rip),%xmm14 1407 paddd %xmm14,%xmm10 1408 pxor %xmm10,%xmm6 1409 movdqa %xmm6,%xmm11 1410 pslld $12,%xmm11 1411 psrld $20,%xmm6 1412 pxor %xmm11,%xmm6 1413 paddd %xmm6,%xmm2 1414 pxor %xmm2,%xmm14 1415 pshufb L$rol8(%rip),%xmm14 1416 paddd %xmm14,%xmm10 1417 pxor %xmm10,%xmm6 1418 movdqa %xmm6,%xmm11 1419 pslld $7,%xmm11 1420 psrld $25,%xmm6 1421 pxor %xmm11,%xmm6 1422.byte 102,15,58,15,246,4 1423.byte 102,69,15,58,15,210,8 1424.byte 102,69,15,58,15,246,12 1425 movdqa 0+80(%rbp),%xmm11 1426 movq 0+0+0(%rbp),%rax 1427 movq %rax,%r15 1428 mulq %r10 1429 movq %rax,%r13 1430 movq %rdx,%r14 1431 movq 0+0+0(%rbp),%rax 1432 mulq %r11 1433 imulq %r12,%r15 1434 addq %rax,%r14 1435 adcq %rdx,%r15 1436 movdqa %xmm9,0+80(%rbp) 1437 paddd %xmm7,%xmm3 1438 pxor %xmm3,%xmm15 1439 pshufb L$rol16(%rip),%xmm15 1440 paddd %xmm15,%xmm11 1441 pxor %xmm11,%xmm7 1442 movdqa %xmm7,%xmm9 1443 pslld $12,%xmm9 1444 psrld $20,%xmm7 1445 pxor %xmm9,%xmm7 1446 paddd %xmm7,%xmm3 1447 pxor %xmm3,%xmm15 1448 pshufb L$rol8(%rip),%xmm15 1449 paddd %xmm15,%xmm11 1450 pxor %xmm11,%xmm7 1451 movdqa %xmm7,%xmm9 1452 pslld $7,%xmm9 1453 psrld $25,%xmm7 1454 pxor %xmm9,%xmm7 1455.byte 102,15,58,15,255,4 1456.byte 102,69,15,58,15,219,8 1457.byte 102,69,15,58,15,255,12 1458 movdqa 0+80(%rbp),%xmm9 1459 movq 8+0+0(%rbp),%rax 1460 movq %rax,%r9 1461 mulq %r10 1462 addq %rax,%r14 1463 adcq $0,%rdx 1464 movq %rdx,%r10 1465 movq 8+0+0(%rbp),%rax 1466 mulq %r11 1467 addq %rax,%r15 1468 adcq $0,%rdx 1469 movdqa %xmm11,0+80(%rbp) 1470 paddd %xmm4,%xmm0 1471 pxor %xmm0,%xmm12 1472 pshufb L$rol16(%rip),%xmm12 1473 paddd %xmm12,%xmm8 1474 pxor %xmm8,%xmm4 1475 movdqa %xmm4,%xmm11 1476 pslld $12,%xmm11 1477 psrld $20,%xmm4 1478 pxor %xmm11,%xmm4 1479 paddd %xmm4,%xmm0 1480 pxor %xmm0,%xmm12 1481 pshufb L$rol8(%rip),%xmm12 1482 paddd %xmm12,%xmm8 1483 pxor %xmm8,%xmm4 1484 movdqa %xmm4,%xmm11 1485 pslld $7,%xmm11 1486 psrld $25,%xmm4 1487 pxor %xmm11,%xmm4 1488.byte 102,15,58,15,228,12 1489.byte 102,69,15,58,15,192,8 1490.byte 102,69,15,58,15,228,4 1491 paddd %xmm5,%xmm1 1492 pxor %xmm1,%xmm13 1493 pshufb L$rol16(%rip),%xmm13 1494 paddd %xmm13,%xmm9 1495 pxor %xmm9,%xmm5 1496 movdqa %xmm5,%xmm11 1497 pslld $12,%xmm11 1498 psrld $20,%xmm5 1499 pxor %xmm11,%xmm5 1500 paddd %xmm5,%xmm1 1501 pxor %xmm1,%xmm13 1502 pshufb L$rol8(%rip),%xmm13 1503 paddd %xmm13,%xmm9 1504 pxor %xmm9,%xmm5 1505 movdqa %xmm5,%xmm11 1506 pslld $7,%xmm11 1507 psrld $25,%xmm5 1508 pxor %xmm11,%xmm5 1509.byte 102,15,58,15,237,12 1510.byte 102,69,15,58,15,201,8 1511.byte 102,69,15,58,15,237,4 1512 imulq %r12,%r9 1513 addq %r10,%r15 1514 adcq %rdx,%r9 1515 paddd %xmm6,%xmm2 1516 pxor %xmm2,%xmm14 1517 pshufb L$rol16(%rip),%xmm14 1518 paddd %xmm14,%xmm10 1519 pxor %xmm10,%xmm6 1520 movdqa %xmm6,%xmm11 1521 pslld $12,%xmm11 1522 psrld $20,%xmm6 1523 pxor %xmm11,%xmm6 1524 paddd %xmm6,%xmm2 1525 pxor %xmm2,%xmm14 1526 pshufb L$rol8(%rip),%xmm14 1527 paddd %xmm14,%xmm10 1528 pxor %xmm10,%xmm6 1529 movdqa %xmm6,%xmm11 1530 pslld $7,%xmm11 1531 psrld $25,%xmm6 1532 pxor %xmm11,%xmm6 1533.byte 102,15,58,15,246,12 1534.byte 102,69,15,58,15,210,8 1535.byte 102,69,15,58,15,246,4 1536 movdqa 0+80(%rbp),%xmm11 1537 movq %r13,%r10 1538 movq %r14,%r11 1539 movq %r15,%r12 1540 andq $3,%r12 1541 movq %r15,%r13 1542 andq $-4,%r13 1543 movq %r9,%r14 1544 shrdq $2,%r9,%r15 1545 shrq $2,%r9 1546 addq %r13,%r15 1547 adcq %r14,%r9 1548 addq %r15,%r10 1549 adcq %r9,%r11 1550 adcq $0,%r12 1551 movdqa %xmm9,0+80(%rbp) 1552 paddd %xmm7,%xmm3 1553 pxor %xmm3,%xmm15 1554 pshufb L$rol16(%rip),%xmm15 1555 paddd %xmm15,%xmm11 1556 pxor %xmm11,%xmm7 1557 movdqa %xmm7,%xmm9 1558 pslld $12,%xmm9 1559 psrld $20,%xmm7 1560 pxor %xmm9,%xmm7 1561 paddd %xmm7,%xmm3 1562 pxor %xmm3,%xmm15 1563 pshufb L$rol8(%rip),%xmm15 1564 paddd %xmm15,%xmm11 1565 pxor %xmm11,%xmm7 1566 movdqa %xmm7,%xmm9 1567 pslld $7,%xmm9 1568 psrld $25,%xmm7 1569 pxor %xmm9,%xmm7 1570.byte 102,15,58,15,255,12 1571.byte 102,69,15,58,15,219,8 1572.byte 102,69,15,58,15,255,4 1573 movdqa 0+80(%rbp),%xmm9 1574 1575 addq $16,%r8 1576 cmpq $160,%r8 1577 jb L$open_sse_tail_256_rounds_and_x1hash 1578 1579 movq %rbx,%rcx 1580 andq $-16,%rcx 1581L$open_sse_tail_256_hash: 1582 addq 0+0(%rsi,%r8,1),%r10 1583 adcq 8+0(%rsi,%r8,1),%r11 1584 adcq $1,%r12 1585 movq 0+0+0(%rbp),%rax 1586 movq %rax,%r15 1587 mulq %r10 1588 movq %rax,%r13 1589 movq %rdx,%r14 1590 movq 0+0+0(%rbp),%rax 1591 mulq %r11 1592 imulq %r12,%r15 1593 addq %rax,%r14 1594 adcq %rdx,%r15 1595 movq 8+0+0(%rbp),%rax 1596 movq %rax,%r9 1597 mulq %r10 1598 addq %rax,%r14 1599 adcq $0,%rdx 1600 movq %rdx,%r10 1601 movq 8+0+0(%rbp),%rax 1602 mulq %r11 1603 addq %rax,%r15 1604 adcq $0,%rdx 1605 imulq %r12,%r9 1606 addq %r10,%r15 1607 adcq %rdx,%r9 1608 movq %r13,%r10 1609 movq %r14,%r11 1610 movq %r15,%r12 1611 andq $3,%r12 1612 movq %r15,%r13 1613 andq $-4,%r13 1614 movq %r9,%r14 1615 shrdq $2,%r9,%r15 1616 shrq $2,%r9 1617 addq %r13,%r15 1618 adcq %r14,%r9 1619 addq %r15,%r10 1620 adcq %r9,%r11 1621 adcq $0,%r12 1622 1623 addq $16,%r8 1624 cmpq %rcx,%r8 1625 jb L$open_sse_tail_256_hash 1626 paddd L$chacha20_consts(%rip),%xmm3 1627 paddd 0+48(%rbp),%xmm7 1628 paddd 0+64(%rbp),%xmm11 1629 paddd 0+144(%rbp),%xmm15 1630 paddd L$chacha20_consts(%rip),%xmm2 1631 paddd 0+48(%rbp),%xmm6 1632 paddd 0+64(%rbp),%xmm10 1633 paddd 0+128(%rbp),%xmm14 1634 paddd L$chacha20_consts(%rip),%xmm1 1635 paddd 0+48(%rbp),%xmm5 1636 paddd 0+64(%rbp),%xmm9 1637 paddd 0+112(%rbp),%xmm13 1638 paddd L$chacha20_consts(%rip),%xmm0 1639 paddd 0+48(%rbp),%xmm4 1640 paddd 0+64(%rbp),%xmm8 1641 paddd 0+96(%rbp),%xmm12 1642 movdqa %xmm12,0+80(%rbp) 1643 movdqu 0 + 0(%rsi),%xmm12 1644 pxor %xmm3,%xmm12 1645 movdqu %xmm12,0 + 0(%rdi) 1646 movdqu 16 + 0(%rsi),%xmm12 1647 pxor %xmm7,%xmm12 1648 movdqu %xmm12,16 + 0(%rdi) 1649 movdqu 32 + 0(%rsi),%xmm12 1650 pxor %xmm11,%xmm12 1651 movdqu %xmm12,32 + 0(%rdi) 1652 movdqu 48 + 0(%rsi),%xmm12 1653 pxor %xmm15,%xmm12 1654 movdqu %xmm12,48 + 0(%rdi) 1655 movdqu 0 + 64(%rsi),%xmm3 1656 movdqu 16 + 64(%rsi),%xmm7 1657 movdqu 32 + 64(%rsi),%xmm11 1658 movdqu 48 + 64(%rsi),%xmm15 1659 pxor %xmm3,%xmm2 1660 pxor %xmm7,%xmm6 1661 pxor %xmm11,%xmm10 1662 pxor %xmm14,%xmm15 1663 movdqu %xmm2,0 + 64(%rdi) 1664 movdqu %xmm6,16 + 64(%rdi) 1665 movdqu %xmm10,32 + 64(%rdi) 1666 movdqu %xmm15,48 + 64(%rdi) 1667 movdqu 0 + 128(%rsi),%xmm3 1668 movdqu 16 + 128(%rsi),%xmm7 1669 movdqu 32 + 128(%rsi),%xmm11 1670 movdqu 48 + 128(%rsi),%xmm15 1671 pxor %xmm3,%xmm1 1672 pxor %xmm7,%xmm5 1673 pxor %xmm11,%xmm9 1674 pxor %xmm13,%xmm15 1675 movdqu %xmm1,0 + 128(%rdi) 1676 movdqu %xmm5,16 + 128(%rdi) 1677 movdqu %xmm9,32 + 128(%rdi) 1678 movdqu %xmm15,48 + 128(%rdi) 1679 1680 movdqa 0+80(%rbp),%xmm12 1681 subq $192,%rbx 1682 leaq 192(%rsi),%rsi 1683 leaq 192(%rdi),%rdi 1684 1685 1686L$open_sse_tail_64_dec_loop: 1687 cmpq $16,%rbx 1688 jb L$open_sse_tail_16_init 1689 subq $16,%rbx 1690 movdqu (%rsi),%xmm3 1691 pxor %xmm3,%xmm0 1692 movdqu %xmm0,(%rdi) 1693 leaq 16(%rsi),%rsi 1694 leaq 16(%rdi),%rdi 1695 movdqa %xmm4,%xmm0 1696 movdqa %xmm8,%xmm4 1697 movdqa %xmm12,%xmm8 1698 jmp L$open_sse_tail_64_dec_loop 1699L$open_sse_tail_16_init: 1700 movdqa %xmm0,%xmm1 1701 1702 1703L$open_sse_tail_16: 1704 testq %rbx,%rbx 1705 jz L$open_sse_finalize 1706 1707 1708 1709 pxor %xmm3,%xmm3 1710 leaq -1(%rsi,%rbx,1),%rsi 1711 movq %rbx,%r8 1712L$open_sse_tail_16_compose: 1713 pslldq $1,%xmm3 1714 pinsrb $0,(%rsi),%xmm3 1715 subq $1,%rsi 1716 subq $1,%r8 1717 jnz L$open_sse_tail_16_compose 1718 1719.byte 102,73,15,126,221 1720 pextrq $1,%xmm3,%r14 1721 1722 pxor %xmm1,%xmm3 1723 1724 1725L$open_sse_tail_16_extract: 1726 pextrb $0,%xmm3,(%rdi) 1727 psrldq $1,%xmm3 1728 addq $1,%rdi 1729 subq $1,%rbx 1730 jne L$open_sse_tail_16_extract 1731 1732 addq %r13,%r10 1733 adcq %r14,%r11 1734 adcq $1,%r12 1735 movq 0+0+0(%rbp),%rax 1736 movq %rax,%r15 1737 mulq %r10 1738 movq %rax,%r13 1739 movq %rdx,%r14 1740 movq 0+0+0(%rbp),%rax 1741 mulq %r11 1742 imulq %r12,%r15 1743 addq %rax,%r14 1744 adcq %rdx,%r15 1745 movq 8+0+0(%rbp),%rax 1746 movq %rax,%r9 1747 mulq %r10 1748 addq %rax,%r14 1749 adcq $0,%rdx 1750 movq %rdx,%r10 1751 movq 8+0+0(%rbp),%rax 1752 mulq %r11 1753 addq %rax,%r15 1754 adcq $0,%rdx 1755 imulq %r12,%r9 1756 addq %r10,%r15 1757 adcq %rdx,%r9 1758 movq %r13,%r10 1759 movq %r14,%r11 1760 movq %r15,%r12 1761 andq $3,%r12 1762 movq %r15,%r13 1763 andq $-4,%r13 1764 movq %r9,%r14 1765 shrdq $2,%r9,%r15 1766 shrq $2,%r9 1767 addq %r13,%r15 1768 adcq %r14,%r9 1769 addq %r15,%r10 1770 adcq %r9,%r11 1771 adcq $0,%r12 1772 1773 1774L$open_sse_finalize: 1775 addq 0+0+32(%rbp),%r10 1776 adcq 8+0+32(%rbp),%r11 1777 adcq $1,%r12 1778 movq 0+0+0(%rbp),%rax 1779 movq %rax,%r15 1780 mulq %r10 1781 movq %rax,%r13 1782 movq %rdx,%r14 1783 movq 0+0+0(%rbp),%rax 1784 mulq %r11 1785 imulq %r12,%r15 1786 addq %rax,%r14 1787 adcq %rdx,%r15 1788 movq 8+0+0(%rbp),%rax 1789 movq %rax,%r9 1790 mulq %r10 1791 addq %rax,%r14 1792 adcq $0,%rdx 1793 movq %rdx,%r10 1794 movq 8+0+0(%rbp),%rax 1795 mulq %r11 1796 addq %rax,%r15 1797 adcq $0,%rdx 1798 imulq %r12,%r9 1799 addq %r10,%r15 1800 adcq %rdx,%r9 1801 movq %r13,%r10 1802 movq %r14,%r11 1803 movq %r15,%r12 1804 andq $3,%r12 1805 movq %r15,%r13 1806 andq $-4,%r13 1807 movq %r9,%r14 1808 shrdq $2,%r9,%r15 1809 shrq $2,%r9 1810 addq %r13,%r15 1811 adcq %r14,%r9 1812 addq %r15,%r10 1813 adcq %r9,%r11 1814 adcq $0,%r12 1815 1816 1817 movq %r10,%r13 1818 movq %r11,%r14 1819 movq %r12,%r15 1820 subq $-5,%r10 1821 sbbq $-1,%r11 1822 sbbq $3,%r12 1823 cmovcq %r13,%r10 1824 cmovcq %r14,%r11 1825 cmovcq %r15,%r12 1826 1827 addq 0+0+16(%rbp),%r10 1828 adcq 8+0+16(%rbp),%r11 1829 1830 1831 addq $288 + 0 + 32,%rsp 1832 1833 1834 popq %r9 1835 1836 movq %r10,(%r9) 1837 movq %r11,8(%r9) 1838 popq %r15 1839 1840 popq %r14 1841 1842 popq %r13 1843 1844 popq %r12 1845 1846 popq %rbx 1847 1848 popq %rbp 1849 1850 ret 1851 1852L$open_sse_128: 1853 1854 movdqu L$chacha20_consts(%rip),%xmm0 1855 movdqa %xmm0,%xmm1 1856 movdqa %xmm0,%xmm2 1857 movdqu 0(%r9),%xmm4 1858 movdqa %xmm4,%xmm5 1859 movdqa %xmm4,%xmm6 1860 movdqu 16(%r9),%xmm8 1861 movdqa %xmm8,%xmm9 1862 movdqa %xmm8,%xmm10 1863 movdqu 32(%r9),%xmm12 1864 movdqa %xmm12,%xmm13 1865 paddd L$sse_inc(%rip),%xmm13 1866 movdqa %xmm13,%xmm14 1867 paddd L$sse_inc(%rip),%xmm14 1868 movdqa %xmm4,%xmm7 1869 movdqa %xmm8,%xmm11 1870 movdqa %xmm13,%xmm15 1871 movq $10,%r10 1872 1873L$open_sse_128_rounds: 1874 paddd %xmm4,%xmm0 1875 pxor %xmm0,%xmm12 1876 pshufb L$rol16(%rip),%xmm12 1877 paddd %xmm12,%xmm8 1878 pxor %xmm8,%xmm4 1879 movdqa %xmm4,%xmm3 1880 pslld $12,%xmm3 1881 psrld $20,%xmm4 1882 pxor %xmm3,%xmm4 1883 paddd %xmm4,%xmm0 1884 pxor %xmm0,%xmm12 1885 pshufb L$rol8(%rip),%xmm12 1886 paddd %xmm12,%xmm8 1887 pxor %xmm8,%xmm4 1888 movdqa %xmm4,%xmm3 1889 pslld $7,%xmm3 1890 psrld $25,%xmm4 1891 pxor %xmm3,%xmm4 1892.byte 102,15,58,15,228,4 1893.byte 102,69,15,58,15,192,8 1894.byte 102,69,15,58,15,228,12 1895 paddd %xmm5,%xmm1 1896 pxor %xmm1,%xmm13 1897 pshufb L$rol16(%rip),%xmm13 1898 paddd %xmm13,%xmm9 1899 pxor %xmm9,%xmm5 1900 movdqa %xmm5,%xmm3 1901 pslld $12,%xmm3 1902 psrld $20,%xmm5 1903 pxor %xmm3,%xmm5 1904 paddd %xmm5,%xmm1 1905 pxor %xmm1,%xmm13 1906 pshufb L$rol8(%rip),%xmm13 1907 paddd %xmm13,%xmm9 1908 pxor %xmm9,%xmm5 1909 movdqa %xmm5,%xmm3 1910 pslld $7,%xmm3 1911 psrld $25,%xmm5 1912 pxor %xmm3,%xmm5 1913.byte 102,15,58,15,237,4 1914.byte 102,69,15,58,15,201,8 1915.byte 102,69,15,58,15,237,12 1916 paddd %xmm6,%xmm2 1917 pxor %xmm2,%xmm14 1918 pshufb L$rol16(%rip),%xmm14 1919 paddd %xmm14,%xmm10 1920 pxor %xmm10,%xmm6 1921 movdqa %xmm6,%xmm3 1922 pslld $12,%xmm3 1923 psrld $20,%xmm6 1924 pxor %xmm3,%xmm6 1925 paddd %xmm6,%xmm2 1926 pxor %xmm2,%xmm14 1927 pshufb L$rol8(%rip),%xmm14 1928 paddd %xmm14,%xmm10 1929 pxor %xmm10,%xmm6 1930 movdqa %xmm6,%xmm3 1931 pslld $7,%xmm3 1932 psrld $25,%xmm6 1933 pxor %xmm3,%xmm6 1934.byte 102,15,58,15,246,4 1935.byte 102,69,15,58,15,210,8 1936.byte 102,69,15,58,15,246,12 1937 paddd %xmm4,%xmm0 1938 pxor %xmm0,%xmm12 1939 pshufb L$rol16(%rip),%xmm12 1940 paddd %xmm12,%xmm8 1941 pxor %xmm8,%xmm4 1942 movdqa %xmm4,%xmm3 1943 pslld $12,%xmm3 1944 psrld $20,%xmm4 1945 pxor %xmm3,%xmm4 1946 paddd %xmm4,%xmm0 1947 pxor %xmm0,%xmm12 1948 pshufb L$rol8(%rip),%xmm12 1949 paddd %xmm12,%xmm8 1950 pxor %xmm8,%xmm4 1951 movdqa %xmm4,%xmm3 1952 pslld $7,%xmm3 1953 psrld $25,%xmm4 1954 pxor %xmm3,%xmm4 1955.byte 102,15,58,15,228,12 1956.byte 102,69,15,58,15,192,8 1957.byte 102,69,15,58,15,228,4 1958 paddd %xmm5,%xmm1 1959 pxor %xmm1,%xmm13 1960 pshufb L$rol16(%rip),%xmm13 1961 paddd %xmm13,%xmm9 1962 pxor %xmm9,%xmm5 1963 movdqa %xmm5,%xmm3 1964 pslld $12,%xmm3 1965 psrld $20,%xmm5 1966 pxor %xmm3,%xmm5 1967 paddd %xmm5,%xmm1 1968 pxor %xmm1,%xmm13 1969 pshufb L$rol8(%rip),%xmm13 1970 paddd %xmm13,%xmm9 1971 pxor %xmm9,%xmm5 1972 movdqa %xmm5,%xmm3 1973 pslld $7,%xmm3 1974 psrld $25,%xmm5 1975 pxor %xmm3,%xmm5 1976.byte 102,15,58,15,237,12 1977.byte 102,69,15,58,15,201,8 1978.byte 102,69,15,58,15,237,4 1979 paddd %xmm6,%xmm2 1980 pxor %xmm2,%xmm14 1981 pshufb L$rol16(%rip),%xmm14 1982 paddd %xmm14,%xmm10 1983 pxor %xmm10,%xmm6 1984 movdqa %xmm6,%xmm3 1985 pslld $12,%xmm3 1986 psrld $20,%xmm6 1987 pxor %xmm3,%xmm6 1988 paddd %xmm6,%xmm2 1989 pxor %xmm2,%xmm14 1990 pshufb L$rol8(%rip),%xmm14 1991 paddd %xmm14,%xmm10 1992 pxor %xmm10,%xmm6 1993 movdqa %xmm6,%xmm3 1994 pslld $7,%xmm3 1995 psrld $25,%xmm6 1996 pxor %xmm3,%xmm6 1997.byte 102,15,58,15,246,12 1998.byte 102,69,15,58,15,210,8 1999.byte 102,69,15,58,15,246,4 2000 2001 decq %r10 2002 jnz L$open_sse_128_rounds 2003 paddd L$chacha20_consts(%rip),%xmm0 2004 paddd L$chacha20_consts(%rip),%xmm1 2005 paddd L$chacha20_consts(%rip),%xmm2 2006 paddd %xmm7,%xmm4 2007 paddd %xmm7,%xmm5 2008 paddd %xmm7,%xmm6 2009 paddd %xmm11,%xmm9 2010 paddd %xmm11,%xmm10 2011 paddd %xmm15,%xmm13 2012 paddd L$sse_inc(%rip),%xmm15 2013 paddd %xmm15,%xmm14 2014 2015 pand L$clamp(%rip),%xmm0 2016 movdqa %xmm0,0+0(%rbp) 2017 movdqa %xmm4,0+16(%rbp) 2018 2019 movq %r8,%r8 2020 call poly_hash_ad_internal 2021L$open_sse_128_xor_hash: 2022 cmpq $16,%rbx 2023 jb L$open_sse_tail_16 2024 subq $16,%rbx 2025 addq 0+0(%rsi),%r10 2026 adcq 8+0(%rsi),%r11 2027 adcq $1,%r12 2028 2029 2030 movdqu 0(%rsi),%xmm3 2031 pxor %xmm3,%xmm1 2032 movdqu %xmm1,0(%rdi) 2033 leaq 16(%rsi),%rsi 2034 leaq 16(%rdi),%rdi 2035 movq 0+0+0(%rbp),%rax 2036 movq %rax,%r15 2037 mulq %r10 2038 movq %rax,%r13 2039 movq %rdx,%r14 2040 movq 0+0+0(%rbp),%rax 2041 mulq %r11 2042 imulq %r12,%r15 2043 addq %rax,%r14 2044 adcq %rdx,%r15 2045 movq 8+0+0(%rbp),%rax 2046 movq %rax,%r9 2047 mulq %r10 2048 addq %rax,%r14 2049 adcq $0,%rdx 2050 movq %rdx,%r10 2051 movq 8+0+0(%rbp),%rax 2052 mulq %r11 2053 addq %rax,%r15 2054 adcq $0,%rdx 2055 imulq %r12,%r9 2056 addq %r10,%r15 2057 adcq %rdx,%r9 2058 movq %r13,%r10 2059 movq %r14,%r11 2060 movq %r15,%r12 2061 andq $3,%r12 2062 movq %r15,%r13 2063 andq $-4,%r13 2064 movq %r9,%r14 2065 shrdq $2,%r9,%r15 2066 shrq $2,%r9 2067 addq %r13,%r15 2068 adcq %r14,%r9 2069 addq %r15,%r10 2070 adcq %r9,%r11 2071 adcq $0,%r12 2072 2073 2074 movdqa %xmm5,%xmm1 2075 movdqa %xmm9,%xmm5 2076 movdqa %xmm13,%xmm9 2077 movdqa %xmm2,%xmm13 2078 movdqa %xmm6,%xmm2 2079 movdqa %xmm10,%xmm6 2080 movdqa %xmm14,%xmm10 2081 jmp L$open_sse_128_xor_hash 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091.globl _chacha20_poly1305_seal 2092.private_extern _chacha20_poly1305_seal 2093 2094.p2align 6 2095_chacha20_poly1305_seal: 2096 2097_CET_ENDBR 2098 pushq %rbp 2099 2100 pushq %rbx 2101 2102 pushq %r12 2103 2104 pushq %r13 2105 2106 pushq %r14 2107 2108 pushq %r15 2109 2110 2111 2112 pushq %r9 2113 2114 subq $288 + 0 + 32,%rsp 2115 2116 leaq 32(%rsp),%rbp 2117 andq $-32,%rbp 2118 2119 movq 56(%r9),%rbx 2120 addq %rdx,%rbx 2121 movq %r8,0+0+32(%rbp) 2122 movq %rbx,8+0+32(%rbp) 2123 movq %rdx,%rbx 2124 2125 movl _OPENSSL_ia32cap_P+8(%rip),%eax 2126 andl $288,%eax 2127 xorl $288,%eax 2128 jz chacha20_poly1305_seal_avx2 2129 2130 cmpq $128,%rbx 2131 jbe L$seal_sse_128 2132 2133 movdqa L$chacha20_consts(%rip),%xmm0 2134 movdqu 0(%r9),%xmm4 2135 movdqu 16(%r9),%xmm8 2136 movdqu 32(%r9),%xmm12 2137 2138 movdqa %xmm0,%xmm1 2139 movdqa %xmm0,%xmm2 2140 movdqa %xmm0,%xmm3 2141 movdqa %xmm4,%xmm5 2142 movdqa %xmm4,%xmm6 2143 movdqa %xmm4,%xmm7 2144 movdqa %xmm8,%xmm9 2145 movdqa %xmm8,%xmm10 2146 movdqa %xmm8,%xmm11 2147 movdqa %xmm12,%xmm15 2148 paddd L$sse_inc(%rip),%xmm12 2149 movdqa %xmm12,%xmm14 2150 paddd L$sse_inc(%rip),%xmm12 2151 movdqa %xmm12,%xmm13 2152 paddd L$sse_inc(%rip),%xmm12 2153 2154 movdqa %xmm4,0+48(%rbp) 2155 movdqa %xmm8,0+64(%rbp) 2156 movdqa %xmm12,0+96(%rbp) 2157 movdqa %xmm13,0+112(%rbp) 2158 movdqa %xmm14,0+128(%rbp) 2159 movdqa %xmm15,0+144(%rbp) 2160 movq $10,%r10 2161L$seal_sse_init_rounds: 2162 movdqa %xmm8,0+80(%rbp) 2163 movdqa L$rol16(%rip),%xmm8 2164 paddd %xmm7,%xmm3 2165 paddd %xmm6,%xmm2 2166 paddd %xmm5,%xmm1 2167 paddd %xmm4,%xmm0 2168 pxor %xmm3,%xmm15 2169 pxor %xmm2,%xmm14 2170 pxor %xmm1,%xmm13 2171 pxor %xmm0,%xmm12 2172.byte 102,69,15,56,0,248 2173.byte 102,69,15,56,0,240 2174.byte 102,69,15,56,0,232 2175.byte 102,69,15,56,0,224 2176 movdqa 0+80(%rbp),%xmm8 2177 paddd %xmm15,%xmm11 2178 paddd %xmm14,%xmm10 2179 paddd %xmm13,%xmm9 2180 paddd %xmm12,%xmm8 2181 pxor %xmm11,%xmm7 2182 pxor %xmm10,%xmm6 2183 pxor %xmm9,%xmm5 2184 pxor %xmm8,%xmm4 2185 movdqa %xmm8,0+80(%rbp) 2186 movdqa %xmm7,%xmm8 2187 psrld $20,%xmm8 2188 pslld $32-20,%xmm7 2189 pxor %xmm8,%xmm7 2190 movdqa %xmm6,%xmm8 2191 psrld $20,%xmm8 2192 pslld $32-20,%xmm6 2193 pxor %xmm8,%xmm6 2194 movdqa %xmm5,%xmm8 2195 psrld $20,%xmm8 2196 pslld $32-20,%xmm5 2197 pxor %xmm8,%xmm5 2198 movdqa %xmm4,%xmm8 2199 psrld $20,%xmm8 2200 pslld $32-20,%xmm4 2201 pxor %xmm8,%xmm4 2202 movdqa L$rol8(%rip),%xmm8 2203 paddd %xmm7,%xmm3 2204 paddd %xmm6,%xmm2 2205 paddd %xmm5,%xmm1 2206 paddd %xmm4,%xmm0 2207 pxor %xmm3,%xmm15 2208 pxor %xmm2,%xmm14 2209 pxor %xmm1,%xmm13 2210 pxor %xmm0,%xmm12 2211.byte 102,69,15,56,0,248 2212.byte 102,69,15,56,0,240 2213.byte 102,69,15,56,0,232 2214.byte 102,69,15,56,0,224 2215 movdqa 0+80(%rbp),%xmm8 2216 paddd %xmm15,%xmm11 2217 paddd %xmm14,%xmm10 2218 paddd %xmm13,%xmm9 2219 paddd %xmm12,%xmm8 2220 pxor %xmm11,%xmm7 2221 pxor %xmm10,%xmm6 2222 pxor %xmm9,%xmm5 2223 pxor %xmm8,%xmm4 2224 movdqa %xmm8,0+80(%rbp) 2225 movdqa %xmm7,%xmm8 2226 psrld $25,%xmm8 2227 pslld $32-25,%xmm7 2228 pxor %xmm8,%xmm7 2229 movdqa %xmm6,%xmm8 2230 psrld $25,%xmm8 2231 pslld $32-25,%xmm6 2232 pxor %xmm8,%xmm6 2233 movdqa %xmm5,%xmm8 2234 psrld $25,%xmm8 2235 pslld $32-25,%xmm5 2236 pxor %xmm8,%xmm5 2237 movdqa %xmm4,%xmm8 2238 psrld $25,%xmm8 2239 pslld $32-25,%xmm4 2240 pxor %xmm8,%xmm4 2241 movdqa 0+80(%rbp),%xmm8 2242.byte 102,15,58,15,255,4 2243.byte 102,69,15,58,15,219,8 2244.byte 102,69,15,58,15,255,12 2245.byte 102,15,58,15,246,4 2246.byte 102,69,15,58,15,210,8 2247.byte 102,69,15,58,15,246,12 2248.byte 102,15,58,15,237,4 2249.byte 102,69,15,58,15,201,8 2250.byte 102,69,15,58,15,237,12 2251.byte 102,15,58,15,228,4 2252.byte 102,69,15,58,15,192,8 2253.byte 102,69,15,58,15,228,12 2254 movdqa %xmm8,0+80(%rbp) 2255 movdqa L$rol16(%rip),%xmm8 2256 paddd %xmm7,%xmm3 2257 paddd %xmm6,%xmm2 2258 paddd %xmm5,%xmm1 2259 paddd %xmm4,%xmm0 2260 pxor %xmm3,%xmm15 2261 pxor %xmm2,%xmm14 2262 pxor %xmm1,%xmm13 2263 pxor %xmm0,%xmm12 2264.byte 102,69,15,56,0,248 2265.byte 102,69,15,56,0,240 2266.byte 102,69,15,56,0,232 2267.byte 102,69,15,56,0,224 2268 movdqa 0+80(%rbp),%xmm8 2269 paddd %xmm15,%xmm11 2270 paddd %xmm14,%xmm10 2271 paddd %xmm13,%xmm9 2272 paddd %xmm12,%xmm8 2273 pxor %xmm11,%xmm7 2274 pxor %xmm10,%xmm6 2275 pxor %xmm9,%xmm5 2276 pxor %xmm8,%xmm4 2277 movdqa %xmm8,0+80(%rbp) 2278 movdqa %xmm7,%xmm8 2279 psrld $20,%xmm8 2280 pslld $32-20,%xmm7 2281 pxor %xmm8,%xmm7 2282 movdqa %xmm6,%xmm8 2283 psrld $20,%xmm8 2284 pslld $32-20,%xmm6 2285 pxor %xmm8,%xmm6 2286 movdqa %xmm5,%xmm8 2287 psrld $20,%xmm8 2288 pslld $32-20,%xmm5 2289 pxor %xmm8,%xmm5 2290 movdqa %xmm4,%xmm8 2291 psrld $20,%xmm8 2292 pslld $32-20,%xmm4 2293 pxor %xmm8,%xmm4 2294 movdqa L$rol8(%rip),%xmm8 2295 paddd %xmm7,%xmm3 2296 paddd %xmm6,%xmm2 2297 paddd %xmm5,%xmm1 2298 paddd %xmm4,%xmm0 2299 pxor %xmm3,%xmm15 2300 pxor %xmm2,%xmm14 2301 pxor %xmm1,%xmm13 2302 pxor %xmm0,%xmm12 2303.byte 102,69,15,56,0,248 2304.byte 102,69,15,56,0,240 2305.byte 102,69,15,56,0,232 2306.byte 102,69,15,56,0,224 2307 movdqa 0+80(%rbp),%xmm8 2308 paddd %xmm15,%xmm11 2309 paddd %xmm14,%xmm10 2310 paddd %xmm13,%xmm9 2311 paddd %xmm12,%xmm8 2312 pxor %xmm11,%xmm7 2313 pxor %xmm10,%xmm6 2314 pxor %xmm9,%xmm5 2315 pxor %xmm8,%xmm4 2316 movdqa %xmm8,0+80(%rbp) 2317 movdqa %xmm7,%xmm8 2318 psrld $25,%xmm8 2319 pslld $32-25,%xmm7 2320 pxor %xmm8,%xmm7 2321 movdqa %xmm6,%xmm8 2322 psrld $25,%xmm8 2323 pslld $32-25,%xmm6 2324 pxor %xmm8,%xmm6 2325 movdqa %xmm5,%xmm8 2326 psrld $25,%xmm8 2327 pslld $32-25,%xmm5 2328 pxor %xmm8,%xmm5 2329 movdqa %xmm4,%xmm8 2330 psrld $25,%xmm8 2331 pslld $32-25,%xmm4 2332 pxor %xmm8,%xmm4 2333 movdqa 0+80(%rbp),%xmm8 2334.byte 102,15,58,15,255,12 2335.byte 102,69,15,58,15,219,8 2336.byte 102,69,15,58,15,255,4 2337.byte 102,15,58,15,246,12 2338.byte 102,69,15,58,15,210,8 2339.byte 102,69,15,58,15,246,4 2340.byte 102,15,58,15,237,12 2341.byte 102,69,15,58,15,201,8 2342.byte 102,69,15,58,15,237,4 2343.byte 102,15,58,15,228,12 2344.byte 102,69,15,58,15,192,8 2345.byte 102,69,15,58,15,228,4 2346 2347 decq %r10 2348 jnz L$seal_sse_init_rounds 2349 paddd L$chacha20_consts(%rip),%xmm3 2350 paddd 0+48(%rbp),%xmm7 2351 paddd 0+64(%rbp),%xmm11 2352 paddd 0+144(%rbp),%xmm15 2353 paddd L$chacha20_consts(%rip),%xmm2 2354 paddd 0+48(%rbp),%xmm6 2355 paddd 0+64(%rbp),%xmm10 2356 paddd 0+128(%rbp),%xmm14 2357 paddd L$chacha20_consts(%rip),%xmm1 2358 paddd 0+48(%rbp),%xmm5 2359 paddd 0+64(%rbp),%xmm9 2360 paddd 0+112(%rbp),%xmm13 2361 paddd L$chacha20_consts(%rip),%xmm0 2362 paddd 0+48(%rbp),%xmm4 2363 paddd 0+64(%rbp),%xmm8 2364 paddd 0+96(%rbp),%xmm12 2365 2366 2367 pand L$clamp(%rip),%xmm3 2368 movdqa %xmm3,0+0(%rbp) 2369 movdqa %xmm7,0+16(%rbp) 2370 2371 movq %r8,%r8 2372 call poly_hash_ad_internal 2373 movdqu 0 + 0(%rsi),%xmm3 2374 movdqu 16 + 0(%rsi),%xmm7 2375 movdqu 32 + 0(%rsi),%xmm11 2376 movdqu 48 + 0(%rsi),%xmm15 2377 pxor %xmm3,%xmm2 2378 pxor %xmm7,%xmm6 2379 pxor %xmm11,%xmm10 2380 pxor %xmm14,%xmm15 2381 movdqu %xmm2,0 + 0(%rdi) 2382 movdqu %xmm6,16 + 0(%rdi) 2383 movdqu %xmm10,32 + 0(%rdi) 2384 movdqu %xmm15,48 + 0(%rdi) 2385 movdqu 0 + 64(%rsi),%xmm3 2386 movdqu 16 + 64(%rsi),%xmm7 2387 movdqu 32 + 64(%rsi),%xmm11 2388 movdqu 48 + 64(%rsi),%xmm15 2389 pxor %xmm3,%xmm1 2390 pxor %xmm7,%xmm5 2391 pxor %xmm11,%xmm9 2392 pxor %xmm13,%xmm15 2393 movdqu %xmm1,0 + 64(%rdi) 2394 movdqu %xmm5,16 + 64(%rdi) 2395 movdqu %xmm9,32 + 64(%rdi) 2396 movdqu %xmm15,48 + 64(%rdi) 2397 2398 cmpq $192,%rbx 2399 ja L$seal_sse_main_init 2400 movq $128,%rcx 2401 subq $128,%rbx 2402 leaq 128(%rsi),%rsi 2403 jmp L$seal_sse_128_tail_hash 2404L$seal_sse_main_init: 2405 movdqu 0 + 128(%rsi),%xmm3 2406 movdqu 16 + 128(%rsi),%xmm7 2407 movdqu 32 + 128(%rsi),%xmm11 2408 movdqu 48 + 128(%rsi),%xmm15 2409 pxor %xmm3,%xmm0 2410 pxor %xmm7,%xmm4 2411 pxor %xmm11,%xmm8 2412 pxor %xmm12,%xmm15 2413 movdqu %xmm0,0 + 128(%rdi) 2414 movdqu %xmm4,16 + 128(%rdi) 2415 movdqu %xmm8,32 + 128(%rdi) 2416 movdqu %xmm15,48 + 128(%rdi) 2417 2418 movq $192,%rcx 2419 subq $192,%rbx 2420 leaq 192(%rsi),%rsi 2421 movq $2,%rcx 2422 movq $8,%r8 2423 cmpq $64,%rbx 2424 jbe L$seal_sse_tail_64 2425 cmpq $128,%rbx 2426 jbe L$seal_sse_tail_128 2427 cmpq $192,%rbx 2428 jbe L$seal_sse_tail_192 2429 2430L$seal_sse_main_loop: 2431 movdqa L$chacha20_consts(%rip),%xmm0 2432 movdqa 0+48(%rbp),%xmm4 2433 movdqa 0+64(%rbp),%xmm8 2434 movdqa %xmm0,%xmm1 2435 movdqa %xmm4,%xmm5 2436 movdqa %xmm8,%xmm9 2437 movdqa %xmm0,%xmm2 2438 movdqa %xmm4,%xmm6 2439 movdqa %xmm8,%xmm10 2440 movdqa %xmm0,%xmm3 2441 movdqa %xmm4,%xmm7 2442 movdqa %xmm8,%xmm11 2443 movdqa 0+96(%rbp),%xmm15 2444 paddd L$sse_inc(%rip),%xmm15 2445 movdqa %xmm15,%xmm14 2446 paddd L$sse_inc(%rip),%xmm14 2447 movdqa %xmm14,%xmm13 2448 paddd L$sse_inc(%rip),%xmm13 2449 movdqa %xmm13,%xmm12 2450 paddd L$sse_inc(%rip),%xmm12 2451 movdqa %xmm12,0+96(%rbp) 2452 movdqa %xmm13,0+112(%rbp) 2453 movdqa %xmm14,0+128(%rbp) 2454 movdqa %xmm15,0+144(%rbp) 2455 2456.p2align 5 2457L$seal_sse_main_rounds: 2458 movdqa %xmm8,0+80(%rbp) 2459 movdqa L$rol16(%rip),%xmm8 2460 paddd %xmm7,%xmm3 2461 paddd %xmm6,%xmm2 2462 paddd %xmm5,%xmm1 2463 paddd %xmm4,%xmm0 2464 pxor %xmm3,%xmm15 2465 pxor %xmm2,%xmm14 2466 pxor %xmm1,%xmm13 2467 pxor %xmm0,%xmm12 2468.byte 102,69,15,56,0,248 2469.byte 102,69,15,56,0,240 2470.byte 102,69,15,56,0,232 2471.byte 102,69,15,56,0,224 2472 movdqa 0+80(%rbp),%xmm8 2473 paddd %xmm15,%xmm11 2474 paddd %xmm14,%xmm10 2475 paddd %xmm13,%xmm9 2476 paddd %xmm12,%xmm8 2477 pxor %xmm11,%xmm7 2478 addq 0+0(%rdi),%r10 2479 adcq 8+0(%rdi),%r11 2480 adcq $1,%r12 2481 pxor %xmm10,%xmm6 2482 pxor %xmm9,%xmm5 2483 pxor %xmm8,%xmm4 2484 movdqa %xmm8,0+80(%rbp) 2485 movdqa %xmm7,%xmm8 2486 psrld $20,%xmm8 2487 pslld $32-20,%xmm7 2488 pxor %xmm8,%xmm7 2489 movdqa %xmm6,%xmm8 2490 psrld $20,%xmm8 2491 pslld $32-20,%xmm6 2492 pxor %xmm8,%xmm6 2493 movdqa %xmm5,%xmm8 2494 psrld $20,%xmm8 2495 pslld $32-20,%xmm5 2496 pxor %xmm8,%xmm5 2497 movdqa %xmm4,%xmm8 2498 psrld $20,%xmm8 2499 pslld $32-20,%xmm4 2500 pxor %xmm8,%xmm4 2501 movq 0+0+0(%rbp),%rax 2502 movq %rax,%r15 2503 mulq %r10 2504 movq %rax,%r13 2505 movq %rdx,%r14 2506 movq 0+0+0(%rbp),%rax 2507 mulq %r11 2508 imulq %r12,%r15 2509 addq %rax,%r14 2510 adcq %rdx,%r15 2511 movdqa L$rol8(%rip),%xmm8 2512 paddd %xmm7,%xmm3 2513 paddd %xmm6,%xmm2 2514 paddd %xmm5,%xmm1 2515 paddd %xmm4,%xmm0 2516 pxor %xmm3,%xmm15 2517 pxor %xmm2,%xmm14 2518 pxor %xmm1,%xmm13 2519 pxor %xmm0,%xmm12 2520.byte 102,69,15,56,0,248 2521.byte 102,69,15,56,0,240 2522.byte 102,69,15,56,0,232 2523.byte 102,69,15,56,0,224 2524 movdqa 0+80(%rbp),%xmm8 2525 paddd %xmm15,%xmm11 2526 paddd %xmm14,%xmm10 2527 paddd %xmm13,%xmm9 2528 paddd %xmm12,%xmm8 2529 pxor %xmm11,%xmm7 2530 pxor %xmm10,%xmm6 2531 movq 8+0+0(%rbp),%rax 2532 movq %rax,%r9 2533 mulq %r10 2534 addq %rax,%r14 2535 adcq $0,%rdx 2536 movq %rdx,%r10 2537 movq 8+0+0(%rbp),%rax 2538 mulq %r11 2539 addq %rax,%r15 2540 adcq $0,%rdx 2541 pxor %xmm9,%xmm5 2542 pxor %xmm8,%xmm4 2543 movdqa %xmm8,0+80(%rbp) 2544 movdqa %xmm7,%xmm8 2545 psrld $25,%xmm8 2546 pslld $32-25,%xmm7 2547 pxor %xmm8,%xmm7 2548 movdqa %xmm6,%xmm8 2549 psrld $25,%xmm8 2550 pslld $32-25,%xmm6 2551 pxor %xmm8,%xmm6 2552 movdqa %xmm5,%xmm8 2553 psrld $25,%xmm8 2554 pslld $32-25,%xmm5 2555 pxor %xmm8,%xmm5 2556 movdqa %xmm4,%xmm8 2557 psrld $25,%xmm8 2558 pslld $32-25,%xmm4 2559 pxor %xmm8,%xmm4 2560 movdqa 0+80(%rbp),%xmm8 2561 imulq %r12,%r9 2562 addq %r10,%r15 2563 adcq %rdx,%r9 2564.byte 102,15,58,15,255,4 2565.byte 102,69,15,58,15,219,8 2566.byte 102,69,15,58,15,255,12 2567.byte 102,15,58,15,246,4 2568.byte 102,69,15,58,15,210,8 2569.byte 102,69,15,58,15,246,12 2570.byte 102,15,58,15,237,4 2571.byte 102,69,15,58,15,201,8 2572.byte 102,69,15,58,15,237,12 2573.byte 102,15,58,15,228,4 2574.byte 102,69,15,58,15,192,8 2575.byte 102,69,15,58,15,228,12 2576 movdqa %xmm8,0+80(%rbp) 2577 movdqa L$rol16(%rip),%xmm8 2578 paddd %xmm7,%xmm3 2579 paddd %xmm6,%xmm2 2580 paddd %xmm5,%xmm1 2581 paddd %xmm4,%xmm0 2582 pxor %xmm3,%xmm15 2583 pxor %xmm2,%xmm14 2584 movq %r13,%r10 2585 movq %r14,%r11 2586 movq %r15,%r12 2587 andq $3,%r12 2588 movq %r15,%r13 2589 andq $-4,%r13 2590 movq %r9,%r14 2591 shrdq $2,%r9,%r15 2592 shrq $2,%r9 2593 addq %r13,%r15 2594 adcq %r14,%r9 2595 addq %r15,%r10 2596 adcq %r9,%r11 2597 adcq $0,%r12 2598 pxor %xmm1,%xmm13 2599 pxor %xmm0,%xmm12 2600.byte 102,69,15,56,0,248 2601.byte 102,69,15,56,0,240 2602.byte 102,69,15,56,0,232 2603.byte 102,69,15,56,0,224 2604 movdqa 0+80(%rbp),%xmm8 2605 paddd %xmm15,%xmm11 2606 paddd %xmm14,%xmm10 2607 paddd %xmm13,%xmm9 2608 paddd %xmm12,%xmm8 2609 pxor %xmm11,%xmm7 2610 pxor %xmm10,%xmm6 2611 pxor %xmm9,%xmm5 2612 pxor %xmm8,%xmm4 2613 movdqa %xmm8,0+80(%rbp) 2614 movdqa %xmm7,%xmm8 2615 psrld $20,%xmm8 2616 pslld $32-20,%xmm7 2617 pxor %xmm8,%xmm7 2618 movdqa %xmm6,%xmm8 2619 psrld $20,%xmm8 2620 pslld $32-20,%xmm6 2621 pxor %xmm8,%xmm6 2622 movdqa %xmm5,%xmm8 2623 psrld $20,%xmm8 2624 pslld $32-20,%xmm5 2625 pxor %xmm8,%xmm5 2626 movdqa %xmm4,%xmm8 2627 psrld $20,%xmm8 2628 pslld $32-20,%xmm4 2629 pxor %xmm8,%xmm4 2630 movdqa L$rol8(%rip),%xmm8 2631 paddd %xmm7,%xmm3 2632 paddd %xmm6,%xmm2 2633 paddd %xmm5,%xmm1 2634 paddd %xmm4,%xmm0 2635 pxor %xmm3,%xmm15 2636 pxor %xmm2,%xmm14 2637 pxor %xmm1,%xmm13 2638 pxor %xmm0,%xmm12 2639.byte 102,69,15,56,0,248 2640.byte 102,69,15,56,0,240 2641.byte 102,69,15,56,0,232 2642.byte 102,69,15,56,0,224 2643 movdqa 0+80(%rbp),%xmm8 2644 paddd %xmm15,%xmm11 2645 paddd %xmm14,%xmm10 2646 paddd %xmm13,%xmm9 2647 paddd %xmm12,%xmm8 2648 pxor %xmm11,%xmm7 2649 pxor %xmm10,%xmm6 2650 pxor %xmm9,%xmm5 2651 pxor %xmm8,%xmm4 2652 movdqa %xmm8,0+80(%rbp) 2653 movdqa %xmm7,%xmm8 2654 psrld $25,%xmm8 2655 pslld $32-25,%xmm7 2656 pxor %xmm8,%xmm7 2657 movdqa %xmm6,%xmm8 2658 psrld $25,%xmm8 2659 pslld $32-25,%xmm6 2660 pxor %xmm8,%xmm6 2661 movdqa %xmm5,%xmm8 2662 psrld $25,%xmm8 2663 pslld $32-25,%xmm5 2664 pxor %xmm8,%xmm5 2665 movdqa %xmm4,%xmm8 2666 psrld $25,%xmm8 2667 pslld $32-25,%xmm4 2668 pxor %xmm8,%xmm4 2669 movdqa 0+80(%rbp),%xmm8 2670.byte 102,15,58,15,255,12 2671.byte 102,69,15,58,15,219,8 2672.byte 102,69,15,58,15,255,4 2673.byte 102,15,58,15,246,12 2674.byte 102,69,15,58,15,210,8 2675.byte 102,69,15,58,15,246,4 2676.byte 102,15,58,15,237,12 2677.byte 102,69,15,58,15,201,8 2678.byte 102,69,15,58,15,237,4 2679.byte 102,15,58,15,228,12 2680.byte 102,69,15,58,15,192,8 2681.byte 102,69,15,58,15,228,4 2682 2683 leaq 16(%rdi),%rdi 2684 decq %r8 2685 jge L$seal_sse_main_rounds 2686 addq 0+0(%rdi),%r10 2687 adcq 8+0(%rdi),%r11 2688 adcq $1,%r12 2689 movq 0+0+0(%rbp),%rax 2690 movq %rax,%r15 2691 mulq %r10 2692 movq %rax,%r13 2693 movq %rdx,%r14 2694 movq 0+0+0(%rbp),%rax 2695 mulq %r11 2696 imulq %r12,%r15 2697 addq %rax,%r14 2698 adcq %rdx,%r15 2699 movq 8+0+0(%rbp),%rax 2700 movq %rax,%r9 2701 mulq %r10 2702 addq %rax,%r14 2703 adcq $0,%rdx 2704 movq %rdx,%r10 2705 movq 8+0+0(%rbp),%rax 2706 mulq %r11 2707 addq %rax,%r15 2708 adcq $0,%rdx 2709 imulq %r12,%r9 2710 addq %r10,%r15 2711 adcq %rdx,%r9 2712 movq %r13,%r10 2713 movq %r14,%r11 2714 movq %r15,%r12 2715 andq $3,%r12 2716 movq %r15,%r13 2717 andq $-4,%r13 2718 movq %r9,%r14 2719 shrdq $2,%r9,%r15 2720 shrq $2,%r9 2721 addq %r13,%r15 2722 adcq %r14,%r9 2723 addq %r15,%r10 2724 adcq %r9,%r11 2725 adcq $0,%r12 2726 2727 leaq 16(%rdi),%rdi 2728 decq %rcx 2729 jg L$seal_sse_main_rounds 2730 paddd L$chacha20_consts(%rip),%xmm3 2731 paddd 0+48(%rbp),%xmm7 2732 paddd 0+64(%rbp),%xmm11 2733 paddd 0+144(%rbp),%xmm15 2734 paddd L$chacha20_consts(%rip),%xmm2 2735 paddd 0+48(%rbp),%xmm6 2736 paddd 0+64(%rbp),%xmm10 2737 paddd 0+128(%rbp),%xmm14 2738 paddd L$chacha20_consts(%rip),%xmm1 2739 paddd 0+48(%rbp),%xmm5 2740 paddd 0+64(%rbp),%xmm9 2741 paddd 0+112(%rbp),%xmm13 2742 paddd L$chacha20_consts(%rip),%xmm0 2743 paddd 0+48(%rbp),%xmm4 2744 paddd 0+64(%rbp),%xmm8 2745 paddd 0+96(%rbp),%xmm12 2746 2747 movdqa %xmm14,0+80(%rbp) 2748 movdqa %xmm14,0+80(%rbp) 2749 movdqu 0 + 0(%rsi),%xmm14 2750 pxor %xmm3,%xmm14 2751 movdqu %xmm14,0 + 0(%rdi) 2752 movdqu 16 + 0(%rsi),%xmm14 2753 pxor %xmm7,%xmm14 2754 movdqu %xmm14,16 + 0(%rdi) 2755 movdqu 32 + 0(%rsi),%xmm14 2756 pxor %xmm11,%xmm14 2757 movdqu %xmm14,32 + 0(%rdi) 2758 movdqu 48 + 0(%rsi),%xmm14 2759 pxor %xmm15,%xmm14 2760 movdqu %xmm14,48 + 0(%rdi) 2761 2762 movdqa 0+80(%rbp),%xmm14 2763 movdqu 0 + 64(%rsi),%xmm3 2764 movdqu 16 + 64(%rsi),%xmm7 2765 movdqu 32 + 64(%rsi),%xmm11 2766 movdqu 48 + 64(%rsi),%xmm15 2767 pxor %xmm3,%xmm2 2768 pxor %xmm7,%xmm6 2769 pxor %xmm11,%xmm10 2770 pxor %xmm14,%xmm15 2771 movdqu %xmm2,0 + 64(%rdi) 2772 movdqu %xmm6,16 + 64(%rdi) 2773 movdqu %xmm10,32 + 64(%rdi) 2774 movdqu %xmm15,48 + 64(%rdi) 2775 movdqu 0 + 128(%rsi),%xmm3 2776 movdqu 16 + 128(%rsi),%xmm7 2777 movdqu 32 + 128(%rsi),%xmm11 2778 movdqu 48 + 128(%rsi),%xmm15 2779 pxor %xmm3,%xmm1 2780 pxor %xmm7,%xmm5 2781 pxor %xmm11,%xmm9 2782 pxor %xmm13,%xmm15 2783 movdqu %xmm1,0 + 128(%rdi) 2784 movdqu %xmm5,16 + 128(%rdi) 2785 movdqu %xmm9,32 + 128(%rdi) 2786 movdqu %xmm15,48 + 128(%rdi) 2787 2788 cmpq $256,%rbx 2789 ja L$seal_sse_main_loop_xor 2790 2791 movq $192,%rcx 2792 subq $192,%rbx 2793 leaq 192(%rsi),%rsi 2794 jmp L$seal_sse_128_tail_hash 2795L$seal_sse_main_loop_xor: 2796 movdqu 0 + 192(%rsi),%xmm3 2797 movdqu 16 + 192(%rsi),%xmm7 2798 movdqu 32 + 192(%rsi),%xmm11 2799 movdqu 48 + 192(%rsi),%xmm15 2800 pxor %xmm3,%xmm0 2801 pxor %xmm7,%xmm4 2802 pxor %xmm11,%xmm8 2803 pxor %xmm12,%xmm15 2804 movdqu %xmm0,0 + 192(%rdi) 2805 movdqu %xmm4,16 + 192(%rdi) 2806 movdqu %xmm8,32 + 192(%rdi) 2807 movdqu %xmm15,48 + 192(%rdi) 2808 2809 leaq 256(%rsi),%rsi 2810 subq $256,%rbx 2811 movq $6,%rcx 2812 movq $4,%r8 2813 cmpq $192,%rbx 2814 jg L$seal_sse_main_loop 2815 movq %rbx,%rcx 2816 testq %rbx,%rbx 2817 je L$seal_sse_128_tail_hash 2818 movq $6,%rcx 2819 cmpq $128,%rbx 2820 ja L$seal_sse_tail_192 2821 cmpq $64,%rbx 2822 ja L$seal_sse_tail_128 2823 2824L$seal_sse_tail_64: 2825 movdqa L$chacha20_consts(%rip),%xmm0 2826 movdqa 0+48(%rbp),%xmm4 2827 movdqa 0+64(%rbp),%xmm8 2828 movdqa 0+96(%rbp),%xmm12 2829 paddd L$sse_inc(%rip),%xmm12 2830 movdqa %xmm12,0+96(%rbp) 2831 2832L$seal_sse_tail_64_rounds_and_x2hash: 2833 addq 0+0(%rdi),%r10 2834 adcq 8+0(%rdi),%r11 2835 adcq $1,%r12 2836 movq 0+0+0(%rbp),%rax 2837 movq %rax,%r15 2838 mulq %r10 2839 movq %rax,%r13 2840 movq %rdx,%r14 2841 movq 0+0+0(%rbp),%rax 2842 mulq %r11 2843 imulq %r12,%r15 2844 addq %rax,%r14 2845 adcq %rdx,%r15 2846 movq 8+0+0(%rbp),%rax 2847 movq %rax,%r9 2848 mulq %r10 2849 addq %rax,%r14 2850 adcq $0,%rdx 2851 movq %rdx,%r10 2852 movq 8+0+0(%rbp),%rax 2853 mulq %r11 2854 addq %rax,%r15 2855 adcq $0,%rdx 2856 imulq %r12,%r9 2857 addq %r10,%r15 2858 adcq %rdx,%r9 2859 movq %r13,%r10 2860 movq %r14,%r11 2861 movq %r15,%r12 2862 andq $3,%r12 2863 movq %r15,%r13 2864 andq $-4,%r13 2865 movq %r9,%r14 2866 shrdq $2,%r9,%r15 2867 shrq $2,%r9 2868 addq %r13,%r15 2869 adcq %r14,%r9 2870 addq %r15,%r10 2871 adcq %r9,%r11 2872 adcq $0,%r12 2873 2874 leaq 16(%rdi),%rdi 2875L$seal_sse_tail_64_rounds_and_x1hash: 2876 paddd %xmm4,%xmm0 2877 pxor %xmm0,%xmm12 2878 pshufb L$rol16(%rip),%xmm12 2879 paddd %xmm12,%xmm8 2880 pxor %xmm8,%xmm4 2881 movdqa %xmm4,%xmm3 2882 pslld $12,%xmm3 2883 psrld $20,%xmm4 2884 pxor %xmm3,%xmm4 2885 paddd %xmm4,%xmm0 2886 pxor %xmm0,%xmm12 2887 pshufb L$rol8(%rip),%xmm12 2888 paddd %xmm12,%xmm8 2889 pxor %xmm8,%xmm4 2890 movdqa %xmm4,%xmm3 2891 pslld $7,%xmm3 2892 psrld $25,%xmm4 2893 pxor %xmm3,%xmm4 2894.byte 102,15,58,15,228,4 2895.byte 102,69,15,58,15,192,8 2896.byte 102,69,15,58,15,228,12 2897 paddd %xmm4,%xmm0 2898 pxor %xmm0,%xmm12 2899 pshufb L$rol16(%rip),%xmm12 2900 paddd %xmm12,%xmm8 2901 pxor %xmm8,%xmm4 2902 movdqa %xmm4,%xmm3 2903 pslld $12,%xmm3 2904 psrld $20,%xmm4 2905 pxor %xmm3,%xmm4 2906 paddd %xmm4,%xmm0 2907 pxor %xmm0,%xmm12 2908 pshufb L$rol8(%rip),%xmm12 2909 paddd %xmm12,%xmm8 2910 pxor %xmm8,%xmm4 2911 movdqa %xmm4,%xmm3 2912 pslld $7,%xmm3 2913 psrld $25,%xmm4 2914 pxor %xmm3,%xmm4 2915.byte 102,15,58,15,228,12 2916.byte 102,69,15,58,15,192,8 2917.byte 102,69,15,58,15,228,4 2918 addq 0+0(%rdi),%r10 2919 adcq 8+0(%rdi),%r11 2920 adcq $1,%r12 2921 movq 0+0+0(%rbp),%rax 2922 movq %rax,%r15 2923 mulq %r10 2924 movq %rax,%r13 2925 movq %rdx,%r14 2926 movq 0+0+0(%rbp),%rax 2927 mulq %r11 2928 imulq %r12,%r15 2929 addq %rax,%r14 2930 adcq %rdx,%r15 2931 movq 8+0+0(%rbp),%rax 2932 movq %rax,%r9 2933 mulq %r10 2934 addq %rax,%r14 2935 adcq $0,%rdx 2936 movq %rdx,%r10 2937 movq 8+0+0(%rbp),%rax 2938 mulq %r11 2939 addq %rax,%r15 2940 adcq $0,%rdx 2941 imulq %r12,%r9 2942 addq %r10,%r15 2943 adcq %rdx,%r9 2944 movq %r13,%r10 2945 movq %r14,%r11 2946 movq %r15,%r12 2947 andq $3,%r12 2948 movq %r15,%r13 2949 andq $-4,%r13 2950 movq %r9,%r14 2951 shrdq $2,%r9,%r15 2952 shrq $2,%r9 2953 addq %r13,%r15 2954 adcq %r14,%r9 2955 addq %r15,%r10 2956 adcq %r9,%r11 2957 adcq $0,%r12 2958 2959 leaq 16(%rdi),%rdi 2960 decq %rcx 2961 jg L$seal_sse_tail_64_rounds_and_x2hash 2962 decq %r8 2963 jge L$seal_sse_tail_64_rounds_and_x1hash 2964 paddd L$chacha20_consts(%rip),%xmm0 2965 paddd 0+48(%rbp),%xmm4 2966 paddd 0+64(%rbp),%xmm8 2967 paddd 0+96(%rbp),%xmm12 2968 2969 jmp L$seal_sse_128_tail_xor 2970 2971L$seal_sse_tail_128: 2972 movdqa L$chacha20_consts(%rip),%xmm0 2973 movdqa 0+48(%rbp),%xmm4 2974 movdqa 0+64(%rbp),%xmm8 2975 movdqa %xmm0,%xmm1 2976 movdqa %xmm4,%xmm5 2977 movdqa %xmm8,%xmm9 2978 movdqa 0+96(%rbp),%xmm13 2979 paddd L$sse_inc(%rip),%xmm13 2980 movdqa %xmm13,%xmm12 2981 paddd L$sse_inc(%rip),%xmm12 2982 movdqa %xmm12,0+96(%rbp) 2983 movdqa %xmm13,0+112(%rbp) 2984 2985L$seal_sse_tail_128_rounds_and_x2hash: 2986 addq 0+0(%rdi),%r10 2987 adcq 8+0(%rdi),%r11 2988 adcq $1,%r12 2989 movq 0+0+0(%rbp),%rax 2990 movq %rax,%r15 2991 mulq %r10 2992 movq %rax,%r13 2993 movq %rdx,%r14 2994 movq 0+0+0(%rbp),%rax 2995 mulq %r11 2996 imulq %r12,%r15 2997 addq %rax,%r14 2998 adcq %rdx,%r15 2999 movq 8+0+0(%rbp),%rax 3000 movq %rax,%r9 3001 mulq %r10 3002 addq %rax,%r14 3003 adcq $0,%rdx 3004 movq %rdx,%r10 3005 movq 8+0+0(%rbp),%rax 3006 mulq %r11 3007 addq %rax,%r15 3008 adcq $0,%rdx 3009 imulq %r12,%r9 3010 addq %r10,%r15 3011 adcq %rdx,%r9 3012 movq %r13,%r10 3013 movq %r14,%r11 3014 movq %r15,%r12 3015 andq $3,%r12 3016 movq %r15,%r13 3017 andq $-4,%r13 3018 movq %r9,%r14 3019 shrdq $2,%r9,%r15 3020 shrq $2,%r9 3021 addq %r13,%r15 3022 adcq %r14,%r9 3023 addq %r15,%r10 3024 adcq %r9,%r11 3025 adcq $0,%r12 3026 3027 leaq 16(%rdi),%rdi 3028L$seal_sse_tail_128_rounds_and_x1hash: 3029 paddd %xmm4,%xmm0 3030 pxor %xmm0,%xmm12 3031 pshufb L$rol16(%rip),%xmm12 3032 paddd %xmm12,%xmm8 3033 pxor %xmm8,%xmm4 3034 movdqa %xmm4,%xmm3 3035 pslld $12,%xmm3 3036 psrld $20,%xmm4 3037 pxor %xmm3,%xmm4 3038 paddd %xmm4,%xmm0 3039 pxor %xmm0,%xmm12 3040 pshufb L$rol8(%rip),%xmm12 3041 paddd %xmm12,%xmm8 3042 pxor %xmm8,%xmm4 3043 movdqa %xmm4,%xmm3 3044 pslld $7,%xmm3 3045 psrld $25,%xmm4 3046 pxor %xmm3,%xmm4 3047.byte 102,15,58,15,228,4 3048.byte 102,69,15,58,15,192,8 3049.byte 102,69,15,58,15,228,12 3050 paddd %xmm5,%xmm1 3051 pxor %xmm1,%xmm13 3052 pshufb L$rol16(%rip),%xmm13 3053 paddd %xmm13,%xmm9 3054 pxor %xmm9,%xmm5 3055 movdqa %xmm5,%xmm3 3056 pslld $12,%xmm3 3057 psrld $20,%xmm5 3058 pxor %xmm3,%xmm5 3059 paddd %xmm5,%xmm1 3060 pxor %xmm1,%xmm13 3061 pshufb L$rol8(%rip),%xmm13 3062 paddd %xmm13,%xmm9 3063 pxor %xmm9,%xmm5 3064 movdqa %xmm5,%xmm3 3065 pslld $7,%xmm3 3066 psrld $25,%xmm5 3067 pxor %xmm3,%xmm5 3068.byte 102,15,58,15,237,4 3069.byte 102,69,15,58,15,201,8 3070.byte 102,69,15,58,15,237,12 3071 addq 0+0(%rdi),%r10 3072 adcq 8+0(%rdi),%r11 3073 adcq $1,%r12 3074 movq 0+0+0(%rbp),%rax 3075 movq %rax,%r15 3076 mulq %r10 3077 movq %rax,%r13 3078 movq %rdx,%r14 3079 movq 0+0+0(%rbp),%rax 3080 mulq %r11 3081 imulq %r12,%r15 3082 addq %rax,%r14 3083 adcq %rdx,%r15 3084 movq 8+0+0(%rbp),%rax 3085 movq %rax,%r9 3086 mulq %r10 3087 addq %rax,%r14 3088 adcq $0,%rdx 3089 movq %rdx,%r10 3090 movq 8+0+0(%rbp),%rax 3091 mulq %r11 3092 addq %rax,%r15 3093 adcq $0,%rdx 3094 imulq %r12,%r9 3095 addq %r10,%r15 3096 adcq %rdx,%r9 3097 movq %r13,%r10 3098 movq %r14,%r11 3099 movq %r15,%r12 3100 andq $3,%r12 3101 movq %r15,%r13 3102 andq $-4,%r13 3103 movq %r9,%r14 3104 shrdq $2,%r9,%r15 3105 shrq $2,%r9 3106 addq %r13,%r15 3107 adcq %r14,%r9 3108 addq %r15,%r10 3109 adcq %r9,%r11 3110 adcq $0,%r12 3111 paddd %xmm4,%xmm0 3112 pxor %xmm0,%xmm12 3113 pshufb L$rol16(%rip),%xmm12 3114 paddd %xmm12,%xmm8 3115 pxor %xmm8,%xmm4 3116 movdqa %xmm4,%xmm3 3117 pslld $12,%xmm3 3118 psrld $20,%xmm4 3119 pxor %xmm3,%xmm4 3120 paddd %xmm4,%xmm0 3121 pxor %xmm0,%xmm12 3122 pshufb L$rol8(%rip),%xmm12 3123 paddd %xmm12,%xmm8 3124 pxor %xmm8,%xmm4 3125 movdqa %xmm4,%xmm3 3126 pslld $7,%xmm3 3127 psrld $25,%xmm4 3128 pxor %xmm3,%xmm4 3129.byte 102,15,58,15,228,12 3130.byte 102,69,15,58,15,192,8 3131.byte 102,69,15,58,15,228,4 3132 paddd %xmm5,%xmm1 3133 pxor %xmm1,%xmm13 3134 pshufb L$rol16(%rip),%xmm13 3135 paddd %xmm13,%xmm9 3136 pxor %xmm9,%xmm5 3137 movdqa %xmm5,%xmm3 3138 pslld $12,%xmm3 3139 psrld $20,%xmm5 3140 pxor %xmm3,%xmm5 3141 paddd %xmm5,%xmm1 3142 pxor %xmm1,%xmm13 3143 pshufb L$rol8(%rip),%xmm13 3144 paddd %xmm13,%xmm9 3145 pxor %xmm9,%xmm5 3146 movdqa %xmm5,%xmm3 3147 pslld $7,%xmm3 3148 psrld $25,%xmm5 3149 pxor %xmm3,%xmm5 3150.byte 102,15,58,15,237,12 3151.byte 102,69,15,58,15,201,8 3152.byte 102,69,15,58,15,237,4 3153 3154 leaq 16(%rdi),%rdi 3155 decq %rcx 3156 jg L$seal_sse_tail_128_rounds_and_x2hash 3157 decq %r8 3158 jge L$seal_sse_tail_128_rounds_and_x1hash 3159 paddd L$chacha20_consts(%rip),%xmm1 3160 paddd 0+48(%rbp),%xmm5 3161 paddd 0+64(%rbp),%xmm9 3162 paddd 0+112(%rbp),%xmm13 3163 paddd L$chacha20_consts(%rip),%xmm0 3164 paddd 0+48(%rbp),%xmm4 3165 paddd 0+64(%rbp),%xmm8 3166 paddd 0+96(%rbp),%xmm12 3167 movdqu 0 + 0(%rsi),%xmm3 3168 movdqu 16 + 0(%rsi),%xmm7 3169 movdqu 32 + 0(%rsi),%xmm11 3170 movdqu 48 + 0(%rsi),%xmm15 3171 pxor %xmm3,%xmm1 3172 pxor %xmm7,%xmm5 3173 pxor %xmm11,%xmm9 3174 pxor %xmm13,%xmm15 3175 movdqu %xmm1,0 + 0(%rdi) 3176 movdqu %xmm5,16 + 0(%rdi) 3177 movdqu %xmm9,32 + 0(%rdi) 3178 movdqu %xmm15,48 + 0(%rdi) 3179 3180 movq $64,%rcx 3181 subq $64,%rbx 3182 leaq 64(%rsi),%rsi 3183 jmp L$seal_sse_128_tail_hash 3184 3185L$seal_sse_tail_192: 3186 movdqa L$chacha20_consts(%rip),%xmm0 3187 movdqa 0+48(%rbp),%xmm4 3188 movdqa 0+64(%rbp),%xmm8 3189 movdqa %xmm0,%xmm1 3190 movdqa %xmm4,%xmm5 3191 movdqa %xmm8,%xmm9 3192 movdqa %xmm0,%xmm2 3193 movdqa %xmm4,%xmm6 3194 movdqa %xmm8,%xmm10 3195 movdqa 0+96(%rbp),%xmm14 3196 paddd L$sse_inc(%rip),%xmm14 3197 movdqa %xmm14,%xmm13 3198 paddd L$sse_inc(%rip),%xmm13 3199 movdqa %xmm13,%xmm12 3200 paddd L$sse_inc(%rip),%xmm12 3201 movdqa %xmm12,0+96(%rbp) 3202 movdqa %xmm13,0+112(%rbp) 3203 movdqa %xmm14,0+128(%rbp) 3204 3205L$seal_sse_tail_192_rounds_and_x2hash: 3206 addq 0+0(%rdi),%r10 3207 adcq 8+0(%rdi),%r11 3208 adcq $1,%r12 3209 movq 0+0+0(%rbp),%rax 3210 movq %rax,%r15 3211 mulq %r10 3212 movq %rax,%r13 3213 movq %rdx,%r14 3214 movq 0+0+0(%rbp),%rax 3215 mulq %r11 3216 imulq %r12,%r15 3217 addq %rax,%r14 3218 adcq %rdx,%r15 3219 movq 8+0+0(%rbp),%rax 3220 movq %rax,%r9 3221 mulq %r10 3222 addq %rax,%r14 3223 adcq $0,%rdx 3224 movq %rdx,%r10 3225 movq 8+0+0(%rbp),%rax 3226 mulq %r11 3227 addq %rax,%r15 3228 adcq $0,%rdx 3229 imulq %r12,%r9 3230 addq %r10,%r15 3231 adcq %rdx,%r9 3232 movq %r13,%r10 3233 movq %r14,%r11 3234 movq %r15,%r12 3235 andq $3,%r12 3236 movq %r15,%r13 3237 andq $-4,%r13 3238 movq %r9,%r14 3239 shrdq $2,%r9,%r15 3240 shrq $2,%r9 3241 addq %r13,%r15 3242 adcq %r14,%r9 3243 addq %r15,%r10 3244 adcq %r9,%r11 3245 adcq $0,%r12 3246 3247 leaq 16(%rdi),%rdi 3248L$seal_sse_tail_192_rounds_and_x1hash: 3249 paddd %xmm4,%xmm0 3250 pxor %xmm0,%xmm12 3251 pshufb L$rol16(%rip),%xmm12 3252 paddd %xmm12,%xmm8 3253 pxor %xmm8,%xmm4 3254 movdqa %xmm4,%xmm3 3255 pslld $12,%xmm3 3256 psrld $20,%xmm4 3257 pxor %xmm3,%xmm4 3258 paddd %xmm4,%xmm0 3259 pxor %xmm0,%xmm12 3260 pshufb L$rol8(%rip),%xmm12 3261 paddd %xmm12,%xmm8 3262 pxor %xmm8,%xmm4 3263 movdqa %xmm4,%xmm3 3264 pslld $7,%xmm3 3265 psrld $25,%xmm4 3266 pxor %xmm3,%xmm4 3267.byte 102,15,58,15,228,4 3268.byte 102,69,15,58,15,192,8 3269.byte 102,69,15,58,15,228,12 3270 paddd %xmm5,%xmm1 3271 pxor %xmm1,%xmm13 3272 pshufb L$rol16(%rip),%xmm13 3273 paddd %xmm13,%xmm9 3274 pxor %xmm9,%xmm5 3275 movdqa %xmm5,%xmm3 3276 pslld $12,%xmm3 3277 psrld $20,%xmm5 3278 pxor %xmm3,%xmm5 3279 paddd %xmm5,%xmm1 3280 pxor %xmm1,%xmm13 3281 pshufb L$rol8(%rip),%xmm13 3282 paddd %xmm13,%xmm9 3283 pxor %xmm9,%xmm5 3284 movdqa %xmm5,%xmm3 3285 pslld $7,%xmm3 3286 psrld $25,%xmm5 3287 pxor %xmm3,%xmm5 3288.byte 102,15,58,15,237,4 3289.byte 102,69,15,58,15,201,8 3290.byte 102,69,15,58,15,237,12 3291 paddd %xmm6,%xmm2 3292 pxor %xmm2,%xmm14 3293 pshufb L$rol16(%rip),%xmm14 3294 paddd %xmm14,%xmm10 3295 pxor %xmm10,%xmm6 3296 movdqa %xmm6,%xmm3 3297 pslld $12,%xmm3 3298 psrld $20,%xmm6 3299 pxor %xmm3,%xmm6 3300 paddd %xmm6,%xmm2 3301 pxor %xmm2,%xmm14 3302 pshufb L$rol8(%rip),%xmm14 3303 paddd %xmm14,%xmm10 3304 pxor %xmm10,%xmm6 3305 movdqa %xmm6,%xmm3 3306 pslld $7,%xmm3 3307 psrld $25,%xmm6 3308 pxor %xmm3,%xmm6 3309.byte 102,15,58,15,246,4 3310.byte 102,69,15,58,15,210,8 3311.byte 102,69,15,58,15,246,12 3312 addq 0+0(%rdi),%r10 3313 adcq 8+0(%rdi),%r11 3314 adcq $1,%r12 3315 movq 0+0+0(%rbp),%rax 3316 movq %rax,%r15 3317 mulq %r10 3318 movq %rax,%r13 3319 movq %rdx,%r14 3320 movq 0+0+0(%rbp),%rax 3321 mulq %r11 3322 imulq %r12,%r15 3323 addq %rax,%r14 3324 adcq %rdx,%r15 3325 movq 8+0+0(%rbp),%rax 3326 movq %rax,%r9 3327 mulq %r10 3328 addq %rax,%r14 3329 adcq $0,%rdx 3330 movq %rdx,%r10 3331 movq 8+0+0(%rbp),%rax 3332 mulq %r11 3333 addq %rax,%r15 3334 adcq $0,%rdx 3335 imulq %r12,%r9 3336 addq %r10,%r15 3337 adcq %rdx,%r9 3338 movq %r13,%r10 3339 movq %r14,%r11 3340 movq %r15,%r12 3341 andq $3,%r12 3342 movq %r15,%r13 3343 andq $-4,%r13 3344 movq %r9,%r14 3345 shrdq $2,%r9,%r15 3346 shrq $2,%r9 3347 addq %r13,%r15 3348 adcq %r14,%r9 3349 addq %r15,%r10 3350 adcq %r9,%r11 3351 adcq $0,%r12 3352 paddd %xmm4,%xmm0 3353 pxor %xmm0,%xmm12 3354 pshufb L$rol16(%rip),%xmm12 3355 paddd %xmm12,%xmm8 3356 pxor %xmm8,%xmm4 3357 movdqa %xmm4,%xmm3 3358 pslld $12,%xmm3 3359 psrld $20,%xmm4 3360 pxor %xmm3,%xmm4 3361 paddd %xmm4,%xmm0 3362 pxor %xmm0,%xmm12 3363 pshufb L$rol8(%rip),%xmm12 3364 paddd %xmm12,%xmm8 3365 pxor %xmm8,%xmm4 3366 movdqa %xmm4,%xmm3 3367 pslld $7,%xmm3 3368 psrld $25,%xmm4 3369 pxor %xmm3,%xmm4 3370.byte 102,15,58,15,228,12 3371.byte 102,69,15,58,15,192,8 3372.byte 102,69,15,58,15,228,4 3373 paddd %xmm5,%xmm1 3374 pxor %xmm1,%xmm13 3375 pshufb L$rol16(%rip),%xmm13 3376 paddd %xmm13,%xmm9 3377 pxor %xmm9,%xmm5 3378 movdqa %xmm5,%xmm3 3379 pslld $12,%xmm3 3380 psrld $20,%xmm5 3381 pxor %xmm3,%xmm5 3382 paddd %xmm5,%xmm1 3383 pxor %xmm1,%xmm13 3384 pshufb L$rol8(%rip),%xmm13 3385 paddd %xmm13,%xmm9 3386 pxor %xmm9,%xmm5 3387 movdqa %xmm5,%xmm3 3388 pslld $7,%xmm3 3389 psrld $25,%xmm5 3390 pxor %xmm3,%xmm5 3391.byte 102,15,58,15,237,12 3392.byte 102,69,15,58,15,201,8 3393.byte 102,69,15,58,15,237,4 3394 paddd %xmm6,%xmm2 3395 pxor %xmm2,%xmm14 3396 pshufb L$rol16(%rip),%xmm14 3397 paddd %xmm14,%xmm10 3398 pxor %xmm10,%xmm6 3399 movdqa %xmm6,%xmm3 3400 pslld $12,%xmm3 3401 psrld $20,%xmm6 3402 pxor %xmm3,%xmm6 3403 paddd %xmm6,%xmm2 3404 pxor %xmm2,%xmm14 3405 pshufb L$rol8(%rip),%xmm14 3406 paddd %xmm14,%xmm10 3407 pxor %xmm10,%xmm6 3408 movdqa %xmm6,%xmm3 3409 pslld $7,%xmm3 3410 psrld $25,%xmm6 3411 pxor %xmm3,%xmm6 3412.byte 102,15,58,15,246,12 3413.byte 102,69,15,58,15,210,8 3414.byte 102,69,15,58,15,246,4 3415 3416 leaq 16(%rdi),%rdi 3417 decq %rcx 3418 jg L$seal_sse_tail_192_rounds_and_x2hash 3419 decq %r8 3420 jge L$seal_sse_tail_192_rounds_and_x1hash 3421 paddd L$chacha20_consts(%rip),%xmm2 3422 paddd 0+48(%rbp),%xmm6 3423 paddd 0+64(%rbp),%xmm10 3424 paddd 0+128(%rbp),%xmm14 3425 paddd L$chacha20_consts(%rip),%xmm1 3426 paddd 0+48(%rbp),%xmm5 3427 paddd 0+64(%rbp),%xmm9 3428 paddd 0+112(%rbp),%xmm13 3429 paddd L$chacha20_consts(%rip),%xmm0 3430 paddd 0+48(%rbp),%xmm4 3431 paddd 0+64(%rbp),%xmm8 3432 paddd 0+96(%rbp),%xmm12 3433 movdqu 0 + 0(%rsi),%xmm3 3434 movdqu 16 + 0(%rsi),%xmm7 3435 movdqu 32 + 0(%rsi),%xmm11 3436 movdqu 48 + 0(%rsi),%xmm15 3437 pxor %xmm3,%xmm2 3438 pxor %xmm7,%xmm6 3439 pxor %xmm11,%xmm10 3440 pxor %xmm14,%xmm15 3441 movdqu %xmm2,0 + 0(%rdi) 3442 movdqu %xmm6,16 + 0(%rdi) 3443 movdqu %xmm10,32 + 0(%rdi) 3444 movdqu %xmm15,48 + 0(%rdi) 3445 movdqu 0 + 64(%rsi),%xmm3 3446 movdqu 16 + 64(%rsi),%xmm7 3447 movdqu 32 + 64(%rsi),%xmm11 3448 movdqu 48 + 64(%rsi),%xmm15 3449 pxor %xmm3,%xmm1 3450 pxor %xmm7,%xmm5 3451 pxor %xmm11,%xmm9 3452 pxor %xmm13,%xmm15 3453 movdqu %xmm1,0 + 64(%rdi) 3454 movdqu %xmm5,16 + 64(%rdi) 3455 movdqu %xmm9,32 + 64(%rdi) 3456 movdqu %xmm15,48 + 64(%rdi) 3457 3458 movq $128,%rcx 3459 subq $128,%rbx 3460 leaq 128(%rsi),%rsi 3461 3462L$seal_sse_128_tail_hash: 3463 cmpq $16,%rcx 3464 jb L$seal_sse_128_tail_xor 3465 addq 0+0(%rdi),%r10 3466 adcq 8+0(%rdi),%r11 3467 adcq $1,%r12 3468 movq 0+0+0(%rbp),%rax 3469 movq %rax,%r15 3470 mulq %r10 3471 movq %rax,%r13 3472 movq %rdx,%r14 3473 movq 0+0+0(%rbp),%rax 3474 mulq %r11 3475 imulq %r12,%r15 3476 addq %rax,%r14 3477 adcq %rdx,%r15 3478 movq 8+0+0(%rbp),%rax 3479 movq %rax,%r9 3480 mulq %r10 3481 addq %rax,%r14 3482 adcq $0,%rdx 3483 movq %rdx,%r10 3484 movq 8+0+0(%rbp),%rax 3485 mulq %r11 3486 addq %rax,%r15 3487 adcq $0,%rdx 3488 imulq %r12,%r9 3489 addq %r10,%r15 3490 adcq %rdx,%r9 3491 movq %r13,%r10 3492 movq %r14,%r11 3493 movq %r15,%r12 3494 andq $3,%r12 3495 movq %r15,%r13 3496 andq $-4,%r13 3497 movq %r9,%r14 3498 shrdq $2,%r9,%r15 3499 shrq $2,%r9 3500 addq %r13,%r15 3501 adcq %r14,%r9 3502 addq %r15,%r10 3503 adcq %r9,%r11 3504 adcq $0,%r12 3505 3506 subq $16,%rcx 3507 leaq 16(%rdi),%rdi 3508 jmp L$seal_sse_128_tail_hash 3509 3510L$seal_sse_128_tail_xor: 3511 cmpq $16,%rbx 3512 jb L$seal_sse_tail_16 3513 subq $16,%rbx 3514 3515 movdqu 0(%rsi),%xmm3 3516 pxor %xmm3,%xmm0 3517 movdqu %xmm0,0(%rdi) 3518 3519 addq 0(%rdi),%r10 3520 adcq 8(%rdi),%r11 3521 adcq $1,%r12 3522 leaq 16(%rsi),%rsi 3523 leaq 16(%rdi),%rdi 3524 movq 0+0+0(%rbp),%rax 3525 movq %rax,%r15 3526 mulq %r10 3527 movq %rax,%r13 3528 movq %rdx,%r14 3529 movq 0+0+0(%rbp),%rax 3530 mulq %r11 3531 imulq %r12,%r15 3532 addq %rax,%r14 3533 adcq %rdx,%r15 3534 movq 8+0+0(%rbp),%rax 3535 movq %rax,%r9 3536 mulq %r10 3537 addq %rax,%r14 3538 adcq $0,%rdx 3539 movq %rdx,%r10 3540 movq 8+0+0(%rbp),%rax 3541 mulq %r11 3542 addq %rax,%r15 3543 adcq $0,%rdx 3544 imulq %r12,%r9 3545 addq %r10,%r15 3546 adcq %rdx,%r9 3547 movq %r13,%r10 3548 movq %r14,%r11 3549 movq %r15,%r12 3550 andq $3,%r12 3551 movq %r15,%r13 3552 andq $-4,%r13 3553 movq %r9,%r14 3554 shrdq $2,%r9,%r15 3555 shrq $2,%r9 3556 addq %r13,%r15 3557 adcq %r14,%r9 3558 addq %r15,%r10 3559 adcq %r9,%r11 3560 adcq $0,%r12 3561 3562 3563 movdqa %xmm4,%xmm0 3564 movdqa %xmm8,%xmm4 3565 movdqa %xmm12,%xmm8 3566 movdqa %xmm1,%xmm12 3567 movdqa %xmm5,%xmm1 3568 movdqa %xmm9,%xmm5 3569 movdqa %xmm13,%xmm9 3570 jmp L$seal_sse_128_tail_xor 3571 3572L$seal_sse_tail_16: 3573 testq %rbx,%rbx 3574 jz L$process_blocks_of_extra_in 3575 3576 movq %rbx,%r8 3577 movq %rbx,%rcx 3578 leaq -1(%rsi,%rbx,1),%rsi 3579 pxor %xmm15,%xmm15 3580L$seal_sse_tail_16_compose: 3581 pslldq $1,%xmm15 3582 pinsrb $0,(%rsi),%xmm15 3583 leaq -1(%rsi),%rsi 3584 decq %rcx 3585 jne L$seal_sse_tail_16_compose 3586 3587 3588 pxor %xmm0,%xmm15 3589 3590 3591 movq %rbx,%rcx 3592 movdqu %xmm15,%xmm0 3593L$seal_sse_tail_16_extract: 3594 pextrb $0,%xmm0,(%rdi) 3595 psrldq $1,%xmm0 3596 addq $1,%rdi 3597 subq $1,%rcx 3598 jnz L$seal_sse_tail_16_extract 3599 3600 3601 3602 3603 3604 3605 3606 3607 movq 288 + 0 + 32(%rsp),%r9 3608 movq 56(%r9),%r14 3609 movq 48(%r9),%r13 3610 testq %r14,%r14 3611 jz L$process_partial_block 3612 3613 movq $16,%r15 3614 subq %rbx,%r15 3615 cmpq %r15,%r14 3616 3617 jge L$load_extra_in 3618 movq %r14,%r15 3619 3620L$load_extra_in: 3621 3622 3623 leaq -1(%r13,%r15,1),%rsi 3624 3625 3626 addq %r15,%r13 3627 subq %r15,%r14 3628 movq %r13,48(%r9) 3629 movq %r14,56(%r9) 3630 3631 3632 3633 addq %r15,%r8 3634 3635 3636 pxor %xmm11,%xmm11 3637L$load_extra_load_loop: 3638 pslldq $1,%xmm11 3639 pinsrb $0,(%rsi),%xmm11 3640 leaq -1(%rsi),%rsi 3641 subq $1,%r15 3642 jnz L$load_extra_load_loop 3643 3644 3645 3646 3647 movq %rbx,%r15 3648 3649L$load_extra_shift_loop: 3650 pslldq $1,%xmm11 3651 subq $1,%r15 3652 jnz L$load_extra_shift_loop 3653 3654 3655 3656 3657 leaq L$and_masks(%rip),%r15 3658 shlq $4,%rbx 3659 pand -16(%r15,%rbx,1),%xmm15 3660 3661 3662 por %xmm11,%xmm15 3663 3664 3665 3666.byte 102,77,15,126,253 3667 pextrq $1,%xmm15,%r14 3668 addq %r13,%r10 3669 adcq %r14,%r11 3670 adcq $1,%r12 3671 movq 0+0+0(%rbp),%rax 3672 movq %rax,%r15 3673 mulq %r10 3674 movq %rax,%r13 3675 movq %rdx,%r14 3676 movq 0+0+0(%rbp),%rax 3677 mulq %r11 3678 imulq %r12,%r15 3679 addq %rax,%r14 3680 adcq %rdx,%r15 3681 movq 8+0+0(%rbp),%rax 3682 movq %rax,%r9 3683 mulq %r10 3684 addq %rax,%r14 3685 adcq $0,%rdx 3686 movq %rdx,%r10 3687 movq 8+0+0(%rbp),%rax 3688 mulq %r11 3689 addq %rax,%r15 3690 adcq $0,%rdx 3691 imulq %r12,%r9 3692 addq %r10,%r15 3693 adcq %rdx,%r9 3694 movq %r13,%r10 3695 movq %r14,%r11 3696 movq %r15,%r12 3697 andq $3,%r12 3698 movq %r15,%r13 3699 andq $-4,%r13 3700 movq %r9,%r14 3701 shrdq $2,%r9,%r15 3702 shrq $2,%r9 3703 addq %r13,%r15 3704 adcq %r14,%r9 3705 addq %r15,%r10 3706 adcq %r9,%r11 3707 adcq $0,%r12 3708 3709 3710L$process_blocks_of_extra_in: 3711 3712 movq 288+32+0 (%rsp),%r9 3713 movq 48(%r9),%rsi 3714 movq 56(%r9),%r8 3715 movq %r8,%rcx 3716 shrq $4,%r8 3717 3718L$process_extra_hash_loop: 3719 jz process_extra_in_trailer 3720 addq 0+0(%rsi),%r10 3721 adcq 8+0(%rsi),%r11 3722 adcq $1,%r12 3723 movq 0+0+0(%rbp),%rax 3724 movq %rax,%r15 3725 mulq %r10 3726 movq %rax,%r13 3727 movq %rdx,%r14 3728 movq 0+0+0(%rbp),%rax 3729 mulq %r11 3730 imulq %r12,%r15 3731 addq %rax,%r14 3732 adcq %rdx,%r15 3733 movq 8+0+0(%rbp),%rax 3734 movq %rax,%r9 3735 mulq %r10 3736 addq %rax,%r14 3737 adcq $0,%rdx 3738 movq %rdx,%r10 3739 movq 8+0+0(%rbp),%rax 3740 mulq %r11 3741 addq %rax,%r15 3742 adcq $0,%rdx 3743 imulq %r12,%r9 3744 addq %r10,%r15 3745 adcq %rdx,%r9 3746 movq %r13,%r10 3747 movq %r14,%r11 3748 movq %r15,%r12 3749 andq $3,%r12 3750 movq %r15,%r13 3751 andq $-4,%r13 3752 movq %r9,%r14 3753 shrdq $2,%r9,%r15 3754 shrq $2,%r9 3755 addq %r13,%r15 3756 adcq %r14,%r9 3757 addq %r15,%r10 3758 adcq %r9,%r11 3759 adcq $0,%r12 3760 3761 leaq 16(%rsi),%rsi 3762 subq $1,%r8 3763 jmp L$process_extra_hash_loop 3764process_extra_in_trailer: 3765 andq $15,%rcx 3766 movq %rcx,%rbx 3767 jz L$do_length_block 3768 leaq -1(%rsi,%rcx,1),%rsi 3769 3770L$process_extra_in_trailer_load: 3771 pslldq $1,%xmm15 3772 pinsrb $0,(%rsi),%xmm15 3773 leaq -1(%rsi),%rsi 3774 subq $1,%rcx 3775 jnz L$process_extra_in_trailer_load 3776 3777L$process_partial_block: 3778 3779 leaq L$and_masks(%rip),%r15 3780 shlq $4,%rbx 3781 pand -16(%r15,%rbx,1),%xmm15 3782.byte 102,77,15,126,253 3783 pextrq $1,%xmm15,%r14 3784 addq %r13,%r10 3785 adcq %r14,%r11 3786 adcq $1,%r12 3787 movq 0+0+0(%rbp),%rax 3788 movq %rax,%r15 3789 mulq %r10 3790 movq %rax,%r13 3791 movq %rdx,%r14 3792 movq 0+0+0(%rbp),%rax 3793 mulq %r11 3794 imulq %r12,%r15 3795 addq %rax,%r14 3796 adcq %rdx,%r15 3797 movq 8+0+0(%rbp),%rax 3798 movq %rax,%r9 3799 mulq %r10 3800 addq %rax,%r14 3801 adcq $0,%rdx 3802 movq %rdx,%r10 3803 movq 8+0+0(%rbp),%rax 3804 mulq %r11 3805 addq %rax,%r15 3806 adcq $0,%rdx 3807 imulq %r12,%r9 3808 addq %r10,%r15 3809 adcq %rdx,%r9 3810 movq %r13,%r10 3811 movq %r14,%r11 3812 movq %r15,%r12 3813 andq $3,%r12 3814 movq %r15,%r13 3815 andq $-4,%r13 3816 movq %r9,%r14 3817 shrdq $2,%r9,%r15 3818 shrq $2,%r9 3819 addq %r13,%r15 3820 adcq %r14,%r9 3821 addq %r15,%r10 3822 adcq %r9,%r11 3823 adcq $0,%r12 3824 3825 3826L$do_length_block: 3827 addq 0+0+32(%rbp),%r10 3828 adcq 8+0+32(%rbp),%r11 3829 adcq $1,%r12 3830 movq 0+0+0(%rbp),%rax 3831 movq %rax,%r15 3832 mulq %r10 3833 movq %rax,%r13 3834 movq %rdx,%r14 3835 movq 0+0+0(%rbp),%rax 3836 mulq %r11 3837 imulq %r12,%r15 3838 addq %rax,%r14 3839 adcq %rdx,%r15 3840 movq 8+0+0(%rbp),%rax 3841 movq %rax,%r9 3842 mulq %r10 3843 addq %rax,%r14 3844 adcq $0,%rdx 3845 movq %rdx,%r10 3846 movq 8+0+0(%rbp),%rax 3847 mulq %r11 3848 addq %rax,%r15 3849 adcq $0,%rdx 3850 imulq %r12,%r9 3851 addq %r10,%r15 3852 adcq %rdx,%r9 3853 movq %r13,%r10 3854 movq %r14,%r11 3855 movq %r15,%r12 3856 andq $3,%r12 3857 movq %r15,%r13 3858 andq $-4,%r13 3859 movq %r9,%r14 3860 shrdq $2,%r9,%r15 3861 shrq $2,%r9 3862 addq %r13,%r15 3863 adcq %r14,%r9 3864 addq %r15,%r10 3865 adcq %r9,%r11 3866 adcq $0,%r12 3867 3868 3869 movq %r10,%r13 3870 movq %r11,%r14 3871 movq %r12,%r15 3872 subq $-5,%r10 3873 sbbq $-1,%r11 3874 sbbq $3,%r12 3875 cmovcq %r13,%r10 3876 cmovcq %r14,%r11 3877 cmovcq %r15,%r12 3878 3879 addq 0+0+16(%rbp),%r10 3880 adcq 8+0+16(%rbp),%r11 3881 3882 3883 addq $288 + 0 + 32,%rsp 3884 3885 3886 popq %r9 3887 3888 movq %r10,(%r9) 3889 movq %r11,8(%r9) 3890 popq %r15 3891 3892 popq %r14 3893 3894 popq %r13 3895 3896 popq %r12 3897 3898 popq %rbx 3899 3900 popq %rbp 3901 3902 ret 3903 3904L$seal_sse_128: 3905 3906 movdqu L$chacha20_consts(%rip),%xmm0 3907 movdqa %xmm0,%xmm1 3908 movdqa %xmm0,%xmm2 3909 movdqu 0(%r9),%xmm4 3910 movdqa %xmm4,%xmm5 3911 movdqa %xmm4,%xmm6 3912 movdqu 16(%r9),%xmm8 3913 movdqa %xmm8,%xmm9 3914 movdqa %xmm8,%xmm10 3915 movdqu 32(%r9),%xmm14 3916 movdqa %xmm14,%xmm12 3917 paddd L$sse_inc(%rip),%xmm12 3918 movdqa %xmm12,%xmm13 3919 paddd L$sse_inc(%rip),%xmm13 3920 movdqa %xmm4,%xmm7 3921 movdqa %xmm8,%xmm11 3922 movdqa %xmm12,%xmm15 3923 movq $10,%r10 3924 3925L$seal_sse_128_rounds: 3926 paddd %xmm4,%xmm0 3927 pxor %xmm0,%xmm12 3928 pshufb L$rol16(%rip),%xmm12 3929 paddd %xmm12,%xmm8 3930 pxor %xmm8,%xmm4 3931 movdqa %xmm4,%xmm3 3932 pslld $12,%xmm3 3933 psrld $20,%xmm4 3934 pxor %xmm3,%xmm4 3935 paddd %xmm4,%xmm0 3936 pxor %xmm0,%xmm12 3937 pshufb L$rol8(%rip),%xmm12 3938 paddd %xmm12,%xmm8 3939 pxor %xmm8,%xmm4 3940 movdqa %xmm4,%xmm3 3941 pslld $7,%xmm3 3942 psrld $25,%xmm4 3943 pxor %xmm3,%xmm4 3944.byte 102,15,58,15,228,4 3945.byte 102,69,15,58,15,192,8 3946.byte 102,69,15,58,15,228,12 3947 paddd %xmm5,%xmm1 3948 pxor %xmm1,%xmm13 3949 pshufb L$rol16(%rip),%xmm13 3950 paddd %xmm13,%xmm9 3951 pxor %xmm9,%xmm5 3952 movdqa %xmm5,%xmm3 3953 pslld $12,%xmm3 3954 psrld $20,%xmm5 3955 pxor %xmm3,%xmm5 3956 paddd %xmm5,%xmm1 3957 pxor %xmm1,%xmm13 3958 pshufb L$rol8(%rip),%xmm13 3959 paddd %xmm13,%xmm9 3960 pxor %xmm9,%xmm5 3961 movdqa %xmm5,%xmm3 3962 pslld $7,%xmm3 3963 psrld $25,%xmm5 3964 pxor %xmm3,%xmm5 3965.byte 102,15,58,15,237,4 3966.byte 102,69,15,58,15,201,8 3967.byte 102,69,15,58,15,237,12 3968 paddd %xmm6,%xmm2 3969 pxor %xmm2,%xmm14 3970 pshufb L$rol16(%rip),%xmm14 3971 paddd %xmm14,%xmm10 3972 pxor %xmm10,%xmm6 3973 movdqa %xmm6,%xmm3 3974 pslld $12,%xmm3 3975 psrld $20,%xmm6 3976 pxor %xmm3,%xmm6 3977 paddd %xmm6,%xmm2 3978 pxor %xmm2,%xmm14 3979 pshufb L$rol8(%rip),%xmm14 3980 paddd %xmm14,%xmm10 3981 pxor %xmm10,%xmm6 3982 movdqa %xmm6,%xmm3 3983 pslld $7,%xmm3 3984 psrld $25,%xmm6 3985 pxor %xmm3,%xmm6 3986.byte 102,15,58,15,246,4 3987.byte 102,69,15,58,15,210,8 3988.byte 102,69,15,58,15,246,12 3989 paddd %xmm4,%xmm0 3990 pxor %xmm0,%xmm12 3991 pshufb L$rol16(%rip),%xmm12 3992 paddd %xmm12,%xmm8 3993 pxor %xmm8,%xmm4 3994 movdqa %xmm4,%xmm3 3995 pslld $12,%xmm3 3996 psrld $20,%xmm4 3997 pxor %xmm3,%xmm4 3998 paddd %xmm4,%xmm0 3999 pxor %xmm0,%xmm12 4000 pshufb L$rol8(%rip),%xmm12 4001 paddd %xmm12,%xmm8 4002 pxor %xmm8,%xmm4 4003 movdqa %xmm4,%xmm3 4004 pslld $7,%xmm3 4005 psrld $25,%xmm4 4006 pxor %xmm3,%xmm4 4007.byte 102,15,58,15,228,12 4008.byte 102,69,15,58,15,192,8 4009.byte 102,69,15,58,15,228,4 4010 paddd %xmm5,%xmm1 4011 pxor %xmm1,%xmm13 4012 pshufb L$rol16(%rip),%xmm13 4013 paddd %xmm13,%xmm9 4014 pxor %xmm9,%xmm5 4015 movdqa %xmm5,%xmm3 4016 pslld $12,%xmm3 4017 psrld $20,%xmm5 4018 pxor %xmm3,%xmm5 4019 paddd %xmm5,%xmm1 4020 pxor %xmm1,%xmm13 4021 pshufb L$rol8(%rip),%xmm13 4022 paddd %xmm13,%xmm9 4023 pxor %xmm9,%xmm5 4024 movdqa %xmm5,%xmm3 4025 pslld $7,%xmm3 4026 psrld $25,%xmm5 4027 pxor %xmm3,%xmm5 4028.byte 102,15,58,15,237,12 4029.byte 102,69,15,58,15,201,8 4030.byte 102,69,15,58,15,237,4 4031 paddd %xmm6,%xmm2 4032 pxor %xmm2,%xmm14 4033 pshufb L$rol16(%rip),%xmm14 4034 paddd %xmm14,%xmm10 4035 pxor %xmm10,%xmm6 4036 movdqa %xmm6,%xmm3 4037 pslld $12,%xmm3 4038 psrld $20,%xmm6 4039 pxor %xmm3,%xmm6 4040 paddd %xmm6,%xmm2 4041 pxor %xmm2,%xmm14 4042 pshufb L$rol8(%rip),%xmm14 4043 paddd %xmm14,%xmm10 4044 pxor %xmm10,%xmm6 4045 movdqa %xmm6,%xmm3 4046 pslld $7,%xmm3 4047 psrld $25,%xmm6 4048 pxor %xmm3,%xmm6 4049.byte 102,15,58,15,246,12 4050.byte 102,69,15,58,15,210,8 4051.byte 102,69,15,58,15,246,4 4052 4053 decq %r10 4054 jnz L$seal_sse_128_rounds 4055 paddd L$chacha20_consts(%rip),%xmm0 4056 paddd L$chacha20_consts(%rip),%xmm1 4057 paddd L$chacha20_consts(%rip),%xmm2 4058 paddd %xmm7,%xmm4 4059 paddd %xmm7,%xmm5 4060 paddd %xmm7,%xmm6 4061 paddd %xmm11,%xmm8 4062 paddd %xmm11,%xmm9 4063 paddd %xmm15,%xmm12 4064 paddd L$sse_inc(%rip),%xmm15 4065 paddd %xmm15,%xmm13 4066 4067 pand L$clamp(%rip),%xmm2 4068 movdqa %xmm2,0+0(%rbp) 4069 movdqa %xmm6,0+16(%rbp) 4070 4071 movq %r8,%r8 4072 call poly_hash_ad_internal 4073 jmp L$seal_sse_128_tail_xor 4074 4075 4076 4077 4078 4079.p2align 6 4080chacha20_poly1305_open_avx2: 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 vzeroupper 4094 vmovdqa L$chacha20_consts(%rip),%ymm0 4095 vbroadcasti128 0(%r9),%ymm4 4096 vbroadcasti128 16(%r9),%ymm8 4097 vbroadcasti128 32(%r9),%ymm12 4098 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 4099 cmpq $192,%rbx 4100 jbe L$open_avx2_192 4101 cmpq $320,%rbx 4102 jbe L$open_avx2_320 4103 4104 vmovdqa %ymm4,0+64(%rbp) 4105 vmovdqa %ymm8,0+96(%rbp) 4106 vmovdqa %ymm12,0+160(%rbp) 4107 movq $10,%r10 4108L$open_avx2_init_rounds: 4109 vpaddd %ymm4,%ymm0,%ymm0 4110 vpxor %ymm0,%ymm12,%ymm12 4111 vpshufb L$rol16(%rip),%ymm12,%ymm12 4112 vpaddd %ymm12,%ymm8,%ymm8 4113 vpxor %ymm8,%ymm4,%ymm4 4114 vpsrld $20,%ymm4,%ymm3 4115 vpslld $12,%ymm4,%ymm4 4116 vpxor %ymm3,%ymm4,%ymm4 4117 vpaddd %ymm4,%ymm0,%ymm0 4118 vpxor %ymm0,%ymm12,%ymm12 4119 vpshufb L$rol8(%rip),%ymm12,%ymm12 4120 vpaddd %ymm12,%ymm8,%ymm8 4121 vpxor %ymm8,%ymm4,%ymm4 4122 vpslld $7,%ymm4,%ymm3 4123 vpsrld $25,%ymm4,%ymm4 4124 vpxor %ymm3,%ymm4,%ymm4 4125 vpalignr $12,%ymm12,%ymm12,%ymm12 4126 vpalignr $8,%ymm8,%ymm8,%ymm8 4127 vpalignr $4,%ymm4,%ymm4,%ymm4 4128 vpaddd %ymm4,%ymm0,%ymm0 4129 vpxor %ymm0,%ymm12,%ymm12 4130 vpshufb L$rol16(%rip),%ymm12,%ymm12 4131 vpaddd %ymm12,%ymm8,%ymm8 4132 vpxor %ymm8,%ymm4,%ymm4 4133 vpsrld $20,%ymm4,%ymm3 4134 vpslld $12,%ymm4,%ymm4 4135 vpxor %ymm3,%ymm4,%ymm4 4136 vpaddd %ymm4,%ymm0,%ymm0 4137 vpxor %ymm0,%ymm12,%ymm12 4138 vpshufb L$rol8(%rip),%ymm12,%ymm12 4139 vpaddd %ymm12,%ymm8,%ymm8 4140 vpxor %ymm8,%ymm4,%ymm4 4141 vpslld $7,%ymm4,%ymm3 4142 vpsrld $25,%ymm4,%ymm4 4143 vpxor %ymm3,%ymm4,%ymm4 4144 vpalignr $4,%ymm12,%ymm12,%ymm12 4145 vpalignr $8,%ymm8,%ymm8,%ymm8 4146 vpalignr $12,%ymm4,%ymm4,%ymm4 4147 4148 decq %r10 4149 jne L$open_avx2_init_rounds 4150 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4151 vpaddd 0+64(%rbp),%ymm4,%ymm4 4152 vpaddd 0+96(%rbp),%ymm8,%ymm8 4153 vpaddd 0+160(%rbp),%ymm12,%ymm12 4154 4155 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4156 4157 vpand L$clamp(%rip),%ymm3,%ymm3 4158 vmovdqa %ymm3,0+0(%rbp) 4159 4160 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4161 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4162 4163 movq %r8,%r8 4164 call poly_hash_ad_internal 4165 4166 xorq %rcx,%rcx 4167L$open_avx2_init_hash: 4168 addq 0+0(%rsi,%rcx,1),%r10 4169 adcq 8+0(%rsi,%rcx,1),%r11 4170 adcq $1,%r12 4171 movq 0+0+0(%rbp),%rax 4172 movq %rax,%r15 4173 mulq %r10 4174 movq %rax,%r13 4175 movq %rdx,%r14 4176 movq 0+0+0(%rbp),%rax 4177 mulq %r11 4178 imulq %r12,%r15 4179 addq %rax,%r14 4180 adcq %rdx,%r15 4181 movq 8+0+0(%rbp),%rax 4182 movq %rax,%r9 4183 mulq %r10 4184 addq %rax,%r14 4185 adcq $0,%rdx 4186 movq %rdx,%r10 4187 movq 8+0+0(%rbp),%rax 4188 mulq %r11 4189 addq %rax,%r15 4190 adcq $0,%rdx 4191 imulq %r12,%r9 4192 addq %r10,%r15 4193 adcq %rdx,%r9 4194 movq %r13,%r10 4195 movq %r14,%r11 4196 movq %r15,%r12 4197 andq $3,%r12 4198 movq %r15,%r13 4199 andq $-4,%r13 4200 movq %r9,%r14 4201 shrdq $2,%r9,%r15 4202 shrq $2,%r9 4203 addq %r13,%r15 4204 adcq %r14,%r9 4205 addq %r15,%r10 4206 adcq %r9,%r11 4207 adcq $0,%r12 4208 4209 addq $16,%rcx 4210 cmpq $64,%rcx 4211 jne L$open_avx2_init_hash 4212 4213 vpxor 0(%rsi),%ymm0,%ymm0 4214 vpxor 32(%rsi),%ymm4,%ymm4 4215 4216 vmovdqu %ymm0,0(%rdi) 4217 vmovdqu %ymm4,32(%rdi) 4218 leaq 64(%rsi),%rsi 4219 leaq 64(%rdi),%rdi 4220 subq $64,%rbx 4221L$open_avx2_main_loop: 4222 4223 cmpq $512,%rbx 4224 jb L$open_avx2_main_loop_done 4225 vmovdqa L$chacha20_consts(%rip),%ymm0 4226 vmovdqa 0+64(%rbp),%ymm4 4227 vmovdqa 0+96(%rbp),%ymm8 4228 vmovdqa %ymm0,%ymm1 4229 vmovdqa %ymm4,%ymm5 4230 vmovdqa %ymm8,%ymm9 4231 vmovdqa %ymm0,%ymm2 4232 vmovdqa %ymm4,%ymm6 4233 vmovdqa %ymm8,%ymm10 4234 vmovdqa %ymm0,%ymm3 4235 vmovdqa %ymm4,%ymm7 4236 vmovdqa %ymm8,%ymm11 4237 vmovdqa L$avx2_inc(%rip),%ymm12 4238 vpaddd 0+160(%rbp),%ymm12,%ymm15 4239 vpaddd %ymm15,%ymm12,%ymm14 4240 vpaddd %ymm14,%ymm12,%ymm13 4241 vpaddd %ymm13,%ymm12,%ymm12 4242 vmovdqa %ymm15,0+256(%rbp) 4243 vmovdqa %ymm14,0+224(%rbp) 4244 vmovdqa %ymm13,0+192(%rbp) 4245 vmovdqa %ymm12,0+160(%rbp) 4246 4247 xorq %rcx,%rcx 4248L$open_avx2_main_loop_rounds: 4249 addq 0+0(%rsi,%rcx,1),%r10 4250 adcq 8+0(%rsi,%rcx,1),%r11 4251 adcq $1,%r12 4252 vmovdqa %ymm8,0+128(%rbp) 4253 vmovdqa L$rol16(%rip),%ymm8 4254 vpaddd %ymm7,%ymm3,%ymm3 4255 vpaddd %ymm6,%ymm2,%ymm2 4256 vpaddd %ymm5,%ymm1,%ymm1 4257 vpaddd %ymm4,%ymm0,%ymm0 4258 vpxor %ymm3,%ymm15,%ymm15 4259 vpxor %ymm2,%ymm14,%ymm14 4260 vpxor %ymm1,%ymm13,%ymm13 4261 vpxor %ymm0,%ymm12,%ymm12 4262 movq 0+0+0(%rbp),%rdx 4263 movq %rdx,%r15 4264 mulxq %r10,%r13,%r14 4265 mulxq %r11,%rax,%rdx 4266 imulq %r12,%r15 4267 addq %rax,%r14 4268 adcq %rdx,%r15 4269 vpshufb %ymm8,%ymm15,%ymm15 4270 vpshufb %ymm8,%ymm14,%ymm14 4271 vpshufb %ymm8,%ymm13,%ymm13 4272 vpshufb %ymm8,%ymm12,%ymm12 4273 vpaddd %ymm15,%ymm11,%ymm11 4274 vpaddd %ymm14,%ymm10,%ymm10 4275 vpaddd %ymm13,%ymm9,%ymm9 4276 vpaddd 0+128(%rbp),%ymm12,%ymm8 4277 vpxor %ymm11,%ymm7,%ymm7 4278 movq 8+0+0(%rbp),%rdx 4279 mulxq %r10,%r10,%rax 4280 addq %r10,%r14 4281 mulxq %r11,%r11,%r9 4282 adcq %r11,%r15 4283 adcq $0,%r9 4284 imulq %r12,%rdx 4285 vpxor %ymm10,%ymm6,%ymm6 4286 vpxor %ymm9,%ymm5,%ymm5 4287 vpxor %ymm8,%ymm4,%ymm4 4288 vmovdqa %ymm8,0+128(%rbp) 4289 vpsrld $20,%ymm7,%ymm8 4290 vpslld $32-20,%ymm7,%ymm7 4291 vpxor %ymm8,%ymm7,%ymm7 4292 vpsrld $20,%ymm6,%ymm8 4293 vpslld $32-20,%ymm6,%ymm6 4294 vpxor %ymm8,%ymm6,%ymm6 4295 vpsrld $20,%ymm5,%ymm8 4296 vpslld $32-20,%ymm5,%ymm5 4297 addq %rax,%r15 4298 adcq %rdx,%r9 4299 vpxor %ymm8,%ymm5,%ymm5 4300 vpsrld $20,%ymm4,%ymm8 4301 vpslld $32-20,%ymm4,%ymm4 4302 vpxor %ymm8,%ymm4,%ymm4 4303 vmovdqa L$rol8(%rip),%ymm8 4304 vpaddd %ymm7,%ymm3,%ymm3 4305 vpaddd %ymm6,%ymm2,%ymm2 4306 vpaddd %ymm5,%ymm1,%ymm1 4307 vpaddd %ymm4,%ymm0,%ymm0 4308 vpxor %ymm3,%ymm15,%ymm15 4309 movq %r13,%r10 4310 movq %r14,%r11 4311 movq %r15,%r12 4312 andq $3,%r12 4313 movq %r15,%r13 4314 andq $-4,%r13 4315 movq %r9,%r14 4316 shrdq $2,%r9,%r15 4317 shrq $2,%r9 4318 addq %r13,%r15 4319 adcq %r14,%r9 4320 addq %r15,%r10 4321 adcq %r9,%r11 4322 adcq $0,%r12 4323 vpxor %ymm2,%ymm14,%ymm14 4324 vpxor %ymm1,%ymm13,%ymm13 4325 vpxor %ymm0,%ymm12,%ymm12 4326 vpshufb %ymm8,%ymm15,%ymm15 4327 vpshufb %ymm8,%ymm14,%ymm14 4328 vpshufb %ymm8,%ymm13,%ymm13 4329 vpshufb %ymm8,%ymm12,%ymm12 4330 vpaddd %ymm15,%ymm11,%ymm11 4331 vpaddd %ymm14,%ymm10,%ymm10 4332 addq 0+16(%rsi,%rcx,1),%r10 4333 adcq 8+16(%rsi,%rcx,1),%r11 4334 adcq $1,%r12 4335 vpaddd %ymm13,%ymm9,%ymm9 4336 vpaddd 0+128(%rbp),%ymm12,%ymm8 4337 vpxor %ymm11,%ymm7,%ymm7 4338 vpxor %ymm10,%ymm6,%ymm6 4339 vpxor %ymm9,%ymm5,%ymm5 4340 vpxor %ymm8,%ymm4,%ymm4 4341 vmovdqa %ymm8,0+128(%rbp) 4342 vpsrld $25,%ymm7,%ymm8 4343 movq 0+0+0(%rbp),%rdx 4344 movq %rdx,%r15 4345 mulxq %r10,%r13,%r14 4346 mulxq %r11,%rax,%rdx 4347 imulq %r12,%r15 4348 addq %rax,%r14 4349 adcq %rdx,%r15 4350 vpslld $32-25,%ymm7,%ymm7 4351 vpxor %ymm8,%ymm7,%ymm7 4352 vpsrld $25,%ymm6,%ymm8 4353 vpslld $32-25,%ymm6,%ymm6 4354 vpxor %ymm8,%ymm6,%ymm6 4355 vpsrld $25,%ymm5,%ymm8 4356 vpslld $32-25,%ymm5,%ymm5 4357 vpxor %ymm8,%ymm5,%ymm5 4358 vpsrld $25,%ymm4,%ymm8 4359 vpslld $32-25,%ymm4,%ymm4 4360 vpxor %ymm8,%ymm4,%ymm4 4361 vmovdqa 0+128(%rbp),%ymm8 4362 vpalignr $4,%ymm7,%ymm7,%ymm7 4363 vpalignr $8,%ymm11,%ymm11,%ymm11 4364 vpalignr $12,%ymm15,%ymm15,%ymm15 4365 vpalignr $4,%ymm6,%ymm6,%ymm6 4366 vpalignr $8,%ymm10,%ymm10,%ymm10 4367 vpalignr $12,%ymm14,%ymm14,%ymm14 4368 movq 8+0+0(%rbp),%rdx 4369 mulxq %r10,%r10,%rax 4370 addq %r10,%r14 4371 mulxq %r11,%r11,%r9 4372 adcq %r11,%r15 4373 adcq $0,%r9 4374 imulq %r12,%rdx 4375 vpalignr $4,%ymm5,%ymm5,%ymm5 4376 vpalignr $8,%ymm9,%ymm9,%ymm9 4377 vpalignr $12,%ymm13,%ymm13,%ymm13 4378 vpalignr $4,%ymm4,%ymm4,%ymm4 4379 vpalignr $8,%ymm8,%ymm8,%ymm8 4380 vpalignr $12,%ymm12,%ymm12,%ymm12 4381 vmovdqa %ymm8,0+128(%rbp) 4382 vmovdqa L$rol16(%rip),%ymm8 4383 vpaddd %ymm7,%ymm3,%ymm3 4384 vpaddd %ymm6,%ymm2,%ymm2 4385 vpaddd %ymm5,%ymm1,%ymm1 4386 vpaddd %ymm4,%ymm0,%ymm0 4387 vpxor %ymm3,%ymm15,%ymm15 4388 vpxor %ymm2,%ymm14,%ymm14 4389 vpxor %ymm1,%ymm13,%ymm13 4390 vpxor %ymm0,%ymm12,%ymm12 4391 vpshufb %ymm8,%ymm15,%ymm15 4392 vpshufb %ymm8,%ymm14,%ymm14 4393 addq %rax,%r15 4394 adcq %rdx,%r9 4395 vpshufb %ymm8,%ymm13,%ymm13 4396 vpshufb %ymm8,%ymm12,%ymm12 4397 vpaddd %ymm15,%ymm11,%ymm11 4398 vpaddd %ymm14,%ymm10,%ymm10 4399 vpaddd %ymm13,%ymm9,%ymm9 4400 vpaddd 0+128(%rbp),%ymm12,%ymm8 4401 vpxor %ymm11,%ymm7,%ymm7 4402 vpxor %ymm10,%ymm6,%ymm6 4403 vpxor %ymm9,%ymm5,%ymm5 4404 movq %r13,%r10 4405 movq %r14,%r11 4406 movq %r15,%r12 4407 andq $3,%r12 4408 movq %r15,%r13 4409 andq $-4,%r13 4410 movq %r9,%r14 4411 shrdq $2,%r9,%r15 4412 shrq $2,%r9 4413 addq %r13,%r15 4414 adcq %r14,%r9 4415 addq %r15,%r10 4416 adcq %r9,%r11 4417 adcq $0,%r12 4418 vpxor %ymm8,%ymm4,%ymm4 4419 vmovdqa %ymm8,0+128(%rbp) 4420 vpsrld $20,%ymm7,%ymm8 4421 vpslld $32-20,%ymm7,%ymm7 4422 vpxor %ymm8,%ymm7,%ymm7 4423 vpsrld $20,%ymm6,%ymm8 4424 vpslld $32-20,%ymm6,%ymm6 4425 vpxor %ymm8,%ymm6,%ymm6 4426 addq 0+32(%rsi,%rcx,1),%r10 4427 adcq 8+32(%rsi,%rcx,1),%r11 4428 adcq $1,%r12 4429 4430 leaq 48(%rcx),%rcx 4431 vpsrld $20,%ymm5,%ymm8 4432 vpslld $32-20,%ymm5,%ymm5 4433 vpxor %ymm8,%ymm5,%ymm5 4434 vpsrld $20,%ymm4,%ymm8 4435 vpslld $32-20,%ymm4,%ymm4 4436 vpxor %ymm8,%ymm4,%ymm4 4437 vmovdqa L$rol8(%rip),%ymm8 4438 vpaddd %ymm7,%ymm3,%ymm3 4439 vpaddd %ymm6,%ymm2,%ymm2 4440 vpaddd %ymm5,%ymm1,%ymm1 4441 vpaddd %ymm4,%ymm0,%ymm0 4442 vpxor %ymm3,%ymm15,%ymm15 4443 vpxor %ymm2,%ymm14,%ymm14 4444 vpxor %ymm1,%ymm13,%ymm13 4445 vpxor %ymm0,%ymm12,%ymm12 4446 vpshufb %ymm8,%ymm15,%ymm15 4447 vpshufb %ymm8,%ymm14,%ymm14 4448 vpshufb %ymm8,%ymm13,%ymm13 4449 movq 0+0+0(%rbp),%rdx 4450 movq %rdx,%r15 4451 mulxq %r10,%r13,%r14 4452 mulxq %r11,%rax,%rdx 4453 imulq %r12,%r15 4454 addq %rax,%r14 4455 adcq %rdx,%r15 4456 vpshufb %ymm8,%ymm12,%ymm12 4457 vpaddd %ymm15,%ymm11,%ymm11 4458 vpaddd %ymm14,%ymm10,%ymm10 4459 vpaddd %ymm13,%ymm9,%ymm9 4460 vpaddd 0+128(%rbp),%ymm12,%ymm8 4461 vpxor %ymm11,%ymm7,%ymm7 4462 vpxor %ymm10,%ymm6,%ymm6 4463 vpxor %ymm9,%ymm5,%ymm5 4464 movq 8+0+0(%rbp),%rdx 4465 mulxq %r10,%r10,%rax 4466 addq %r10,%r14 4467 mulxq %r11,%r11,%r9 4468 adcq %r11,%r15 4469 adcq $0,%r9 4470 imulq %r12,%rdx 4471 vpxor %ymm8,%ymm4,%ymm4 4472 vmovdqa %ymm8,0+128(%rbp) 4473 vpsrld $25,%ymm7,%ymm8 4474 vpslld $32-25,%ymm7,%ymm7 4475 vpxor %ymm8,%ymm7,%ymm7 4476 vpsrld $25,%ymm6,%ymm8 4477 vpslld $32-25,%ymm6,%ymm6 4478 vpxor %ymm8,%ymm6,%ymm6 4479 addq %rax,%r15 4480 adcq %rdx,%r9 4481 vpsrld $25,%ymm5,%ymm8 4482 vpslld $32-25,%ymm5,%ymm5 4483 vpxor %ymm8,%ymm5,%ymm5 4484 vpsrld $25,%ymm4,%ymm8 4485 vpslld $32-25,%ymm4,%ymm4 4486 vpxor %ymm8,%ymm4,%ymm4 4487 vmovdqa 0+128(%rbp),%ymm8 4488 vpalignr $12,%ymm7,%ymm7,%ymm7 4489 vpalignr $8,%ymm11,%ymm11,%ymm11 4490 vpalignr $4,%ymm15,%ymm15,%ymm15 4491 vpalignr $12,%ymm6,%ymm6,%ymm6 4492 vpalignr $8,%ymm10,%ymm10,%ymm10 4493 vpalignr $4,%ymm14,%ymm14,%ymm14 4494 vpalignr $12,%ymm5,%ymm5,%ymm5 4495 vpalignr $8,%ymm9,%ymm9,%ymm9 4496 vpalignr $4,%ymm13,%ymm13,%ymm13 4497 vpalignr $12,%ymm4,%ymm4,%ymm4 4498 vpalignr $8,%ymm8,%ymm8,%ymm8 4499 movq %r13,%r10 4500 movq %r14,%r11 4501 movq %r15,%r12 4502 andq $3,%r12 4503 movq %r15,%r13 4504 andq $-4,%r13 4505 movq %r9,%r14 4506 shrdq $2,%r9,%r15 4507 shrq $2,%r9 4508 addq %r13,%r15 4509 adcq %r14,%r9 4510 addq %r15,%r10 4511 adcq %r9,%r11 4512 adcq $0,%r12 4513 vpalignr $4,%ymm12,%ymm12,%ymm12 4514 4515 cmpq $60*8,%rcx 4516 jne L$open_avx2_main_loop_rounds 4517 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 4518 vpaddd 0+64(%rbp),%ymm7,%ymm7 4519 vpaddd 0+96(%rbp),%ymm11,%ymm11 4520 vpaddd 0+256(%rbp),%ymm15,%ymm15 4521 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 4522 vpaddd 0+64(%rbp),%ymm6,%ymm6 4523 vpaddd 0+96(%rbp),%ymm10,%ymm10 4524 vpaddd 0+224(%rbp),%ymm14,%ymm14 4525 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 4526 vpaddd 0+64(%rbp),%ymm5,%ymm5 4527 vpaddd 0+96(%rbp),%ymm9,%ymm9 4528 vpaddd 0+192(%rbp),%ymm13,%ymm13 4529 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4530 vpaddd 0+64(%rbp),%ymm4,%ymm4 4531 vpaddd 0+96(%rbp),%ymm8,%ymm8 4532 vpaddd 0+160(%rbp),%ymm12,%ymm12 4533 4534 vmovdqa %ymm0,0+128(%rbp) 4535 addq 0+60*8(%rsi),%r10 4536 adcq 8+60*8(%rsi),%r11 4537 adcq $1,%r12 4538 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4539 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4540 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4541 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4542 vpxor 0+0(%rsi),%ymm0,%ymm0 4543 vpxor 32+0(%rsi),%ymm3,%ymm3 4544 vpxor 64+0(%rsi),%ymm7,%ymm7 4545 vpxor 96+0(%rsi),%ymm11,%ymm11 4546 vmovdqu %ymm0,0+0(%rdi) 4547 vmovdqu %ymm3,32+0(%rdi) 4548 vmovdqu %ymm7,64+0(%rdi) 4549 vmovdqu %ymm11,96+0(%rdi) 4550 4551 vmovdqa 0+128(%rbp),%ymm0 4552 movq 0+0+0(%rbp),%rax 4553 movq %rax,%r15 4554 mulq %r10 4555 movq %rax,%r13 4556 movq %rdx,%r14 4557 movq 0+0+0(%rbp),%rax 4558 mulq %r11 4559 imulq %r12,%r15 4560 addq %rax,%r14 4561 adcq %rdx,%r15 4562 movq 8+0+0(%rbp),%rax 4563 movq %rax,%r9 4564 mulq %r10 4565 addq %rax,%r14 4566 adcq $0,%rdx 4567 movq %rdx,%r10 4568 movq 8+0+0(%rbp),%rax 4569 mulq %r11 4570 addq %rax,%r15 4571 adcq $0,%rdx 4572 imulq %r12,%r9 4573 addq %r10,%r15 4574 adcq %rdx,%r9 4575 movq %r13,%r10 4576 movq %r14,%r11 4577 movq %r15,%r12 4578 andq $3,%r12 4579 movq %r15,%r13 4580 andq $-4,%r13 4581 movq %r9,%r14 4582 shrdq $2,%r9,%r15 4583 shrq $2,%r9 4584 addq %r13,%r15 4585 adcq %r14,%r9 4586 addq %r15,%r10 4587 adcq %r9,%r11 4588 adcq $0,%r12 4589 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4590 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4591 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4592 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4593 vpxor 0+128(%rsi),%ymm3,%ymm3 4594 vpxor 32+128(%rsi),%ymm2,%ymm2 4595 vpxor 64+128(%rsi),%ymm6,%ymm6 4596 vpxor 96+128(%rsi),%ymm10,%ymm10 4597 vmovdqu %ymm3,0+128(%rdi) 4598 vmovdqu %ymm2,32+128(%rdi) 4599 vmovdqu %ymm6,64+128(%rdi) 4600 vmovdqu %ymm10,96+128(%rdi) 4601 addq 0+60*8+16(%rsi),%r10 4602 adcq 8+60*8+16(%rsi),%r11 4603 adcq $1,%r12 4604 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4605 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4606 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4607 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4608 vpxor 0+256(%rsi),%ymm3,%ymm3 4609 vpxor 32+256(%rsi),%ymm1,%ymm1 4610 vpxor 64+256(%rsi),%ymm5,%ymm5 4611 vpxor 96+256(%rsi),%ymm9,%ymm9 4612 vmovdqu %ymm3,0+256(%rdi) 4613 vmovdqu %ymm1,32+256(%rdi) 4614 vmovdqu %ymm5,64+256(%rdi) 4615 vmovdqu %ymm9,96+256(%rdi) 4616 movq 0+0+0(%rbp),%rax 4617 movq %rax,%r15 4618 mulq %r10 4619 movq %rax,%r13 4620 movq %rdx,%r14 4621 movq 0+0+0(%rbp),%rax 4622 mulq %r11 4623 imulq %r12,%r15 4624 addq %rax,%r14 4625 adcq %rdx,%r15 4626 movq 8+0+0(%rbp),%rax 4627 movq %rax,%r9 4628 mulq %r10 4629 addq %rax,%r14 4630 adcq $0,%rdx 4631 movq %rdx,%r10 4632 movq 8+0+0(%rbp),%rax 4633 mulq %r11 4634 addq %rax,%r15 4635 adcq $0,%rdx 4636 imulq %r12,%r9 4637 addq %r10,%r15 4638 adcq %rdx,%r9 4639 movq %r13,%r10 4640 movq %r14,%r11 4641 movq %r15,%r12 4642 andq $3,%r12 4643 movq %r15,%r13 4644 andq $-4,%r13 4645 movq %r9,%r14 4646 shrdq $2,%r9,%r15 4647 shrq $2,%r9 4648 addq %r13,%r15 4649 adcq %r14,%r9 4650 addq %r15,%r10 4651 adcq %r9,%r11 4652 adcq $0,%r12 4653 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4654 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4655 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4656 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4657 vpxor 0+384(%rsi),%ymm3,%ymm3 4658 vpxor 32+384(%rsi),%ymm0,%ymm0 4659 vpxor 64+384(%rsi),%ymm4,%ymm4 4660 vpxor 96+384(%rsi),%ymm8,%ymm8 4661 vmovdqu %ymm3,0+384(%rdi) 4662 vmovdqu %ymm0,32+384(%rdi) 4663 vmovdqu %ymm4,64+384(%rdi) 4664 vmovdqu %ymm8,96+384(%rdi) 4665 4666 leaq 512(%rsi),%rsi 4667 leaq 512(%rdi),%rdi 4668 subq $512,%rbx 4669 jmp L$open_avx2_main_loop 4670L$open_avx2_main_loop_done: 4671 testq %rbx,%rbx 4672 vzeroupper 4673 je L$open_sse_finalize 4674 4675 cmpq $384,%rbx 4676 ja L$open_avx2_tail_512 4677 cmpq $256,%rbx 4678 ja L$open_avx2_tail_384 4679 cmpq $128,%rbx 4680 ja L$open_avx2_tail_256 4681 vmovdqa L$chacha20_consts(%rip),%ymm0 4682 vmovdqa 0+64(%rbp),%ymm4 4683 vmovdqa 0+96(%rbp),%ymm8 4684 vmovdqa L$avx2_inc(%rip),%ymm12 4685 vpaddd 0+160(%rbp),%ymm12,%ymm12 4686 vmovdqa %ymm12,0+160(%rbp) 4687 4688 xorq %r8,%r8 4689 movq %rbx,%rcx 4690 andq $-16,%rcx 4691 testq %rcx,%rcx 4692 je L$open_avx2_tail_128_rounds 4693L$open_avx2_tail_128_rounds_and_x1hash: 4694 addq 0+0(%rsi,%r8,1),%r10 4695 adcq 8+0(%rsi,%r8,1),%r11 4696 adcq $1,%r12 4697 movq 0+0+0(%rbp),%rax 4698 movq %rax,%r15 4699 mulq %r10 4700 movq %rax,%r13 4701 movq %rdx,%r14 4702 movq 0+0+0(%rbp),%rax 4703 mulq %r11 4704 imulq %r12,%r15 4705 addq %rax,%r14 4706 adcq %rdx,%r15 4707 movq 8+0+0(%rbp),%rax 4708 movq %rax,%r9 4709 mulq %r10 4710 addq %rax,%r14 4711 adcq $0,%rdx 4712 movq %rdx,%r10 4713 movq 8+0+0(%rbp),%rax 4714 mulq %r11 4715 addq %rax,%r15 4716 adcq $0,%rdx 4717 imulq %r12,%r9 4718 addq %r10,%r15 4719 adcq %rdx,%r9 4720 movq %r13,%r10 4721 movq %r14,%r11 4722 movq %r15,%r12 4723 andq $3,%r12 4724 movq %r15,%r13 4725 andq $-4,%r13 4726 movq %r9,%r14 4727 shrdq $2,%r9,%r15 4728 shrq $2,%r9 4729 addq %r13,%r15 4730 adcq %r14,%r9 4731 addq %r15,%r10 4732 adcq %r9,%r11 4733 adcq $0,%r12 4734 4735L$open_avx2_tail_128_rounds: 4736 addq $16,%r8 4737 vpaddd %ymm4,%ymm0,%ymm0 4738 vpxor %ymm0,%ymm12,%ymm12 4739 vpshufb L$rol16(%rip),%ymm12,%ymm12 4740 vpaddd %ymm12,%ymm8,%ymm8 4741 vpxor %ymm8,%ymm4,%ymm4 4742 vpsrld $20,%ymm4,%ymm3 4743 vpslld $12,%ymm4,%ymm4 4744 vpxor %ymm3,%ymm4,%ymm4 4745 vpaddd %ymm4,%ymm0,%ymm0 4746 vpxor %ymm0,%ymm12,%ymm12 4747 vpshufb L$rol8(%rip),%ymm12,%ymm12 4748 vpaddd %ymm12,%ymm8,%ymm8 4749 vpxor %ymm8,%ymm4,%ymm4 4750 vpslld $7,%ymm4,%ymm3 4751 vpsrld $25,%ymm4,%ymm4 4752 vpxor %ymm3,%ymm4,%ymm4 4753 vpalignr $12,%ymm12,%ymm12,%ymm12 4754 vpalignr $8,%ymm8,%ymm8,%ymm8 4755 vpalignr $4,%ymm4,%ymm4,%ymm4 4756 vpaddd %ymm4,%ymm0,%ymm0 4757 vpxor %ymm0,%ymm12,%ymm12 4758 vpshufb L$rol16(%rip),%ymm12,%ymm12 4759 vpaddd %ymm12,%ymm8,%ymm8 4760 vpxor %ymm8,%ymm4,%ymm4 4761 vpsrld $20,%ymm4,%ymm3 4762 vpslld $12,%ymm4,%ymm4 4763 vpxor %ymm3,%ymm4,%ymm4 4764 vpaddd %ymm4,%ymm0,%ymm0 4765 vpxor %ymm0,%ymm12,%ymm12 4766 vpshufb L$rol8(%rip),%ymm12,%ymm12 4767 vpaddd %ymm12,%ymm8,%ymm8 4768 vpxor %ymm8,%ymm4,%ymm4 4769 vpslld $7,%ymm4,%ymm3 4770 vpsrld $25,%ymm4,%ymm4 4771 vpxor %ymm3,%ymm4,%ymm4 4772 vpalignr $4,%ymm12,%ymm12,%ymm12 4773 vpalignr $8,%ymm8,%ymm8,%ymm8 4774 vpalignr $12,%ymm4,%ymm4,%ymm4 4775 4776 cmpq %rcx,%r8 4777 jb L$open_avx2_tail_128_rounds_and_x1hash 4778 cmpq $160,%r8 4779 jne L$open_avx2_tail_128_rounds 4780 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4781 vpaddd 0+64(%rbp),%ymm4,%ymm4 4782 vpaddd 0+96(%rbp),%ymm8,%ymm8 4783 vpaddd 0+160(%rbp),%ymm12,%ymm12 4784 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4785 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4786 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4787 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4788 vmovdqa %ymm3,%ymm8 4789 4790 jmp L$open_avx2_tail_128_xor 4791 4792L$open_avx2_tail_256: 4793 vmovdqa L$chacha20_consts(%rip),%ymm0 4794 vmovdqa 0+64(%rbp),%ymm4 4795 vmovdqa 0+96(%rbp),%ymm8 4796 vmovdqa %ymm0,%ymm1 4797 vmovdqa %ymm4,%ymm5 4798 vmovdqa %ymm8,%ymm9 4799 vmovdqa L$avx2_inc(%rip),%ymm12 4800 vpaddd 0+160(%rbp),%ymm12,%ymm13 4801 vpaddd %ymm13,%ymm12,%ymm12 4802 vmovdqa %ymm12,0+160(%rbp) 4803 vmovdqa %ymm13,0+192(%rbp) 4804 4805 movq %rbx,0+128(%rbp) 4806 movq %rbx,%rcx 4807 subq $128,%rcx 4808 shrq $4,%rcx 4809 movq $10,%r8 4810 cmpq $10,%rcx 4811 cmovgq %r8,%rcx 4812 movq %rsi,%rbx 4813 xorq %r8,%r8 4814L$open_avx2_tail_256_rounds_and_x1hash: 4815 addq 0+0(%rbx),%r10 4816 adcq 8+0(%rbx),%r11 4817 adcq $1,%r12 4818 movq 0+0+0(%rbp),%rdx 4819 movq %rdx,%r15 4820 mulxq %r10,%r13,%r14 4821 mulxq %r11,%rax,%rdx 4822 imulq %r12,%r15 4823 addq %rax,%r14 4824 adcq %rdx,%r15 4825 movq 8+0+0(%rbp),%rdx 4826 mulxq %r10,%r10,%rax 4827 addq %r10,%r14 4828 mulxq %r11,%r11,%r9 4829 adcq %r11,%r15 4830 adcq $0,%r9 4831 imulq %r12,%rdx 4832 addq %rax,%r15 4833 adcq %rdx,%r9 4834 movq %r13,%r10 4835 movq %r14,%r11 4836 movq %r15,%r12 4837 andq $3,%r12 4838 movq %r15,%r13 4839 andq $-4,%r13 4840 movq %r9,%r14 4841 shrdq $2,%r9,%r15 4842 shrq $2,%r9 4843 addq %r13,%r15 4844 adcq %r14,%r9 4845 addq %r15,%r10 4846 adcq %r9,%r11 4847 adcq $0,%r12 4848 4849 leaq 16(%rbx),%rbx 4850L$open_avx2_tail_256_rounds: 4851 vpaddd %ymm4,%ymm0,%ymm0 4852 vpxor %ymm0,%ymm12,%ymm12 4853 vpshufb L$rol16(%rip),%ymm12,%ymm12 4854 vpaddd %ymm12,%ymm8,%ymm8 4855 vpxor %ymm8,%ymm4,%ymm4 4856 vpsrld $20,%ymm4,%ymm3 4857 vpslld $12,%ymm4,%ymm4 4858 vpxor %ymm3,%ymm4,%ymm4 4859 vpaddd %ymm4,%ymm0,%ymm0 4860 vpxor %ymm0,%ymm12,%ymm12 4861 vpshufb L$rol8(%rip),%ymm12,%ymm12 4862 vpaddd %ymm12,%ymm8,%ymm8 4863 vpxor %ymm8,%ymm4,%ymm4 4864 vpslld $7,%ymm4,%ymm3 4865 vpsrld $25,%ymm4,%ymm4 4866 vpxor %ymm3,%ymm4,%ymm4 4867 vpalignr $12,%ymm12,%ymm12,%ymm12 4868 vpalignr $8,%ymm8,%ymm8,%ymm8 4869 vpalignr $4,%ymm4,%ymm4,%ymm4 4870 vpaddd %ymm5,%ymm1,%ymm1 4871 vpxor %ymm1,%ymm13,%ymm13 4872 vpshufb L$rol16(%rip),%ymm13,%ymm13 4873 vpaddd %ymm13,%ymm9,%ymm9 4874 vpxor %ymm9,%ymm5,%ymm5 4875 vpsrld $20,%ymm5,%ymm3 4876 vpslld $12,%ymm5,%ymm5 4877 vpxor %ymm3,%ymm5,%ymm5 4878 vpaddd %ymm5,%ymm1,%ymm1 4879 vpxor %ymm1,%ymm13,%ymm13 4880 vpshufb L$rol8(%rip),%ymm13,%ymm13 4881 vpaddd %ymm13,%ymm9,%ymm9 4882 vpxor %ymm9,%ymm5,%ymm5 4883 vpslld $7,%ymm5,%ymm3 4884 vpsrld $25,%ymm5,%ymm5 4885 vpxor %ymm3,%ymm5,%ymm5 4886 vpalignr $12,%ymm13,%ymm13,%ymm13 4887 vpalignr $8,%ymm9,%ymm9,%ymm9 4888 vpalignr $4,%ymm5,%ymm5,%ymm5 4889 4890 incq %r8 4891 vpaddd %ymm4,%ymm0,%ymm0 4892 vpxor %ymm0,%ymm12,%ymm12 4893 vpshufb L$rol16(%rip),%ymm12,%ymm12 4894 vpaddd %ymm12,%ymm8,%ymm8 4895 vpxor %ymm8,%ymm4,%ymm4 4896 vpsrld $20,%ymm4,%ymm3 4897 vpslld $12,%ymm4,%ymm4 4898 vpxor %ymm3,%ymm4,%ymm4 4899 vpaddd %ymm4,%ymm0,%ymm0 4900 vpxor %ymm0,%ymm12,%ymm12 4901 vpshufb L$rol8(%rip),%ymm12,%ymm12 4902 vpaddd %ymm12,%ymm8,%ymm8 4903 vpxor %ymm8,%ymm4,%ymm4 4904 vpslld $7,%ymm4,%ymm3 4905 vpsrld $25,%ymm4,%ymm4 4906 vpxor %ymm3,%ymm4,%ymm4 4907 vpalignr $4,%ymm12,%ymm12,%ymm12 4908 vpalignr $8,%ymm8,%ymm8,%ymm8 4909 vpalignr $12,%ymm4,%ymm4,%ymm4 4910 vpaddd %ymm5,%ymm1,%ymm1 4911 vpxor %ymm1,%ymm13,%ymm13 4912 vpshufb L$rol16(%rip),%ymm13,%ymm13 4913 vpaddd %ymm13,%ymm9,%ymm9 4914 vpxor %ymm9,%ymm5,%ymm5 4915 vpsrld $20,%ymm5,%ymm3 4916 vpslld $12,%ymm5,%ymm5 4917 vpxor %ymm3,%ymm5,%ymm5 4918 vpaddd %ymm5,%ymm1,%ymm1 4919 vpxor %ymm1,%ymm13,%ymm13 4920 vpshufb L$rol8(%rip),%ymm13,%ymm13 4921 vpaddd %ymm13,%ymm9,%ymm9 4922 vpxor %ymm9,%ymm5,%ymm5 4923 vpslld $7,%ymm5,%ymm3 4924 vpsrld $25,%ymm5,%ymm5 4925 vpxor %ymm3,%ymm5,%ymm5 4926 vpalignr $4,%ymm13,%ymm13,%ymm13 4927 vpalignr $8,%ymm9,%ymm9,%ymm9 4928 vpalignr $12,%ymm5,%ymm5,%ymm5 4929 vpaddd %ymm6,%ymm2,%ymm2 4930 vpxor %ymm2,%ymm14,%ymm14 4931 vpshufb L$rol16(%rip),%ymm14,%ymm14 4932 vpaddd %ymm14,%ymm10,%ymm10 4933 vpxor %ymm10,%ymm6,%ymm6 4934 vpsrld $20,%ymm6,%ymm3 4935 vpslld $12,%ymm6,%ymm6 4936 vpxor %ymm3,%ymm6,%ymm6 4937 vpaddd %ymm6,%ymm2,%ymm2 4938 vpxor %ymm2,%ymm14,%ymm14 4939 vpshufb L$rol8(%rip),%ymm14,%ymm14 4940 vpaddd %ymm14,%ymm10,%ymm10 4941 vpxor %ymm10,%ymm6,%ymm6 4942 vpslld $7,%ymm6,%ymm3 4943 vpsrld $25,%ymm6,%ymm6 4944 vpxor %ymm3,%ymm6,%ymm6 4945 vpalignr $4,%ymm14,%ymm14,%ymm14 4946 vpalignr $8,%ymm10,%ymm10,%ymm10 4947 vpalignr $12,%ymm6,%ymm6,%ymm6 4948 4949 cmpq %rcx,%r8 4950 jb L$open_avx2_tail_256_rounds_and_x1hash 4951 cmpq $10,%r8 4952 jne L$open_avx2_tail_256_rounds 4953 movq %rbx,%r8 4954 subq %rsi,%rbx 4955 movq %rbx,%rcx 4956 movq 0+128(%rbp),%rbx 4957L$open_avx2_tail_256_hash: 4958 addq $16,%rcx 4959 cmpq %rbx,%rcx 4960 jg L$open_avx2_tail_256_done 4961 addq 0+0(%r8),%r10 4962 adcq 8+0(%r8),%r11 4963 adcq $1,%r12 4964 movq 0+0+0(%rbp),%rdx 4965 movq %rdx,%r15 4966 mulxq %r10,%r13,%r14 4967 mulxq %r11,%rax,%rdx 4968 imulq %r12,%r15 4969 addq %rax,%r14 4970 adcq %rdx,%r15 4971 movq 8+0+0(%rbp),%rdx 4972 mulxq %r10,%r10,%rax 4973 addq %r10,%r14 4974 mulxq %r11,%r11,%r9 4975 adcq %r11,%r15 4976 adcq $0,%r9 4977 imulq %r12,%rdx 4978 addq %rax,%r15 4979 adcq %rdx,%r9 4980 movq %r13,%r10 4981 movq %r14,%r11 4982 movq %r15,%r12 4983 andq $3,%r12 4984 movq %r15,%r13 4985 andq $-4,%r13 4986 movq %r9,%r14 4987 shrdq $2,%r9,%r15 4988 shrq $2,%r9 4989 addq %r13,%r15 4990 adcq %r14,%r9 4991 addq %r15,%r10 4992 adcq %r9,%r11 4993 adcq $0,%r12 4994 4995 leaq 16(%r8),%r8 4996 jmp L$open_avx2_tail_256_hash 4997L$open_avx2_tail_256_done: 4998 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 4999 vpaddd 0+64(%rbp),%ymm5,%ymm5 5000 vpaddd 0+96(%rbp),%ymm9,%ymm9 5001 vpaddd 0+192(%rbp),%ymm13,%ymm13 5002 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5003 vpaddd 0+64(%rbp),%ymm4,%ymm4 5004 vpaddd 0+96(%rbp),%ymm8,%ymm8 5005 vpaddd 0+160(%rbp),%ymm12,%ymm12 5006 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5007 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5008 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5009 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5010 vpxor 0+0(%rsi),%ymm3,%ymm3 5011 vpxor 32+0(%rsi),%ymm1,%ymm1 5012 vpxor 64+0(%rsi),%ymm5,%ymm5 5013 vpxor 96+0(%rsi),%ymm9,%ymm9 5014 vmovdqu %ymm3,0+0(%rdi) 5015 vmovdqu %ymm1,32+0(%rdi) 5016 vmovdqu %ymm5,64+0(%rdi) 5017 vmovdqu %ymm9,96+0(%rdi) 5018 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5019 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5020 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5021 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5022 vmovdqa %ymm3,%ymm8 5023 5024 leaq 128(%rsi),%rsi 5025 leaq 128(%rdi),%rdi 5026 subq $128,%rbx 5027 jmp L$open_avx2_tail_128_xor 5028 5029L$open_avx2_tail_384: 5030 vmovdqa L$chacha20_consts(%rip),%ymm0 5031 vmovdqa 0+64(%rbp),%ymm4 5032 vmovdqa 0+96(%rbp),%ymm8 5033 vmovdqa %ymm0,%ymm1 5034 vmovdqa %ymm4,%ymm5 5035 vmovdqa %ymm8,%ymm9 5036 vmovdqa %ymm0,%ymm2 5037 vmovdqa %ymm4,%ymm6 5038 vmovdqa %ymm8,%ymm10 5039 vmovdqa L$avx2_inc(%rip),%ymm12 5040 vpaddd 0+160(%rbp),%ymm12,%ymm14 5041 vpaddd %ymm14,%ymm12,%ymm13 5042 vpaddd %ymm13,%ymm12,%ymm12 5043 vmovdqa %ymm12,0+160(%rbp) 5044 vmovdqa %ymm13,0+192(%rbp) 5045 vmovdqa %ymm14,0+224(%rbp) 5046 5047 movq %rbx,0+128(%rbp) 5048 movq %rbx,%rcx 5049 subq $256,%rcx 5050 shrq $4,%rcx 5051 addq $6,%rcx 5052 movq $10,%r8 5053 cmpq $10,%rcx 5054 cmovgq %r8,%rcx 5055 movq %rsi,%rbx 5056 xorq %r8,%r8 5057L$open_avx2_tail_384_rounds_and_x2hash: 5058 addq 0+0(%rbx),%r10 5059 adcq 8+0(%rbx),%r11 5060 adcq $1,%r12 5061 movq 0+0+0(%rbp),%rdx 5062 movq %rdx,%r15 5063 mulxq %r10,%r13,%r14 5064 mulxq %r11,%rax,%rdx 5065 imulq %r12,%r15 5066 addq %rax,%r14 5067 adcq %rdx,%r15 5068 movq 8+0+0(%rbp),%rdx 5069 mulxq %r10,%r10,%rax 5070 addq %r10,%r14 5071 mulxq %r11,%r11,%r9 5072 adcq %r11,%r15 5073 adcq $0,%r9 5074 imulq %r12,%rdx 5075 addq %rax,%r15 5076 adcq %rdx,%r9 5077 movq %r13,%r10 5078 movq %r14,%r11 5079 movq %r15,%r12 5080 andq $3,%r12 5081 movq %r15,%r13 5082 andq $-4,%r13 5083 movq %r9,%r14 5084 shrdq $2,%r9,%r15 5085 shrq $2,%r9 5086 addq %r13,%r15 5087 adcq %r14,%r9 5088 addq %r15,%r10 5089 adcq %r9,%r11 5090 adcq $0,%r12 5091 5092 leaq 16(%rbx),%rbx 5093L$open_avx2_tail_384_rounds_and_x1hash: 5094 vpaddd %ymm6,%ymm2,%ymm2 5095 vpxor %ymm2,%ymm14,%ymm14 5096 vpshufb L$rol16(%rip),%ymm14,%ymm14 5097 vpaddd %ymm14,%ymm10,%ymm10 5098 vpxor %ymm10,%ymm6,%ymm6 5099 vpsrld $20,%ymm6,%ymm3 5100 vpslld $12,%ymm6,%ymm6 5101 vpxor %ymm3,%ymm6,%ymm6 5102 vpaddd %ymm6,%ymm2,%ymm2 5103 vpxor %ymm2,%ymm14,%ymm14 5104 vpshufb L$rol8(%rip),%ymm14,%ymm14 5105 vpaddd %ymm14,%ymm10,%ymm10 5106 vpxor %ymm10,%ymm6,%ymm6 5107 vpslld $7,%ymm6,%ymm3 5108 vpsrld $25,%ymm6,%ymm6 5109 vpxor %ymm3,%ymm6,%ymm6 5110 vpalignr $12,%ymm14,%ymm14,%ymm14 5111 vpalignr $8,%ymm10,%ymm10,%ymm10 5112 vpalignr $4,%ymm6,%ymm6,%ymm6 5113 vpaddd %ymm5,%ymm1,%ymm1 5114 vpxor %ymm1,%ymm13,%ymm13 5115 vpshufb L$rol16(%rip),%ymm13,%ymm13 5116 vpaddd %ymm13,%ymm9,%ymm9 5117 vpxor %ymm9,%ymm5,%ymm5 5118 vpsrld $20,%ymm5,%ymm3 5119 vpslld $12,%ymm5,%ymm5 5120 vpxor %ymm3,%ymm5,%ymm5 5121 vpaddd %ymm5,%ymm1,%ymm1 5122 vpxor %ymm1,%ymm13,%ymm13 5123 vpshufb L$rol8(%rip),%ymm13,%ymm13 5124 vpaddd %ymm13,%ymm9,%ymm9 5125 vpxor %ymm9,%ymm5,%ymm5 5126 vpslld $7,%ymm5,%ymm3 5127 vpsrld $25,%ymm5,%ymm5 5128 vpxor %ymm3,%ymm5,%ymm5 5129 vpalignr $12,%ymm13,%ymm13,%ymm13 5130 vpalignr $8,%ymm9,%ymm9,%ymm9 5131 vpalignr $4,%ymm5,%ymm5,%ymm5 5132 vpaddd %ymm4,%ymm0,%ymm0 5133 vpxor %ymm0,%ymm12,%ymm12 5134 vpshufb L$rol16(%rip),%ymm12,%ymm12 5135 vpaddd %ymm12,%ymm8,%ymm8 5136 vpxor %ymm8,%ymm4,%ymm4 5137 vpsrld $20,%ymm4,%ymm3 5138 vpslld $12,%ymm4,%ymm4 5139 vpxor %ymm3,%ymm4,%ymm4 5140 vpaddd %ymm4,%ymm0,%ymm0 5141 vpxor %ymm0,%ymm12,%ymm12 5142 vpshufb L$rol8(%rip),%ymm12,%ymm12 5143 vpaddd %ymm12,%ymm8,%ymm8 5144 vpxor %ymm8,%ymm4,%ymm4 5145 vpslld $7,%ymm4,%ymm3 5146 vpsrld $25,%ymm4,%ymm4 5147 vpxor %ymm3,%ymm4,%ymm4 5148 vpalignr $12,%ymm12,%ymm12,%ymm12 5149 vpalignr $8,%ymm8,%ymm8,%ymm8 5150 vpalignr $4,%ymm4,%ymm4,%ymm4 5151 addq 0+0(%rbx),%r10 5152 adcq 8+0(%rbx),%r11 5153 adcq $1,%r12 5154 movq 0+0+0(%rbp),%rax 5155 movq %rax,%r15 5156 mulq %r10 5157 movq %rax,%r13 5158 movq %rdx,%r14 5159 movq 0+0+0(%rbp),%rax 5160 mulq %r11 5161 imulq %r12,%r15 5162 addq %rax,%r14 5163 adcq %rdx,%r15 5164 movq 8+0+0(%rbp),%rax 5165 movq %rax,%r9 5166 mulq %r10 5167 addq %rax,%r14 5168 adcq $0,%rdx 5169 movq %rdx,%r10 5170 movq 8+0+0(%rbp),%rax 5171 mulq %r11 5172 addq %rax,%r15 5173 adcq $0,%rdx 5174 imulq %r12,%r9 5175 addq %r10,%r15 5176 adcq %rdx,%r9 5177 movq %r13,%r10 5178 movq %r14,%r11 5179 movq %r15,%r12 5180 andq $3,%r12 5181 movq %r15,%r13 5182 andq $-4,%r13 5183 movq %r9,%r14 5184 shrdq $2,%r9,%r15 5185 shrq $2,%r9 5186 addq %r13,%r15 5187 adcq %r14,%r9 5188 addq %r15,%r10 5189 adcq %r9,%r11 5190 adcq $0,%r12 5191 5192 leaq 16(%rbx),%rbx 5193 incq %r8 5194 vpaddd %ymm6,%ymm2,%ymm2 5195 vpxor %ymm2,%ymm14,%ymm14 5196 vpshufb L$rol16(%rip),%ymm14,%ymm14 5197 vpaddd %ymm14,%ymm10,%ymm10 5198 vpxor %ymm10,%ymm6,%ymm6 5199 vpsrld $20,%ymm6,%ymm3 5200 vpslld $12,%ymm6,%ymm6 5201 vpxor %ymm3,%ymm6,%ymm6 5202 vpaddd %ymm6,%ymm2,%ymm2 5203 vpxor %ymm2,%ymm14,%ymm14 5204 vpshufb L$rol8(%rip),%ymm14,%ymm14 5205 vpaddd %ymm14,%ymm10,%ymm10 5206 vpxor %ymm10,%ymm6,%ymm6 5207 vpslld $7,%ymm6,%ymm3 5208 vpsrld $25,%ymm6,%ymm6 5209 vpxor %ymm3,%ymm6,%ymm6 5210 vpalignr $4,%ymm14,%ymm14,%ymm14 5211 vpalignr $8,%ymm10,%ymm10,%ymm10 5212 vpalignr $12,%ymm6,%ymm6,%ymm6 5213 vpaddd %ymm5,%ymm1,%ymm1 5214 vpxor %ymm1,%ymm13,%ymm13 5215 vpshufb L$rol16(%rip),%ymm13,%ymm13 5216 vpaddd %ymm13,%ymm9,%ymm9 5217 vpxor %ymm9,%ymm5,%ymm5 5218 vpsrld $20,%ymm5,%ymm3 5219 vpslld $12,%ymm5,%ymm5 5220 vpxor %ymm3,%ymm5,%ymm5 5221 vpaddd %ymm5,%ymm1,%ymm1 5222 vpxor %ymm1,%ymm13,%ymm13 5223 vpshufb L$rol8(%rip),%ymm13,%ymm13 5224 vpaddd %ymm13,%ymm9,%ymm9 5225 vpxor %ymm9,%ymm5,%ymm5 5226 vpslld $7,%ymm5,%ymm3 5227 vpsrld $25,%ymm5,%ymm5 5228 vpxor %ymm3,%ymm5,%ymm5 5229 vpalignr $4,%ymm13,%ymm13,%ymm13 5230 vpalignr $8,%ymm9,%ymm9,%ymm9 5231 vpalignr $12,%ymm5,%ymm5,%ymm5 5232 vpaddd %ymm4,%ymm0,%ymm0 5233 vpxor %ymm0,%ymm12,%ymm12 5234 vpshufb L$rol16(%rip),%ymm12,%ymm12 5235 vpaddd %ymm12,%ymm8,%ymm8 5236 vpxor %ymm8,%ymm4,%ymm4 5237 vpsrld $20,%ymm4,%ymm3 5238 vpslld $12,%ymm4,%ymm4 5239 vpxor %ymm3,%ymm4,%ymm4 5240 vpaddd %ymm4,%ymm0,%ymm0 5241 vpxor %ymm0,%ymm12,%ymm12 5242 vpshufb L$rol8(%rip),%ymm12,%ymm12 5243 vpaddd %ymm12,%ymm8,%ymm8 5244 vpxor %ymm8,%ymm4,%ymm4 5245 vpslld $7,%ymm4,%ymm3 5246 vpsrld $25,%ymm4,%ymm4 5247 vpxor %ymm3,%ymm4,%ymm4 5248 vpalignr $4,%ymm12,%ymm12,%ymm12 5249 vpalignr $8,%ymm8,%ymm8,%ymm8 5250 vpalignr $12,%ymm4,%ymm4,%ymm4 5251 5252 cmpq %rcx,%r8 5253 jb L$open_avx2_tail_384_rounds_and_x2hash 5254 cmpq $10,%r8 5255 jne L$open_avx2_tail_384_rounds_and_x1hash 5256 movq %rbx,%r8 5257 subq %rsi,%rbx 5258 movq %rbx,%rcx 5259 movq 0+128(%rbp),%rbx 5260L$open_avx2_384_tail_hash: 5261 addq $16,%rcx 5262 cmpq %rbx,%rcx 5263 jg L$open_avx2_384_tail_done 5264 addq 0+0(%r8),%r10 5265 adcq 8+0(%r8),%r11 5266 adcq $1,%r12 5267 movq 0+0+0(%rbp),%rdx 5268 movq %rdx,%r15 5269 mulxq %r10,%r13,%r14 5270 mulxq %r11,%rax,%rdx 5271 imulq %r12,%r15 5272 addq %rax,%r14 5273 adcq %rdx,%r15 5274 movq 8+0+0(%rbp),%rdx 5275 mulxq %r10,%r10,%rax 5276 addq %r10,%r14 5277 mulxq %r11,%r11,%r9 5278 adcq %r11,%r15 5279 adcq $0,%r9 5280 imulq %r12,%rdx 5281 addq %rax,%r15 5282 adcq %rdx,%r9 5283 movq %r13,%r10 5284 movq %r14,%r11 5285 movq %r15,%r12 5286 andq $3,%r12 5287 movq %r15,%r13 5288 andq $-4,%r13 5289 movq %r9,%r14 5290 shrdq $2,%r9,%r15 5291 shrq $2,%r9 5292 addq %r13,%r15 5293 adcq %r14,%r9 5294 addq %r15,%r10 5295 adcq %r9,%r11 5296 adcq $0,%r12 5297 5298 leaq 16(%r8),%r8 5299 jmp L$open_avx2_384_tail_hash 5300L$open_avx2_384_tail_done: 5301 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5302 vpaddd 0+64(%rbp),%ymm6,%ymm6 5303 vpaddd 0+96(%rbp),%ymm10,%ymm10 5304 vpaddd 0+224(%rbp),%ymm14,%ymm14 5305 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5306 vpaddd 0+64(%rbp),%ymm5,%ymm5 5307 vpaddd 0+96(%rbp),%ymm9,%ymm9 5308 vpaddd 0+192(%rbp),%ymm13,%ymm13 5309 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5310 vpaddd 0+64(%rbp),%ymm4,%ymm4 5311 vpaddd 0+96(%rbp),%ymm8,%ymm8 5312 vpaddd 0+160(%rbp),%ymm12,%ymm12 5313 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5314 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5315 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5316 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5317 vpxor 0+0(%rsi),%ymm3,%ymm3 5318 vpxor 32+0(%rsi),%ymm2,%ymm2 5319 vpxor 64+0(%rsi),%ymm6,%ymm6 5320 vpxor 96+0(%rsi),%ymm10,%ymm10 5321 vmovdqu %ymm3,0+0(%rdi) 5322 vmovdqu %ymm2,32+0(%rdi) 5323 vmovdqu %ymm6,64+0(%rdi) 5324 vmovdqu %ymm10,96+0(%rdi) 5325 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5326 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5327 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5328 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5329 vpxor 0+128(%rsi),%ymm3,%ymm3 5330 vpxor 32+128(%rsi),%ymm1,%ymm1 5331 vpxor 64+128(%rsi),%ymm5,%ymm5 5332 vpxor 96+128(%rsi),%ymm9,%ymm9 5333 vmovdqu %ymm3,0+128(%rdi) 5334 vmovdqu %ymm1,32+128(%rdi) 5335 vmovdqu %ymm5,64+128(%rdi) 5336 vmovdqu %ymm9,96+128(%rdi) 5337 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5338 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5339 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5340 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5341 vmovdqa %ymm3,%ymm8 5342 5343 leaq 256(%rsi),%rsi 5344 leaq 256(%rdi),%rdi 5345 subq $256,%rbx 5346 jmp L$open_avx2_tail_128_xor 5347 5348L$open_avx2_tail_512: 5349 vmovdqa L$chacha20_consts(%rip),%ymm0 5350 vmovdqa 0+64(%rbp),%ymm4 5351 vmovdqa 0+96(%rbp),%ymm8 5352 vmovdqa %ymm0,%ymm1 5353 vmovdqa %ymm4,%ymm5 5354 vmovdqa %ymm8,%ymm9 5355 vmovdqa %ymm0,%ymm2 5356 vmovdqa %ymm4,%ymm6 5357 vmovdqa %ymm8,%ymm10 5358 vmovdqa %ymm0,%ymm3 5359 vmovdqa %ymm4,%ymm7 5360 vmovdqa %ymm8,%ymm11 5361 vmovdqa L$avx2_inc(%rip),%ymm12 5362 vpaddd 0+160(%rbp),%ymm12,%ymm15 5363 vpaddd %ymm15,%ymm12,%ymm14 5364 vpaddd %ymm14,%ymm12,%ymm13 5365 vpaddd %ymm13,%ymm12,%ymm12 5366 vmovdqa %ymm15,0+256(%rbp) 5367 vmovdqa %ymm14,0+224(%rbp) 5368 vmovdqa %ymm13,0+192(%rbp) 5369 vmovdqa %ymm12,0+160(%rbp) 5370 5371 xorq %rcx,%rcx 5372 movq %rsi,%r8 5373L$open_avx2_tail_512_rounds_and_x2hash: 5374 addq 0+0(%r8),%r10 5375 adcq 8+0(%r8),%r11 5376 adcq $1,%r12 5377 movq 0+0+0(%rbp),%rax 5378 movq %rax,%r15 5379 mulq %r10 5380 movq %rax,%r13 5381 movq %rdx,%r14 5382 movq 0+0+0(%rbp),%rax 5383 mulq %r11 5384 imulq %r12,%r15 5385 addq %rax,%r14 5386 adcq %rdx,%r15 5387 movq 8+0+0(%rbp),%rax 5388 movq %rax,%r9 5389 mulq %r10 5390 addq %rax,%r14 5391 adcq $0,%rdx 5392 movq %rdx,%r10 5393 movq 8+0+0(%rbp),%rax 5394 mulq %r11 5395 addq %rax,%r15 5396 adcq $0,%rdx 5397 imulq %r12,%r9 5398 addq %r10,%r15 5399 adcq %rdx,%r9 5400 movq %r13,%r10 5401 movq %r14,%r11 5402 movq %r15,%r12 5403 andq $3,%r12 5404 movq %r15,%r13 5405 andq $-4,%r13 5406 movq %r9,%r14 5407 shrdq $2,%r9,%r15 5408 shrq $2,%r9 5409 addq %r13,%r15 5410 adcq %r14,%r9 5411 addq %r15,%r10 5412 adcq %r9,%r11 5413 adcq $0,%r12 5414 5415 leaq 16(%r8),%r8 5416L$open_avx2_tail_512_rounds_and_x1hash: 5417 vmovdqa %ymm8,0+128(%rbp) 5418 vmovdqa L$rol16(%rip),%ymm8 5419 vpaddd %ymm7,%ymm3,%ymm3 5420 vpaddd %ymm6,%ymm2,%ymm2 5421 vpaddd %ymm5,%ymm1,%ymm1 5422 vpaddd %ymm4,%ymm0,%ymm0 5423 vpxor %ymm3,%ymm15,%ymm15 5424 vpxor %ymm2,%ymm14,%ymm14 5425 vpxor %ymm1,%ymm13,%ymm13 5426 vpxor %ymm0,%ymm12,%ymm12 5427 vpshufb %ymm8,%ymm15,%ymm15 5428 vpshufb %ymm8,%ymm14,%ymm14 5429 vpshufb %ymm8,%ymm13,%ymm13 5430 vpshufb %ymm8,%ymm12,%ymm12 5431 vpaddd %ymm15,%ymm11,%ymm11 5432 vpaddd %ymm14,%ymm10,%ymm10 5433 vpaddd %ymm13,%ymm9,%ymm9 5434 vpaddd 0+128(%rbp),%ymm12,%ymm8 5435 vpxor %ymm11,%ymm7,%ymm7 5436 vpxor %ymm10,%ymm6,%ymm6 5437 vpxor %ymm9,%ymm5,%ymm5 5438 vpxor %ymm8,%ymm4,%ymm4 5439 vmovdqa %ymm8,0+128(%rbp) 5440 vpsrld $20,%ymm7,%ymm8 5441 vpslld $32-20,%ymm7,%ymm7 5442 vpxor %ymm8,%ymm7,%ymm7 5443 vpsrld $20,%ymm6,%ymm8 5444 vpslld $32-20,%ymm6,%ymm6 5445 vpxor %ymm8,%ymm6,%ymm6 5446 vpsrld $20,%ymm5,%ymm8 5447 vpslld $32-20,%ymm5,%ymm5 5448 vpxor %ymm8,%ymm5,%ymm5 5449 vpsrld $20,%ymm4,%ymm8 5450 vpslld $32-20,%ymm4,%ymm4 5451 vpxor %ymm8,%ymm4,%ymm4 5452 vmovdqa L$rol8(%rip),%ymm8 5453 vpaddd %ymm7,%ymm3,%ymm3 5454 addq 0+0(%r8),%r10 5455 adcq 8+0(%r8),%r11 5456 adcq $1,%r12 5457 movq 0+0+0(%rbp),%rdx 5458 movq %rdx,%r15 5459 mulxq %r10,%r13,%r14 5460 mulxq %r11,%rax,%rdx 5461 imulq %r12,%r15 5462 addq %rax,%r14 5463 adcq %rdx,%r15 5464 movq 8+0+0(%rbp),%rdx 5465 mulxq %r10,%r10,%rax 5466 addq %r10,%r14 5467 mulxq %r11,%r11,%r9 5468 adcq %r11,%r15 5469 adcq $0,%r9 5470 imulq %r12,%rdx 5471 addq %rax,%r15 5472 adcq %rdx,%r9 5473 movq %r13,%r10 5474 movq %r14,%r11 5475 movq %r15,%r12 5476 andq $3,%r12 5477 movq %r15,%r13 5478 andq $-4,%r13 5479 movq %r9,%r14 5480 shrdq $2,%r9,%r15 5481 shrq $2,%r9 5482 addq %r13,%r15 5483 adcq %r14,%r9 5484 addq %r15,%r10 5485 adcq %r9,%r11 5486 adcq $0,%r12 5487 vpaddd %ymm6,%ymm2,%ymm2 5488 vpaddd %ymm5,%ymm1,%ymm1 5489 vpaddd %ymm4,%ymm0,%ymm0 5490 vpxor %ymm3,%ymm15,%ymm15 5491 vpxor %ymm2,%ymm14,%ymm14 5492 vpxor %ymm1,%ymm13,%ymm13 5493 vpxor %ymm0,%ymm12,%ymm12 5494 vpshufb %ymm8,%ymm15,%ymm15 5495 vpshufb %ymm8,%ymm14,%ymm14 5496 vpshufb %ymm8,%ymm13,%ymm13 5497 vpshufb %ymm8,%ymm12,%ymm12 5498 vpaddd %ymm15,%ymm11,%ymm11 5499 vpaddd %ymm14,%ymm10,%ymm10 5500 vpaddd %ymm13,%ymm9,%ymm9 5501 vpaddd 0+128(%rbp),%ymm12,%ymm8 5502 vpxor %ymm11,%ymm7,%ymm7 5503 vpxor %ymm10,%ymm6,%ymm6 5504 vpxor %ymm9,%ymm5,%ymm5 5505 vpxor %ymm8,%ymm4,%ymm4 5506 vmovdqa %ymm8,0+128(%rbp) 5507 vpsrld $25,%ymm7,%ymm8 5508 vpslld $32-25,%ymm7,%ymm7 5509 vpxor %ymm8,%ymm7,%ymm7 5510 vpsrld $25,%ymm6,%ymm8 5511 vpslld $32-25,%ymm6,%ymm6 5512 vpxor %ymm8,%ymm6,%ymm6 5513 vpsrld $25,%ymm5,%ymm8 5514 vpslld $32-25,%ymm5,%ymm5 5515 vpxor %ymm8,%ymm5,%ymm5 5516 vpsrld $25,%ymm4,%ymm8 5517 vpslld $32-25,%ymm4,%ymm4 5518 vpxor %ymm8,%ymm4,%ymm4 5519 vmovdqa 0+128(%rbp),%ymm8 5520 vpalignr $4,%ymm7,%ymm7,%ymm7 5521 vpalignr $8,%ymm11,%ymm11,%ymm11 5522 vpalignr $12,%ymm15,%ymm15,%ymm15 5523 vpalignr $4,%ymm6,%ymm6,%ymm6 5524 vpalignr $8,%ymm10,%ymm10,%ymm10 5525 vpalignr $12,%ymm14,%ymm14,%ymm14 5526 vpalignr $4,%ymm5,%ymm5,%ymm5 5527 vpalignr $8,%ymm9,%ymm9,%ymm9 5528 vpalignr $12,%ymm13,%ymm13,%ymm13 5529 vpalignr $4,%ymm4,%ymm4,%ymm4 5530 vpalignr $8,%ymm8,%ymm8,%ymm8 5531 vpalignr $12,%ymm12,%ymm12,%ymm12 5532 vmovdqa %ymm8,0+128(%rbp) 5533 vmovdqa L$rol16(%rip),%ymm8 5534 vpaddd %ymm7,%ymm3,%ymm3 5535 addq 0+16(%r8),%r10 5536 adcq 8+16(%r8),%r11 5537 adcq $1,%r12 5538 movq 0+0+0(%rbp),%rdx 5539 movq %rdx,%r15 5540 mulxq %r10,%r13,%r14 5541 mulxq %r11,%rax,%rdx 5542 imulq %r12,%r15 5543 addq %rax,%r14 5544 adcq %rdx,%r15 5545 movq 8+0+0(%rbp),%rdx 5546 mulxq %r10,%r10,%rax 5547 addq %r10,%r14 5548 mulxq %r11,%r11,%r9 5549 adcq %r11,%r15 5550 adcq $0,%r9 5551 imulq %r12,%rdx 5552 addq %rax,%r15 5553 adcq %rdx,%r9 5554 movq %r13,%r10 5555 movq %r14,%r11 5556 movq %r15,%r12 5557 andq $3,%r12 5558 movq %r15,%r13 5559 andq $-4,%r13 5560 movq %r9,%r14 5561 shrdq $2,%r9,%r15 5562 shrq $2,%r9 5563 addq %r13,%r15 5564 adcq %r14,%r9 5565 addq %r15,%r10 5566 adcq %r9,%r11 5567 adcq $0,%r12 5568 5569 leaq 32(%r8),%r8 5570 vpaddd %ymm6,%ymm2,%ymm2 5571 vpaddd %ymm5,%ymm1,%ymm1 5572 vpaddd %ymm4,%ymm0,%ymm0 5573 vpxor %ymm3,%ymm15,%ymm15 5574 vpxor %ymm2,%ymm14,%ymm14 5575 vpxor %ymm1,%ymm13,%ymm13 5576 vpxor %ymm0,%ymm12,%ymm12 5577 vpshufb %ymm8,%ymm15,%ymm15 5578 vpshufb %ymm8,%ymm14,%ymm14 5579 vpshufb %ymm8,%ymm13,%ymm13 5580 vpshufb %ymm8,%ymm12,%ymm12 5581 vpaddd %ymm15,%ymm11,%ymm11 5582 vpaddd %ymm14,%ymm10,%ymm10 5583 vpaddd %ymm13,%ymm9,%ymm9 5584 vpaddd 0+128(%rbp),%ymm12,%ymm8 5585 vpxor %ymm11,%ymm7,%ymm7 5586 vpxor %ymm10,%ymm6,%ymm6 5587 vpxor %ymm9,%ymm5,%ymm5 5588 vpxor %ymm8,%ymm4,%ymm4 5589 vmovdqa %ymm8,0+128(%rbp) 5590 vpsrld $20,%ymm7,%ymm8 5591 vpslld $32-20,%ymm7,%ymm7 5592 vpxor %ymm8,%ymm7,%ymm7 5593 vpsrld $20,%ymm6,%ymm8 5594 vpslld $32-20,%ymm6,%ymm6 5595 vpxor %ymm8,%ymm6,%ymm6 5596 vpsrld $20,%ymm5,%ymm8 5597 vpslld $32-20,%ymm5,%ymm5 5598 vpxor %ymm8,%ymm5,%ymm5 5599 vpsrld $20,%ymm4,%ymm8 5600 vpslld $32-20,%ymm4,%ymm4 5601 vpxor %ymm8,%ymm4,%ymm4 5602 vmovdqa L$rol8(%rip),%ymm8 5603 vpaddd %ymm7,%ymm3,%ymm3 5604 vpaddd %ymm6,%ymm2,%ymm2 5605 vpaddd %ymm5,%ymm1,%ymm1 5606 vpaddd %ymm4,%ymm0,%ymm0 5607 vpxor %ymm3,%ymm15,%ymm15 5608 vpxor %ymm2,%ymm14,%ymm14 5609 vpxor %ymm1,%ymm13,%ymm13 5610 vpxor %ymm0,%ymm12,%ymm12 5611 vpshufb %ymm8,%ymm15,%ymm15 5612 vpshufb %ymm8,%ymm14,%ymm14 5613 vpshufb %ymm8,%ymm13,%ymm13 5614 vpshufb %ymm8,%ymm12,%ymm12 5615 vpaddd %ymm15,%ymm11,%ymm11 5616 vpaddd %ymm14,%ymm10,%ymm10 5617 vpaddd %ymm13,%ymm9,%ymm9 5618 vpaddd 0+128(%rbp),%ymm12,%ymm8 5619 vpxor %ymm11,%ymm7,%ymm7 5620 vpxor %ymm10,%ymm6,%ymm6 5621 vpxor %ymm9,%ymm5,%ymm5 5622 vpxor %ymm8,%ymm4,%ymm4 5623 vmovdqa %ymm8,0+128(%rbp) 5624 vpsrld $25,%ymm7,%ymm8 5625 vpslld $32-25,%ymm7,%ymm7 5626 vpxor %ymm8,%ymm7,%ymm7 5627 vpsrld $25,%ymm6,%ymm8 5628 vpslld $32-25,%ymm6,%ymm6 5629 vpxor %ymm8,%ymm6,%ymm6 5630 vpsrld $25,%ymm5,%ymm8 5631 vpslld $32-25,%ymm5,%ymm5 5632 vpxor %ymm8,%ymm5,%ymm5 5633 vpsrld $25,%ymm4,%ymm8 5634 vpslld $32-25,%ymm4,%ymm4 5635 vpxor %ymm8,%ymm4,%ymm4 5636 vmovdqa 0+128(%rbp),%ymm8 5637 vpalignr $12,%ymm7,%ymm7,%ymm7 5638 vpalignr $8,%ymm11,%ymm11,%ymm11 5639 vpalignr $4,%ymm15,%ymm15,%ymm15 5640 vpalignr $12,%ymm6,%ymm6,%ymm6 5641 vpalignr $8,%ymm10,%ymm10,%ymm10 5642 vpalignr $4,%ymm14,%ymm14,%ymm14 5643 vpalignr $12,%ymm5,%ymm5,%ymm5 5644 vpalignr $8,%ymm9,%ymm9,%ymm9 5645 vpalignr $4,%ymm13,%ymm13,%ymm13 5646 vpalignr $12,%ymm4,%ymm4,%ymm4 5647 vpalignr $8,%ymm8,%ymm8,%ymm8 5648 vpalignr $4,%ymm12,%ymm12,%ymm12 5649 5650 incq %rcx 5651 cmpq $4,%rcx 5652 jl L$open_avx2_tail_512_rounds_and_x2hash 5653 cmpq $10,%rcx 5654 jne L$open_avx2_tail_512_rounds_and_x1hash 5655 movq %rbx,%rcx 5656 subq $384,%rcx 5657 andq $-16,%rcx 5658L$open_avx2_tail_512_hash: 5659 testq %rcx,%rcx 5660 je L$open_avx2_tail_512_done 5661 addq 0+0(%r8),%r10 5662 adcq 8+0(%r8),%r11 5663 adcq $1,%r12 5664 movq 0+0+0(%rbp),%rdx 5665 movq %rdx,%r15 5666 mulxq %r10,%r13,%r14 5667 mulxq %r11,%rax,%rdx 5668 imulq %r12,%r15 5669 addq %rax,%r14 5670 adcq %rdx,%r15 5671 movq 8+0+0(%rbp),%rdx 5672 mulxq %r10,%r10,%rax 5673 addq %r10,%r14 5674 mulxq %r11,%r11,%r9 5675 adcq %r11,%r15 5676 adcq $0,%r9 5677 imulq %r12,%rdx 5678 addq %rax,%r15 5679 adcq %rdx,%r9 5680 movq %r13,%r10 5681 movq %r14,%r11 5682 movq %r15,%r12 5683 andq $3,%r12 5684 movq %r15,%r13 5685 andq $-4,%r13 5686 movq %r9,%r14 5687 shrdq $2,%r9,%r15 5688 shrq $2,%r9 5689 addq %r13,%r15 5690 adcq %r14,%r9 5691 addq %r15,%r10 5692 adcq %r9,%r11 5693 adcq $0,%r12 5694 5695 leaq 16(%r8),%r8 5696 subq $16,%rcx 5697 jmp L$open_avx2_tail_512_hash 5698L$open_avx2_tail_512_done: 5699 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 5700 vpaddd 0+64(%rbp),%ymm7,%ymm7 5701 vpaddd 0+96(%rbp),%ymm11,%ymm11 5702 vpaddd 0+256(%rbp),%ymm15,%ymm15 5703 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5704 vpaddd 0+64(%rbp),%ymm6,%ymm6 5705 vpaddd 0+96(%rbp),%ymm10,%ymm10 5706 vpaddd 0+224(%rbp),%ymm14,%ymm14 5707 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5708 vpaddd 0+64(%rbp),%ymm5,%ymm5 5709 vpaddd 0+96(%rbp),%ymm9,%ymm9 5710 vpaddd 0+192(%rbp),%ymm13,%ymm13 5711 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5712 vpaddd 0+64(%rbp),%ymm4,%ymm4 5713 vpaddd 0+96(%rbp),%ymm8,%ymm8 5714 vpaddd 0+160(%rbp),%ymm12,%ymm12 5715 5716 vmovdqa %ymm0,0+128(%rbp) 5717 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5718 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5719 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5720 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5721 vpxor 0+0(%rsi),%ymm0,%ymm0 5722 vpxor 32+0(%rsi),%ymm3,%ymm3 5723 vpxor 64+0(%rsi),%ymm7,%ymm7 5724 vpxor 96+0(%rsi),%ymm11,%ymm11 5725 vmovdqu %ymm0,0+0(%rdi) 5726 vmovdqu %ymm3,32+0(%rdi) 5727 vmovdqu %ymm7,64+0(%rdi) 5728 vmovdqu %ymm11,96+0(%rdi) 5729 5730 vmovdqa 0+128(%rbp),%ymm0 5731 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5732 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5733 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5734 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5735 vpxor 0+128(%rsi),%ymm3,%ymm3 5736 vpxor 32+128(%rsi),%ymm2,%ymm2 5737 vpxor 64+128(%rsi),%ymm6,%ymm6 5738 vpxor 96+128(%rsi),%ymm10,%ymm10 5739 vmovdqu %ymm3,0+128(%rdi) 5740 vmovdqu %ymm2,32+128(%rdi) 5741 vmovdqu %ymm6,64+128(%rdi) 5742 vmovdqu %ymm10,96+128(%rdi) 5743 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5744 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5745 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5746 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5747 vpxor 0+256(%rsi),%ymm3,%ymm3 5748 vpxor 32+256(%rsi),%ymm1,%ymm1 5749 vpxor 64+256(%rsi),%ymm5,%ymm5 5750 vpxor 96+256(%rsi),%ymm9,%ymm9 5751 vmovdqu %ymm3,0+256(%rdi) 5752 vmovdqu %ymm1,32+256(%rdi) 5753 vmovdqu %ymm5,64+256(%rdi) 5754 vmovdqu %ymm9,96+256(%rdi) 5755 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5756 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5757 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5758 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5759 vmovdqa %ymm3,%ymm8 5760 5761 leaq 384(%rsi),%rsi 5762 leaq 384(%rdi),%rdi 5763 subq $384,%rbx 5764L$open_avx2_tail_128_xor: 5765 cmpq $32,%rbx 5766 jb L$open_avx2_tail_32_xor 5767 subq $32,%rbx 5768 vpxor (%rsi),%ymm0,%ymm0 5769 vmovdqu %ymm0,(%rdi) 5770 leaq 32(%rsi),%rsi 5771 leaq 32(%rdi),%rdi 5772 vmovdqa %ymm4,%ymm0 5773 vmovdqa %ymm8,%ymm4 5774 vmovdqa %ymm12,%ymm8 5775 jmp L$open_avx2_tail_128_xor 5776L$open_avx2_tail_32_xor: 5777 cmpq $16,%rbx 5778 vmovdqa %xmm0,%xmm1 5779 jb L$open_avx2_exit 5780 subq $16,%rbx 5781 5782 vpxor (%rsi),%xmm0,%xmm1 5783 vmovdqu %xmm1,(%rdi) 5784 leaq 16(%rsi),%rsi 5785 leaq 16(%rdi),%rdi 5786 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5787 vmovdqa %xmm0,%xmm1 5788L$open_avx2_exit: 5789 vzeroupper 5790 jmp L$open_sse_tail_16 5791 5792L$open_avx2_192: 5793 vmovdqa %ymm0,%ymm1 5794 vmovdqa %ymm0,%ymm2 5795 vmovdqa %ymm4,%ymm5 5796 vmovdqa %ymm4,%ymm6 5797 vmovdqa %ymm8,%ymm9 5798 vmovdqa %ymm8,%ymm10 5799 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 5800 vmovdqa %ymm12,%ymm11 5801 vmovdqa %ymm13,%ymm15 5802 movq $10,%r10 5803L$open_avx2_192_rounds: 5804 vpaddd %ymm4,%ymm0,%ymm0 5805 vpxor %ymm0,%ymm12,%ymm12 5806 vpshufb L$rol16(%rip),%ymm12,%ymm12 5807 vpaddd %ymm12,%ymm8,%ymm8 5808 vpxor %ymm8,%ymm4,%ymm4 5809 vpsrld $20,%ymm4,%ymm3 5810 vpslld $12,%ymm4,%ymm4 5811 vpxor %ymm3,%ymm4,%ymm4 5812 vpaddd %ymm4,%ymm0,%ymm0 5813 vpxor %ymm0,%ymm12,%ymm12 5814 vpshufb L$rol8(%rip),%ymm12,%ymm12 5815 vpaddd %ymm12,%ymm8,%ymm8 5816 vpxor %ymm8,%ymm4,%ymm4 5817 vpslld $7,%ymm4,%ymm3 5818 vpsrld $25,%ymm4,%ymm4 5819 vpxor %ymm3,%ymm4,%ymm4 5820 vpalignr $12,%ymm12,%ymm12,%ymm12 5821 vpalignr $8,%ymm8,%ymm8,%ymm8 5822 vpalignr $4,%ymm4,%ymm4,%ymm4 5823 vpaddd %ymm5,%ymm1,%ymm1 5824 vpxor %ymm1,%ymm13,%ymm13 5825 vpshufb L$rol16(%rip),%ymm13,%ymm13 5826 vpaddd %ymm13,%ymm9,%ymm9 5827 vpxor %ymm9,%ymm5,%ymm5 5828 vpsrld $20,%ymm5,%ymm3 5829 vpslld $12,%ymm5,%ymm5 5830 vpxor %ymm3,%ymm5,%ymm5 5831 vpaddd %ymm5,%ymm1,%ymm1 5832 vpxor %ymm1,%ymm13,%ymm13 5833 vpshufb L$rol8(%rip),%ymm13,%ymm13 5834 vpaddd %ymm13,%ymm9,%ymm9 5835 vpxor %ymm9,%ymm5,%ymm5 5836 vpslld $7,%ymm5,%ymm3 5837 vpsrld $25,%ymm5,%ymm5 5838 vpxor %ymm3,%ymm5,%ymm5 5839 vpalignr $12,%ymm13,%ymm13,%ymm13 5840 vpalignr $8,%ymm9,%ymm9,%ymm9 5841 vpalignr $4,%ymm5,%ymm5,%ymm5 5842 vpaddd %ymm4,%ymm0,%ymm0 5843 vpxor %ymm0,%ymm12,%ymm12 5844 vpshufb L$rol16(%rip),%ymm12,%ymm12 5845 vpaddd %ymm12,%ymm8,%ymm8 5846 vpxor %ymm8,%ymm4,%ymm4 5847 vpsrld $20,%ymm4,%ymm3 5848 vpslld $12,%ymm4,%ymm4 5849 vpxor %ymm3,%ymm4,%ymm4 5850 vpaddd %ymm4,%ymm0,%ymm0 5851 vpxor %ymm0,%ymm12,%ymm12 5852 vpshufb L$rol8(%rip),%ymm12,%ymm12 5853 vpaddd %ymm12,%ymm8,%ymm8 5854 vpxor %ymm8,%ymm4,%ymm4 5855 vpslld $7,%ymm4,%ymm3 5856 vpsrld $25,%ymm4,%ymm4 5857 vpxor %ymm3,%ymm4,%ymm4 5858 vpalignr $4,%ymm12,%ymm12,%ymm12 5859 vpalignr $8,%ymm8,%ymm8,%ymm8 5860 vpalignr $12,%ymm4,%ymm4,%ymm4 5861 vpaddd %ymm5,%ymm1,%ymm1 5862 vpxor %ymm1,%ymm13,%ymm13 5863 vpshufb L$rol16(%rip),%ymm13,%ymm13 5864 vpaddd %ymm13,%ymm9,%ymm9 5865 vpxor %ymm9,%ymm5,%ymm5 5866 vpsrld $20,%ymm5,%ymm3 5867 vpslld $12,%ymm5,%ymm5 5868 vpxor %ymm3,%ymm5,%ymm5 5869 vpaddd %ymm5,%ymm1,%ymm1 5870 vpxor %ymm1,%ymm13,%ymm13 5871 vpshufb L$rol8(%rip),%ymm13,%ymm13 5872 vpaddd %ymm13,%ymm9,%ymm9 5873 vpxor %ymm9,%ymm5,%ymm5 5874 vpslld $7,%ymm5,%ymm3 5875 vpsrld $25,%ymm5,%ymm5 5876 vpxor %ymm3,%ymm5,%ymm5 5877 vpalignr $4,%ymm13,%ymm13,%ymm13 5878 vpalignr $8,%ymm9,%ymm9,%ymm9 5879 vpalignr $12,%ymm5,%ymm5,%ymm5 5880 5881 decq %r10 5882 jne L$open_avx2_192_rounds 5883 vpaddd %ymm2,%ymm0,%ymm0 5884 vpaddd %ymm2,%ymm1,%ymm1 5885 vpaddd %ymm6,%ymm4,%ymm4 5886 vpaddd %ymm6,%ymm5,%ymm5 5887 vpaddd %ymm10,%ymm8,%ymm8 5888 vpaddd %ymm10,%ymm9,%ymm9 5889 vpaddd %ymm11,%ymm12,%ymm12 5890 vpaddd %ymm15,%ymm13,%ymm13 5891 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5892 5893 vpand L$clamp(%rip),%ymm3,%ymm3 5894 vmovdqa %ymm3,0+0(%rbp) 5895 5896 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5897 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5898 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5899 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5900 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5901 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5902L$open_avx2_short: 5903 movq %r8,%r8 5904 call poly_hash_ad_internal 5905L$open_avx2_short_hash_and_xor_loop: 5906 cmpq $32,%rbx 5907 jb L$open_avx2_short_tail_32 5908 subq $32,%rbx 5909 addq 0+0(%rsi),%r10 5910 adcq 8+0(%rsi),%r11 5911 adcq $1,%r12 5912 movq 0+0+0(%rbp),%rax 5913 movq %rax,%r15 5914 mulq %r10 5915 movq %rax,%r13 5916 movq %rdx,%r14 5917 movq 0+0+0(%rbp),%rax 5918 mulq %r11 5919 imulq %r12,%r15 5920 addq %rax,%r14 5921 adcq %rdx,%r15 5922 movq 8+0+0(%rbp),%rax 5923 movq %rax,%r9 5924 mulq %r10 5925 addq %rax,%r14 5926 adcq $0,%rdx 5927 movq %rdx,%r10 5928 movq 8+0+0(%rbp),%rax 5929 mulq %r11 5930 addq %rax,%r15 5931 adcq $0,%rdx 5932 imulq %r12,%r9 5933 addq %r10,%r15 5934 adcq %rdx,%r9 5935 movq %r13,%r10 5936 movq %r14,%r11 5937 movq %r15,%r12 5938 andq $3,%r12 5939 movq %r15,%r13 5940 andq $-4,%r13 5941 movq %r9,%r14 5942 shrdq $2,%r9,%r15 5943 shrq $2,%r9 5944 addq %r13,%r15 5945 adcq %r14,%r9 5946 addq %r15,%r10 5947 adcq %r9,%r11 5948 adcq $0,%r12 5949 addq 0+16(%rsi),%r10 5950 adcq 8+16(%rsi),%r11 5951 adcq $1,%r12 5952 movq 0+0+0(%rbp),%rax 5953 movq %rax,%r15 5954 mulq %r10 5955 movq %rax,%r13 5956 movq %rdx,%r14 5957 movq 0+0+0(%rbp),%rax 5958 mulq %r11 5959 imulq %r12,%r15 5960 addq %rax,%r14 5961 adcq %rdx,%r15 5962 movq 8+0+0(%rbp),%rax 5963 movq %rax,%r9 5964 mulq %r10 5965 addq %rax,%r14 5966 adcq $0,%rdx 5967 movq %rdx,%r10 5968 movq 8+0+0(%rbp),%rax 5969 mulq %r11 5970 addq %rax,%r15 5971 adcq $0,%rdx 5972 imulq %r12,%r9 5973 addq %r10,%r15 5974 adcq %rdx,%r9 5975 movq %r13,%r10 5976 movq %r14,%r11 5977 movq %r15,%r12 5978 andq $3,%r12 5979 movq %r15,%r13 5980 andq $-4,%r13 5981 movq %r9,%r14 5982 shrdq $2,%r9,%r15 5983 shrq $2,%r9 5984 addq %r13,%r15 5985 adcq %r14,%r9 5986 addq %r15,%r10 5987 adcq %r9,%r11 5988 adcq $0,%r12 5989 5990 5991 vpxor (%rsi),%ymm0,%ymm0 5992 vmovdqu %ymm0,(%rdi) 5993 leaq 32(%rsi),%rsi 5994 leaq 32(%rdi),%rdi 5995 5996 vmovdqa %ymm4,%ymm0 5997 vmovdqa %ymm8,%ymm4 5998 vmovdqa %ymm12,%ymm8 5999 vmovdqa %ymm1,%ymm12 6000 vmovdqa %ymm5,%ymm1 6001 vmovdqa %ymm9,%ymm5 6002 vmovdqa %ymm13,%ymm9 6003 vmovdqa %ymm2,%ymm13 6004 vmovdqa %ymm6,%ymm2 6005 jmp L$open_avx2_short_hash_and_xor_loop 6006L$open_avx2_short_tail_32: 6007 cmpq $16,%rbx 6008 vmovdqa %xmm0,%xmm1 6009 jb L$open_avx2_short_tail_32_exit 6010 subq $16,%rbx 6011 addq 0+0(%rsi),%r10 6012 adcq 8+0(%rsi),%r11 6013 adcq $1,%r12 6014 movq 0+0+0(%rbp),%rax 6015 movq %rax,%r15 6016 mulq %r10 6017 movq %rax,%r13 6018 movq %rdx,%r14 6019 movq 0+0+0(%rbp),%rax 6020 mulq %r11 6021 imulq %r12,%r15 6022 addq %rax,%r14 6023 adcq %rdx,%r15 6024 movq 8+0+0(%rbp),%rax 6025 movq %rax,%r9 6026 mulq %r10 6027 addq %rax,%r14 6028 adcq $0,%rdx 6029 movq %rdx,%r10 6030 movq 8+0+0(%rbp),%rax 6031 mulq %r11 6032 addq %rax,%r15 6033 adcq $0,%rdx 6034 imulq %r12,%r9 6035 addq %r10,%r15 6036 adcq %rdx,%r9 6037 movq %r13,%r10 6038 movq %r14,%r11 6039 movq %r15,%r12 6040 andq $3,%r12 6041 movq %r15,%r13 6042 andq $-4,%r13 6043 movq %r9,%r14 6044 shrdq $2,%r9,%r15 6045 shrq $2,%r9 6046 addq %r13,%r15 6047 adcq %r14,%r9 6048 addq %r15,%r10 6049 adcq %r9,%r11 6050 adcq $0,%r12 6051 6052 vpxor (%rsi),%xmm0,%xmm3 6053 vmovdqu %xmm3,(%rdi) 6054 leaq 16(%rsi),%rsi 6055 leaq 16(%rdi),%rdi 6056 vextracti128 $1,%ymm0,%xmm1 6057L$open_avx2_short_tail_32_exit: 6058 vzeroupper 6059 jmp L$open_sse_tail_16 6060 6061L$open_avx2_320: 6062 vmovdqa %ymm0,%ymm1 6063 vmovdqa %ymm0,%ymm2 6064 vmovdqa %ymm4,%ymm5 6065 vmovdqa %ymm4,%ymm6 6066 vmovdqa %ymm8,%ymm9 6067 vmovdqa %ymm8,%ymm10 6068 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 6069 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 6070 vmovdqa %ymm4,%ymm7 6071 vmovdqa %ymm8,%ymm11 6072 vmovdqa %ymm12,0+160(%rbp) 6073 vmovdqa %ymm13,0+192(%rbp) 6074 vmovdqa %ymm14,0+224(%rbp) 6075 movq $10,%r10 6076L$open_avx2_320_rounds: 6077 vpaddd %ymm4,%ymm0,%ymm0 6078 vpxor %ymm0,%ymm12,%ymm12 6079 vpshufb L$rol16(%rip),%ymm12,%ymm12 6080 vpaddd %ymm12,%ymm8,%ymm8 6081 vpxor %ymm8,%ymm4,%ymm4 6082 vpsrld $20,%ymm4,%ymm3 6083 vpslld $12,%ymm4,%ymm4 6084 vpxor %ymm3,%ymm4,%ymm4 6085 vpaddd %ymm4,%ymm0,%ymm0 6086 vpxor %ymm0,%ymm12,%ymm12 6087 vpshufb L$rol8(%rip),%ymm12,%ymm12 6088 vpaddd %ymm12,%ymm8,%ymm8 6089 vpxor %ymm8,%ymm4,%ymm4 6090 vpslld $7,%ymm4,%ymm3 6091 vpsrld $25,%ymm4,%ymm4 6092 vpxor %ymm3,%ymm4,%ymm4 6093 vpalignr $12,%ymm12,%ymm12,%ymm12 6094 vpalignr $8,%ymm8,%ymm8,%ymm8 6095 vpalignr $4,%ymm4,%ymm4,%ymm4 6096 vpaddd %ymm5,%ymm1,%ymm1 6097 vpxor %ymm1,%ymm13,%ymm13 6098 vpshufb L$rol16(%rip),%ymm13,%ymm13 6099 vpaddd %ymm13,%ymm9,%ymm9 6100 vpxor %ymm9,%ymm5,%ymm5 6101 vpsrld $20,%ymm5,%ymm3 6102 vpslld $12,%ymm5,%ymm5 6103 vpxor %ymm3,%ymm5,%ymm5 6104 vpaddd %ymm5,%ymm1,%ymm1 6105 vpxor %ymm1,%ymm13,%ymm13 6106 vpshufb L$rol8(%rip),%ymm13,%ymm13 6107 vpaddd %ymm13,%ymm9,%ymm9 6108 vpxor %ymm9,%ymm5,%ymm5 6109 vpslld $7,%ymm5,%ymm3 6110 vpsrld $25,%ymm5,%ymm5 6111 vpxor %ymm3,%ymm5,%ymm5 6112 vpalignr $12,%ymm13,%ymm13,%ymm13 6113 vpalignr $8,%ymm9,%ymm9,%ymm9 6114 vpalignr $4,%ymm5,%ymm5,%ymm5 6115 vpaddd %ymm6,%ymm2,%ymm2 6116 vpxor %ymm2,%ymm14,%ymm14 6117 vpshufb L$rol16(%rip),%ymm14,%ymm14 6118 vpaddd %ymm14,%ymm10,%ymm10 6119 vpxor %ymm10,%ymm6,%ymm6 6120 vpsrld $20,%ymm6,%ymm3 6121 vpslld $12,%ymm6,%ymm6 6122 vpxor %ymm3,%ymm6,%ymm6 6123 vpaddd %ymm6,%ymm2,%ymm2 6124 vpxor %ymm2,%ymm14,%ymm14 6125 vpshufb L$rol8(%rip),%ymm14,%ymm14 6126 vpaddd %ymm14,%ymm10,%ymm10 6127 vpxor %ymm10,%ymm6,%ymm6 6128 vpslld $7,%ymm6,%ymm3 6129 vpsrld $25,%ymm6,%ymm6 6130 vpxor %ymm3,%ymm6,%ymm6 6131 vpalignr $12,%ymm14,%ymm14,%ymm14 6132 vpalignr $8,%ymm10,%ymm10,%ymm10 6133 vpalignr $4,%ymm6,%ymm6,%ymm6 6134 vpaddd %ymm4,%ymm0,%ymm0 6135 vpxor %ymm0,%ymm12,%ymm12 6136 vpshufb L$rol16(%rip),%ymm12,%ymm12 6137 vpaddd %ymm12,%ymm8,%ymm8 6138 vpxor %ymm8,%ymm4,%ymm4 6139 vpsrld $20,%ymm4,%ymm3 6140 vpslld $12,%ymm4,%ymm4 6141 vpxor %ymm3,%ymm4,%ymm4 6142 vpaddd %ymm4,%ymm0,%ymm0 6143 vpxor %ymm0,%ymm12,%ymm12 6144 vpshufb L$rol8(%rip),%ymm12,%ymm12 6145 vpaddd %ymm12,%ymm8,%ymm8 6146 vpxor %ymm8,%ymm4,%ymm4 6147 vpslld $7,%ymm4,%ymm3 6148 vpsrld $25,%ymm4,%ymm4 6149 vpxor %ymm3,%ymm4,%ymm4 6150 vpalignr $4,%ymm12,%ymm12,%ymm12 6151 vpalignr $8,%ymm8,%ymm8,%ymm8 6152 vpalignr $12,%ymm4,%ymm4,%ymm4 6153 vpaddd %ymm5,%ymm1,%ymm1 6154 vpxor %ymm1,%ymm13,%ymm13 6155 vpshufb L$rol16(%rip),%ymm13,%ymm13 6156 vpaddd %ymm13,%ymm9,%ymm9 6157 vpxor %ymm9,%ymm5,%ymm5 6158 vpsrld $20,%ymm5,%ymm3 6159 vpslld $12,%ymm5,%ymm5 6160 vpxor %ymm3,%ymm5,%ymm5 6161 vpaddd %ymm5,%ymm1,%ymm1 6162 vpxor %ymm1,%ymm13,%ymm13 6163 vpshufb L$rol8(%rip),%ymm13,%ymm13 6164 vpaddd %ymm13,%ymm9,%ymm9 6165 vpxor %ymm9,%ymm5,%ymm5 6166 vpslld $7,%ymm5,%ymm3 6167 vpsrld $25,%ymm5,%ymm5 6168 vpxor %ymm3,%ymm5,%ymm5 6169 vpalignr $4,%ymm13,%ymm13,%ymm13 6170 vpalignr $8,%ymm9,%ymm9,%ymm9 6171 vpalignr $12,%ymm5,%ymm5,%ymm5 6172 vpaddd %ymm6,%ymm2,%ymm2 6173 vpxor %ymm2,%ymm14,%ymm14 6174 vpshufb L$rol16(%rip),%ymm14,%ymm14 6175 vpaddd %ymm14,%ymm10,%ymm10 6176 vpxor %ymm10,%ymm6,%ymm6 6177 vpsrld $20,%ymm6,%ymm3 6178 vpslld $12,%ymm6,%ymm6 6179 vpxor %ymm3,%ymm6,%ymm6 6180 vpaddd %ymm6,%ymm2,%ymm2 6181 vpxor %ymm2,%ymm14,%ymm14 6182 vpshufb L$rol8(%rip),%ymm14,%ymm14 6183 vpaddd %ymm14,%ymm10,%ymm10 6184 vpxor %ymm10,%ymm6,%ymm6 6185 vpslld $7,%ymm6,%ymm3 6186 vpsrld $25,%ymm6,%ymm6 6187 vpxor %ymm3,%ymm6,%ymm6 6188 vpalignr $4,%ymm14,%ymm14,%ymm14 6189 vpalignr $8,%ymm10,%ymm10,%ymm10 6190 vpalignr $12,%ymm6,%ymm6,%ymm6 6191 6192 decq %r10 6193 jne L$open_avx2_320_rounds 6194 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6195 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6196 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6197 vpaddd %ymm7,%ymm4,%ymm4 6198 vpaddd %ymm7,%ymm5,%ymm5 6199 vpaddd %ymm7,%ymm6,%ymm6 6200 vpaddd %ymm11,%ymm8,%ymm8 6201 vpaddd %ymm11,%ymm9,%ymm9 6202 vpaddd %ymm11,%ymm10,%ymm10 6203 vpaddd 0+160(%rbp),%ymm12,%ymm12 6204 vpaddd 0+192(%rbp),%ymm13,%ymm13 6205 vpaddd 0+224(%rbp),%ymm14,%ymm14 6206 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6207 6208 vpand L$clamp(%rip),%ymm3,%ymm3 6209 vmovdqa %ymm3,0+0(%rbp) 6210 6211 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6212 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6213 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6214 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6215 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6216 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6217 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6218 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6219 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6220 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6221 jmp L$open_avx2_short 6222 6223 6224 6225 6226 6227.p2align 6 6228chacha20_poly1305_seal_avx2: 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 vzeroupper 6242 vmovdqa L$chacha20_consts(%rip),%ymm0 6243 vbroadcasti128 0(%r9),%ymm4 6244 vbroadcasti128 16(%r9),%ymm8 6245 vbroadcasti128 32(%r9),%ymm12 6246 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 6247 cmpq $192,%rbx 6248 jbe L$seal_avx2_192 6249 cmpq $320,%rbx 6250 jbe L$seal_avx2_320 6251 vmovdqa %ymm0,%ymm1 6252 vmovdqa %ymm0,%ymm2 6253 vmovdqa %ymm0,%ymm3 6254 vmovdqa %ymm4,%ymm5 6255 vmovdqa %ymm4,%ymm6 6256 vmovdqa %ymm4,%ymm7 6257 vmovdqa %ymm4,0+64(%rbp) 6258 vmovdqa %ymm8,%ymm9 6259 vmovdqa %ymm8,%ymm10 6260 vmovdqa %ymm8,%ymm11 6261 vmovdqa %ymm8,0+96(%rbp) 6262 vmovdqa %ymm12,%ymm15 6263 vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 6264 vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 6265 vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 6266 vmovdqa %ymm12,0+160(%rbp) 6267 vmovdqa %ymm13,0+192(%rbp) 6268 vmovdqa %ymm14,0+224(%rbp) 6269 vmovdqa %ymm15,0+256(%rbp) 6270 movq $10,%r10 6271L$seal_avx2_init_rounds: 6272 vmovdqa %ymm8,0+128(%rbp) 6273 vmovdqa L$rol16(%rip),%ymm8 6274 vpaddd %ymm7,%ymm3,%ymm3 6275 vpaddd %ymm6,%ymm2,%ymm2 6276 vpaddd %ymm5,%ymm1,%ymm1 6277 vpaddd %ymm4,%ymm0,%ymm0 6278 vpxor %ymm3,%ymm15,%ymm15 6279 vpxor %ymm2,%ymm14,%ymm14 6280 vpxor %ymm1,%ymm13,%ymm13 6281 vpxor %ymm0,%ymm12,%ymm12 6282 vpshufb %ymm8,%ymm15,%ymm15 6283 vpshufb %ymm8,%ymm14,%ymm14 6284 vpshufb %ymm8,%ymm13,%ymm13 6285 vpshufb %ymm8,%ymm12,%ymm12 6286 vpaddd %ymm15,%ymm11,%ymm11 6287 vpaddd %ymm14,%ymm10,%ymm10 6288 vpaddd %ymm13,%ymm9,%ymm9 6289 vpaddd 0+128(%rbp),%ymm12,%ymm8 6290 vpxor %ymm11,%ymm7,%ymm7 6291 vpxor %ymm10,%ymm6,%ymm6 6292 vpxor %ymm9,%ymm5,%ymm5 6293 vpxor %ymm8,%ymm4,%ymm4 6294 vmovdqa %ymm8,0+128(%rbp) 6295 vpsrld $20,%ymm7,%ymm8 6296 vpslld $32-20,%ymm7,%ymm7 6297 vpxor %ymm8,%ymm7,%ymm7 6298 vpsrld $20,%ymm6,%ymm8 6299 vpslld $32-20,%ymm6,%ymm6 6300 vpxor %ymm8,%ymm6,%ymm6 6301 vpsrld $20,%ymm5,%ymm8 6302 vpslld $32-20,%ymm5,%ymm5 6303 vpxor %ymm8,%ymm5,%ymm5 6304 vpsrld $20,%ymm4,%ymm8 6305 vpslld $32-20,%ymm4,%ymm4 6306 vpxor %ymm8,%ymm4,%ymm4 6307 vmovdqa L$rol8(%rip),%ymm8 6308 vpaddd %ymm7,%ymm3,%ymm3 6309 vpaddd %ymm6,%ymm2,%ymm2 6310 vpaddd %ymm5,%ymm1,%ymm1 6311 vpaddd %ymm4,%ymm0,%ymm0 6312 vpxor %ymm3,%ymm15,%ymm15 6313 vpxor %ymm2,%ymm14,%ymm14 6314 vpxor %ymm1,%ymm13,%ymm13 6315 vpxor %ymm0,%ymm12,%ymm12 6316 vpshufb %ymm8,%ymm15,%ymm15 6317 vpshufb %ymm8,%ymm14,%ymm14 6318 vpshufb %ymm8,%ymm13,%ymm13 6319 vpshufb %ymm8,%ymm12,%ymm12 6320 vpaddd %ymm15,%ymm11,%ymm11 6321 vpaddd %ymm14,%ymm10,%ymm10 6322 vpaddd %ymm13,%ymm9,%ymm9 6323 vpaddd 0+128(%rbp),%ymm12,%ymm8 6324 vpxor %ymm11,%ymm7,%ymm7 6325 vpxor %ymm10,%ymm6,%ymm6 6326 vpxor %ymm9,%ymm5,%ymm5 6327 vpxor %ymm8,%ymm4,%ymm4 6328 vmovdqa %ymm8,0+128(%rbp) 6329 vpsrld $25,%ymm7,%ymm8 6330 vpslld $32-25,%ymm7,%ymm7 6331 vpxor %ymm8,%ymm7,%ymm7 6332 vpsrld $25,%ymm6,%ymm8 6333 vpslld $32-25,%ymm6,%ymm6 6334 vpxor %ymm8,%ymm6,%ymm6 6335 vpsrld $25,%ymm5,%ymm8 6336 vpslld $32-25,%ymm5,%ymm5 6337 vpxor %ymm8,%ymm5,%ymm5 6338 vpsrld $25,%ymm4,%ymm8 6339 vpslld $32-25,%ymm4,%ymm4 6340 vpxor %ymm8,%ymm4,%ymm4 6341 vmovdqa 0+128(%rbp),%ymm8 6342 vpalignr $4,%ymm7,%ymm7,%ymm7 6343 vpalignr $8,%ymm11,%ymm11,%ymm11 6344 vpalignr $12,%ymm15,%ymm15,%ymm15 6345 vpalignr $4,%ymm6,%ymm6,%ymm6 6346 vpalignr $8,%ymm10,%ymm10,%ymm10 6347 vpalignr $12,%ymm14,%ymm14,%ymm14 6348 vpalignr $4,%ymm5,%ymm5,%ymm5 6349 vpalignr $8,%ymm9,%ymm9,%ymm9 6350 vpalignr $12,%ymm13,%ymm13,%ymm13 6351 vpalignr $4,%ymm4,%ymm4,%ymm4 6352 vpalignr $8,%ymm8,%ymm8,%ymm8 6353 vpalignr $12,%ymm12,%ymm12,%ymm12 6354 vmovdqa %ymm8,0+128(%rbp) 6355 vmovdqa L$rol16(%rip),%ymm8 6356 vpaddd %ymm7,%ymm3,%ymm3 6357 vpaddd %ymm6,%ymm2,%ymm2 6358 vpaddd %ymm5,%ymm1,%ymm1 6359 vpaddd %ymm4,%ymm0,%ymm0 6360 vpxor %ymm3,%ymm15,%ymm15 6361 vpxor %ymm2,%ymm14,%ymm14 6362 vpxor %ymm1,%ymm13,%ymm13 6363 vpxor %ymm0,%ymm12,%ymm12 6364 vpshufb %ymm8,%ymm15,%ymm15 6365 vpshufb %ymm8,%ymm14,%ymm14 6366 vpshufb %ymm8,%ymm13,%ymm13 6367 vpshufb %ymm8,%ymm12,%ymm12 6368 vpaddd %ymm15,%ymm11,%ymm11 6369 vpaddd %ymm14,%ymm10,%ymm10 6370 vpaddd %ymm13,%ymm9,%ymm9 6371 vpaddd 0+128(%rbp),%ymm12,%ymm8 6372 vpxor %ymm11,%ymm7,%ymm7 6373 vpxor %ymm10,%ymm6,%ymm6 6374 vpxor %ymm9,%ymm5,%ymm5 6375 vpxor %ymm8,%ymm4,%ymm4 6376 vmovdqa %ymm8,0+128(%rbp) 6377 vpsrld $20,%ymm7,%ymm8 6378 vpslld $32-20,%ymm7,%ymm7 6379 vpxor %ymm8,%ymm7,%ymm7 6380 vpsrld $20,%ymm6,%ymm8 6381 vpslld $32-20,%ymm6,%ymm6 6382 vpxor %ymm8,%ymm6,%ymm6 6383 vpsrld $20,%ymm5,%ymm8 6384 vpslld $32-20,%ymm5,%ymm5 6385 vpxor %ymm8,%ymm5,%ymm5 6386 vpsrld $20,%ymm4,%ymm8 6387 vpslld $32-20,%ymm4,%ymm4 6388 vpxor %ymm8,%ymm4,%ymm4 6389 vmovdqa L$rol8(%rip),%ymm8 6390 vpaddd %ymm7,%ymm3,%ymm3 6391 vpaddd %ymm6,%ymm2,%ymm2 6392 vpaddd %ymm5,%ymm1,%ymm1 6393 vpaddd %ymm4,%ymm0,%ymm0 6394 vpxor %ymm3,%ymm15,%ymm15 6395 vpxor %ymm2,%ymm14,%ymm14 6396 vpxor %ymm1,%ymm13,%ymm13 6397 vpxor %ymm0,%ymm12,%ymm12 6398 vpshufb %ymm8,%ymm15,%ymm15 6399 vpshufb %ymm8,%ymm14,%ymm14 6400 vpshufb %ymm8,%ymm13,%ymm13 6401 vpshufb %ymm8,%ymm12,%ymm12 6402 vpaddd %ymm15,%ymm11,%ymm11 6403 vpaddd %ymm14,%ymm10,%ymm10 6404 vpaddd %ymm13,%ymm9,%ymm9 6405 vpaddd 0+128(%rbp),%ymm12,%ymm8 6406 vpxor %ymm11,%ymm7,%ymm7 6407 vpxor %ymm10,%ymm6,%ymm6 6408 vpxor %ymm9,%ymm5,%ymm5 6409 vpxor %ymm8,%ymm4,%ymm4 6410 vmovdqa %ymm8,0+128(%rbp) 6411 vpsrld $25,%ymm7,%ymm8 6412 vpslld $32-25,%ymm7,%ymm7 6413 vpxor %ymm8,%ymm7,%ymm7 6414 vpsrld $25,%ymm6,%ymm8 6415 vpslld $32-25,%ymm6,%ymm6 6416 vpxor %ymm8,%ymm6,%ymm6 6417 vpsrld $25,%ymm5,%ymm8 6418 vpslld $32-25,%ymm5,%ymm5 6419 vpxor %ymm8,%ymm5,%ymm5 6420 vpsrld $25,%ymm4,%ymm8 6421 vpslld $32-25,%ymm4,%ymm4 6422 vpxor %ymm8,%ymm4,%ymm4 6423 vmovdqa 0+128(%rbp),%ymm8 6424 vpalignr $12,%ymm7,%ymm7,%ymm7 6425 vpalignr $8,%ymm11,%ymm11,%ymm11 6426 vpalignr $4,%ymm15,%ymm15,%ymm15 6427 vpalignr $12,%ymm6,%ymm6,%ymm6 6428 vpalignr $8,%ymm10,%ymm10,%ymm10 6429 vpalignr $4,%ymm14,%ymm14,%ymm14 6430 vpalignr $12,%ymm5,%ymm5,%ymm5 6431 vpalignr $8,%ymm9,%ymm9,%ymm9 6432 vpalignr $4,%ymm13,%ymm13,%ymm13 6433 vpalignr $12,%ymm4,%ymm4,%ymm4 6434 vpalignr $8,%ymm8,%ymm8,%ymm8 6435 vpalignr $4,%ymm12,%ymm12,%ymm12 6436 6437 decq %r10 6438 jnz L$seal_avx2_init_rounds 6439 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 6440 vpaddd 0+64(%rbp),%ymm7,%ymm7 6441 vpaddd 0+96(%rbp),%ymm11,%ymm11 6442 vpaddd 0+256(%rbp),%ymm15,%ymm15 6443 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6444 vpaddd 0+64(%rbp),%ymm6,%ymm6 6445 vpaddd 0+96(%rbp),%ymm10,%ymm10 6446 vpaddd 0+224(%rbp),%ymm14,%ymm14 6447 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6448 vpaddd 0+64(%rbp),%ymm5,%ymm5 6449 vpaddd 0+96(%rbp),%ymm9,%ymm9 6450 vpaddd 0+192(%rbp),%ymm13,%ymm13 6451 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6452 vpaddd 0+64(%rbp),%ymm4,%ymm4 6453 vpaddd 0+96(%rbp),%ymm8,%ymm8 6454 vpaddd 0+160(%rbp),%ymm12,%ymm12 6455 6456 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6457 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6458 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6459 vpand L$clamp(%rip),%ymm15,%ymm15 6460 vmovdqa %ymm15,0+0(%rbp) 6461 movq %r8,%r8 6462 call poly_hash_ad_internal 6463 6464 vpxor 0(%rsi),%ymm3,%ymm3 6465 vpxor 32(%rsi),%ymm11,%ymm11 6466 vmovdqu %ymm3,0(%rdi) 6467 vmovdqu %ymm11,32(%rdi) 6468 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6469 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6470 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6471 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6472 vpxor 0+64(%rsi),%ymm15,%ymm15 6473 vpxor 32+64(%rsi),%ymm2,%ymm2 6474 vpxor 64+64(%rsi),%ymm6,%ymm6 6475 vpxor 96+64(%rsi),%ymm10,%ymm10 6476 vmovdqu %ymm15,0+64(%rdi) 6477 vmovdqu %ymm2,32+64(%rdi) 6478 vmovdqu %ymm6,64+64(%rdi) 6479 vmovdqu %ymm10,96+64(%rdi) 6480 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6481 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6482 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6483 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6484 vpxor 0+192(%rsi),%ymm15,%ymm15 6485 vpxor 32+192(%rsi),%ymm1,%ymm1 6486 vpxor 64+192(%rsi),%ymm5,%ymm5 6487 vpxor 96+192(%rsi),%ymm9,%ymm9 6488 vmovdqu %ymm15,0+192(%rdi) 6489 vmovdqu %ymm1,32+192(%rdi) 6490 vmovdqu %ymm5,64+192(%rdi) 6491 vmovdqu %ymm9,96+192(%rdi) 6492 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6493 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6494 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6495 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6496 vmovdqa %ymm15,%ymm8 6497 6498 leaq 320(%rsi),%rsi 6499 subq $320,%rbx 6500 movq $320,%rcx 6501 cmpq $128,%rbx 6502 jbe L$seal_avx2_short_hash_remainder 6503 vpxor 0(%rsi),%ymm0,%ymm0 6504 vpxor 32(%rsi),%ymm4,%ymm4 6505 vpxor 64(%rsi),%ymm8,%ymm8 6506 vpxor 96(%rsi),%ymm12,%ymm12 6507 vmovdqu %ymm0,320(%rdi) 6508 vmovdqu %ymm4,352(%rdi) 6509 vmovdqu %ymm8,384(%rdi) 6510 vmovdqu %ymm12,416(%rdi) 6511 leaq 128(%rsi),%rsi 6512 subq $128,%rbx 6513 movq $8,%rcx 6514 movq $2,%r8 6515 cmpq $128,%rbx 6516 jbe L$seal_avx2_tail_128 6517 cmpq $256,%rbx 6518 jbe L$seal_avx2_tail_256 6519 cmpq $384,%rbx 6520 jbe L$seal_avx2_tail_384 6521 cmpq $512,%rbx 6522 jbe L$seal_avx2_tail_512 6523 vmovdqa L$chacha20_consts(%rip),%ymm0 6524 vmovdqa 0+64(%rbp),%ymm4 6525 vmovdqa 0+96(%rbp),%ymm8 6526 vmovdqa %ymm0,%ymm1 6527 vmovdqa %ymm4,%ymm5 6528 vmovdqa %ymm8,%ymm9 6529 vmovdqa %ymm0,%ymm2 6530 vmovdqa %ymm4,%ymm6 6531 vmovdqa %ymm8,%ymm10 6532 vmovdqa %ymm0,%ymm3 6533 vmovdqa %ymm4,%ymm7 6534 vmovdqa %ymm8,%ymm11 6535 vmovdqa L$avx2_inc(%rip),%ymm12 6536 vpaddd 0+160(%rbp),%ymm12,%ymm15 6537 vpaddd %ymm15,%ymm12,%ymm14 6538 vpaddd %ymm14,%ymm12,%ymm13 6539 vpaddd %ymm13,%ymm12,%ymm12 6540 vmovdqa %ymm15,0+256(%rbp) 6541 vmovdqa %ymm14,0+224(%rbp) 6542 vmovdqa %ymm13,0+192(%rbp) 6543 vmovdqa %ymm12,0+160(%rbp) 6544 vmovdqa %ymm8,0+128(%rbp) 6545 vmovdqa L$rol16(%rip),%ymm8 6546 vpaddd %ymm7,%ymm3,%ymm3 6547 vpaddd %ymm6,%ymm2,%ymm2 6548 vpaddd %ymm5,%ymm1,%ymm1 6549 vpaddd %ymm4,%ymm0,%ymm0 6550 vpxor %ymm3,%ymm15,%ymm15 6551 vpxor %ymm2,%ymm14,%ymm14 6552 vpxor %ymm1,%ymm13,%ymm13 6553 vpxor %ymm0,%ymm12,%ymm12 6554 vpshufb %ymm8,%ymm15,%ymm15 6555 vpshufb %ymm8,%ymm14,%ymm14 6556 vpshufb %ymm8,%ymm13,%ymm13 6557 vpshufb %ymm8,%ymm12,%ymm12 6558 vpaddd %ymm15,%ymm11,%ymm11 6559 vpaddd %ymm14,%ymm10,%ymm10 6560 vpaddd %ymm13,%ymm9,%ymm9 6561 vpaddd 0+128(%rbp),%ymm12,%ymm8 6562 vpxor %ymm11,%ymm7,%ymm7 6563 vpxor %ymm10,%ymm6,%ymm6 6564 vpxor %ymm9,%ymm5,%ymm5 6565 vpxor %ymm8,%ymm4,%ymm4 6566 vmovdqa %ymm8,0+128(%rbp) 6567 vpsrld $20,%ymm7,%ymm8 6568 vpslld $32-20,%ymm7,%ymm7 6569 vpxor %ymm8,%ymm7,%ymm7 6570 vpsrld $20,%ymm6,%ymm8 6571 vpslld $32-20,%ymm6,%ymm6 6572 vpxor %ymm8,%ymm6,%ymm6 6573 vpsrld $20,%ymm5,%ymm8 6574 vpslld $32-20,%ymm5,%ymm5 6575 vpxor %ymm8,%ymm5,%ymm5 6576 vpsrld $20,%ymm4,%ymm8 6577 vpslld $32-20,%ymm4,%ymm4 6578 vpxor %ymm8,%ymm4,%ymm4 6579 vmovdqa L$rol8(%rip),%ymm8 6580 vpaddd %ymm7,%ymm3,%ymm3 6581 vpaddd %ymm6,%ymm2,%ymm2 6582 vpaddd %ymm5,%ymm1,%ymm1 6583 vpaddd %ymm4,%ymm0,%ymm0 6584 vpxor %ymm3,%ymm15,%ymm15 6585 vpxor %ymm2,%ymm14,%ymm14 6586 vpxor %ymm1,%ymm13,%ymm13 6587 vpxor %ymm0,%ymm12,%ymm12 6588 vpshufb %ymm8,%ymm15,%ymm15 6589 vpshufb %ymm8,%ymm14,%ymm14 6590 vpshufb %ymm8,%ymm13,%ymm13 6591 vpshufb %ymm8,%ymm12,%ymm12 6592 vpaddd %ymm15,%ymm11,%ymm11 6593 vpaddd %ymm14,%ymm10,%ymm10 6594 vpaddd %ymm13,%ymm9,%ymm9 6595 vpaddd 0+128(%rbp),%ymm12,%ymm8 6596 vpxor %ymm11,%ymm7,%ymm7 6597 vpxor %ymm10,%ymm6,%ymm6 6598 vpxor %ymm9,%ymm5,%ymm5 6599 vpxor %ymm8,%ymm4,%ymm4 6600 vmovdqa %ymm8,0+128(%rbp) 6601 vpsrld $25,%ymm7,%ymm8 6602 vpslld $32-25,%ymm7,%ymm7 6603 vpxor %ymm8,%ymm7,%ymm7 6604 vpsrld $25,%ymm6,%ymm8 6605 vpslld $32-25,%ymm6,%ymm6 6606 vpxor %ymm8,%ymm6,%ymm6 6607 vpsrld $25,%ymm5,%ymm8 6608 vpslld $32-25,%ymm5,%ymm5 6609 vpxor %ymm8,%ymm5,%ymm5 6610 vpsrld $25,%ymm4,%ymm8 6611 vpslld $32-25,%ymm4,%ymm4 6612 vpxor %ymm8,%ymm4,%ymm4 6613 vmovdqa 0+128(%rbp),%ymm8 6614 vpalignr $4,%ymm7,%ymm7,%ymm7 6615 vpalignr $8,%ymm11,%ymm11,%ymm11 6616 vpalignr $12,%ymm15,%ymm15,%ymm15 6617 vpalignr $4,%ymm6,%ymm6,%ymm6 6618 vpalignr $8,%ymm10,%ymm10,%ymm10 6619 vpalignr $12,%ymm14,%ymm14,%ymm14 6620 vpalignr $4,%ymm5,%ymm5,%ymm5 6621 vpalignr $8,%ymm9,%ymm9,%ymm9 6622 vpalignr $12,%ymm13,%ymm13,%ymm13 6623 vpalignr $4,%ymm4,%ymm4,%ymm4 6624 vpalignr $8,%ymm8,%ymm8,%ymm8 6625 vpalignr $12,%ymm12,%ymm12,%ymm12 6626 vmovdqa %ymm8,0+128(%rbp) 6627 vmovdqa L$rol16(%rip),%ymm8 6628 vpaddd %ymm7,%ymm3,%ymm3 6629 vpaddd %ymm6,%ymm2,%ymm2 6630 vpaddd %ymm5,%ymm1,%ymm1 6631 vpaddd %ymm4,%ymm0,%ymm0 6632 vpxor %ymm3,%ymm15,%ymm15 6633 vpxor %ymm2,%ymm14,%ymm14 6634 vpxor %ymm1,%ymm13,%ymm13 6635 vpxor %ymm0,%ymm12,%ymm12 6636 vpshufb %ymm8,%ymm15,%ymm15 6637 vpshufb %ymm8,%ymm14,%ymm14 6638 vpshufb %ymm8,%ymm13,%ymm13 6639 vpshufb %ymm8,%ymm12,%ymm12 6640 vpaddd %ymm15,%ymm11,%ymm11 6641 vpaddd %ymm14,%ymm10,%ymm10 6642 vpaddd %ymm13,%ymm9,%ymm9 6643 vpaddd 0+128(%rbp),%ymm12,%ymm8 6644 vpxor %ymm11,%ymm7,%ymm7 6645 vpxor %ymm10,%ymm6,%ymm6 6646 vpxor %ymm9,%ymm5,%ymm5 6647 vpxor %ymm8,%ymm4,%ymm4 6648 vmovdqa %ymm8,0+128(%rbp) 6649 vpsrld $20,%ymm7,%ymm8 6650 vpslld $32-20,%ymm7,%ymm7 6651 vpxor %ymm8,%ymm7,%ymm7 6652 vpsrld $20,%ymm6,%ymm8 6653 vpslld $32-20,%ymm6,%ymm6 6654 vpxor %ymm8,%ymm6,%ymm6 6655 vpsrld $20,%ymm5,%ymm8 6656 vpslld $32-20,%ymm5,%ymm5 6657 vpxor %ymm8,%ymm5,%ymm5 6658 vpsrld $20,%ymm4,%ymm8 6659 vpslld $32-20,%ymm4,%ymm4 6660 vpxor %ymm8,%ymm4,%ymm4 6661 vmovdqa L$rol8(%rip),%ymm8 6662 vpaddd %ymm7,%ymm3,%ymm3 6663 vpaddd %ymm6,%ymm2,%ymm2 6664 vpaddd %ymm5,%ymm1,%ymm1 6665 vpaddd %ymm4,%ymm0,%ymm0 6666 vpxor %ymm3,%ymm15,%ymm15 6667 vpxor %ymm2,%ymm14,%ymm14 6668 vpxor %ymm1,%ymm13,%ymm13 6669 vpxor %ymm0,%ymm12,%ymm12 6670 vpshufb %ymm8,%ymm15,%ymm15 6671 vpshufb %ymm8,%ymm14,%ymm14 6672 vpshufb %ymm8,%ymm13,%ymm13 6673 vpshufb %ymm8,%ymm12,%ymm12 6674 vpaddd %ymm15,%ymm11,%ymm11 6675 vpaddd %ymm14,%ymm10,%ymm10 6676 vpaddd %ymm13,%ymm9,%ymm9 6677 vpaddd 0+128(%rbp),%ymm12,%ymm8 6678 vpxor %ymm11,%ymm7,%ymm7 6679 vpxor %ymm10,%ymm6,%ymm6 6680 vpxor %ymm9,%ymm5,%ymm5 6681 vpxor %ymm8,%ymm4,%ymm4 6682 vmovdqa %ymm8,0+128(%rbp) 6683 vpsrld $25,%ymm7,%ymm8 6684 vpslld $32-25,%ymm7,%ymm7 6685 vpxor %ymm8,%ymm7,%ymm7 6686 vpsrld $25,%ymm6,%ymm8 6687 vpslld $32-25,%ymm6,%ymm6 6688 vpxor %ymm8,%ymm6,%ymm6 6689 vpsrld $25,%ymm5,%ymm8 6690 vpslld $32-25,%ymm5,%ymm5 6691 vpxor %ymm8,%ymm5,%ymm5 6692 vpsrld $25,%ymm4,%ymm8 6693 vpslld $32-25,%ymm4,%ymm4 6694 vpxor %ymm8,%ymm4,%ymm4 6695 vmovdqa 0+128(%rbp),%ymm8 6696 vpalignr $12,%ymm7,%ymm7,%ymm7 6697 vpalignr $8,%ymm11,%ymm11,%ymm11 6698 vpalignr $4,%ymm15,%ymm15,%ymm15 6699 vpalignr $12,%ymm6,%ymm6,%ymm6 6700 vpalignr $8,%ymm10,%ymm10,%ymm10 6701 vpalignr $4,%ymm14,%ymm14,%ymm14 6702 vpalignr $12,%ymm5,%ymm5,%ymm5 6703 vpalignr $8,%ymm9,%ymm9,%ymm9 6704 vpalignr $4,%ymm13,%ymm13,%ymm13 6705 vpalignr $12,%ymm4,%ymm4,%ymm4 6706 vpalignr $8,%ymm8,%ymm8,%ymm8 6707 vpalignr $4,%ymm12,%ymm12,%ymm12 6708 vmovdqa %ymm8,0+128(%rbp) 6709 vmovdqa L$rol16(%rip),%ymm8 6710 vpaddd %ymm7,%ymm3,%ymm3 6711 vpaddd %ymm6,%ymm2,%ymm2 6712 vpaddd %ymm5,%ymm1,%ymm1 6713 vpaddd %ymm4,%ymm0,%ymm0 6714 vpxor %ymm3,%ymm15,%ymm15 6715 vpxor %ymm2,%ymm14,%ymm14 6716 vpxor %ymm1,%ymm13,%ymm13 6717 vpxor %ymm0,%ymm12,%ymm12 6718 vpshufb %ymm8,%ymm15,%ymm15 6719 vpshufb %ymm8,%ymm14,%ymm14 6720 vpshufb %ymm8,%ymm13,%ymm13 6721 vpshufb %ymm8,%ymm12,%ymm12 6722 vpaddd %ymm15,%ymm11,%ymm11 6723 vpaddd %ymm14,%ymm10,%ymm10 6724 vpaddd %ymm13,%ymm9,%ymm9 6725 vpaddd 0+128(%rbp),%ymm12,%ymm8 6726 vpxor %ymm11,%ymm7,%ymm7 6727 vpxor %ymm10,%ymm6,%ymm6 6728 vpxor %ymm9,%ymm5,%ymm5 6729 vpxor %ymm8,%ymm4,%ymm4 6730 vmovdqa %ymm8,0+128(%rbp) 6731 vpsrld $20,%ymm7,%ymm8 6732 vpslld $32-20,%ymm7,%ymm7 6733 vpxor %ymm8,%ymm7,%ymm7 6734 vpsrld $20,%ymm6,%ymm8 6735 vpslld $32-20,%ymm6,%ymm6 6736 vpxor %ymm8,%ymm6,%ymm6 6737 vpsrld $20,%ymm5,%ymm8 6738 vpslld $32-20,%ymm5,%ymm5 6739 vpxor %ymm8,%ymm5,%ymm5 6740 vpsrld $20,%ymm4,%ymm8 6741 vpslld $32-20,%ymm4,%ymm4 6742 vpxor %ymm8,%ymm4,%ymm4 6743 vmovdqa L$rol8(%rip),%ymm8 6744 vpaddd %ymm7,%ymm3,%ymm3 6745 vpaddd %ymm6,%ymm2,%ymm2 6746 vpaddd %ymm5,%ymm1,%ymm1 6747 vpaddd %ymm4,%ymm0,%ymm0 6748 vpxor %ymm3,%ymm15,%ymm15 6749 6750 subq $16,%rdi 6751 movq $9,%rcx 6752 jmp L$seal_avx2_main_loop_rounds_entry 6753.p2align 5 6754L$seal_avx2_main_loop: 6755 vmovdqa L$chacha20_consts(%rip),%ymm0 6756 vmovdqa 0+64(%rbp),%ymm4 6757 vmovdqa 0+96(%rbp),%ymm8 6758 vmovdqa %ymm0,%ymm1 6759 vmovdqa %ymm4,%ymm5 6760 vmovdqa %ymm8,%ymm9 6761 vmovdqa %ymm0,%ymm2 6762 vmovdqa %ymm4,%ymm6 6763 vmovdqa %ymm8,%ymm10 6764 vmovdqa %ymm0,%ymm3 6765 vmovdqa %ymm4,%ymm7 6766 vmovdqa %ymm8,%ymm11 6767 vmovdqa L$avx2_inc(%rip),%ymm12 6768 vpaddd 0+160(%rbp),%ymm12,%ymm15 6769 vpaddd %ymm15,%ymm12,%ymm14 6770 vpaddd %ymm14,%ymm12,%ymm13 6771 vpaddd %ymm13,%ymm12,%ymm12 6772 vmovdqa %ymm15,0+256(%rbp) 6773 vmovdqa %ymm14,0+224(%rbp) 6774 vmovdqa %ymm13,0+192(%rbp) 6775 vmovdqa %ymm12,0+160(%rbp) 6776 6777 movq $10,%rcx 6778.p2align 5 6779L$seal_avx2_main_loop_rounds: 6780 addq 0+0(%rdi),%r10 6781 adcq 8+0(%rdi),%r11 6782 adcq $1,%r12 6783 vmovdqa %ymm8,0+128(%rbp) 6784 vmovdqa L$rol16(%rip),%ymm8 6785 vpaddd %ymm7,%ymm3,%ymm3 6786 vpaddd %ymm6,%ymm2,%ymm2 6787 vpaddd %ymm5,%ymm1,%ymm1 6788 vpaddd %ymm4,%ymm0,%ymm0 6789 vpxor %ymm3,%ymm15,%ymm15 6790 vpxor %ymm2,%ymm14,%ymm14 6791 vpxor %ymm1,%ymm13,%ymm13 6792 vpxor %ymm0,%ymm12,%ymm12 6793 movq 0+0+0(%rbp),%rdx 6794 movq %rdx,%r15 6795 mulxq %r10,%r13,%r14 6796 mulxq %r11,%rax,%rdx 6797 imulq %r12,%r15 6798 addq %rax,%r14 6799 adcq %rdx,%r15 6800 vpshufb %ymm8,%ymm15,%ymm15 6801 vpshufb %ymm8,%ymm14,%ymm14 6802 vpshufb %ymm8,%ymm13,%ymm13 6803 vpshufb %ymm8,%ymm12,%ymm12 6804 vpaddd %ymm15,%ymm11,%ymm11 6805 vpaddd %ymm14,%ymm10,%ymm10 6806 vpaddd %ymm13,%ymm9,%ymm9 6807 vpaddd 0+128(%rbp),%ymm12,%ymm8 6808 vpxor %ymm11,%ymm7,%ymm7 6809 movq 8+0+0(%rbp),%rdx 6810 mulxq %r10,%r10,%rax 6811 addq %r10,%r14 6812 mulxq %r11,%r11,%r9 6813 adcq %r11,%r15 6814 adcq $0,%r9 6815 imulq %r12,%rdx 6816 vpxor %ymm10,%ymm6,%ymm6 6817 vpxor %ymm9,%ymm5,%ymm5 6818 vpxor %ymm8,%ymm4,%ymm4 6819 vmovdqa %ymm8,0+128(%rbp) 6820 vpsrld $20,%ymm7,%ymm8 6821 vpslld $32-20,%ymm7,%ymm7 6822 vpxor %ymm8,%ymm7,%ymm7 6823 vpsrld $20,%ymm6,%ymm8 6824 vpslld $32-20,%ymm6,%ymm6 6825 vpxor %ymm8,%ymm6,%ymm6 6826 vpsrld $20,%ymm5,%ymm8 6827 vpslld $32-20,%ymm5,%ymm5 6828 addq %rax,%r15 6829 adcq %rdx,%r9 6830 vpxor %ymm8,%ymm5,%ymm5 6831 vpsrld $20,%ymm4,%ymm8 6832 vpslld $32-20,%ymm4,%ymm4 6833 vpxor %ymm8,%ymm4,%ymm4 6834 vmovdqa L$rol8(%rip),%ymm8 6835 vpaddd %ymm7,%ymm3,%ymm3 6836 vpaddd %ymm6,%ymm2,%ymm2 6837 vpaddd %ymm5,%ymm1,%ymm1 6838 vpaddd %ymm4,%ymm0,%ymm0 6839 vpxor %ymm3,%ymm15,%ymm15 6840 movq %r13,%r10 6841 movq %r14,%r11 6842 movq %r15,%r12 6843 andq $3,%r12 6844 movq %r15,%r13 6845 andq $-4,%r13 6846 movq %r9,%r14 6847 shrdq $2,%r9,%r15 6848 shrq $2,%r9 6849 addq %r13,%r15 6850 adcq %r14,%r9 6851 addq %r15,%r10 6852 adcq %r9,%r11 6853 adcq $0,%r12 6854 6855L$seal_avx2_main_loop_rounds_entry: 6856 vpxor %ymm2,%ymm14,%ymm14 6857 vpxor %ymm1,%ymm13,%ymm13 6858 vpxor %ymm0,%ymm12,%ymm12 6859 vpshufb %ymm8,%ymm15,%ymm15 6860 vpshufb %ymm8,%ymm14,%ymm14 6861 vpshufb %ymm8,%ymm13,%ymm13 6862 vpshufb %ymm8,%ymm12,%ymm12 6863 vpaddd %ymm15,%ymm11,%ymm11 6864 vpaddd %ymm14,%ymm10,%ymm10 6865 addq 0+16(%rdi),%r10 6866 adcq 8+16(%rdi),%r11 6867 adcq $1,%r12 6868 vpaddd %ymm13,%ymm9,%ymm9 6869 vpaddd 0+128(%rbp),%ymm12,%ymm8 6870 vpxor %ymm11,%ymm7,%ymm7 6871 vpxor %ymm10,%ymm6,%ymm6 6872 vpxor %ymm9,%ymm5,%ymm5 6873 vpxor %ymm8,%ymm4,%ymm4 6874 vmovdqa %ymm8,0+128(%rbp) 6875 vpsrld $25,%ymm7,%ymm8 6876 movq 0+0+0(%rbp),%rdx 6877 movq %rdx,%r15 6878 mulxq %r10,%r13,%r14 6879 mulxq %r11,%rax,%rdx 6880 imulq %r12,%r15 6881 addq %rax,%r14 6882 adcq %rdx,%r15 6883 vpslld $32-25,%ymm7,%ymm7 6884 vpxor %ymm8,%ymm7,%ymm7 6885 vpsrld $25,%ymm6,%ymm8 6886 vpslld $32-25,%ymm6,%ymm6 6887 vpxor %ymm8,%ymm6,%ymm6 6888 vpsrld $25,%ymm5,%ymm8 6889 vpslld $32-25,%ymm5,%ymm5 6890 vpxor %ymm8,%ymm5,%ymm5 6891 vpsrld $25,%ymm4,%ymm8 6892 vpslld $32-25,%ymm4,%ymm4 6893 vpxor %ymm8,%ymm4,%ymm4 6894 vmovdqa 0+128(%rbp),%ymm8 6895 vpalignr $4,%ymm7,%ymm7,%ymm7 6896 vpalignr $8,%ymm11,%ymm11,%ymm11 6897 vpalignr $12,%ymm15,%ymm15,%ymm15 6898 vpalignr $4,%ymm6,%ymm6,%ymm6 6899 vpalignr $8,%ymm10,%ymm10,%ymm10 6900 vpalignr $12,%ymm14,%ymm14,%ymm14 6901 movq 8+0+0(%rbp),%rdx 6902 mulxq %r10,%r10,%rax 6903 addq %r10,%r14 6904 mulxq %r11,%r11,%r9 6905 adcq %r11,%r15 6906 adcq $0,%r9 6907 imulq %r12,%rdx 6908 vpalignr $4,%ymm5,%ymm5,%ymm5 6909 vpalignr $8,%ymm9,%ymm9,%ymm9 6910 vpalignr $12,%ymm13,%ymm13,%ymm13 6911 vpalignr $4,%ymm4,%ymm4,%ymm4 6912 vpalignr $8,%ymm8,%ymm8,%ymm8 6913 vpalignr $12,%ymm12,%ymm12,%ymm12 6914 vmovdqa %ymm8,0+128(%rbp) 6915 vmovdqa L$rol16(%rip),%ymm8 6916 vpaddd %ymm7,%ymm3,%ymm3 6917 vpaddd %ymm6,%ymm2,%ymm2 6918 vpaddd %ymm5,%ymm1,%ymm1 6919 vpaddd %ymm4,%ymm0,%ymm0 6920 vpxor %ymm3,%ymm15,%ymm15 6921 vpxor %ymm2,%ymm14,%ymm14 6922 vpxor %ymm1,%ymm13,%ymm13 6923 vpxor %ymm0,%ymm12,%ymm12 6924 vpshufb %ymm8,%ymm15,%ymm15 6925 vpshufb %ymm8,%ymm14,%ymm14 6926 addq %rax,%r15 6927 adcq %rdx,%r9 6928 vpshufb %ymm8,%ymm13,%ymm13 6929 vpshufb %ymm8,%ymm12,%ymm12 6930 vpaddd %ymm15,%ymm11,%ymm11 6931 vpaddd %ymm14,%ymm10,%ymm10 6932 vpaddd %ymm13,%ymm9,%ymm9 6933 vpaddd 0+128(%rbp),%ymm12,%ymm8 6934 vpxor %ymm11,%ymm7,%ymm7 6935 vpxor %ymm10,%ymm6,%ymm6 6936 vpxor %ymm9,%ymm5,%ymm5 6937 movq %r13,%r10 6938 movq %r14,%r11 6939 movq %r15,%r12 6940 andq $3,%r12 6941 movq %r15,%r13 6942 andq $-4,%r13 6943 movq %r9,%r14 6944 shrdq $2,%r9,%r15 6945 shrq $2,%r9 6946 addq %r13,%r15 6947 adcq %r14,%r9 6948 addq %r15,%r10 6949 adcq %r9,%r11 6950 adcq $0,%r12 6951 vpxor %ymm8,%ymm4,%ymm4 6952 vmovdqa %ymm8,0+128(%rbp) 6953 vpsrld $20,%ymm7,%ymm8 6954 vpslld $32-20,%ymm7,%ymm7 6955 vpxor %ymm8,%ymm7,%ymm7 6956 vpsrld $20,%ymm6,%ymm8 6957 vpslld $32-20,%ymm6,%ymm6 6958 vpxor %ymm8,%ymm6,%ymm6 6959 addq 0+32(%rdi),%r10 6960 adcq 8+32(%rdi),%r11 6961 adcq $1,%r12 6962 6963 leaq 48(%rdi),%rdi 6964 vpsrld $20,%ymm5,%ymm8 6965 vpslld $32-20,%ymm5,%ymm5 6966 vpxor %ymm8,%ymm5,%ymm5 6967 vpsrld $20,%ymm4,%ymm8 6968 vpslld $32-20,%ymm4,%ymm4 6969 vpxor %ymm8,%ymm4,%ymm4 6970 vmovdqa L$rol8(%rip),%ymm8 6971 vpaddd %ymm7,%ymm3,%ymm3 6972 vpaddd %ymm6,%ymm2,%ymm2 6973 vpaddd %ymm5,%ymm1,%ymm1 6974 vpaddd %ymm4,%ymm0,%ymm0 6975 vpxor %ymm3,%ymm15,%ymm15 6976 vpxor %ymm2,%ymm14,%ymm14 6977 vpxor %ymm1,%ymm13,%ymm13 6978 vpxor %ymm0,%ymm12,%ymm12 6979 vpshufb %ymm8,%ymm15,%ymm15 6980 vpshufb %ymm8,%ymm14,%ymm14 6981 vpshufb %ymm8,%ymm13,%ymm13 6982 movq 0+0+0(%rbp),%rdx 6983 movq %rdx,%r15 6984 mulxq %r10,%r13,%r14 6985 mulxq %r11,%rax,%rdx 6986 imulq %r12,%r15 6987 addq %rax,%r14 6988 adcq %rdx,%r15 6989 vpshufb %ymm8,%ymm12,%ymm12 6990 vpaddd %ymm15,%ymm11,%ymm11 6991 vpaddd %ymm14,%ymm10,%ymm10 6992 vpaddd %ymm13,%ymm9,%ymm9 6993 vpaddd 0+128(%rbp),%ymm12,%ymm8 6994 vpxor %ymm11,%ymm7,%ymm7 6995 vpxor %ymm10,%ymm6,%ymm6 6996 vpxor %ymm9,%ymm5,%ymm5 6997 movq 8+0+0(%rbp),%rdx 6998 mulxq %r10,%r10,%rax 6999 addq %r10,%r14 7000 mulxq %r11,%r11,%r9 7001 adcq %r11,%r15 7002 adcq $0,%r9 7003 imulq %r12,%rdx 7004 vpxor %ymm8,%ymm4,%ymm4 7005 vmovdqa %ymm8,0+128(%rbp) 7006 vpsrld $25,%ymm7,%ymm8 7007 vpslld $32-25,%ymm7,%ymm7 7008 vpxor %ymm8,%ymm7,%ymm7 7009 vpsrld $25,%ymm6,%ymm8 7010 vpslld $32-25,%ymm6,%ymm6 7011 vpxor %ymm8,%ymm6,%ymm6 7012 addq %rax,%r15 7013 adcq %rdx,%r9 7014 vpsrld $25,%ymm5,%ymm8 7015 vpslld $32-25,%ymm5,%ymm5 7016 vpxor %ymm8,%ymm5,%ymm5 7017 vpsrld $25,%ymm4,%ymm8 7018 vpslld $32-25,%ymm4,%ymm4 7019 vpxor %ymm8,%ymm4,%ymm4 7020 vmovdqa 0+128(%rbp),%ymm8 7021 vpalignr $12,%ymm7,%ymm7,%ymm7 7022 vpalignr $8,%ymm11,%ymm11,%ymm11 7023 vpalignr $4,%ymm15,%ymm15,%ymm15 7024 vpalignr $12,%ymm6,%ymm6,%ymm6 7025 vpalignr $8,%ymm10,%ymm10,%ymm10 7026 vpalignr $4,%ymm14,%ymm14,%ymm14 7027 vpalignr $12,%ymm5,%ymm5,%ymm5 7028 vpalignr $8,%ymm9,%ymm9,%ymm9 7029 vpalignr $4,%ymm13,%ymm13,%ymm13 7030 vpalignr $12,%ymm4,%ymm4,%ymm4 7031 vpalignr $8,%ymm8,%ymm8,%ymm8 7032 movq %r13,%r10 7033 movq %r14,%r11 7034 movq %r15,%r12 7035 andq $3,%r12 7036 movq %r15,%r13 7037 andq $-4,%r13 7038 movq %r9,%r14 7039 shrdq $2,%r9,%r15 7040 shrq $2,%r9 7041 addq %r13,%r15 7042 adcq %r14,%r9 7043 addq %r15,%r10 7044 adcq %r9,%r11 7045 adcq $0,%r12 7046 vpalignr $4,%ymm12,%ymm12,%ymm12 7047 7048 decq %rcx 7049 jne L$seal_avx2_main_loop_rounds 7050 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 7051 vpaddd 0+64(%rbp),%ymm7,%ymm7 7052 vpaddd 0+96(%rbp),%ymm11,%ymm11 7053 vpaddd 0+256(%rbp),%ymm15,%ymm15 7054 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7055 vpaddd 0+64(%rbp),%ymm6,%ymm6 7056 vpaddd 0+96(%rbp),%ymm10,%ymm10 7057 vpaddd 0+224(%rbp),%ymm14,%ymm14 7058 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7059 vpaddd 0+64(%rbp),%ymm5,%ymm5 7060 vpaddd 0+96(%rbp),%ymm9,%ymm9 7061 vpaddd 0+192(%rbp),%ymm13,%ymm13 7062 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7063 vpaddd 0+64(%rbp),%ymm4,%ymm4 7064 vpaddd 0+96(%rbp),%ymm8,%ymm8 7065 vpaddd 0+160(%rbp),%ymm12,%ymm12 7066 7067 vmovdqa %ymm0,0+128(%rbp) 7068 addq 0+0(%rdi),%r10 7069 adcq 8+0(%rdi),%r11 7070 adcq $1,%r12 7071 movq 0+0+0(%rbp),%rdx 7072 movq %rdx,%r15 7073 mulxq %r10,%r13,%r14 7074 mulxq %r11,%rax,%rdx 7075 imulq %r12,%r15 7076 addq %rax,%r14 7077 adcq %rdx,%r15 7078 movq 8+0+0(%rbp),%rdx 7079 mulxq %r10,%r10,%rax 7080 addq %r10,%r14 7081 mulxq %r11,%r11,%r9 7082 adcq %r11,%r15 7083 adcq $0,%r9 7084 imulq %r12,%rdx 7085 addq %rax,%r15 7086 adcq %rdx,%r9 7087 movq %r13,%r10 7088 movq %r14,%r11 7089 movq %r15,%r12 7090 andq $3,%r12 7091 movq %r15,%r13 7092 andq $-4,%r13 7093 movq %r9,%r14 7094 shrdq $2,%r9,%r15 7095 shrq $2,%r9 7096 addq %r13,%r15 7097 adcq %r14,%r9 7098 addq %r15,%r10 7099 adcq %r9,%r11 7100 adcq $0,%r12 7101 addq 0+16(%rdi),%r10 7102 adcq 8+16(%rdi),%r11 7103 adcq $1,%r12 7104 movq 0+0+0(%rbp),%rdx 7105 movq %rdx,%r15 7106 mulxq %r10,%r13,%r14 7107 mulxq %r11,%rax,%rdx 7108 imulq %r12,%r15 7109 addq %rax,%r14 7110 adcq %rdx,%r15 7111 movq 8+0+0(%rbp),%rdx 7112 mulxq %r10,%r10,%rax 7113 addq %r10,%r14 7114 mulxq %r11,%r11,%r9 7115 adcq %r11,%r15 7116 adcq $0,%r9 7117 imulq %r12,%rdx 7118 addq %rax,%r15 7119 adcq %rdx,%r9 7120 movq %r13,%r10 7121 movq %r14,%r11 7122 movq %r15,%r12 7123 andq $3,%r12 7124 movq %r15,%r13 7125 andq $-4,%r13 7126 movq %r9,%r14 7127 shrdq $2,%r9,%r15 7128 shrq $2,%r9 7129 addq %r13,%r15 7130 adcq %r14,%r9 7131 addq %r15,%r10 7132 adcq %r9,%r11 7133 adcq $0,%r12 7134 7135 leaq 32(%rdi),%rdi 7136 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7137 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7138 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7139 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7140 vpxor 0+0(%rsi),%ymm0,%ymm0 7141 vpxor 32+0(%rsi),%ymm3,%ymm3 7142 vpxor 64+0(%rsi),%ymm7,%ymm7 7143 vpxor 96+0(%rsi),%ymm11,%ymm11 7144 vmovdqu %ymm0,0+0(%rdi) 7145 vmovdqu %ymm3,32+0(%rdi) 7146 vmovdqu %ymm7,64+0(%rdi) 7147 vmovdqu %ymm11,96+0(%rdi) 7148 7149 vmovdqa 0+128(%rbp),%ymm0 7150 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7151 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7152 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7153 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7154 vpxor 0+128(%rsi),%ymm3,%ymm3 7155 vpxor 32+128(%rsi),%ymm2,%ymm2 7156 vpxor 64+128(%rsi),%ymm6,%ymm6 7157 vpxor 96+128(%rsi),%ymm10,%ymm10 7158 vmovdqu %ymm3,0+128(%rdi) 7159 vmovdqu %ymm2,32+128(%rdi) 7160 vmovdqu %ymm6,64+128(%rdi) 7161 vmovdqu %ymm10,96+128(%rdi) 7162 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7163 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7164 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7165 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7166 vpxor 0+256(%rsi),%ymm3,%ymm3 7167 vpxor 32+256(%rsi),%ymm1,%ymm1 7168 vpxor 64+256(%rsi),%ymm5,%ymm5 7169 vpxor 96+256(%rsi),%ymm9,%ymm9 7170 vmovdqu %ymm3,0+256(%rdi) 7171 vmovdqu %ymm1,32+256(%rdi) 7172 vmovdqu %ymm5,64+256(%rdi) 7173 vmovdqu %ymm9,96+256(%rdi) 7174 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7175 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7176 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7177 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7178 vpxor 0+384(%rsi),%ymm3,%ymm3 7179 vpxor 32+384(%rsi),%ymm0,%ymm0 7180 vpxor 64+384(%rsi),%ymm4,%ymm4 7181 vpxor 96+384(%rsi),%ymm8,%ymm8 7182 vmovdqu %ymm3,0+384(%rdi) 7183 vmovdqu %ymm0,32+384(%rdi) 7184 vmovdqu %ymm4,64+384(%rdi) 7185 vmovdqu %ymm8,96+384(%rdi) 7186 7187 leaq 512(%rsi),%rsi 7188 subq $512,%rbx 7189 cmpq $512,%rbx 7190 jg L$seal_avx2_main_loop 7191 7192 addq 0+0(%rdi),%r10 7193 adcq 8+0(%rdi),%r11 7194 adcq $1,%r12 7195 movq 0+0+0(%rbp),%rdx 7196 movq %rdx,%r15 7197 mulxq %r10,%r13,%r14 7198 mulxq %r11,%rax,%rdx 7199 imulq %r12,%r15 7200 addq %rax,%r14 7201 adcq %rdx,%r15 7202 movq 8+0+0(%rbp),%rdx 7203 mulxq %r10,%r10,%rax 7204 addq %r10,%r14 7205 mulxq %r11,%r11,%r9 7206 adcq %r11,%r15 7207 adcq $0,%r9 7208 imulq %r12,%rdx 7209 addq %rax,%r15 7210 adcq %rdx,%r9 7211 movq %r13,%r10 7212 movq %r14,%r11 7213 movq %r15,%r12 7214 andq $3,%r12 7215 movq %r15,%r13 7216 andq $-4,%r13 7217 movq %r9,%r14 7218 shrdq $2,%r9,%r15 7219 shrq $2,%r9 7220 addq %r13,%r15 7221 adcq %r14,%r9 7222 addq %r15,%r10 7223 adcq %r9,%r11 7224 adcq $0,%r12 7225 addq 0+16(%rdi),%r10 7226 adcq 8+16(%rdi),%r11 7227 adcq $1,%r12 7228 movq 0+0+0(%rbp),%rdx 7229 movq %rdx,%r15 7230 mulxq %r10,%r13,%r14 7231 mulxq %r11,%rax,%rdx 7232 imulq %r12,%r15 7233 addq %rax,%r14 7234 adcq %rdx,%r15 7235 movq 8+0+0(%rbp),%rdx 7236 mulxq %r10,%r10,%rax 7237 addq %r10,%r14 7238 mulxq %r11,%r11,%r9 7239 adcq %r11,%r15 7240 adcq $0,%r9 7241 imulq %r12,%rdx 7242 addq %rax,%r15 7243 adcq %rdx,%r9 7244 movq %r13,%r10 7245 movq %r14,%r11 7246 movq %r15,%r12 7247 andq $3,%r12 7248 movq %r15,%r13 7249 andq $-4,%r13 7250 movq %r9,%r14 7251 shrdq $2,%r9,%r15 7252 shrq $2,%r9 7253 addq %r13,%r15 7254 adcq %r14,%r9 7255 addq %r15,%r10 7256 adcq %r9,%r11 7257 adcq $0,%r12 7258 7259 leaq 32(%rdi),%rdi 7260 movq $10,%rcx 7261 xorq %r8,%r8 7262 7263 cmpq $384,%rbx 7264 ja L$seal_avx2_tail_512 7265 cmpq $256,%rbx 7266 ja L$seal_avx2_tail_384 7267 cmpq $128,%rbx 7268 ja L$seal_avx2_tail_256 7269 7270L$seal_avx2_tail_128: 7271 vmovdqa L$chacha20_consts(%rip),%ymm0 7272 vmovdqa 0+64(%rbp),%ymm4 7273 vmovdqa 0+96(%rbp),%ymm8 7274 vmovdqa L$avx2_inc(%rip),%ymm12 7275 vpaddd 0+160(%rbp),%ymm12,%ymm12 7276 vmovdqa %ymm12,0+160(%rbp) 7277 7278L$seal_avx2_tail_128_rounds_and_3xhash: 7279 addq 0+0(%rdi),%r10 7280 adcq 8+0(%rdi),%r11 7281 adcq $1,%r12 7282 movq 0+0+0(%rbp),%rdx 7283 movq %rdx,%r15 7284 mulxq %r10,%r13,%r14 7285 mulxq %r11,%rax,%rdx 7286 imulq %r12,%r15 7287 addq %rax,%r14 7288 adcq %rdx,%r15 7289 movq 8+0+0(%rbp),%rdx 7290 mulxq %r10,%r10,%rax 7291 addq %r10,%r14 7292 mulxq %r11,%r11,%r9 7293 adcq %r11,%r15 7294 adcq $0,%r9 7295 imulq %r12,%rdx 7296 addq %rax,%r15 7297 adcq %rdx,%r9 7298 movq %r13,%r10 7299 movq %r14,%r11 7300 movq %r15,%r12 7301 andq $3,%r12 7302 movq %r15,%r13 7303 andq $-4,%r13 7304 movq %r9,%r14 7305 shrdq $2,%r9,%r15 7306 shrq $2,%r9 7307 addq %r13,%r15 7308 adcq %r14,%r9 7309 addq %r15,%r10 7310 adcq %r9,%r11 7311 adcq $0,%r12 7312 7313 leaq 16(%rdi),%rdi 7314L$seal_avx2_tail_128_rounds_and_2xhash: 7315 vpaddd %ymm4,%ymm0,%ymm0 7316 vpxor %ymm0,%ymm12,%ymm12 7317 vpshufb L$rol16(%rip),%ymm12,%ymm12 7318 vpaddd %ymm12,%ymm8,%ymm8 7319 vpxor %ymm8,%ymm4,%ymm4 7320 vpsrld $20,%ymm4,%ymm3 7321 vpslld $12,%ymm4,%ymm4 7322 vpxor %ymm3,%ymm4,%ymm4 7323 vpaddd %ymm4,%ymm0,%ymm0 7324 vpxor %ymm0,%ymm12,%ymm12 7325 vpshufb L$rol8(%rip),%ymm12,%ymm12 7326 vpaddd %ymm12,%ymm8,%ymm8 7327 vpxor %ymm8,%ymm4,%ymm4 7328 vpslld $7,%ymm4,%ymm3 7329 vpsrld $25,%ymm4,%ymm4 7330 vpxor %ymm3,%ymm4,%ymm4 7331 vpalignr $12,%ymm12,%ymm12,%ymm12 7332 vpalignr $8,%ymm8,%ymm8,%ymm8 7333 vpalignr $4,%ymm4,%ymm4,%ymm4 7334 addq 0+0(%rdi),%r10 7335 adcq 8+0(%rdi),%r11 7336 adcq $1,%r12 7337 movq 0+0+0(%rbp),%rdx 7338 movq %rdx,%r15 7339 mulxq %r10,%r13,%r14 7340 mulxq %r11,%rax,%rdx 7341 imulq %r12,%r15 7342 addq %rax,%r14 7343 adcq %rdx,%r15 7344 movq 8+0+0(%rbp),%rdx 7345 mulxq %r10,%r10,%rax 7346 addq %r10,%r14 7347 mulxq %r11,%r11,%r9 7348 adcq %r11,%r15 7349 adcq $0,%r9 7350 imulq %r12,%rdx 7351 addq %rax,%r15 7352 adcq %rdx,%r9 7353 movq %r13,%r10 7354 movq %r14,%r11 7355 movq %r15,%r12 7356 andq $3,%r12 7357 movq %r15,%r13 7358 andq $-4,%r13 7359 movq %r9,%r14 7360 shrdq $2,%r9,%r15 7361 shrq $2,%r9 7362 addq %r13,%r15 7363 adcq %r14,%r9 7364 addq %r15,%r10 7365 adcq %r9,%r11 7366 adcq $0,%r12 7367 vpaddd %ymm4,%ymm0,%ymm0 7368 vpxor %ymm0,%ymm12,%ymm12 7369 vpshufb L$rol16(%rip),%ymm12,%ymm12 7370 vpaddd %ymm12,%ymm8,%ymm8 7371 vpxor %ymm8,%ymm4,%ymm4 7372 vpsrld $20,%ymm4,%ymm3 7373 vpslld $12,%ymm4,%ymm4 7374 vpxor %ymm3,%ymm4,%ymm4 7375 vpaddd %ymm4,%ymm0,%ymm0 7376 vpxor %ymm0,%ymm12,%ymm12 7377 vpshufb L$rol8(%rip),%ymm12,%ymm12 7378 vpaddd %ymm12,%ymm8,%ymm8 7379 vpxor %ymm8,%ymm4,%ymm4 7380 vpslld $7,%ymm4,%ymm3 7381 vpsrld $25,%ymm4,%ymm4 7382 vpxor %ymm3,%ymm4,%ymm4 7383 vpalignr $4,%ymm12,%ymm12,%ymm12 7384 vpalignr $8,%ymm8,%ymm8,%ymm8 7385 vpalignr $12,%ymm4,%ymm4,%ymm4 7386 addq 0+16(%rdi),%r10 7387 adcq 8+16(%rdi),%r11 7388 adcq $1,%r12 7389 movq 0+0+0(%rbp),%rdx 7390 movq %rdx,%r15 7391 mulxq %r10,%r13,%r14 7392 mulxq %r11,%rax,%rdx 7393 imulq %r12,%r15 7394 addq %rax,%r14 7395 adcq %rdx,%r15 7396 movq 8+0+0(%rbp),%rdx 7397 mulxq %r10,%r10,%rax 7398 addq %r10,%r14 7399 mulxq %r11,%r11,%r9 7400 adcq %r11,%r15 7401 adcq $0,%r9 7402 imulq %r12,%rdx 7403 addq %rax,%r15 7404 adcq %rdx,%r9 7405 movq %r13,%r10 7406 movq %r14,%r11 7407 movq %r15,%r12 7408 andq $3,%r12 7409 movq %r15,%r13 7410 andq $-4,%r13 7411 movq %r9,%r14 7412 shrdq $2,%r9,%r15 7413 shrq $2,%r9 7414 addq %r13,%r15 7415 adcq %r14,%r9 7416 addq %r15,%r10 7417 adcq %r9,%r11 7418 adcq $0,%r12 7419 7420 leaq 32(%rdi),%rdi 7421 decq %rcx 7422 jg L$seal_avx2_tail_128_rounds_and_3xhash 7423 decq %r8 7424 jge L$seal_avx2_tail_128_rounds_and_2xhash 7425 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7426 vpaddd 0+64(%rbp),%ymm4,%ymm4 7427 vpaddd 0+96(%rbp),%ymm8,%ymm8 7428 vpaddd 0+160(%rbp),%ymm12,%ymm12 7429 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7430 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7431 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7432 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7433 vmovdqa %ymm3,%ymm8 7434 7435 jmp L$seal_avx2_short_loop 7436 7437L$seal_avx2_tail_256: 7438 vmovdqa L$chacha20_consts(%rip),%ymm0 7439 vmovdqa 0+64(%rbp),%ymm4 7440 vmovdqa 0+96(%rbp),%ymm8 7441 vmovdqa %ymm0,%ymm1 7442 vmovdqa %ymm4,%ymm5 7443 vmovdqa %ymm8,%ymm9 7444 vmovdqa L$avx2_inc(%rip),%ymm12 7445 vpaddd 0+160(%rbp),%ymm12,%ymm13 7446 vpaddd %ymm13,%ymm12,%ymm12 7447 vmovdqa %ymm12,0+160(%rbp) 7448 vmovdqa %ymm13,0+192(%rbp) 7449 7450L$seal_avx2_tail_256_rounds_and_3xhash: 7451 addq 0+0(%rdi),%r10 7452 adcq 8+0(%rdi),%r11 7453 adcq $1,%r12 7454 movq 0+0+0(%rbp),%rax 7455 movq %rax,%r15 7456 mulq %r10 7457 movq %rax,%r13 7458 movq %rdx,%r14 7459 movq 0+0+0(%rbp),%rax 7460 mulq %r11 7461 imulq %r12,%r15 7462 addq %rax,%r14 7463 adcq %rdx,%r15 7464 movq 8+0+0(%rbp),%rax 7465 movq %rax,%r9 7466 mulq %r10 7467 addq %rax,%r14 7468 adcq $0,%rdx 7469 movq %rdx,%r10 7470 movq 8+0+0(%rbp),%rax 7471 mulq %r11 7472 addq %rax,%r15 7473 adcq $0,%rdx 7474 imulq %r12,%r9 7475 addq %r10,%r15 7476 adcq %rdx,%r9 7477 movq %r13,%r10 7478 movq %r14,%r11 7479 movq %r15,%r12 7480 andq $3,%r12 7481 movq %r15,%r13 7482 andq $-4,%r13 7483 movq %r9,%r14 7484 shrdq $2,%r9,%r15 7485 shrq $2,%r9 7486 addq %r13,%r15 7487 adcq %r14,%r9 7488 addq %r15,%r10 7489 adcq %r9,%r11 7490 adcq $0,%r12 7491 7492 leaq 16(%rdi),%rdi 7493L$seal_avx2_tail_256_rounds_and_2xhash: 7494 vpaddd %ymm4,%ymm0,%ymm0 7495 vpxor %ymm0,%ymm12,%ymm12 7496 vpshufb L$rol16(%rip),%ymm12,%ymm12 7497 vpaddd %ymm12,%ymm8,%ymm8 7498 vpxor %ymm8,%ymm4,%ymm4 7499 vpsrld $20,%ymm4,%ymm3 7500 vpslld $12,%ymm4,%ymm4 7501 vpxor %ymm3,%ymm4,%ymm4 7502 vpaddd %ymm4,%ymm0,%ymm0 7503 vpxor %ymm0,%ymm12,%ymm12 7504 vpshufb L$rol8(%rip),%ymm12,%ymm12 7505 vpaddd %ymm12,%ymm8,%ymm8 7506 vpxor %ymm8,%ymm4,%ymm4 7507 vpslld $7,%ymm4,%ymm3 7508 vpsrld $25,%ymm4,%ymm4 7509 vpxor %ymm3,%ymm4,%ymm4 7510 vpalignr $12,%ymm12,%ymm12,%ymm12 7511 vpalignr $8,%ymm8,%ymm8,%ymm8 7512 vpalignr $4,%ymm4,%ymm4,%ymm4 7513 vpaddd %ymm5,%ymm1,%ymm1 7514 vpxor %ymm1,%ymm13,%ymm13 7515 vpshufb L$rol16(%rip),%ymm13,%ymm13 7516 vpaddd %ymm13,%ymm9,%ymm9 7517 vpxor %ymm9,%ymm5,%ymm5 7518 vpsrld $20,%ymm5,%ymm3 7519 vpslld $12,%ymm5,%ymm5 7520 vpxor %ymm3,%ymm5,%ymm5 7521 vpaddd %ymm5,%ymm1,%ymm1 7522 vpxor %ymm1,%ymm13,%ymm13 7523 vpshufb L$rol8(%rip),%ymm13,%ymm13 7524 vpaddd %ymm13,%ymm9,%ymm9 7525 vpxor %ymm9,%ymm5,%ymm5 7526 vpslld $7,%ymm5,%ymm3 7527 vpsrld $25,%ymm5,%ymm5 7528 vpxor %ymm3,%ymm5,%ymm5 7529 vpalignr $12,%ymm13,%ymm13,%ymm13 7530 vpalignr $8,%ymm9,%ymm9,%ymm9 7531 vpalignr $4,%ymm5,%ymm5,%ymm5 7532 addq 0+0(%rdi),%r10 7533 adcq 8+0(%rdi),%r11 7534 adcq $1,%r12 7535 movq 0+0+0(%rbp),%rax 7536 movq %rax,%r15 7537 mulq %r10 7538 movq %rax,%r13 7539 movq %rdx,%r14 7540 movq 0+0+0(%rbp),%rax 7541 mulq %r11 7542 imulq %r12,%r15 7543 addq %rax,%r14 7544 adcq %rdx,%r15 7545 movq 8+0+0(%rbp),%rax 7546 movq %rax,%r9 7547 mulq %r10 7548 addq %rax,%r14 7549 adcq $0,%rdx 7550 movq %rdx,%r10 7551 movq 8+0+0(%rbp),%rax 7552 mulq %r11 7553 addq %rax,%r15 7554 adcq $0,%rdx 7555 imulq %r12,%r9 7556 addq %r10,%r15 7557 adcq %rdx,%r9 7558 movq %r13,%r10 7559 movq %r14,%r11 7560 movq %r15,%r12 7561 andq $3,%r12 7562 movq %r15,%r13 7563 andq $-4,%r13 7564 movq %r9,%r14 7565 shrdq $2,%r9,%r15 7566 shrq $2,%r9 7567 addq %r13,%r15 7568 adcq %r14,%r9 7569 addq %r15,%r10 7570 adcq %r9,%r11 7571 adcq $0,%r12 7572 vpaddd %ymm4,%ymm0,%ymm0 7573 vpxor %ymm0,%ymm12,%ymm12 7574 vpshufb L$rol16(%rip),%ymm12,%ymm12 7575 vpaddd %ymm12,%ymm8,%ymm8 7576 vpxor %ymm8,%ymm4,%ymm4 7577 vpsrld $20,%ymm4,%ymm3 7578 vpslld $12,%ymm4,%ymm4 7579 vpxor %ymm3,%ymm4,%ymm4 7580 vpaddd %ymm4,%ymm0,%ymm0 7581 vpxor %ymm0,%ymm12,%ymm12 7582 vpshufb L$rol8(%rip),%ymm12,%ymm12 7583 vpaddd %ymm12,%ymm8,%ymm8 7584 vpxor %ymm8,%ymm4,%ymm4 7585 vpslld $7,%ymm4,%ymm3 7586 vpsrld $25,%ymm4,%ymm4 7587 vpxor %ymm3,%ymm4,%ymm4 7588 vpalignr $4,%ymm12,%ymm12,%ymm12 7589 vpalignr $8,%ymm8,%ymm8,%ymm8 7590 vpalignr $12,%ymm4,%ymm4,%ymm4 7591 vpaddd %ymm5,%ymm1,%ymm1 7592 vpxor %ymm1,%ymm13,%ymm13 7593 vpshufb L$rol16(%rip),%ymm13,%ymm13 7594 vpaddd %ymm13,%ymm9,%ymm9 7595 vpxor %ymm9,%ymm5,%ymm5 7596 vpsrld $20,%ymm5,%ymm3 7597 vpslld $12,%ymm5,%ymm5 7598 vpxor %ymm3,%ymm5,%ymm5 7599 vpaddd %ymm5,%ymm1,%ymm1 7600 vpxor %ymm1,%ymm13,%ymm13 7601 vpshufb L$rol8(%rip),%ymm13,%ymm13 7602 vpaddd %ymm13,%ymm9,%ymm9 7603 vpxor %ymm9,%ymm5,%ymm5 7604 vpslld $7,%ymm5,%ymm3 7605 vpsrld $25,%ymm5,%ymm5 7606 vpxor %ymm3,%ymm5,%ymm5 7607 vpalignr $4,%ymm13,%ymm13,%ymm13 7608 vpalignr $8,%ymm9,%ymm9,%ymm9 7609 vpalignr $12,%ymm5,%ymm5,%ymm5 7610 addq 0+16(%rdi),%r10 7611 adcq 8+16(%rdi),%r11 7612 adcq $1,%r12 7613 movq 0+0+0(%rbp),%rax 7614 movq %rax,%r15 7615 mulq %r10 7616 movq %rax,%r13 7617 movq %rdx,%r14 7618 movq 0+0+0(%rbp),%rax 7619 mulq %r11 7620 imulq %r12,%r15 7621 addq %rax,%r14 7622 adcq %rdx,%r15 7623 movq 8+0+0(%rbp),%rax 7624 movq %rax,%r9 7625 mulq %r10 7626 addq %rax,%r14 7627 adcq $0,%rdx 7628 movq %rdx,%r10 7629 movq 8+0+0(%rbp),%rax 7630 mulq %r11 7631 addq %rax,%r15 7632 adcq $0,%rdx 7633 imulq %r12,%r9 7634 addq %r10,%r15 7635 adcq %rdx,%r9 7636 movq %r13,%r10 7637 movq %r14,%r11 7638 movq %r15,%r12 7639 andq $3,%r12 7640 movq %r15,%r13 7641 andq $-4,%r13 7642 movq %r9,%r14 7643 shrdq $2,%r9,%r15 7644 shrq $2,%r9 7645 addq %r13,%r15 7646 adcq %r14,%r9 7647 addq %r15,%r10 7648 adcq %r9,%r11 7649 adcq $0,%r12 7650 7651 leaq 32(%rdi),%rdi 7652 decq %rcx 7653 jg L$seal_avx2_tail_256_rounds_and_3xhash 7654 decq %r8 7655 jge L$seal_avx2_tail_256_rounds_and_2xhash 7656 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7657 vpaddd 0+64(%rbp),%ymm5,%ymm5 7658 vpaddd 0+96(%rbp),%ymm9,%ymm9 7659 vpaddd 0+192(%rbp),%ymm13,%ymm13 7660 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7661 vpaddd 0+64(%rbp),%ymm4,%ymm4 7662 vpaddd 0+96(%rbp),%ymm8,%ymm8 7663 vpaddd 0+160(%rbp),%ymm12,%ymm12 7664 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7665 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7666 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7667 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7668 vpxor 0+0(%rsi),%ymm3,%ymm3 7669 vpxor 32+0(%rsi),%ymm1,%ymm1 7670 vpxor 64+0(%rsi),%ymm5,%ymm5 7671 vpxor 96+0(%rsi),%ymm9,%ymm9 7672 vmovdqu %ymm3,0+0(%rdi) 7673 vmovdqu %ymm1,32+0(%rdi) 7674 vmovdqu %ymm5,64+0(%rdi) 7675 vmovdqu %ymm9,96+0(%rdi) 7676 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7677 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7678 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7679 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7680 vmovdqa %ymm3,%ymm8 7681 7682 movq $128,%rcx 7683 leaq 128(%rsi),%rsi 7684 subq $128,%rbx 7685 jmp L$seal_avx2_short_hash_remainder 7686 7687L$seal_avx2_tail_384: 7688 vmovdqa L$chacha20_consts(%rip),%ymm0 7689 vmovdqa 0+64(%rbp),%ymm4 7690 vmovdqa 0+96(%rbp),%ymm8 7691 vmovdqa %ymm0,%ymm1 7692 vmovdqa %ymm4,%ymm5 7693 vmovdqa %ymm8,%ymm9 7694 vmovdqa %ymm0,%ymm2 7695 vmovdqa %ymm4,%ymm6 7696 vmovdqa %ymm8,%ymm10 7697 vmovdqa L$avx2_inc(%rip),%ymm12 7698 vpaddd 0+160(%rbp),%ymm12,%ymm14 7699 vpaddd %ymm14,%ymm12,%ymm13 7700 vpaddd %ymm13,%ymm12,%ymm12 7701 vmovdqa %ymm12,0+160(%rbp) 7702 vmovdqa %ymm13,0+192(%rbp) 7703 vmovdqa %ymm14,0+224(%rbp) 7704 7705L$seal_avx2_tail_384_rounds_and_3xhash: 7706 addq 0+0(%rdi),%r10 7707 adcq 8+0(%rdi),%r11 7708 adcq $1,%r12 7709 movq 0+0+0(%rbp),%rax 7710 movq %rax,%r15 7711 mulq %r10 7712 movq %rax,%r13 7713 movq %rdx,%r14 7714 movq 0+0+0(%rbp),%rax 7715 mulq %r11 7716 imulq %r12,%r15 7717 addq %rax,%r14 7718 adcq %rdx,%r15 7719 movq 8+0+0(%rbp),%rax 7720 movq %rax,%r9 7721 mulq %r10 7722 addq %rax,%r14 7723 adcq $0,%rdx 7724 movq %rdx,%r10 7725 movq 8+0+0(%rbp),%rax 7726 mulq %r11 7727 addq %rax,%r15 7728 adcq $0,%rdx 7729 imulq %r12,%r9 7730 addq %r10,%r15 7731 adcq %rdx,%r9 7732 movq %r13,%r10 7733 movq %r14,%r11 7734 movq %r15,%r12 7735 andq $3,%r12 7736 movq %r15,%r13 7737 andq $-4,%r13 7738 movq %r9,%r14 7739 shrdq $2,%r9,%r15 7740 shrq $2,%r9 7741 addq %r13,%r15 7742 adcq %r14,%r9 7743 addq %r15,%r10 7744 adcq %r9,%r11 7745 adcq $0,%r12 7746 7747 leaq 16(%rdi),%rdi 7748L$seal_avx2_tail_384_rounds_and_2xhash: 7749 vpaddd %ymm4,%ymm0,%ymm0 7750 vpxor %ymm0,%ymm12,%ymm12 7751 vpshufb L$rol16(%rip),%ymm12,%ymm12 7752 vpaddd %ymm12,%ymm8,%ymm8 7753 vpxor %ymm8,%ymm4,%ymm4 7754 vpsrld $20,%ymm4,%ymm3 7755 vpslld $12,%ymm4,%ymm4 7756 vpxor %ymm3,%ymm4,%ymm4 7757 vpaddd %ymm4,%ymm0,%ymm0 7758 vpxor %ymm0,%ymm12,%ymm12 7759 vpshufb L$rol8(%rip),%ymm12,%ymm12 7760 vpaddd %ymm12,%ymm8,%ymm8 7761 vpxor %ymm8,%ymm4,%ymm4 7762 vpslld $7,%ymm4,%ymm3 7763 vpsrld $25,%ymm4,%ymm4 7764 vpxor %ymm3,%ymm4,%ymm4 7765 vpalignr $12,%ymm12,%ymm12,%ymm12 7766 vpalignr $8,%ymm8,%ymm8,%ymm8 7767 vpalignr $4,%ymm4,%ymm4,%ymm4 7768 vpaddd %ymm5,%ymm1,%ymm1 7769 vpxor %ymm1,%ymm13,%ymm13 7770 vpshufb L$rol16(%rip),%ymm13,%ymm13 7771 vpaddd %ymm13,%ymm9,%ymm9 7772 vpxor %ymm9,%ymm5,%ymm5 7773 vpsrld $20,%ymm5,%ymm3 7774 vpslld $12,%ymm5,%ymm5 7775 vpxor %ymm3,%ymm5,%ymm5 7776 vpaddd %ymm5,%ymm1,%ymm1 7777 vpxor %ymm1,%ymm13,%ymm13 7778 vpshufb L$rol8(%rip),%ymm13,%ymm13 7779 vpaddd %ymm13,%ymm9,%ymm9 7780 vpxor %ymm9,%ymm5,%ymm5 7781 vpslld $7,%ymm5,%ymm3 7782 vpsrld $25,%ymm5,%ymm5 7783 vpxor %ymm3,%ymm5,%ymm5 7784 vpalignr $12,%ymm13,%ymm13,%ymm13 7785 vpalignr $8,%ymm9,%ymm9,%ymm9 7786 vpalignr $4,%ymm5,%ymm5,%ymm5 7787 addq 0+0(%rdi),%r10 7788 adcq 8+0(%rdi),%r11 7789 adcq $1,%r12 7790 movq 0+0+0(%rbp),%rax 7791 movq %rax,%r15 7792 mulq %r10 7793 movq %rax,%r13 7794 movq %rdx,%r14 7795 movq 0+0+0(%rbp),%rax 7796 mulq %r11 7797 imulq %r12,%r15 7798 addq %rax,%r14 7799 adcq %rdx,%r15 7800 movq 8+0+0(%rbp),%rax 7801 movq %rax,%r9 7802 mulq %r10 7803 addq %rax,%r14 7804 adcq $0,%rdx 7805 movq %rdx,%r10 7806 movq 8+0+0(%rbp),%rax 7807 mulq %r11 7808 addq %rax,%r15 7809 adcq $0,%rdx 7810 imulq %r12,%r9 7811 addq %r10,%r15 7812 adcq %rdx,%r9 7813 movq %r13,%r10 7814 movq %r14,%r11 7815 movq %r15,%r12 7816 andq $3,%r12 7817 movq %r15,%r13 7818 andq $-4,%r13 7819 movq %r9,%r14 7820 shrdq $2,%r9,%r15 7821 shrq $2,%r9 7822 addq %r13,%r15 7823 adcq %r14,%r9 7824 addq %r15,%r10 7825 adcq %r9,%r11 7826 adcq $0,%r12 7827 vpaddd %ymm6,%ymm2,%ymm2 7828 vpxor %ymm2,%ymm14,%ymm14 7829 vpshufb L$rol16(%rip),%ymm14,%ymm14 7830 vpaddd %ymm14,%ymm10,%ymm10 7831 vpxor %ymm10,%ymm6,%ymm6 7832 vpsrld $20,%ymm6,%ymm3 7833 vpslld $12,%ymm6,%ymm6 7834 vpxor %ymm3,%ymm6,%ymm6 7835 vpaddd %ymm6,%ymm2,%ymm2 7836 vpxor %ymm2,%ymm14,%ymm14 7837 vpshufb L$rol8(%rip),%ymm14,%ymm14 7838 vpaddd %ymm14,%ymm10,%ymm10 7839 vpxor %ymm10,%ymm6,%ymm6 7840 vpslld $7,%ymm6,%ymm3 7841 vpsrld $25,%ymm6,%ymm6 7842 vpxor %ymm3,%ymm6,%ymm6 7843 vpalignr $12,%ymm14,%ymm14,%ymm14 7844 vpalignr $8,%ymm10,%ymm10,%ymm10 7845 vpalignr $4,%ymm6,%ymm6,%ymm6 7846 vpaddd %ymm4,%ymm0,%ymm0 7847 vpxor %ymm0,%ymm12,%ymm12 7848 vpshufb L$rol16(%rip),%ymm12,%ymm12 7849 vpaddd %ymm12,%ymm8,%ymm8 7850 vpxor %ymm8,%ymm4,%ymm4 7851 vpsrld $20,%ymm4,%ymm3 7852 vpslld $12,%ymm4,%ymm4 7853 vpxor %ymm3,%ymm4,%ymm4 7854 vpaddd %ymm4,%ymm0,%ymm0 7855 vpxor %ymm0,%ymm12,%ymm12 7856 vpshufb L$rol8(%rip),%ymm12,%ymm12 7857 vpaddd %ymm12,%ymm8,%ymm8 7858 vpxor %ymm8,%ymm4,%ymm4 7859 vpslld $7,%ymm4,%ymm3 7860 vpsrld $25,%ymm4,%ymm4 7861 vpxor %ymm3,%ymm4,%ymm4 7862 vpalignr $4,%ymm12,%ymm12,%ymm12 7863 vpalignr $8,%ymm8,%ymm8,%ymm8 7864 vpalignr $12,%ymm4,%ymm4,%ymm4 7865 addq 0+16(%rdi),%r10 7866 adcq 8+16(%rdi),%r11 7867 adcq $1,%r12 7868 movq 0+0+0(%rbp),%rax 7869 movq %rax,%r15 7870 mulq %r10 7871 movq %rax,%r13 7872 movq %rdx,%r14 7873 movq 0+0+0(%rbp),%rax 7874 mulq %r11 7875 imulq %r12,%r15 7876 addq %rax,%r14 7877 adcq %rdx,%r15 7878 movq 8+0+0(%rbp),%rax 7879 movq %rax,%r9 7880 mulq %r10 7881 addq %rax,%r14 7882 adcq $0,%rdx 7883 movq %rdx,%r10 7884 movq 8+0+0(%rbp),%rax 7885 mulq %r11 7886 addq %rax,%r15 7887 adcq $0,%rdx 7888 imulq %r12,%r9 7889 addq %r10,%r15 7890 adcq %rdx,%r9 7891 movq %r13,%r10 7892 movq %r14,%r11 7893 movq %r15,%r12 7894 andq $3,%r12 7895 movq %r15,%r13 7896 andq $-4,%r13 7897 movq %r9,%r14 7898 shrdq $2,%r9,%r15 7899 shrq $2,%r9 7900 addq %r13,%r15 7901 adcq %r14,%r9 7902 addq %r15,%r10 7903 adcq %r9,%r11 7904 adcq $0,%r12 7905 vpaddd %ymm5,%ymm1,%ymm1 7906 vpxor %ymm1,%ymm13,%ymm13 7907 vpshufb L$rol16(%rip),%ymm13,%ymm13 7908 vpaddd %ymm13,%ymm9,%ymm9 7909 vpxor %ymm9,%ymm5,%ymm5 7910 vpsrld $20,%ymm5,%ymm3 7911 vpslld $12,%ymm5,%ymm5 7912 vpxor %ymm3,%ymm5,%ymm5 7913 vpaddd %ymm5,%ymm1,%ymm1 7914 vpxor %ymm1,%ymm13,%ymm13 7915 vpshufb L$rol8(%rip),%ymm13,%ymm13 7916 vpaddd %ymm13,%ymm9,%ymm9 7917 vpxor %ymm9,%ymm5,%ymm5 7918 vpslld $7,%ymm5,%ymm3 7919 vpsrld $25,%ymm5,%ymm5 7920 vpxor %ymm3,%ymm5,%ymm5 7921 vpalignr $4,%ymm13,%ymm13,%ymm13 7922 vpalignr $8,%ymm9,%ymm9,%ymm9 7923 vpalignr $12,%ymm5,%ymm5,%ymm5 7924 vpaddd %ymm6,%ymm2,%ymm2 7925 vpxor %ymm2,%ymm14,%ymm14 7926 vpshufb L$rol16(%rip),%ymm14,%ymm14 7927 vpaddd %ymm14,%ymm10,%ymm10 7928 vpxor %ymm10,%ymm6,%ymm6 7929 vpsrld $20,%ymm6,%ymm3 7930 vpslld $12,%ymm6,%ymm6 7931 vpxor %ymm3,%ymm6,%ymm6 7932 vpaddd %ymm6,%ymm2,%ymm2 7933 vpxor %ymm2,%ymm14,%ymm14 7934 vpshufb L$rol8(%rip),%ymm14,%ymm14 7935 vpaddd %ymm14,%ymm10,%ymm10 7936 vpxor %ymm10,%ymm6,%ymm6 7937 vpslld $7,%ymm6,%ymm3 7938 vpsrld $25,%ymm6,%ymm6 7939 vpxor %ymm3,%ymm6,%ymm6 7940 vpalignr $4,%ymm14,%ymm14,%ymm14 7941 vpalignr $8,%ymm10,%ymm10,%ymm10 7942 vpalignr $12,%ymm6,%ymm6,%ymm6 7943 7944 leaq 32(%rdi),%rdi 7945 decq %rcx 7946 jg L$seal_avx2_tail_384_rounds_and_3xhash 7947 decq %r8 7948 jge L$seal_avx2_tail_384_rounds_and_2xhash 7949 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7950 vpaddd 0+64(%rbp),%ymm6,%ymm6 7951 vpaddd 0+96(%rbp),%ymm10,%ymm10 7952 vpaddd 0+224(%rbp),%ymm14,%ymm14 7953 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7954 vpaddd 0+64(%rbp),%ymm5,%ymm5 7955 vpaddd 0+96(%rbp),%ymm9,%ymm9 7956 vpaddd 0+192(%rbp),%ymm13,%ymm13 7957 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7958 vpaddd 0+64(%rbp),%ymm4,%ymm4 7959 vpaddd 0+96(%rbp),%ymm8,%ymm8 7960 vpaddd 0+160(%rbp),%ymm12,%ymm12 7961 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7962 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7963 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7964 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7965 vpxor 0+0(%rsi),%ymm3,%ymm3 7966 vpxor 32+0(%rsi),%ymm2,%ymm2 7967 vpxor 64+0(%rsi),%ymm6,%ymm6 7968 vpxor 96+0(%rsi),%ymm10,%ymm10 7969 vmovdqu %ymm3,0+0(%rdi) 7970 vmovdqu %ymm2,32+0(%rdi) 7971 vmovdqu %ymm6,64+0(%rdi) 7972 vmovdqu %ymm10,96+0(%rdi) 7973 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7974 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7975 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7976 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7977 vpxor 0+128(%rsi),%ymm3,%ymm3 7978 vpxor 32+128(%rsi),%ymm1,%ymm1 7979 vpxor 64+128(%rsi),%ymm5,%ymm5 7980 vpxor 96+128(%rsi),%ymm9,%ymm9 7981 vmovdqu %ymm3,0+128(%rdi) 7982 vmovdqu %ymm1,32+128(%rdi) 7983 vmovdqu %ymm5,64+128(%rdi) 7984 vmovdqu %ymm9,96+128(%rdi) 7985 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7986 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7987 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7988 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7989 vmovdqa %ymm3,%ymm8 7990 7991 movq $256,%rcx 7992 leaq 256(%rsi),%rsi 7993 subq $256,%rbx 7994 jmp L$seal_avx2_short_hash_remainder 7995 7996L$seal_avx2_tail_512: 7997 vmovdqa L$chacha20_consts(%rip),%ymm0 7998 vmovdqa 0+64(%rbp),%ymm4 7999 vmovdqa 0+96(%rbp),%ymm8 8000 vmovdqa %ymm0,%ymm1 8001 vmovdqa %ymm4,%ymm5 8002 vmovdqa %ymm8,%ymm9 8003 vmovdqa %ymm0,%ymm2 8004 vmovdqa %ymm4,%ymm6 8005 vmovdqa %ymm8,%ymm10 8006 vmovdqa %ymm0,%ymm3 8007 vmovdqa %ymm4,%ymm7 8008 vmovdqa %ymm8,%ymm11 8009 vmovdqa L$avx2_inc(%rip),%ymm12 8010 vpaddd 0+160(%rbp),%ymm12,%ymm15 8011 vpaddd %ymm15,%ymm12,%ymm14 8012 vpaddd %ymm14,%ymm12,%ymm13 8013 vpaddd %ymm13,%ymm12,%ymm12 8014 vmovdqa %ymm15,0+256(%rbp) 8015 vmovdqa %ymm14,0+224(%rbp) 8016 vmovdqa %ymm13,0+192(%rbp) 8017 vmovdqa %ymm12,0+160(%rbp) 8018 8019L$seal_avx2_tail_512_rounds_and_3xhash: 8020 addq 0+0(%rdi),%r10 8021 adcq 8+0(%rdi),%r11 8022 adcq $1,%r12 8023 movq 0+0+0(%rbp),%rdx 8024 movq %rdx,%r15 8025 mulxq %r10,%r13,%r14 8026 mulxq %r11,%rax,%rdx 8027 imulq %r12,%r15 8028 addq %rax,%r14 8029 adcq %rdx,%r15 8030 movq 8+0+0(%rbp),%rdx 8031 mulxq %r10,%r10,%rax 8032 addq %r10,%r14 8033 mulxq %r11,%r11,%r9 8034 adcq %r11,%r15 8035 adcq $0,%r9 8036 imulq %r12,%rdx 8037 addq %rax,%r15 8038 adcq %rdx,%r9 8039 movq %r13,%r10 8040 movq %r14,%r11 8041 movq %r15,%r12 8042 andq $3,%r12 8043 movq %r15,%r13 8044 andq $-4,%r13 8045 movq %r9,%r14 8046 shrdq $2,%r9,%r15 8047 shrq $2,%r9 8048 addq %r13,%r15 8049 adcq %r14,%r9 8050 addq %r15,%r10 8051 adcq %r9,%r11 8052 adcq $0,%r12 8053 8054 leaq 16(%rdi),%rdi 8055L$seal_avx2_tail_512_rounds_and_2xhash: 8056 vmovdqa %ymm8,0+128(%rbp) 8057 vmovdqa L$rol16(%rip),%ymm8 8058 vpaddd %ymm7,%ymm3,%ymm3 8059 vpaddd %ymm6,%ymm2,%ymm2 8060 vpaddd %ymm5,%ymm1,%ymm1 8061 vpaddd %ymm4,%ymm0,%ymm0 8062 vpxor %ymm3,%ymm15,%ymm15 8063 vpxor %ymm2,%ymm14,%ymm14 8064 vpxor %ymm1,%ymm13,%ymm13 8065 vpxor %ymm0,%ymm12,%ymm12 8066 vpshufb %ymm8,%ymm15,%ymm15 8067 vpshufb %ymm8,%ymm14,%ymm14 8068 vpshufb %ymm8,%ymm13,%ymm13 8069 vpshufb %ymm8,%ymm12,%ymm12 8070 vpaddd %ymm15,%ymm11,%ymm11 8071 vpaddd %ymm14,%ymm10,%ymm10 8072 vpaddd %ymm13,%ymm9,%ymm9 8073 vpaddd 0+128(%rbp),%ymm12,%ymm8 8074 vpxor %ymm11,%ymm7,%ymm7 8075 vpxor %ymm10,%ymm6,%ymm6 8076 addq 0+0(%rdi),%r10 8077 adcq 8+0(%rdi),%r11 8078 adcq $1,%r12 8079 vpxor %ymm9,%ymm5,%ymm5 8080 vpxor %ymm8,%ymm4,%ymm4 8081 vmovdqa %ymm8,0+128(%rbp) 8082 vpsrld $20,%ymm7,%ymm8 8083 vpslld $32-20,%ymm7,%ymm7 8084 vpxor %ymm8,%ymm7,%ymm7 8085 vpsrld $20,%ymm6,%ymm8 8086 vpslld $32-20,%ymm6,%ymm6 8087 vpxor %ymm8,%ymm6,%ymm6 8088 vpsrld $20,%ymm5,%ymm8 8089 vpslld $32-20,%ymm5,%ymm5 8090 vpxor %ymm8,%ymm5,%ymm5 8091 vpsrld $20,%ymm4,%ymm8 8092 vpslld $32-20,%ymm4,%ymm4 8093 vpxor %ymm8,%ymm4,%ymm4 8094 vmovdqa L$rol8(%rip),%ymm8 8095 vpaddd %ymm7,%ymm3,%ymm3 8096 vpaddd %ymm6,%ymm2,%ymm2 8097 vpaddd %ymm5,%ymm1,%ymm1 8098 vpaddd %ymm4,%ymm0,%ymm0 8099 movq 0+0+0(%rbp),%rdx 8100 movq %rdx,%r15 8101 mulxq %r10,%r13,%r14 8102 mulxq %r11,%rax,%rdx 8103 imulq %r12,%r15 8104 addq %rax,%r14 8105 adcq %rdx,%r15 8106 vpxor %ymm3,%ymm15,%ymm15 8107 vpxor %ymm2,%ymm14,%ymm14 8108 vpxor %ymm1,%ymm13,%ymm13 8109 vpxor %ymm0,%ymm12,%ymm12 8110 vpshufb %ymm8,%ymm15,%ymm15 8111 vpshufb %ymm8,%ymm14,%ymm14 8112 vpshufb %ymm8,%ymm13,%ymm13 8113 vpshufb %ymm8,%ymm12,%ymm12 8114 vpaddd %ymm15,%ymm11,%ymm11 8115 vpaddd %ymm14,%ymm10,%ymm10 8116 vpaddd %ymm13,%ymm9,%ymm9 8117 vpaddd 0+128(%rbp),%ymm12,%ymm8 8118 vpxor %ymm11,%ymm7,%ymm7 8119 vpxor %ymm10,%ymm6,%ymm6 8120 vpxor %ymm9,%ymm5,%ymm5 8121 vpxor %ymm8,%ymm4,%ymm4 8122 vmovdqa %ymm8,0+128(%rbp) 8123 vpsrld $25,%ymm7,%ymm8 8124 vpslld $32-25,%ymm7,%ymm7 8125 vpxor %ymm8,%ymm7,%ymm7 8126 movq 8+0+0(%rbp),%rdx 8127 mulxq %r10,%r10,%rax 8128 addq %r10,%r14 8129 mulxq %r11,%r11,%r9 8130 adcq %r11,%r15 8131 adcq $0,%r9 8132 imulq %r12,%rdx 8133 vpsrld $25,%ymm6,%ymm8 8134 vpslld $32-25,%ymm6,%ymm6 8135 vpxor %ymm8,%ymm6,%ymm6 8136 vpsrld $25,%ymm5,%ymm8 8137 vpslld $32-25,%ymm5,%ymm5 8138 vpxor %ymm8,%ymm5,%ymm5 8139 vpsrld $25,%ymm4,%ymm8 8140 vpslld $32-25,%ymm4,%ymm4 8141 vpxor %ymm8,%ymm4,%ymm4 8142 vmovdqa 0+128(%rbp),%ymm8 8143 vpalignr $4,%ymm7,%ymm7,%ymm7 8144 vpalignr $8,%ymm11,%ymm11,%ymm11 8145 vpalignr $12,%ymm15,%ymm15,%ymm15 8146 vpalignr $4,%ymm6,%ymm6,%ymm6 8147 vpalignr $8,%ymm10,%ymm10,%ymm10 8148 vpalignr $12,%ymm14,%ymm14,%ymm14 8149 vpalignr $4,%ymm5,%ymm5,%ymm5 8150 vpalignr $8,%ymm9,%ymm9,%ymm9 8151 vpalignr $12,%ymm13,%ymm13,%ymm13 8152 vpalignr $4,%ymm4,%ymm4,%ymm4 8153 addq %rax,%r15 8154 adcq %rdx,%r9 8155 vpalignr $8,%ymm8,%ymm8,%ymm8 8156 vpalignr $12,%ymm12,%ymm12,%ymm12 8157 vmovdqa %ymm8,0+128(%rbp) 8158 vmovdqa L$rol16(%rip),%ymm8 8159 vpaddd %ymm7,%ymm3,%ymm3 8160 vpaddd %ymm6,%ymm2,%ymm2 8161 vpaddd %ymm5,%ymm1,%ymm1 8162 vpaddd %ymm4,%ymm0,%ymm0 8163 vpxor %ymm3,%ymm15,%ymm15 8164 vpxor %ymm2,%ymm14,%ymm14 8165 vpxor %ymm1,%ymm13,%ymm13 8166 vpxor %ymm0,%ymm12,%ymm12 8167 vpshufb %ymm8,%ymm15,%ymm15 8168 vpshufb %ymm8,%ymm14,%ymm14 8169 vpshufb %ymm8,%ymm13,%ymm13 8170 vpshufb %ymm8,%ymm12,%ymm12 8171 vpaddd %ymm15,%ymm11,%ymm11 8172 vpaddd %ymm14,%ymm10,%ymm10 8173 vpaddd %ymm13,%ymm9,%ymm9 8174 vpaddd 0+128(%rbp),%ymm12,%ymm8 8175 movq %r13,%r10 8176 movq %r14,%r11 8177 movq %r15,%r12 8178 andq $3,%r12 8179 movq %r15,%r13 8180 andq $-4,%r13 8181 movq %r9,%r14 8182 shrdq $2,%r9,%r15 8183 shrq $2,%r9 8184 addq %r13,%r15 8185 adcq %r14,%r9 8186 addq %r15,%r10 8187 adcq %r9,%r11 8188 adcq $0,%r12 8189 vpxor %ymm11,%ymm7,%ymm7 8190 vpxor %ymm10,%ymm6,%ymm6 8191 vpxor %ymm9,%ymm5,%ymm5 8192 vpxor %ymm8,%ymm4,%ymm4 8193 vmovdqa %ymm8,0+128(%rbp) 8194 vpsrld $20,%ymm7,%ymm8 8195 vpslld $32-20,%ymm7,%ymm7 8196 vpxor %ymm8,%ymm7,%ymm7 8197 vpsrld $20,%ymm6,%ymm8 8198 vpslld $32-20,%ymm6,%ymm6 8199 vpxor %ymm8,%ymm6,%ymm6 8200 vpsrld $20,%ymm5,%ymm8 8201 vpslld $32-20,%ymm5,%ymm5 8202 vpxor %ymm8,%ymm5,%ymm5 8203 vpsrld $20,%ymm4,%ymm8 8204 vpslld $32-20,%ymm4,%ymm4 8205 vpxor %ymm8,%ymm4,%ymm4 8206 vmovdqa L$rol8(%rip),%ymm8 8207 vpaddd %ymm7,%ymm3,%ymm3 8208 vpaddd %ymm6,%ymm2,%ymm2 8209 addq 0+16(%rdi),%r10 8210 adcq 8+16(%rdi),%r11 8211 adcq $1,%r12 8212 vpaddd %ymm5,%ymm1,%ymm1 8213 vpaddd %ymm4,%ymm0,%ymm0 8214 vpxor %ymm3,%ymm15,%ymm15 8215 vpxor %ymm2,%ymm14,%ymm14 8216 vpxor %ymm1,%ymm13,%ymm13 8217 vpxor %ymm0,%ymm12,%ymm12 8218 vpshufb %ymm8,%ymm15,%ymm15 8219 vpshufb %ymm8,%ymm14,%ymm14 8220 vpshufb %ymm8,%ymm13,%ymm13 8221 vpshufb %ymm8,%ymm12,%ymm12 8222 vpaddd %ymm15,%ymm11,%ymm11 8223 vpaddd %ymm14,%ymm10,%ymm10 8224 vpaddd %ymm13,%ymm9,%ymm9 8225 vpaddd 0+128(%rbp),%ymm12,%ymm8 8226 vpxor %ymm11,%ymm7,%ymm7 8227 vpxor %ymm10,%ymm6,%ymm6 8228 vpxor %ymm9,%ymm5,%ymm5 8229 vpxor %ymm8,%ymm4,%ymm4 8230 vmovdqa %ymm8,0+128(%rbp) 8231 vpsrld $25,%ymm7,%ymm8 8232 movq 0+0+0(%rbp),%rdx 8233 movq %rdx,%r15 8234 mulxq %r10,%r13,%r14 8235 mulxq %r11,%rax,%rdx 8236 imulq %r12,%r15 8237 addq %rax,%r14 8238 adcq %rdx,%r15 8239 vpslld $32-25,%ymm7,%ymm7 8240 vpxor %ymm8,%ymm7,%ymm7 8241 vpsrld $25,%ymm6,%ymm8 8242 vpslld $32-25,%ymm6,%ymm6 8243 vpxor %ymm8,%ymm6,%ymm6 8244 vpsrld $25,%ymm5,%ymm8 8245 vpslld $32-25,%ymm5,%ymm5 8246 vpxor %ymm8,%ymm5,%ymm5 8247 vpsrld $25,%ymm4,%ymm8 8248 vpslld $32-25,%ymm4,%ymm4 8249 vpxor %ymm8,%ymm4,%ymm4 8250 vmovdqa 0+128(%rbp),%ymm8 8251 vpalignr $12,%ymm7,%ymm7,%ymm7 8252 vpalignr $8,%ymm11,%ymm11,%ymm11 8253 vpalignr $4,%ymm15,%ymm15,%ymm15 8254 vpalignr $12,%ymm6,%ymm6,%ymm6 8255 vpalignr $8,%ymm10,%ymm10,%ymm10 8256 vpalignr $4,%ymm14,%ymm14,%ymm14 8257 vpalignr $12,%ymm5,%ymm5,%ymm5 8258 vpalignr $8,%ymm9,%ymm9,%ymm9 8259 movq 8+0+0(%rbp),%rdx 8260 mulxq %r10,%r10,%rax 8261 addq %r10,%r14 8262 mulxq %r11,%r11,%r9 8263 adcq %r11,%r15 8264 adcq $0,%r9 8265 imulq %r12,%rdx 8266 vpalignr $4,%ymm13,%ymm13,%ymm13 8267 vpalignr $12,%ymm4,%ymm4,%ymm4 8268 vpalignr $8,%ymm8,%ymm8,%ymm8 8269 vpalignr $4,%ymm12,%ymm12,%ymm12 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 addq %rax,%r15 8287 adcq %rdx,%r9 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 movq %r13,%r10 8309 movq %r14,%r11 8310 movq %r15,%r12 8311 andq $3,%r12 8312 movq %r15,%r13 8313 andq $-4,%r13 8314 movq %r9,%r14 8315 shrdq $2,%r9,%r15 8316 shrq $2,%r9 8317 addq %r13,%r15 8318 adcq %r14,%r9 8319 addq %r15,%r10 8320 adcq %r9,%r11 8321 adcq $0,%r12 8322 8323 leaq 32(%rdi),%rdi 8324 decq %rcx 8325 jg L$seal_avx2_tail_512_rounds_and_3xhash 8326 decq %r8 8327 jge L$seal_avx2_tail_512_rounds_and_2xhash 8328 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 8329 vpaddd 0+64(%rbp),%ymm7,%ymm7 8330 vpaddd 0+96(%rbp),%ymm11,%ymm11 8331 vpaddd 0+256(%rbp),%ymm15,%ymm15 8332 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8333 vpaddd 0+64(%rbp),%ymm6,%ymm6 8334 vpaddd 0+96(%rbp),%ymm10,%ymm10 8335 vpaddd 0+224(%rbp),%ymm14,%ymm14 8336 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8337 vpaddd 0+64(%rbp),%ymm5,%ymm5 8338 vpaddd 0+96(%rbp),%ymm9,%ymm9 8339 vpaddd 0+192(%rbp),%ymm13,%ymm13 8340 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8341 vpaddd 0+64(%rbp),%ymm4,%ymm4 8342 vpaddd 0+96(%rbp),%ymm8,%ymm8 8343 vpaddd 0+160(%rbp),%ymm12,%ymm12 8344 8345 vmovdqa %ymm0,0+128(%rbp) 8346 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8347 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8348 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8349 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8350 vpxor 0+0(%rsi),%ymm0,%ymm0 8351 vpxor 32+0(%rsi),%ymm3,%ymm3 8352 vpxor 64+0(%rsi),%ymm7,%ymm7 8353 vpxor 96+0(%rsi),%ymm11,%ymm11 8354 vmovdqu %ymm0,0+0(%rdi) 8355 vmovdqu %ymm3,32+0(%rdi) 8356 vmovdqu %ymm7,64+0(%rdi) 8357 vmovdqu %ymm11,96+0(%rdi) 8358 8359 vmovdqa 0+128(%rbp),%ymm0 8360 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8361 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8362 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8363 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8364 vpxor 0+128(%rsi),%ymm3,%ymm3 8365 vpxor 32+128(%rsi),%ymm2,%ymm2 8366 vpxor 64+128(%rsi),%ymm6,%ymm6 8367 vpxor 96+128(%rsi),%ymm10,%ymm10 8368 vmovdqu %ymm3,0+128(%rdi) 8369 vmovdqu %ymm2,32+128(%rdi) 8370 vmovdqu %ymm6,64+128(%rdi) 8371 vmovdqu %ymm10,96+128(%rdi) 8372 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8373 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8374 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8375 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8376 vpxor 0+256(%rsi),%ymm3,%ymm3 8377 vpxor 32+256(%rsi),%ymm1,%ymm1 8378 vpxor 64+256(%rsi),%ymm5,%ymm5 8379 vpxor 96+256(%rsi),%ymm9,%ymm9 8380 vmovdqu %ymm3,0+256(%rdi) 8381 vmovdqu %ymm1,32+256(%rdi) 8382 vmovdqu %ymm5,64+256(%rdi) 8383 vmovdqu %ymm9,96+256(%rdi) 8384 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8385 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8386 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8387 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8388 vmovdqa %ymm3,%ymm8 8389 8390 movq $384,%rcx 8391 leaq 384(%rsi),%rsi 8392 subq $384,%rbx 8393 jmp L$seal_avx2_short_hash_remainder 8394 8395L$seal_avx2_320: 8396 vmovdqa %ymm0,%ymm1 8397 vmovdqa %ymm0,%ymm2 8398 vmovdqa %ymm4,%ymm5 8399 vmovdqa %ymm4,%ymm6 8400 vmovdqa %ymm8,%ymm9 8401 vmovdqa %ymm8,%ymm10 8402 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8403 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 8404 vmovdqa %ymm4,%ymm7 8405 vmovdqa %ymm8,%ymm11 8406 vmovdqa %ymm12,0+160(%rbp) 8407 vmovdqa %ymm13,0+192(%rbp) 8408 vmovdqa %ymm14,0+224(%rbp) 8409 movq $10,%r10 8410L$seal_avx2_320_rounds: 8411 vpaddd %ymm4,%ymm0,%ymm0 8412 vpxor %ymm0,%ymm12,%ymm12 8413 vpshufb L$rol16(%rip),%ymm12,%ymm12 8414 vpaddd %ymm12,%ymm8,%ymm8 8415 vpxor %ymm8,%ymm4,%ymm4 8416 vpsrld $20,%ymm4,%ymm3 8417 vpslld $12,%ymm4,%ymm4 8418 vpxor %ymm3,%ymm4,%ymm4 8419 vpaddd %ymm4,%ymm0,%ymm0 8420 vpxor %ymm0,%ymm12,%ymm12 8421 vpshufb L$rol8(%rip),%ymm12,%ymm12 8422 vpaddd %ymm12,%ymm8,%ymm8 8423 vpxor %ymm8,%ymm4,%ymm4 8424 vpslld $7,%ymm4,%ymm3 8425 vpsrld $25,%ymm4,%ymm4 8426 vpxor %ymm3,%ymm4,%ymm4 8427 vpalignr $12,%ymm12,%ymm12,%ymm12 8428 vpalignr $8,%ymm8,%ymm8,%ymm8 8429 vpalignr $4,%ymm4,%ymm4,%ymm4 8430 vpaddd %ymm5,%ymm1,%ymm1 8431 vpxor %ymm1,%ymm13,%ymm13 8432 vpshufb L$rol16(%rip),%ymm13,%ymm13 8433 vpaddd %ymm13,%ymm9,%ymm9 8434 vpxor %ymm9,%ymm5,%ymm5 8435 vpsrld $20,%ymm5,%ymm3 8436 vpslld $12,%ymm5,%ymm5 8437 vpxor %ymm3,%ymm5,%ymm5 8438 vpaddd %ymm5,%ymm1,%ymm1 8439 vpxor %ymm1,%ymm13,%ymm13 8440 vpshufb L$rol8(%rip),%ymm13,%ymm13 8441 vpaddd %ymm13,%ymm9,%ymm9 8442 vpxor %ymm9,%ymm5,%ymm5 8443 vpslld $7,%ymm5,%ymm3 8444 vpsrld $25,%ymm5,%ymm5 8445 vpxor %ymm3,%ymm5,%ymm5 8446 vpalignr $12,%ymm13,%ymm13,%ymm13 8447 vpalignr $8,%ymm9,%ymm9,%ymm9 8448 vpalignr $4,%ymm5,%ymm5,%ymm5 8449 vpaddd %ymm6,%ymm2,%ymm2 8450 vpxor %ymm2,%ymm14,%ymm14 8451 vpshufb L$rol16(%rip),%ymm14,%ymm14 8452 vpaddd %ymm14,%ymm10,%ymm10 8453 vpxor %ymm10,%ymm6,%ymm6 8454 vpsrld $20,%ymm6,%ymm3 8455 vpslld $12,%ymm6,%ymm6 8456 vpxor %ymm3,%ymm6,%ymm6 8457 vpaddd %ymm6,%ymm2,%ymm2 8458 vpxor %ymm2,%ymm14,%ymm14 8459 vpshufb L$rol8(%rip),%ymm14,%ymm14 8460 vpaddd %ymm14,%ymm10,%ymm10 8461 vpxor %ymm10,%ymm6,%ymm6 8462 vpslld $7,%ymm6,%ymm3 8463 vpsrld $25,%ymm6,%ymm6 8464 vpxor %ymm3,%ymm6,%ymm6 8465 vpalignr $12,%ymm14,%ymm14,%ymm14 8466 vpalignr $8,%ymm10,%ymm10,%ymm10 8467 vpalignr $4,%ymm6,%ymm6,%ymm6 8468 vpaddd %ymm4,%ymm0,%ymm0 8469 vpxor %ymm0,%ymm12,%ymm12 8470 vpshufb L$rol16(%rip),%ymm12,%ymm12 8471 vpaddd %ymm12,%ymm8,%ymm8 8472 vpxor %ymm8,%ymm4,%ymm4 8473 vpsrld $20,%ymm4,%ymm3 8474 vpslld $12,%ymm4,%ymm4 8475 vpxor %ymm3,%ymm4,%ymm4 8476 vpaddd %ymm4,%ymm0,%ymm0 8477 vpxor %ymm0,%ymm12,%ymm12 8478 vpshufb L$rol8(%rip),%ymm12,%ymm12 8479 vpaddd %ymm12,%ymm8,%ymm8 8480 vpxor %ymm8,%ymm4,%ymm4 8481 vpslld $7,%ymm4,%ymm3 8482 vpsrld $25,%ymm4,%ymm4 8483 vpxor %ymm3,%ymm4,%ymm4 8484 vpalignr $4,%ymm12,%ymm12,%ymm12 8485 vpalignr $8,%ymm8,%ymm8,%ymm8 8486 vpalignr $12,%ymm4,%ymm4,%ymm4 8487 vpaddd %ymm5,%ymm1,%ymm1 8488 vpxor %ymm1,%ymm13,%ymm13 8489 vpshufb L$rol16(%rip),%ymm13,%ymm13 8490 vpaddd %ymm13,%ymm9,%ymm9 8491 vpxor %ymm9,%ymm5,%ymm5 8492 vpsrld $20,%ymm5,%ymm3 8493 vpslld $12,%ymm5,%ymm5 8494 vpxor %ymm3,%ymm5,%ymm5 8495 vpaddd %ymm5,%ymm1,%ymm1 8496 vpxor %ymm1,%ymm13,%ymm13 8497 vpshufb L$rol8(%rip),%ymm13,%ymm13 8498 vpaddd %ymm13,%ymm9,%ymm9 8499 vpxor %ymm9,%ymm5,%ymm5 8500 vpslld $7,%ymm5,%ymm3 8501 vpsrld $25,%ymm5,%ymm5 8502 vpxor %ymm3,%ymm5,%ymm5 8503 vpalignr $4,%ymm13,%ymm13,%ymm13 8504 vpalignr $8,%ymm9,%ymm9,%ymm9 8505 vpalignr $12,%ymm5,%ymm5,%ymm5 8506 vpaddd %ymm6,%ymm2,%ymm2 8507 vpxor %ymm2,%ymm14,%ymm14 8508 vpshufb L$rol16(%rip),%ymm14,%ymm14 8509 vpaddd %ymm14,%ymm10,%ymm10 8510 vpxor %ymm10,%ymm6,%ymm6 8511 vpsrld $20,%ymm6,%ymm3 8512 vpslld $12,%ymm6,%ymm6 8513 vpxor %ymm3,%ymm6,%ymm6 8514 vpaddd %ymm6,%ymm2,%ymm2 8515 vpxor %ymm2,%ymm14,%ymm14 8516 vpshufb L$rol8(%rip),%ymm14,%ymm14 8517 vpaddd %ymm14,%ymm10,%ymm10 8518 vpxor %ymm10,%ymm6,%ymm6 8519 vpslld $7,%ymm6,%ymm3 8520 vpsrld $25,%ymm6,%ymm6 8521 vpxor %ymm3,%ymm6,%ymm6 8522 vpalignr $4,%ymm14,%ymm14,%ymm14 8523 vpalignr $8,%ymm10,%ymm10,%ymm10 8524 vpalignr $12,%ymm6,%ymm6,%ymm6 8525 8526 decq %r10 8527 jne L$seal_avx2_320_rounds 8528 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8529 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8530 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8531 vpaddd %ymm7,%ymm4,%ymm4 8532 vpaddd %ymm7,%ymm5,%ymm5 8533 vpaddd %ymm7,%ymm6,%ymm6 8534 vpaddd %ymm11,%ymm8,%ymm8 8535 vpaddd %ymm11,%ymm9,%ymm9 8536 vpaddd %ymm11,%ymm10,%ymm10 8537 vpaddd 0+160(%rbp),%ymm12,%ymm12 8538 vpaddd 0+192(%rbp),%ymm13,%ymm13 8539 vpaddd 0+224(%rbp),%ymm14,%ymm14 8540 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8541 8542 vpand L$clamp(%rip),%ymm3,%ymm3 8543 vmovdqa %ymm3,0+0(%rbp) 8544 8545 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8546 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8547 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8548 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8549 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8550 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8551 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8552 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8553 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8554 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8555 jmp L$seal_avx2_short 8556 8557L$seal_avx2_192: 8558 vmovdqa %ymm0,%ymm1 8559 vmovdqa %ymm0,%ymm2 8560 vmovdqa %ymm4,%ymm5 8561 vmovdqa %ymm4,%ymm6 8562 vmovdqa %ymm8,%ymm9 8563 vmovdqa %ymm8,%ymm10 8564 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8565 vmovdqa %ymm12,%ymm11 8566 vmovdqa %ymm13,%ymm15 8567 movq $10,%r10 8568L$seal_avx2_192_rounds: 8569 vpaddd %ymm4,%ymm0,%ymm0 8570 vpxor %ymm0,%ymm12,%ymm12 8571 vpshufb L$rol16(%rip),%ymm12,%ymm12 8572 vpaddd %ymm12,%ymm8,%ymm8 8573 vpxor %ymm8,%ymm4,%ymm4 8574 vpsrld $20,%ymm4,%ymm3 8575 vpslld $12,%ymm4,%ymm4 8576 vpxor %ymm3,%ymm4,%ymm4 8577 vpaddd %ymm4,%ymm0,%ymm0 8578 vpxor %ymm0,%ymm12,%ymm12 8579 vpshufb L$rol8(%rip),%ymm12,%ymm12 8580 vpaddd %ymm12,%ymm8,%ymm8 8581 vpxor %ymm8,%ymm4,%ymm4 8582 vpslld $7,%ymm4,%ymm3 8583 vpsrld $25,%ymm4,%ymm4 8584 vpxor %ymm3,%ymm4,%ymm4 8585 vpalignr $12,%ymm12,%ymm12,%ymm12 8586 vpalignr $8,%ymm8,%ymm8,%ymm8 8587 vpalignr $4,%ymm4,%ymm4,%ymm4 8588 vpaddd %ymm5,%ymm1,%ymm1 8589 vpxor %ymm1,%ymm13,%ymm13 8590 vpshufb L$rol16(%rip),%ymm13,%ymm13 8591 vpaddd %ymm13,%ymm9,%ymm9 8592 vpxor %ymm9,%ymm5,%ymm5 8593 vpsrld $20,%ymm5,%ymm3 8594 vpslld $12,%ymm5,%ymm5 8595 vpxor %ymm3,%ymm5,%ymm5 8596 vpaddd %ymm5,%ymm1,%ymm1 8597 vpxor %ymm1,%ymm13,%ymm13 8598 vpshufb L$rol8(%rip),%ymm13,%ymm13 8599 vpaddd %ymm13,%ymm9,%ymm9 8600 vpxor %ymm9,%ymm5,%ymm5 8601 vpslld $7,%ymm5,%ymm3 8602 vpsrld $25,%ymm5,%ymm5 8603 vpxor %ymm3,%ymm5,%ymm5 8604 vpalignr $12,%ymm13,%ymm13,%ymm13 8605 vpalignr $8,%ymm9,%ymm9,%ymm9 8606 vpalignr $4,%ymm5,%ymm5,%ymm5 8607 vpaddd %ymm4,%ymm0,%ymm0 8608 vpxor %ymm0,%ymm12,%ymm12 8609 vpshufb L$rol16(%rip),%ymm12,%ymm12 8610 vpaddd %ymm12,%ymm8,%ymm8 8611 vpxor %ymm8,%ymm4,%ymm4 8612 vpsrld $20,%ymm4,%ymm3 8613 vpslld $12,%ymm4,%ymm4 8614 vpxor %ymm3,%ymm4,%ymm4 8615 vpaddd %ymm4,%ymm0,%ymm0 8616 vpxor %ymm0,%ymm12,%ymm12 8617 vpshufb L$rol8(%rip),%ymm12,%ymm12 8618 vpaddd %ymm12,%ymm8,%ymm8 8619 vpxor %ymm8,%ymm4,%ymm4 8620 vpslld $7,%ymm4,%ymm3 8621 vpsrld $25,%ymm4,%ymm4 8622 vpxor %ymm3,%ymm4,%ymm4 8623 vpalignr $4,%ymm12,%ymm12,%ymm12 8624 vpalignr $8,%ymm8,%ymm8,%ymm8 8625 vpalignr $12,%ymm4,%ymm4,%ymm4 8626 vpaddd %ymm5,%ymm1,%ymm1 8627 vpxor %ymm1,%ymm13,%ymm13 8628 vpshufb L$rol16(%rip),%ymm13,%ymm13 8629 vpaddd %ymm13,%ymm9,%ymm9 8630 vpxor %ymm9,%ymm5,%ymm5 8631 vpsrld $20,%ymm5,%ymm3 8632 vpslld $12,%ymm5,%ymm5 8633 vpxor %ymm3,%ymm5,%ymm5 8634 vpaddd %ymm5,%ymm1,%ymm1 8635 vpxor %ymm1,%ymm13,%ymm13 8636 vpshufb L$rol8(%rip),%ymm13,%ymm13 8637 vpaddd %ymm13,%ymm9,%ymm9 8638 vpxor %ymm9,%ymm5,%ymm5 8639 vpslld $7,%ymm5,%ymm3 8640 vpsrld $25,%ymm5,%ymm5 8641 vpxor %ymm3,%ymm5,%ymm5 8642 vpalignr $4,%ymm13,%ymm13,%ymm13 8643 vpalignr $8,%ymm9,%ymm9,%ymm9 8644 vpalignr $12,%ymm5,%ymm5,%ymm5 8645 8646 decq %r10 8647 jne L$seal_avx2_192_rounds 8648 vpaddd %ymm2,%ymm0,%ymm0 8649 vpaddd %ymm2,%ymm1,%ymm1 8650 vpaddd %ymm6,%ymm4,%ymm4 8651 vpaddd %ymm6,%ymm5,%ymm5 8652 vpaddd %ymm10,%ymm8,%ymm8 8653 vpaddd %ymm10,%ymm9,%ymm9 8654 vpaddd %ymm11,%ymm12,%ymm12 8655 vpaddd %ymm15,%ymm13,%ymm13 8656 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8657 8658 vpand L$clamp(%rip),%ymm3,%ymm3 8659 vmovdqa %ymm3,0+0(%rbp) 8660 8661 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8662 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8663 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8664 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8665 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8666 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8667L$seal_avx2_short: 8668 movq %r8,%r8 8669 call poly_hash_ad_internal 8670 xorq %rcx,%rcx 8671L$seal_avx2_short_hash_remainder: 8672 cmpq $16,%rcx 8673 jb L$seal_avx2_short_loop 8674 addq 0+0(%rdi),%r10 8675 adcq 8+0(%rdi),%r11 8676 adcq $1,%r12 8677 movq 0+0+0(%rbp),%rax 8678 movq %rax,%r15 8679 mulq %r10 8680 movq %rax,%r13 8681 movq %rdx,%r14 8682 movq 0+0+0(%rbp),%rax 8683 mulq %r11 8684 imulq %r12,%r15 8685 addq %rax,%r14 8686 adcq %rdx,%r15 8687 movq 8+0+0(%rbp),%rax 8688 movq %rax,%r9 8689 mulq %r10 8690 addq %rax,%r14 8691 adcq $0,%rdx 8692 movq %rdx,%r10 8693 movq 8+0+0(%rbp),%rax 8694 mulq %r11 8695 addq %rax,%r15 8696 adcq $0,%rdx 8697 imulq %r12,%r9 8698 addq %r10,%r15 8699 adcq %rdx,%r9 8700 movq %r13,%r10 8701 movq %r14,%r11 8702 movq %r15,%r12 8703 andq $3,%r12 8704 movq %r15,%r13 8705 andq $-4,%r13 8706 movq %r9,%r14 8707 shrdq $2,%r9,%r15 8708 shrq $2,%r9 8709 addq %r13,%r15 8710 adcq %r14,%r9 8711 addq %r15,%r10 8712 adcq %r9,%r11 8713 adcq $0,%r12 8714 8715 subq $16,%rcx 8716 addq $16,%rdi 8717 jmp L$seal_avx2_short_hash_remainder 8718L$seal_avx2_short_loop: 8719 cmpq $32,%rbx 8720 jb L$seal_avx2_short_tail 8721 subq $32,%rbx 8722 8723 vpxor (%rsi),%ymm0,%ymm0 8724 vmovdqu %ymm0,(%rdi) 8725 leaq 32(%rsi),%rsi 8726 8727 addq 0+0(%rdi),%r10 8728 adcq 8+0(%rdi),%r11 8729 adcq $1,%r12 8730 movq 0+0+0(%rbp),%rax 8731 movq %rax,%r15 8732 mulq %r10 8733 movq %rax,%r13 8734 movq %rdx,%r14 8735 movq 0+0+0(%rbp),%rax 8736 mulq %r11 8737 imulq %r12,%r15 8738 addq %rax,%r14 8739 adcq %rdx,%r15 8740 movq 8+0+0(%rbp),%rax 8741 movq %rax,%r9 8742 mulq %r10 8743 addq %rax,%r14 8744 adcq $0,%rdx 8745 movq %rdx,%r10 8746 movq 8+0+0(%rbp),%rax 8747 mulq %r11 8748 addq %rax,%r15 8749 adcq $0,%rdx 8750 imulq %r12,%r9 8751 addq %r10,%r15 8752 adcq %rdx,%r9 8753 movq %r13,%r10 8754 movq %r14,%r11 8755 movq %r15,%r12 8756 andq $3,%r12 8757 movq %r15,%r13 8758 andq $-4,%r13 8759 movq %r9,%r14 8760 shrdq $2,%r9,%r15 8761 shrq $2,%r9 8762 addq %r13,%r15 8763 adcq %r14,%r9 8764 addq %r15,%r10 8765 adcq %r9,%r11 8766 adcq $0,%r12 8767 addq 0+16(%rdi),%r10 8768 adcq 8+16(%rdi),%r11 8769 adcq $1,%r12 8770 movq 0+0+0(%rbp),%rax 8771 movq %rax,%r15 8772 mulq %r10 8773 movq %rax,%r13 8774 movq %rdx,%r14 8775 movq 0+0+0(%rbp),%rax 8776 mulq %r11 8777 imulq %r12,%r15 8778 addq %rax,%r14 8779 adcq %rdx,%r15 8780 movq 8+0+0(%rbp),%rax 8781 movq %rax,%r9 8782 mulq %r10 8783 addq %rax,%r14 8784 adcq $0,%rdx 8785 movq %rdx,%r10 8786 movq 8+0+0(%rbp),%rax 8787 mulq %r11 8788 addq %rax,%r15 8789 adcq $0,%rdx 8790 imulq %r12,%r9 8791 addq %r10,%r15 8792 adcq %rdx,%r9 8793 movq %r13,%r10 8794 movq %r14,%r11 8795 movq %r15,%r12 8796 andq $3,%r12 8797 movq %r15,%r13 8798 andq $-4,%r13 8799 movq %r9,%r14 8800 shrdq $2,%r9,%r15 8801 shrq $2,%r9 8802 addq %r13,%r15 8803 adcq %r14,%r9 8804 addq %r15,%r10 8805 adcq %r9,%r11 8806 adcq $0,%r12 8807 8808 leaq 32(%rdi),%rdi 8809 8810 vmovdqa %ymm4,%ymm0 8811 vmovdqa %ymm8,%ymm4 8812 vmovdqa %ymm12,%ymm8 8813 vmovdqa %ymm1,%ymm12 8814 vmovdqa %ymm5,%ymm1 8815 vmovdqa %ymm9,%ymm5 8816 vmovdqa %ymm13,%ymm9 8817 vmovdqa %ymm2,%ymm13 8818 vmovdqa %ymm6,%ymm2 8819 jmp L$seal_avx2_short_loop 8820L$seal_avx2_short_tail: 8821 cmpq $16,%rbx 8822 jb L$seal_avx2_exit 8823 subq $16,%rbx 8824 vpxor (%rsi),%xmm0,%xmm3 8825 vmovdqu %xmm3,(%rdi) 8826 leaq 16(%rsi),%rsi 8827 addq 0+0(%rdi),%r10 8828 adcq 8+0(%rdi),%r11 8829 adcq $1,%r12 8830 movq 0+0+0(%rbp),%rax 8831 movq %rax,%r15 8832 mulq %r10 8833 movq %rax,%r13 8834 movq %rdx,%r14 8835 movq 0+0+0(%rbp),%rax 8836 mulq %r11 8837 imulq %r12,%r15 8838 addq %rax,%r14 8839 adcq %rdx,%r15 8840 movq 8+0+0(%rbp),%rax 8841 movq %rax,%r9 8842 mulq %r10 8843 addq %rax,%r14 8844 adcq $0,%rdx 8845 movq %rdx,%r10 8846 movq 8+0+0(%rbp),%rax 8847 mulq %r11 8848 addq %rax,%r15 8849 adcq $0,%rdx 8850 imulq %r12,%r9 8851 addq %r10,%r15 8852 adcq %rdx,%r9 8853 movq %r13,%r10 8854 movq %r14,%r11 8855 movq %r15,%r12 8856 andq $3,%r12 8857 movq %r15,%r13 8858 andq $-4,%r13 8859 movq %r9,%r14 8860 shrdq $2,%r9,%r15 8861 shrq $2,%r9 8862 addq %r13,%r15 8863 adcq %r14,%r9 8864 addq %r15,%r10 8865 adcq %r9,%r11 8866 adcq $0,%r12 8867 8868 leaq 16(%rdi),%rdi 8869 vextracti128 $1,%ymm0,%xmm0 8870L$seal_avx2_exit: 8871 vzeroupper 8872 jmp L$seal_sse_tail_16 8873 8874 8875#endif 8876