1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) 7.text 8.globl ChaCha20_ctr32_nohw 9.hidden ChaCha20_ctr32_nohw 10.type ChaCha20_ctr32_nohw,@function 11.align 16 12ChaCha20_ctr32_nohw: 13.L_ChaCha20_ctr32_nohw_begin: 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 movl 32(%esp),%esi 19 movl 36(%esp),%edi 20 subl $132,%esp 21 movl (%esi),%eax 22 movl 4(%esi),%ebx 23 movl 8(%esi),%ecx 24 movl 12(%esi),%edx 25 movl %eax,80(%esp) 26 movl %ebx,84(%esp) 27 movl %ecx,88(%esp) 28 movl %edx,92(%esp) 29 movl 16(%esi),%eax 30 movl 20(%esi),%ebx 31 movl 24(%esi),%ecx 32 movl 28(%esi),%edx 33 movl %eax,96(%esp) 34 movl %ebx,100(%esp) 35 movl %ecx,104(%esp) 36 movl %edx,108(%esp) 37 movl (%edi),%eax 38 movl 4(%edi),%ebx 39 movl 8(%edi),%ecx 40 movl 12(%edi),%edx 41 subl $1,%eax 42 movl %eax,112(%esp) 43 movl %ebx,116(%esp) 44 movl %ecx,120(%esp) 45 movl %edx,124(%esp) 46 jmp .L000entry 47.align 16 48.L001outer_loop: 49 movl %ebx,156(%esp) 50 movl %eax,152(%esp) 51 movl %ecx,160(%esp) 52.L000entry: 53 movl $1634760805,%eax 54 movl $857760878,4(%esp) 55 movl $2036477234,8(%esp) 56 movl $1797285236,12(%esp) 57 movl 84(%esp),%ebx 58 movl 88(%esp),%ebp 59 movl 104(%esp),%ecx 60 movl 108(%esp),%esi 61 movl 116(%esp),%edx 62 movl 120(%esp),%edi 63 movl %ebx,20(%esp) 64 movl %ebp,24(%esp) 65 movl %ecx,40(%esp) 66 movl %esi,44(%esp) 67 movl %edx,52(%esp) 68 movl %edi,56(%esp) 69 movl 92(%esp),%ebx 70 movl 124(%esp),%edi 71 movl 112(%esp),%edx 72 movl 80(%esp),%ebp 73 movl 96(%esp),%ecx 74 movl 100(%esp),%esi 75 addl $1,%edx 76 movl %ebx,28(%esp) 77 movl %edi,60(%esp) 78 movl %edx,112(%esp) 79 movl $10,%ebx 80 jmp .L002loop 81.align 16 82.L002loop: 83 addl %ebp,%eax 84 movl %ebx,128(%esp) 85 movl %ebp,%ebx 86 xorl %eax,%edx 87 roll $16,%edx 88 addl %edx,%ecx 89 xorl %ecx,%ebx 90 movl 52(%esp),%edi 91 roll $12,%ebx 92 movl 20(%esp),%ebp 93 addl %ebx,%eax 94 xorl %eax,%edx 95 movl %eax,(%esp) 96 roll $8,%edx 97 movl 4(%esp),%eax 98 addl %edx,%ecx 99 movl %edx,48(%esp) 100 xorl %ecx,%ebx 101 addl %ebp,%eax 102 roll $7,%ebx 103 xorl %eax,%edi 104 movl %ecx,32(%esp) 105 roll $16,%edi 106 movl %ebx,16(%esp) 107 addl %edi,%esi 108 movl 40(%esp),%ecx 109 xorl %esi,%ebp 110 movl 56(%esp),%edx 111 roll $12,%ebp 112 movl 24(%esp),%ebx 113 addl %ebp,%eax 114 xorl %eax,%edi 115 movl %eax,4(%esp) 116 roll $8,%edi 117 movl 8(%esp),%eax 118 addl %edi,%esi 119 movl %edi,52(%esp) 120 xorl %esi,%ebp 121 addl %ebx,%eax 122 roll $7,%ebp 123 xorl %eax,%edx 124 movl %esi,36(%esp) 125 roll $16,%edx 126 movl %ebp,20(%esp) 127 addl %edx,%ecx 128 movl 44(%esp),%esi 129 xorl %ecx,%ebx 130 movl 60(%esp),%edi 131 roll $12,%ebx 132 movl 28(%esp),%ebp 133 addl %ebx,%eax 134 xorl %eax,%edx 135 movl %eax,8(%esp) 136 roll $8,%edx 137 movl 12(%esp),%eax 138 addl %edx,%ecx 139 movl %edx,56(%esp) 140 xorl %ecx,%ebx 141 addl %ebp,%eax 142 roll $7,%ebx 143 xorl %eax,%edi 144 roll $16,%edi 145 movl %ebx,24(%esp) 146 addl %edi,%esi 147 xorl %esi,%ebp 148 roll $12,%ebp 149 movl 20(%esp),%ebx 150 addl %ebp,%eax 151 xorl %eax,%edi 152 movl %eax,12(%esp) 153 roll $8,%edi 154 movl (%esp),%eax 155 addl %edi,%esi 156 movl %edi,%edx 157 xorl %esi,%ebp 158 addl %ebx,%eax 159 roll $7,%ebp 160 xorl %eax,%edx 161 roll $16,%edx 162 movl %ebp,28(%esp) 163 addl %edx,%ecx 164 xorl %ecx,%ebx 165 movl 48(%esp),%edi 166 roll $12,%ebx 167 movl 24(%esp),%ebp 168 addl %ebx,%eax 169 xorl %eax,%edx 170 movl %eax,(%esp) 171 roll $8,%edx 172 movl 4(%esp),%eax 173 addl %edx,%ecx 174 movl %edx,60(%esp) 175 xorl %ecx,%ebx 176 addl %ebp,%eax 177 roll $7,%ebx 178 xorl %eax,%edi 179 movl %ecx,40(%esp) 180 roll $16,%edi 181 movl %ebx,20(%esp) 182 addl %edi,%esi 183 movl 32(%esp),%ecx 184 xorl %esi,%ebp 185 movl 52(%esp),%edx 186 roll $12,%ebp 187 movl 28(%esp),%ebx 188 addl %ebp,%eax 189 xorl %eax,%edi 190 movl %eax,4(%esp) 191 roll $8,%edi 192 movl 8(%esp),%eax 193 addl %edi,%esi 194 movl %edi,48(%esp) 195 xorl %esi,%ebp 196 addl %ebx,%eax 197 roll $7,%ebp 198 xorl %eax,%edx 199 movl %esi,44(%esp) 200 roll $16,%edx 201 movl %ebp,24(%esp) 202 addl %edx,%ecx 203 movl 36(%esp),%esi 204 xorl %ecx,%ebx 205 movl 56(%esp),%edi 206 roll $12,%ebx 207 movl 16(%esp),%ebp 208 addl %ebx,%eax 209 xorl %eax,%edx 210 movl %eax,8(%esp) 211 roll $8,%edx 212 movl 12(%esp),%eax 213 addl %edx,%ecx 214 movl %edx,52(%esp) 215 xorl %ecx,%ebx 216 addl %ebp,%eax 217 roll $7,%ebx 218 xorl %eax,%edi 219 roll $16,%edi 220 movl %ebx,28(%esp) 221 addl %edi,%esi 222 xorl %esi,%ebp 223 movl 48(%esp),%edx 224 roll $12,%ebp 225 movl 128(%esp),%ebx 226 addl %ebp,%eax 227 xorl %eax,%edi 228 movl %eax,12(%esp) 229 roll $8,%edi 230 movl (%esp),%eax 231 addl %edi,%esi 232 movl %edi,56(%esp) 233 xorl %esi,%ebp 234 roll $7,%ebp 235 decl %ebx 236 jnz .L002loop 237 movl 160(%esp),%ebx 238 addl $1634760805,%eax 239 addl 80(%esp),%ebp 240 addl 96(%esp),%ecx 241 addl 100(%esp),%esi 242 cmpl $64,%ebx 243 jb .L003tail 244 movl 156(%esp),%ebx 245 addl 112(%esp),%edx 246 addl 120(%esp),%edi 247 xorl (%ebx),%eax 248 xorl 16(%ebx),%ebp 249 movl %eax,(%esp) 250 movl 152(%esp),%eax 251 xorl 32(%ebx),%ecx 252 xorl 36(%ebx),%esi 253 xorl 48(%ebx),%edx 254 xorl 56(%ebx),%edi 255 movl %ebp,16(%eax) 256 movl %ecx,32(%eax) 257 movl %esi,36(%eax) 258 movl %edx,48(%eax) 259 movl %edi,56(%eax) 260 movl 4(%esp),%ebp 261 movl 8(%esp),%ecx 262 movl 12(%esp),%esi 263 movl 20(%esp),%edx 264 movl 24(%esp),%edi 265 addl $857760878,%ebp 266 addl $2036477234,%ecx 267 addl $1797285236,%esi 268 addl 84(%esp),%edx 269 addl 88(%esp),%edi 270 xorl 4(%ebx),%ebp 271 xorl 8(%ebx),%ecx 272 xorl 12(%ebx),%esi 273 xorl 20(%ebx),%edx 274 xorl 24(%ebx),%edi 275 movl %ebp,4(%eax) 276 movl %ecx,8(%eax) 277 movl %esi,12(%eax) 278 movl %edx,20(%eax) 279 movl %edi,24(%eax) 280 movl 28(%esp),%ebp 281 movl 40(%esp),%ecx 282 movl 44(%esp),%esi 283 movl 52(%esp),%edx 284 movl 60(%esp),%edi 285 addl 92(%esp),%ebp 286 addl 104(%esp),%ecx 287 addl 108(%esp),%esi 288 addl 116(%esp),%edx 289 addl 124(%esp),%edi 290 xorl 28(%ebx),%ebp 291 xorl 40(%ebx),%ecx 292 xorl 44(%ebx),%esi 293 xorl 52(%ebx),%edx 294 xorl 60(%ebx),%edi 295 leal 64(%ebx),%ebx 296 movl %ebp,28(%eax) 297 movl (%esp),%ebp 298 movl %ecx,40(%eax) 299 movl 160(%esp),%ecx 300 movl %esi,44(%eax) 301 movl %edx,52(%eax) 302 movl %edi,60(%eax) 303 movl %ebp,(%eax) 304 leal 64(%eax),%eax 305 subl $64,%ecx 306 jnz .L001outer_loop 307 jmp .L004done 308.L003tail: 309 addl 112(%esp),%edx 310 addl 120(%esp),%edi 311 movl %eax,(%esp) 312 movl %ebp,16(%esp) 313 movl %ecx,32(%esp) 314 movl %esi,36(%esp) 315 movl %edx,48(%esp) 316 movl %edi,56(%esp) 317 movl 4(%esp),%ebp 318 movl 8(%esp),%ecx 319 movl 12(%esp),%esi 320 movl 20(%esp),%edx 321 movl 24(%esp),%edi 322 addl $857760878,%ebp 323 addl $2036477234,%ecx 324 addl $1797285236,%esi 325 addl 84(%esp),%edx 326 addl 88(%esp),%edi 327 movl %ebp,4(%esp) 328 movl %ecx,8(%esp) 329 movl %esi,12(%esp) 330 movl %edx,20(%esp) 331 movl %edi,24(%esp) 332 movl 28(%esp),%ebp 333 movl 40(%esp),%ecx 334 movl 44(%esp),%esi 335 movl 52(%esp),%edx 336 movl 60(%esp),%edi 337 addl 92(%esp),%ebp 338 addl 104(%esp),%ecx 339 addl 108(%esp),%esi 340 addl 116(%esp),%edx 341 addl 124(%esp),%edi 342 movl %ebp,28(%esp) 343 movl 156(%esp),%ebp 344 movl %ecx,40(%esp) 345 movl 152(%esp),%ecx 346 movl %esi,44(%esp) 347 xorl %esi,%esi 348 movl %edx,52(%esp) 349 movl %edi,60(%esp) 350 xorl %eax,%eax 351 xorl %edx,%edx 352.L005tail_loop: 353 movb (%esi,%ebp,1),%al 354 movb (%esp,%esi,1),%dl 355 leal 1(%esi),%esi 356 xorb %dl,%al 357 movb %al,-1(%ecx,%esi,1) 358 decl %ebx 359 jnz .L005tail_loop 360.L004done: 361 addl $132,%esp 362 popl %edi 363 popl %esi 364 popl %ebx 365 popl %ebp 366 ret 367.size ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin 368.globl ChaCha20_ctr32_ssse3 369.hidden ChaCha20_ctr32_ssse3 370.type ChaCha20_ctr32_ssse3,@function 371.align 16 372ChaCha20_ctr32_ssse3: 373.L_ChaCha20_ctr32_ssse3_begin: 374 pushl %ebp 375 pushl %ebx 376 pushl %esi 377 pushl %edi 378 call .Lpic_point 379.Lpic_point: 380 popl %eax 381 movl 20(%esp),%edi 382 movl 24(%esp),%esi 383 movl 28(%esp),%ecx 384 movl 32(%esp),%edx 385 movl 36(%esp),%ebx 386 movl %esp,%ebp 387 subl $524,%esp 388 andl $-64,%esp 389 movl %ebp,512(%esp) 390 leal .Lssse3_data-.Lpic_point(%eax),%eax 391 movdqu (%ebx),%xmm3 392 cmpl $256,%ecx 393 jb .L0061x 394 movl %edx,516(%esp) 395 movl %ebx,520(%esp) 396 subl $256,%ecx 397 leal 384(%esp),%ebp 398 movdqu (%edx),%xmm7 399 pshufd $0,%xmm3,%xmm0 400 pshufd $85,%xmm3,%xmm1 401 pshufd $170,%xmm3,%xmm2 402 pshufd $255,%xmm3,%xmm3 403 paddd 48(%eax),%xmm0 404 pshufd $0,%xmm7,%xmm4 405 pshufd $85,%xmm7,%xmm5 406 psubd 64(%eax),%xmm0 407 pshufd $170,%xmm7,%xmm6 408 pshufd $255,%xmm7,%xmm7 409 movdqa %xmm0,64(%ebp) 410 movdqa %xmm1,80(%ebp) 411 movdqa %xmm2,96(%ebp) 412 movdqa %xmm3,112(%ebp) 413 movdqu 16(%edx),%xmm3 414 movdqa %xmm4,-64(%ebp) 415 movdqa %xmm5,-48(%ebp) 416 movdqa %xmm6,-32(%ebp) 417 movdqa %xmm7,-16(%ebp) 418 movdqa 32(%eax),%xmm7 419 leal 128(%esp),%ebx 420 pshufd $0,%xmm3,%xmm0 421 pshufd $85,%xmm3,%xmm1 422 pshufd $170,%xmm3,%xmm2 423 pshufd $255,%xmm3,%xmm3 424 pshufd $0,%xmm7,%xmm4 425 pshufd $85,%xmm7,%xmm5 426 pshufd $170,%xmm7,%xmm6 427 pshufd $255,%xmm7,%xmm7 428 movdqa %xmm0,(%ebp) 429 movdqa %xmm1,16(%ebp) 430 movdqa %xmm2,32(%ebp) 431 movdqa %xmm3,48(%ebp) 432 movdqa %xmm4,-128(%ebp) 433 movdqa %xmm5,-112(%ebp) 434 movdqa %xmm6,-96(%ebp) 435 movdqa %xmm7,-80(%ebp) 436 leal 128(%esi),%esi 437 leal 128(%edi),%edi 438 jmp .L007outer_loop 439.align 16 440.L007outer_loop: 441 movdqa -112(%ebp),%xmm1 442 movdqa -96(%ebp),%xmm2 443 movdqa -80(%ebp),%xmm3 444 movdqa -48(%ebp),%xmm5 445 movdqa -32(%ebp),%xmm6 446 movdqa -16(%ebp),%xmm7 447 movdqa %xmm1,-112(%ebx) 448 movdqa %xmm2,-96(%ebx) 449 movdqa %xmm3,-80(%ebx) 450 movdqa %xmm5,-48(%ebx) 451 movdqa %xmm6,-32(%ebx) 452 movdqa %xmm7,-16(%ebx) 453 movdqa 32(%ebp),%xmm2 454 movdqa 48(%ebp),%xmm3 455 movdqa 64(%ebp),%xmm4 456 movdqa 80(%ebp),%xmm5 457 movdqa 96(%ebp),%xmm6 458 movdqa 112(%ebp),%xmm7 459 paddd 64(%eax),%xmm4 460 movdqa %xmm2,32(%ebx) 461 movdqa %xmm3,48(%ebx) 462 movdqa %xmm4,64(%ebx) 463 movdqa %xmm5,80(%ebx) 464 movdqa %xmm6,96(%ebx) 465 movdqa %xmm7,112(%ebx) 466 movdqa %xmm4,64(%ebp) 467 movdqa -128(%ebp),%xmm0 468 movdqa %xmm4,%xmm6 469 movdqa -64(%ebp),%xmm3 470 movdqa (%ebp),%xmm4 471 movdqa 16(%ebp),%xmm5 472 movl $10,%edx 473 nop 474.align 16 475.L008loop: 476 paddd %xmm3,%xmm0 477 movdqa %xmm3,%xmm2 478 pxor %xmm0,%xmm6 479 pshufb (%eax),%xmm6 480 paddd %xmm6,%xmm4 481 pxor %xmm4,%xmm2 482 movdqa -48(%ebx),%xmm3 483 movdqa %xmm2,%xmm1 484 pslld $12,%xmm2 485 psrld $20,%xmm1 486 por %xmm1,%xmm2 487 movdqa -112(%ebx),%xmm1 488 paddd %xmm2,%xmm0 489 movdqa 80(%ebx),%xmm7 490 pxor %xmm0,%xmm6 491 movdqa %xmm0,-128(%ebx) 492 pshufb 16(%eax),%xmm6 493 paddd %xmm6,%xmm4 494 movdqa %xmm6,64(%ebx) 495 pxor %xmm4,%xmm2 496 paddd %xmm3,%xmm1 497 movdqa %xmm2,%xmm0 498 pslld $7,%xmm2 499 psrld $25,%xmm0 500 pxor %xmm1,%xmm7 501 por %xmm0,%xmm2 502 movdqa %xmm4,(%ebx) 503 pshufb (%eax),%xmm7 504 movdqa %xmm2,-64(%ebx) 505 paddd %xmm7,%xmm5 506 movdqa 32(%ebx),%xmm4 507 pxor %xmm5,%xmm3 508 movdqa -32(%ebx),%xmm2 509 movdqa %xmm3,%xmm0 510 pslld $12,%xmm3 511 psrld $20,%xmm0 512 por %xmm0,%xmm3 513 movdqa -96(%ebx),%xmm0 514 paddd %xmm3,%xmm1 515 movdqa 96(%ebx),%xmm6 516 pxor %xmm1,%xmm7 517 movdqa %xmm1,-112(%ebx) 518 pshufb 16(%eax),%xmm7 519 paddd %xmm7,%xmm5 520 movdqa %xmm7,80(%ebx) 521 pxor %xmm5,%xmm3 522 paddd %xmm2,%xmm0 523 movdqa %xmm3,%xmm1 524 pslld $7,%xmm3 525 psrld $25,%xmm1 526 pxor %xmm0,%xmm6 527 por %xmm1,%xmm3 528 movdqa %xmm5,16(%ebx) 529 pshufb (%eax),%xmm6 530 movdqa %xmm3,-48(%ebx) 531 paddd %xmm6,%xmm4 532 movdqa 48(%ebx),%xmm5 533 pxor %xmm4,%xmm2 534 movdqa -16(%ebx),%xmm3 535 movdqa %xmm2,%xmm1 536 pslld $12,%xmm2 537 psrld $20,%xmm1 538 por %xmm1,%xmm2 539 movdqa -80(%ebx),%xmm1 540 paddd %xmm2,%xmm0 541 movdqa 112(%ebx),%xmm7 542 pxor %xmm0,%xmm6 543 movdqa %xmm0,-96(%ebx) 544 pshufb 16(%eax),%xmm6 545 paddd %xmm6,%xmm4 546 movdqa %xmm6,96(%ebx) 547 pxor %xmm4,%xmm2 548 paddd %xmm3,%xmm1 549 movdqa %xmm2,%xmm0 550 pslld $7,%xmm2 551 psrld $25,%xmm0 552 pxor %xmm1,%xmm7 553 por %xmm0,%xmm2 554 pshufb (%eax),%xmm7 555 movdqa %xmm2,-32(%ebx) 556 paddd %xmm7,%xmm5 557 pxor %xmm5,%xmm3 558 movdqa -48(%ebx),%xmm2 559 movdqa %xmm3,%xmm0 560 pslld $12,%xmm3 561 psrld $20,%xmm0 562 por %xmm0,%xmm3 563 movdqa -128(%ebx),%xmm0 564 paddd %xmm3,%xmm1 565 pxor %xmm1,%xmm7 566 movdqa %xmm1,-80(%ebx) 567 pshufb 16(%eax),%xmm7 568 paddd %xmm7,%xmm5 569 movdqa %xmm7,%xmm6 570 pxor %xmm5,%xmm3 571 paddd %xmm2,%xmm0 572 movdqa %xmm3,%xmm1 573 pslld $7,%xmm3 574 psrld $25,%xmm1 575 pxor %xmm0,%xmm6 576 por %xmm1,%xmm3 577 pshufb (%eax),%xmm6 578 movdqa %xmm3,-16(%ebx) 579 paddd %xmm6,%xmm4 580 pxor %xmm4,%xmm2 581 movdqa -32(%ebx),%xmm3 582 movdqa %xmm2,%xmm1 583 pslld $12,%xmm2 584 psrld $20,%xmm1 585 por %xmm1,%xmm2 586 movdqa -112(%ebx),%xmm1 587 paddd %xmm2,%xmm0 588 movdqa 64(%ebx),%xmm7 589 pxor %xmm0,%xmm6 590 movdqa %xmm0,-128(%ebx) 591 pshufb 16(%eax),%xmm6 592 paddd %xmm6,%xmm4 593 movdqa %xmm6,112(%ebx) 594 pxor %xmm4,%xmm2 595 paddd %xmm3,%xmm1 596 movdqa %xmm2,%xmm0 597 pslld $7,%xmm2 598 psrld $25,%xmm0 599 pxor %xmm1,%xmm7 600 por %xmm0,%xmm2 601 movdqa %xmm4,32(%ebx) 602 pshufb (%eax),%xmm7 603 movdqa %xmm2,-48(%ebx) 604 paddd %xmm7,%xmm5 605 movdqa (%ebx),%xmm4 606 pxor %xmm5,%xmm3 607 movdqa -16(%ebx),%xmm2 608 movdqa %xmm3,%xmm0 609 pslld $12,%xmm3 610 psrld $20,%xmm0 611 por %xmm0,%xmm3 612 movdqa -96(%ebx),%xmm0 613 paddd %xmm3,%xmm1 614 movdqa 80(%ebx),%xmm6 615 pxor %xmm1,%xmm7 616 movdqa %xmm1,-112(%ebx) 617 pshufb 16(%eax),%xmm7 618 paddd %xmm7,%xmm5 619 movdqa %xmm7,64(%ebx) 620 pxor %xmm5,%xmm3 621 paddd %xmm2,%xmm0 622 movdqa %xmm3,%xmm1 623 pslld $7,%xmm3 624 psrld $25,%xmm1 625 pxor %xmm0,%xmm6 626 por %xmm1,%xmm3 627 movdqa %xmm5,48(%ebx) 628 pshufb (%eax),%xmm6 629 movdqa %xmm3,-32(%ebx) 630 paddd %xmm6,%xmm4 631 movdqa 16(%ebx),%xmm5 632 pxor %xmm4,%xmm2 633 movdqa -64(%ebx),%xmm3 634 movdqa %xmm2,%xmm1 635 pslld $12,%xmm2 636 psrld $20,%xmm1 637 por %xmm1,%xmm2 638 movdqa -80(%ebx),%xmm1 639 paddd %xmm2,%xmm0 640 movdqa 96(%ebx),%xmm7 641 pxor %xmm0,%xmm6 642 movdqa %xmm0,-96(%ebx) 643 pshufb 16(%eax),%xmm6 644 paddd %xmm6,%xmm4 645 movdqa %xmm6,80(%ebx) 646 pxor %xmm4,%xmm2 647 paddd %xmm3,%xmm1 648 movdqa %xmm2,%xmm0 649 pslld $7,%xmm2 650 psrld $25,%xmm0 651 pxor %xmm1,%xmm7 652 por %xmm0,%xmm2 653 pshufb (%eax),%xmm7 654 movdqa %xmm2,-16(%ebx) 655 paddd %xmm7,%xmm5 656 pxor %xmm5,%xmm3 657 movdqa %xmm3,%xmm0 658 pslld $12,%xmm3 659 psrld $20,%xmm0 660 por %xmm0,%xmm3 661 movdqa -128(%ebx),%xmm0 662 paddd %xmm3,%xmm1 663 movdqa 64(%ebx),%xmm6 664 pxor %xmm1,%xmm7 665 movdqa %xmm1,-80(%ebx) 666 pshufb 16(%eax),%xmm7 667 paddd %xmm7,%xmm5 668 movdqa %xmm7,96(%ebx) 669 pxor %xmm5,%xmm3 670 movdqa %xmm3,%xmm1 671 pslld $7,%xmm3 672 psrld $25,%xmm1 673 por %xmm1,%xmm3 674 decl %edx 675 jnz .L008loop 676 movdqa %xmm3,-64(%ebx) 677 movdqa %xmm4,(%ebx) 678 movdqa %xmm5,16(%ebx) 679 movdqa %xmm6,64(%ebx) 680 movdqa %xmm7,96(%ebx) 681 movdqa -112(%ebx),%xmm1 682 movdqa -96(%ebx),%xmm2 683 movdqa -80(%ebx),%xmm3 684 paddd -128(%ebp),%xmm0 685 paddd -112(%ebp),%xmm1 686 paddd -96(%ebp),%xmm2 687 paddd -80(%ebp),%xmm3 688 movdqa %xmm0,%xmm6 689 punpckldq %xmm1,%xmm0 690 movdqa %xmm2,%xmm7 691 punpckldq %xmm3,%xmm2 692 punpckhdq %xmm1,%xmm6 693 punpckhdq %xmm3,%xmm7 694 movdqa %xmm0,%xmm1 695 punpcklqdq %xmm2,%xmm0 696 movdqa %xmm6,%xmm3 697 punpcklqdq %xmm7,%xmm6 698 punpckhqdq %xmm2,%xmm1 699 punpckhqdq %xmm7,%xmm3 700 movdqu -128(%esi),%xmm4 701 movdqu -64(%esi),%xmm5 702 movdqu (%esi),%xmm2 703 movdqu 64(%esi),%xmm7 704 leal 16(%esi),%esi 705 pxor %xmm0,%xmm4 706 movdqa -64(%ebx),%xmm0 707 pxor %xmm1,%xmm5 708 movdqa -48(%ebx),%xmm1 709 pxor %xmm2,%xmm6 710 movdqa -32(%ebx),%xmm2 711 pxor %xmm3,%xmm7 712 movdqa -16(%ebx),%xmm3 713 movdqu %xmm4,-128(%edi) 714 movdqu %xmm5,-64(%edi) 715 movdqu %xmm6,(%edi) 716 movdqu %xmm7,64(%edi) 717 leal 16(%edi),%edi 718 paddd -64(%ebp),%xmm0 719 paddd -48(%ebp),%xmm1 720 paddd -32(%ebp),%xmm2 721 paddd -16(%ebp),%xmm3 722 movdqa %xmm0,%xmm6 723 punpckldq %xmm1,%xmm0 724 movdqa %xmm2,%xmm7 725 punpckldq %xmm3,%xmm2 726 punpckhdq %xmm1,%xmm6 727 punpckhdq %xmm3,%xmm7 728 movdqa %xmm0,%xmm1 729 punpcklqdq %xmm2,%xmm0 730 movdqa %xmm6,%xmm3 731 punpcklqdq %xmm7,%xmm6 732 punpckhqdq %xmm2,%xmm1 733 punpckhqdq %xmm7,%xmm3 734 movdqu -128(%esi),%xmm4 735 movdqu -64(%esi),%xmm5 736 movdqu (%esi),%xmm2 737 movdqu 64(%esi),%xmm7 738 leal 16(%esi),%esi 739 pxor %xmm0,%xmm4 740 movdqa (%ebx),%xmm0 741 pxor %xmm1,%xmm5 742 movdqa 16(%ebx),%xmm1 743 pxor %xmm2,%xmm6 744 movdqa 32(%ebx),%xmm2 745 pxor %xmm3,%xmm7 746 movdqa 48(%ebx),%xmm3 747 movdqu %xmm4,-128(%edi) 748 movdqu %xmm5,-64(%edi) 749 movdqu %xmm6,(%edi) 750 movdqu %xmm7,64(%edi) 751 leal 16(%edi),%edi 752 paddd (%ebp),%xmm0 753 paddd 16(%ebp),%xmm1 754 paddd 32(%ebp),%xmm2 755 paddd 48(%ebp),%xmm3 756 movdqa %xmm0,%xmm6 757 punpckldq %xmm1,%xmm0 758 movdqa %xmm2,%xmm7 759 punpckldq %xmm3,%xmm2 760 punpckhdq %xmm1,%xmm6 761 punpckhdq %xmm3,%xmm7 762 movdqa %xmm0,%xmm1 763 punpcklqdq %xmm2,%xmm0 764 movdqa %xmm6,%xmm3 765 punpcklqdq %xmm7,%xmm6 766 punpckhqdq %xmm2,%xmm1 767 punpckhqdq %xmm7,%xmm3 768 movdqu -128(%esi),%xmm4 769 movdqu -64(%esi),%xmm5 770 movdqu (%esi),%xmm2 771 movdqu 64(%esi),%xmm7 772 leal 16(%esi),%esi 773 pxor %xmm0,%xmm4 774 movdqa 64(%ebx),%xmm0 775 pxor %xmm1,%xmm5 776 movdqa 80(%ebx),%xmm1 777 pxor %xmm2,%xmm6 778 movdqa 96(%ebx),%xmm2 779 pxor %xmm3,%xmm7 780 movdqa 112(%ebx),%xmm3 781 movdqu %xmm4,-128(%edi) 782 movdqu %xmm5,-64(%edi) 783 movdqu %xmm6,(%edi) 784 movdqu %xmm7,64(%edi) 785 leal 16(%edi),%edi 786 paddd 64(%ebp),%xmm0 787 paddd 80(%ebp),%xmm1 788 paddd 96(%ebp),%xmm2 789 paddd 112(%ebp),%xmm3 790 movdqa %xmm0,%xmm6 791 punpckldq %xmm1,%xmm0 792 movdqa %xmm2,%xmm7 793 punpckldq %xmm3,%xmm2 794 punpckhdq %xmm1,%xmm6 795 punpckhdq %xmm3,%xmm7 796 movdqa %xmm0,%xmm1 797 punpcklqdq %xmm2,%xmm0 798 movdqa %xmm6,%xmm3 799 punpcklqdq %xmm7,%xmm6 800 punpckhqdq %xmm2,%xmm1 801 punpckhqdq %xmm7,%xmm3 802 movdqu -128(%esi),%xmm4 803 movdqu -64(%esi),%xmm5 804 movdqu (%esi),%xmm2 805 movdqu 64(%esi),%xmm7 806 leal 208(%esi),%esi 807 pxor %xmm0,%xmm4 808 pxor %xmm1,%xmm5 809 pxor %xmm2,%xmm6 810 pxor %xmm3,%xmm7 811 movdqu %xmm4,-128(%edi) 812 movdqu %xmm5,-64(%edi) 813 movdqu %xmm6,(%edi) 814 movdqu %xmm7,64(%edi) 815 leal 208(%edi),%edi 816 subl $256,%ecx 817 jnc .L007outer_loop 818 addl $256,%ecx 819 jz .L009done 820 movl 520(%esp),%ebx 821 leal -128(%esi),%esi 822 movl 516(%esp),%edx 823 leal -128(%edi),%edi 824 movd 64(%ebp),%xmm2 825 movdqu (%ebx),%xmm3 826 paddd 96(%eax),%xmm2 827 pand 112(%eax),%xmm3 828 por %xmm2,%xmm3 829.L0061x: 830 movdqa 32(%eax),%xmm0 831 movdqu (%edx),%xmm1 832 movdqu 16(%edx),%xmm2 833 movdqa (%eax),%xmm6 834 movdqa 16(%eax),%xmm7 835 movl %ebp,48(%esp) 836 movdqa %xmm0,(%esp) 837 movdqa %xmm1,16(%esp) 838 movdqa %xmm2,32(%esp) 839 movdqa %xmm3,48(%esp) 840 movl $10,%edx 841 jmp .L010loop1x 842.align 16 843.L011outer1x: 844 movdqa 80(%eax),%xmm3 845 movdqa (%esp),%xmm0 846 movdqa 16(%esp),%xmm1 847 movdqa 32(%esp),%xmm2 848 paddd 48(%esp),%xmm3 849 movl $10,%edx 850 movdqa %xmm3,48(%esp) 851 jmp .L010loop1x 852.align 16 853.L010loop1x: 854 paddd %xmm1,%xmm0 855 pxor %xmm0,%xmm3 856.byte 102,15,56,0,222 857 paddd %xmm3,%xmm2 858 pxor %xmm2,%xmm1 859 movdqa %xmm1,%xmm4 860 psrld $20,%xmm1 861 pslld $12,%xmm4 862 por %xmm4,%xmm1 863 paddd %xmm1,%xmm0 864 pxor %xmm0,%xmm3 865.byte 102,15,56,0,223 866 paddd %xmm3,%xmm2 867 pxor %xmm2,%xmm1 868 movdqa %xmm1,%xmm4 869 psrld $25,%xmm1 870 pslld $7,%xmm4 871 por %xmm4,%xmm1 872 pshufd $78,%xmm2,%xmm2 873 pshufd $57,%xmm1,%xmm1 874 pshufd $147,%xmm3,%xmm3 875 nop 876 paddd %xmm1,%xmm0 877 pxor %xmm0,%xmm3 878.byte 102,15,56,0,222 879 paddd %xmm3,%xmm2 880 pxor %xmm2,%xmm1 881 movdqa %xmm1,%xmm4 882 psrld $20,%xmm1 883 pslld $12,%xmm4 884 por %xmm4,%xmm1 885 paddd %xmm1,%xmm0 886 pxor %xmm0,%xmm3 887.byte 102,15,56,0,223 888 paddd %xmm3,%xmm2 889 pxor %xmm2,%xmm1 890 movdqa %xmm1,%xmm4 891 psrld $25,%xmm1 892 pslld $7,%xmm4 893 por %xmm4,%xmm1 894 pshufd $78,%xmm2,%xmm2 895 pshufd $147,%xmm1,%xmm1 896 pshufd $57,%xmm3,%xmm3 897 decl %edx 898 jnz .L010loop1x 899 paddd (%esp),%xmm0 900 paddd 16(%esp),%xmm1 901 paddd 32(%esp),%xmm2 902 paddd 48(%esp),%xmm3 903 cmpl $64,%ecx 904 jb .L012tail 905 movdqu (%esi),%xmm4 906 movdqu 16(%esi),%xmm5 907 pxor %xmm4,%xmm0 908 movdqu 32(%esi),%xmm4 909 pxor %xmm5,%xmm1 910 movdqu 48(%esi),%xmm5 911 pxor %xmm4,%xmm2 912 pxor %xmm5,%xmm3 913 leal 64(%esi),%esi 914 movdqu %xmm0,(%edi) 915 movdqu %xmm1,16(%edi) 916 movdqu %xmm2,32(%edi) 917 movdqu %xmm3,48(%edi) 918 leal 64(%edi),%edi 919 subl $64,%ecx 920 jnz .L011outer1x 921 jmp .L009done 922.L012tail: 923 movdqa %xmm0,(%esp) 924 movdqa %xmm1,16(%esp) 925 movdqa %xmm2,32(%esp) 926 movdqa %xmm3,48(%esp) 927 xorl %eax,%eax 928 xorl %edx,%edx 929 xorl %ebp,%ebp 930.L013tail_loop: 931 movb (%esp,%ebp,1),%al 932 movb (%esi,%ebp,1),%dl 933 leal 1(%ebp),%ebp 934 xorb %dl,%al 935 movb %al,-1(%edi,%ebp,1) 936 decl %ecx 937 jnz .L013tail_loop 938.L009done: 939 movl 512(%esp),%esp 940 popl %edi 941 popl %esi 942 popl %ebx 943 popl %ebp 944 ret 945.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin 946.align 64 947.Lssse3_data: 948.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 949.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 950.long 1634760805,857760878,2036477234,1797285236 951.long 0,1,2,3 952.long 4,4,4,4 953.long 1,0,0,0 954.long 4,0,0,0 955.long 0,-1,-1,-1 956.align 64 957.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 958.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 959.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 960.byte 114,103,62,0 961#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) 962