1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__, win32 8%ifidn __OUTPUT_FORMAT__,obj 9section code use32 class=code align=64 10%elifidn __OUTPUT_FORMAT__,win32 11$@feat.00 equ 1 12section .text code align=64 13%else 14section .text code 15%endif 16global _ChaCha20_ctr32_nohw 17align 16 18_ChaCha20_ctr32_nohw: 19L$_ChaCha20_ctr32_nohw_begin: 20 push ebp 21 push ebx 22 push esi 23 push edi 24 mov esi,DWORD [32+esp] 25 mov edi,DWORD [36+esp] 26 sub esp,132 27 mov eax,DWORD [esi] 28 mov ebx,DWORD [4+esi] 29 mov ecx,DWORD [8+esi] 30 mov edx,DWORD [12+esi] 31 mov DWORD [80+esp],eax 32 mov DWORD [84+esp],ebx 33 mov DWORD [88+esp],ecx 34 mov DWORD [92+esp],edx 35 mov eax,DWORD [16+esi] 36 mov ebx,DWORD [20+esi] 37 mov ecx,DWORD [24+esi] 38 mov edx,DWORD [28+esi] 39 mov DWORD [96+esp],eax 40 mov DWORD [100+esp],ebx 41 mov DWORD [104+esp],ecx 42 mov DWORD [108+esp],edx 43 mov eax,DWORD [edi] 44 mov ebx,DWORD [4+edi] 45 mov ecx,DWORD [8+edi] 46 mov edx,DWORD [12+edi] 47 sub eax,1 48 mov DWORD [112+esp],eax 49 mov DWORD [116+esp],ebx 50 mov DWORD [120+esp],ecx 51 mov DWORD [124+esp],edx 52 jmp NEAR L$000entry 53align 16 54L$001outer_loop: 55 mov DWORD [156+esp],ebx 56 mov DWORD [152+esp],eax 57 mov DWORD [160+esp],ecx 58L$000entry: 59 mov eax,1634760805 60 mov DWORD [4+esp],857760878 61 mov DWORD [8+esp],2036477234 62 mov DWORD [12+esp],1797285236 63 mov ebx,DWORD [84+esp] 64 mov ebp,DWORD [88+esp] 65 mov ecx,DWORD [104+esp] 66 mov esi,DWORD [108+esp] 67 mov edx,DWORD [116+esp] 68 mov edi,DWORD [120+esp] 69 mov DWORD [20+esp],ebx 70 mov DWORD [24+esp],ebp 71 mov DWORD [40+esp],ecx 72 mov DWORD [44+esp],esi 73 mov DWORD [52+esp],edx 74 mov DWORD [56+esp],edi 75 mov ebx,DWORD [92+esp] 76 mov edi,DWORD [124+esp] 77 mov edx,DWORD [112+esp] 78 mov ebp,DWORD [80+esp] 79 mov ecx,DWORD [96+esp] 80 mov esi,DWORD [100+esp] 81 add edx,1 82 mov DWORD [28+esp],ebx 83 mov DWORD [60+esp],edi 84 mov DWORD [112+esp],edx 85 mov ebx,10 86 jmp NEAR L$002loop 87align 16 88L$002loop: 89 add eax,ebp 90 mov DWORD [128+esp],ebx 91 mov ebx,ebp 92 xor edx,eax 93 rol edx,16 94 add ecx,edx 95 xor ebx,ecx 96 mov edi,DWORD [52+esp] 97 rol ebx,12 98 mov ebp,DWORD [20+esp] 99 add eax,ebx 100 xor edx,eax 101 mov DWORD [esp],eax 102 rol edx,8 103 mov eax,DWORD [4+esp] 104 add ecx,edx 105 mov DWORD [48+esp],edx 106 xor ebx,ecx 107 add eax,ebp 108 rol ebx,7 109 xor edi,eax 110 mov DWORD [32+esp],ecx 111 rol edi,16 112 mov DWORD [16+esp],ebx 113 add esi,edi 114 mov ecx,DWORD [40+esp] 115 xor ebp,esi 116 mov edx,DWORD [56+esp] 117 rol ebp,12 118 mov ebx,DWORD [24+esp] 119 add eax,ebp 120 xor edi,eax 121 mov DWORD [4+esp],eax 122 rol edi,8 123 mov eax,DWORD [8+esp] 124 add esi,edi 125 mov DWORD [52+esp],edi 126 xor ebp,esi 127 add eax,ebx 128 rol ebp,7 129 xor edx,eax 130 mov DWORD [36+esp],esi 131 rol edx,16 132 mov DWORD [20+esp],ebp 133 add ecx,edx 134 mov esi,DWORD [44+esp] 135 xor ebx,ecx 136 mov edi,DWORD [60+esp] 137 rol ebx,12 138 mov ebp,DWORD [28+esp] 139 add eax,ebx 140 xor edx,eax 141 mov DWORD [8+esp],eax 142 rol edx,8 143 mov eax,DWORD [12+esp] 144 add ecx,edx 145 mov DWORD [56+esp],edx 146 xor ebx,ecx 147 add eax,ebp 148 rol ebx,7 149 xor edi,eax 150 rol edi,16 151 mov DWORD [24+esp],ebx 152 add esi,edi 153 xor ebp,esi 154 rol ebp,12 155 mov ebx,DWORD [20+esp] 156 add eax,ebp 157 xor edi,eax 158 mov DWORD [12+esp],eax 159 rol edi,8 160 mov eax,DWORD [esp] 161 add esi,edi 162 mov edx,edi 163 xor ebp,esi 164 add eax,ebx 165 rol ebp,7 166 xor edx,eax 167 rol edx,16 168 mov DWORD [28+esp],ebp 169 add ecx,edx 170 xor ebx,ecx 171 mov edi,DWORD [48+esp] 172 rol ebx,12 173 mov ebp,DWORD [24+esp] 174 add eax,ebx 175 xor edx,eax 176 mov DWORD [esp],eax 177 rol edx,8 178 mov eax,DWORD [4+esp] 179 add ecx,edx 180 mov DWORD [60+esp],edx 181 xor ebx,ecx 182 add eax,ebp 183 rol ebx,7 184 xor edi,eax 185 mov DWORD [40+esp],ecx 186 rol edi,16 187 mov DWORD [20+esp],ebx 188 add esi,edi 189 mov ecx,DWORD [32+esp] 190 xor ebp,esi 191 mov edx,DWORD [52+esp] 192 rol ebp,12 193 mov ebx,DWORD [28+esp] 194 add eax,ebp 195 xor edi,eax 196 mov DWORD [4+esp],eax 197 rol edi,8 198 mov eax,DWORD [8+esp] 199 add esi,edi 200 mov DWORD [48+esp],edi 201 xor ebp,esi 202 add eax,ebx 203 rol ebp,7 204 xor edx,eax 205 mov DWORD [44+esp],esi 206 rol edx,16 207 mov DWORD [24+esp],ebp 208 add ecx,edx 209 mov esi,DWORD [36+esp] 210 xor ebx,ecx 211 mov edi,DWORD [56+esp] 212 rol ebx,12 213 mov ebp,DWORD [16+esp] 214 add eax,ebx 215 xor edx,eax 216 mov DWORD [8+esp],eax 217 rol edx,8 218 mov eax,DWORD [12+esp] 219 add ecx,edx 220 mov DWORD [52+esp],edx 221 xor ebx,ecx 222 add eax,ebp 223 rol ebx,7 224 xor edi,eax 225 rol edi,16 226 mov DWORD [28+esp],ebx 227 add esi,edi 228 xor ebp,esi 229 mov edx,DWORD [48+esp] 230 rol ebp,12 231 mov ebx,DWORD [128+esp] 232 add eax,ebp 233 xor edi,eax 234 mov DWORD [12+esp],eax 235 rol edi,8 236 mov eax,DWORD [esp] 237 add esi,edi 238 mov DWORD [56+esp],edi 239 xor ebp,esi 240 rol ebp,7 241 dec ebx 242 jnz NEAR L$002loop 243 mov ebx,DWORD [160+esp] 244 add eax,1634760805 245 add ebp,DWORD [80+esp] 246 add ecx,DWORD [96+esp] 247 add esi,DWORD [100+esp] 248 cmp ebx,64 249 jb NEAR L$003tail 250 mov ebx,DWORD [156+esp] 251 add edx,DWORD [112+esp] 252 add edi,DWORD [120+esp] 253 xor eax,DWORD [ebx] 254 xor ebp,DWORD [16+ebx] 255 mov DWORD [esp],eax 256 mov eax,DWORD [152+esp] 257 xor ecx,DWORD [32+ebx] 258 xor esi,DWORD [36+ebx] 259 xor edx,DWORD [48+ebx] 260 xor edi,DWORD [56+ebx] 261 mov DWORD [16+eax],ebp 262 mov DWORD [32+eax],ecx 263 mov DWORD [36+eax],esi 264 mov DWORD [48+eax],edx 265 mov DWORD [56+eax],edi 266 mov ebp,DWORD [4+esp] 267 mov ecx,DWORD [8+esp] 268 mov esi,DWORD [12+esp] 269 mov edx,DWORD [20+esp] 270 mov edi,DWORD [24+esp] 271 add ebp,857760878 272 add ecx,2036477234 273 add esi,1797285236 274 add edx,DWORD [84+esp] 275 add edi,DWORD [88+esp] 276 xor ebp,DWORD [4+ebx] 277 xor ecx,DWORD [8+ebx] 278 xor esi,DWORD [12+ebx] 279 xor edx,DWORD [20+ebx] 280 xor edi,DWORD [24+ebx] 281 mov DWORD [4+eax],ebp 282 mov DWORD [8+eax],ecx 283 mov DWORD [12+eax],esi 284 mov DWORD [20+eax],edx 285 mov DWORD [24+eax],edi 286 mov ebp,DWORD [28+esp] 287 mov ecx,DWORD [40+esp] 288 mov esi,DWORD [44+esp] 289 mov edx,DWORD [52+esp] 290 mov edi,DWORD [60+esp] 291 add ebp,DWORD [92+esp] 292 add ecx,DWORD [104+esp] 293 add esi,DWORD [108+esp] 294 add edx,DWORD [116+esp] 295 add edi,DWORD [124+esp] 296 xor ebp,DWORD [28+ebx] 297 xor ecx,DWORD [40+ebx] 298 xor esi,DWORD [44+ebx] 299 xor edx,DWORD [52+ebx] 300 xor edi,DWORD [60+ebx] 301 lea ebx,[64+ebx] 302 mov DWORD [28+eax],ebp 303 mov ebp,DWORD [esp] 304 mov DWORD [40+eax],ecx 305 mov ecx,DWORD [160+esp] 306 mov DWORD [44+eax],esi 307 mov DWORD [52+eax],edx 308 mov DWORD [60+eax],edi 309 mov DWORD [eax],ebp 310 lea eax,[64+eax] 311 sub ecx,64 312 jnz NEAR L$001outer_loop 313 jmp NEAR L$004done 314L$003tail: 315 add edx,DWORD [112+esp] 316 add edi,DWORD [120+esp] 317 mov DWORD [esp],eax 318 mov DWORD [16+esp],ebp 319 mov DWORD [32+esp],ecx 320 mov DWORD [36+esp],esi 321 mov DWORD [48+esp],edx 322 mov DWORD [56+esp],edi 323 mov ebp,DWORD [4+esp] 324 mov ecx,DWORD [8+esp] 325 mov esi,DWORD [12+esp] 326 mov edx,DWORD [20+esp] 327 mov edi,DWORD [24+esp] 328 add ebp,857760878 329 add ecx,2036477234 330 add esi,1797285236 331 add edx,DWORD [84+esp] 332 add edi,DWORD [88+esp] 333 mov DWORD [4+esp],ebp 334 mov DWORD [8+esp],ecx 335 mov DWORD [12+esp],esi 336 mov DWORD [20+esp],edx 337 mov DWORD [24+esp],edi 338 mov ebp,DWORD [28+esp] 339 mov ecx,DWORD [40+esp] 340 mov esi,DWORD [44+esp] 341 mov edx,DWORD [52+esp] 342 mov edi,DWORD [60+esp] 343 add ebp,DWORD [92+esp] 344 add ecx,DWORD [104+esp] 345 add esi,DWORD [108+esp] 346 add edx,DWORD [116+esp] 347 add edi,DWORD [124+esp] 348 mov DWORD [28+esp],ebp 349 mov ebp,DWORD [156+esp] 350 mov DWORD [40+esp],ecx 351 mov ecx,DWORD [152+esp] 352 mov DWORD [44+esp],esi 353 xor esi,esi 354 mov DWORD [52+esp],edx 355 mov DWORD [60+esp],edi 356 xor eax,eax 357 xor edx,edx 358L$005tail_loop: 359 mov al,BYTE [ebp*1+esi] 360 mov dl,BYTE [esi*1+esp] 361 lea esi,[1+esi] 362 xor al,dl 363 mov BYTE [esi*1+ecx-1],al 364 dec ebx 365 jnz NEAR L$005tail_loop 366L$004done: 367 add esp,132 368 pop edi 369 pop esi 370 pop ebx 371 pop ebp 372 ret 373global _ChaCha20_ctr32_ssse3 374align 16 375_ChaCha20_ctr32_ssse3: 376L$_ChaCha20_ctr32_ssse3_begin: 377 push ebp 378 push ebx 379 push esi 380 push edi 381 call L$pic_point 382L$pic_point: 383 pop eax 384 mov edi,DWORD [20+esp] 385 mov esi,DWORD [24+esp] 386 mov ecx,DWORD [28+esp] 387 mov edx,DWORD [32+esp] 388 mov ebx,DWORD [36+esp] 389 mov ebp,esp 390 sub esp,524 391 and esp,-64 392 mov DWORD [512+esp],ebp 393 lea eax,[(L$ssse3_data-L$pic_point)+eax] 394 movdqu xmm3,[ebx] 395 cmp ecx,256 396 jb NEAR L$0061x 397 mov DWORD [516+esp],edx 398 mov DWORD [520+esp],ebx 399 sub ecx,256 400 lea ebp,[384+esp] 401 movdqu xmm7,[edx] 402 pshufd xmm0,xmm3,0 403 pshufd xmm1,xmm3,85 404 pshufd xmm2,xmm3,170 405 pshufd xmm3,xmm3,255 406 paddd xmm0,[48+eax] 407 pshufd xmm4,xmm7,0 408 pshufd xmm5,xmm7,85 409 psubd xmm0,[64+eax] 410 pshufd xmm6,xmm7,170 411 pshufd xmm7,xmm7,255 412 movdqa [64+ebp],xmm0 413 movdqa [80+ebp],xmm1 414 movdqa [96+ebp],xmm2 415 movdqa [112+ebp],xmm3 416 movdqu xmm3,[16+edx] 417 movdqa [ebp-64],xmm4 418 movdqa [ebp-48],xmm5 419 movdqa [ebp-32],xmm6 420 movdqa [ebp-16],xmm7 421 movdqa xmm7,[32+eax] 422 lea ebx,[128+esp] 423 pshufd xmm0,xmm3,0 424 pshufd xmm1,xmm3,85 425 pshufd xmm2,xmm3,170 426 pshufd xmm3,xmm3,255 427 pshufd xmm4,xmm7,0 428 pshufd xmm5,xmm7,85 429 pshufd xmm6,xmm7,170 430 pshufd xmm7,xmm7,255 431 movdqa [ebp],xmm0 432 movdqa [16+ebp],xmm1 433 movdqa [32+ebp],xmm2 434 movdqa [48+ebp],xmm3 435 movdqa [ebp-128],xmm4 436 movdqa [ebp-112],xmm5 437 movdqa [ebp-96],xmm6 438 movdqa [ebp-80],xmm7 439 lea esi,[128+esi] 440 lea edi,[128+edi] 441 jmp NEAR L$007outer_loop 442align 16 443L$007outer_loop: 444 movdqa xmm1,[ebp-112] 445 movdqa xmm2,[ebp-96] 446 movdqa xmm3,[ebp-80] 447 movdqa xmm5,[ebp-48] 448 movdqa xmm6,[ebp-32] 449 movdqa xmm7,[ebp-16] 450 movdqa [ebx-112],xmm1 451 movdqa [ebx-96],xmm2 452 movdqa [ebx-80],xmm3 453 movdqa [ebx-48],xmm5 454 movdqa [ebx-32],xmm6 455 movdqa [ebx-16],xmm7 456 movdqa xmm2,[32+ebp] 457 movdqa xmm3,[48+ebp] 458 movdqa xmm4,[64+ebp] 459 movdqa xmm5,[80+ebp] 460 movdqa xmm6,[96+ebp] 461 movdqa xmm7,[112+ebp] 462 paddd xmm4,[64+eax] 463 movdqa [32+ebx],xmm2 464 movdqa [48+ebx],xmm3 465 movdqa [64+ebx],xmm4 466 movdqa [80+ebx],xmm5 467 movdqa [96+ebx],xmm6 468 movdqa [112+ebx],xmm7 469 movdqa [64+ebp],xmm4 470 movdqa xmm0,[ebp-128] 471 movdqa xmm6,xmm4 472 movdqa xmm3,[ebp-64] 473 movdqa xmm4,[ebp] 474 movdqa xmm5,[16+ebp] 475 mov edx,10 476 nop 477align 16 478L$008loop: 479 paddd xmm0,xmm3 480 movdqa xmm2,xmm3 481 pxor xmm6,xmm0 482 pshufb xmm6,[eax] 483 paddd xmm4,xmm6 484 pxor xmm2,xmm4 485 movdqa xmm3,[ebx-48] 486 movdqa xmm1,xmm2 487 pslld xmm2,12 488 psrld xmm1,20 489 por xmm2,xmm1 490 movdqa xmm1,[ebx-112] 491 paddd xmm0,xmm2 492 movdqa xmm7,[80+ebx] 493 pxor xmm6,xmm0 494 movdqa [ebx-128],xmm0 495 pshufb xmm6,[16+eax] 496 paddd xmm4,xmm6 497 movdqa [64+ebx],xmm6 498 pxor xmm2,xmm4 499 paddd xmm1,xmm3 500 movdqa xmm0,xmm2 501 pslld xmm2,7 502 psrld xmm0,25 503 pxor xmm7,xmm1 504 por xmm2,xmm0 505 movdqa [ebx],xmm4 506 pshufb xmm7,[eax] 507 movdqa [ebx-64],xmm2 508 paddd xmm5,xmm7 509 movdqa xmm4,[32+ebx] 510 pxor xmm3,xmm5 511 movdqa xmm2,[ebx-32] 512 movdqa xmm0,xmm3 513 pslld xmm3,12 514 psrld xmm0,20 515 por xmm3,xmm0 516 movdqa xmm0,[ebx-96] 517 paddd xmm1,xmm3 518 movdqa xmm6,[96+ebx] 519 pxor xmm7,xmm1 520 movdqa [ebx-112],xmm1 521 pshufb xmm7,[16+eax] 522 paddd xmm5,xmm7 523 movdqa [80+ebx],xmm7 524 pxor xmm3,xmm5 525 paddd xmm0,xmm2 526 movdqa xmm1,xmm3 527 pslld xmm3,7 528 psrld xmm1,25 529 pxor xmm6,xmm0 530 por xmm3,xmm1 531 movdqa [16+ebx],xmm5 532 pshufb xmm6,[eax] 533 movdqa [ebx-48],xmm3 534 paddd xmm4,xmm6 535 movdqa xmm5,[48+ebx] 536 pxor xmm2,xmm4 537 movdqa xmm3,[ebx-16] 538 movdqa xmm1,xmm2 539 pslld xmm2,12 540 psrld xmm1,20 541 por xmm2,xmm1 542 movdqa xmm1,[ebx-80] 543 paddd xmm0,xmm2 544 movdqa xmm7,[112+ebx] 545 pxor xmm6,xmm0 546 movdqa [ebx-96],xmm0 547 pshufb xmm6,[16+eax] 548 paddd xmm4,xmm6 549 movdqa [96+ebx],xmm6 550 pxor xmm2,xmm4 551 paddd xmm1,xmm3 552 movdqa xmm0,xmm2 553 pslld xmm2,7 554 psrld xmm0,25 555 pxor xmm7,xmm1 556 por xmm2,xmm0 557 pshufb xmm7,[eax] 558 movdqa [ebx-32],xmm2 559 paddd xmm5,xmm7 560 pxor xmm3,xmm5 561 movdqa xmm2,[ebx-48] 562 movdqa xmm0,xmm3 563 pslld xmm3,12 564 psrld xmm0,20 565 por xmm3,xmm0 566 movdqa xmm0,[ebx-128] 567 paddd xmm1,xmm3 568 pxor xmm7,xmm1 569 movdqa [ebx-80],xmm1 570 pshufb xmm7,[16+eax] 571 paddd xmm5,xmm7 572 movdqa xmm6,xmm7 573 pxor xmm3,xmm5 574 paddd xmm0,xmm2 575 movdqa xmm1,xmm3 576 pslld xmm3,7 577 psrld xmm1,25 578 pxor xmm6,xmm0 579 por xmm3,xmm1 580 pshufb xmm6,[eax] 581 movdqa [ebx-16],xmm3 582 paddd xmm4,xmm6 583 pxor xmm2,xmm4 584 movdqa xmm3,[ebx-32] 585 movdqa xmm1,xmm2 586 pslld xmm2,12 587 psrld xmm1,20 588 por xmm2,xmm1 589 movdqa xmm1,[ebx-112] 590 paddd xmm0,xmm2 591 movdqa xmm7,[64+ebx] 592 pxor xmm6,xmm0 593 movdqa [ebx-128],xmm0 594 pshufb xmm6,[16+eax] 595 paddd xmm4,xmm6 596 movdqa [112+ebx],xmm6 597 pxor xmm2,xmm4 598 paddd xmm1,xmm3 599 movdqa xmm0,xmm2 600 pslld xmm2,7 601 psrld xmm0,25 602 pxor xmm7,xmm1 603 por xmm2,xmm0 604 movdqa [32+ebx],xmm4 605 pshufb xmm7,[eax] 606 movdqa [ebx-48],xmm2 607 paddd xmm5,xmm7 608 movdqa xmm4,[ebx] 609 pxor xmm3,xmm5 610 movdqa xmm2,[ebx-16] 611 movdqa xmm0,xmm3 612 pslld xmm3,12 613 psrld xmm0,20 614 por xmm3,xmm0 615 movdqa xmm0,[ebx-96] 616 paddd xmm1,xmm3 617 movdqa xmm6,[80+ebx] 618 pxor xmm7,xmm1 619 movdqa [ebx-112],xmm1 620 pshufb xmm7,[16+eax] 621 paddd xmm5,xmm7 622 movdqa [64+ebx],xmm7 623 pxor xmm3,xmm5 624 paddd xmm0,xmm2 625 movdqa xmm1,xmm3 626 pslld xmm3,7 627 psrld xmm1,25 628 pxor xmm6,xmm0 629 por xmm3,xmm1 630 movdqa [48+ebx],xmm5 631 pshufb xmm6,[eax] 632 movdqa [ebx-32],xmm3 633 paddd xmm4,xmm6 634 movdqa xmm5,[16+ebx] 635 pxor xmm2,xmm4 636 movdqa xmm3,[ebx-64] 637 movdqa xmm1,xmm2 638 pslld xmm2,12 639 psrld xmm1,20 640 por xmm2,xmm1 641 movdqa xmm1,[ebx-80] 642 paddd xmm0,xmm2 643 movdqa xmm7,[96+ebx] 644 pxor xmm6,xmm0 645 movdqa [ebx-96],xmm0 646 pshufb xmm6,[16+eax] 647 paddd xmm4,xmm6 648 movdqa [80+ebx],xmm6 649 pxor xmm2,xmm4 650 paddd xmm1,xmm3 651 movdqa xmm0,xmm2 652 pslld xmm2,7 653 psrld xmm0,25 654 pxor xmm7,xmm1 655 por xmm2,xmm0 656 pshufb xmm7,[eax] 657 movdqa [ebx-16],xmm2 658 paddd xmm5,xmm7 659 pxor xmm3,xmm5 660 movdqa xmm0,xmm3 661 pslld xmm3,12 662 psrld xmm0,20 663 por xmm3,xmm0 664 movdqa xmm0,[ebx-128] 665 paddd xmm1,xmm3 666 movdqa xmm6,[64+ebx] 667 pxor xmm7,xmm1 668 movdqa [ebx-80],xmm1 669 pshufb xmm7,[16+eax] 670 paddd xmm5,xmm7 671 movdqa [96+ebx],xmm7 672 pxor xmm3,xmm5 673 movdqa xmm1,xmm3 674 pslld xmm3,7 675 psrld xmm1,25 676 por xmm3,xmm1 677 dec edx 678 jnz NEAR L$008loop 679 movdqa [ebx-64],xmm3 680 movdqa [ebx],xmm4 681 movdqa [16+ebx],xmm5 682 movdqa [64+ebx],xmm6 683 movdqa [96+ebx],xmm7 684 movdqa xmm1,[ebx-112] 685 movdqa xmm2,[ebx-96] 686 movdqa xmm3,[ebx-80] 687 paddd xmm0,[ebp-128] 688 paddd xmm1,[ebp-112] 689 paddd xmm2,[ebp-96] 690 paddd xmm3,[ebp-80] 691 movdqa xmm6,xmm0 692 punpckldq xmm0,xmm1 693 movdqa xmm7,xmm2 694 punpckldq xmm2,xmm3 695 punpckhdq xmm6,xmm1 696 punpckhdq xmm7,xmm3 697 movdqa xmm1,xmm0 698 punpcklqdq xmm0,xmm2 699 movdqa xmm3,xmm6 700 punpcklqdq xmm6,xmm7 701 punpckhqdq xmm1,xmm2 702 punpckhqdq xmm3,xmm7 703 movdqu xmm4,[esi-128] 704 movdqu xmm5,[esi-64] 705 movdqu xmm2,[esi] 706 movdqu xmm7,[64+esi] 707 lea esi,[16+esi] 708 pxor xmm4,xmm0 709 movdqa xmm0,[ebx-64] 710 pxor xmm5,xmm1 711 movdqa xmm1,[ebx-48] 712 pxor xmm6,xmm2 713 movdqa xmm2,[ebx-32] 714 pxor xmm7,xmm3 715 movdqa xmm3,[ebx-16] 716 movdqu [edi-128],xmm4 717 movdqu [edi-64],xmm5 718 movdqu [edi],xmm6 719 movdqu [64+edi],xmm7 720 lea edi,[16+edi] 721 paddd xmm0,[ebp-64] 722 paddd xmm1,[ebp-48] 723 paddd xmm2,[ebp-32] 724 paddd xmm3,[ebp-16] 725 movdqa xmm6,xmm0 726 punpckldq xmm0,xmm1 727 movdqa xmm7,xmm2 728 punpckldq xmm2,xmm3 729 punpckhdq xmm6,xmm1 730 punpckhdq xmm7,xmm3 731 movdqa xmm1,xmm0 732 punpcklqdq xmm0,xmm2 733 movdqa xmm3,xmm6 734 punpcklqdq xmm6,xmm7 735 punpckhqdq xmm1,xmm2 736 punpckhqdq xmm3,xmm7 737 movdqu xmm4,[esi-128] 738 movdqu xmm5,[esi-64] 739 movdqu xmm2,[esi] 740 movdqu xmm7,[64+esi] 741 lea esi,[16+esi] 742 pxor xmm4,xmm0 743 movdqa xmm0,[ebx] 744 pxor xmm5,xmm1 745 movdqa xmm1,[16+ebx] 746 pxor xmm6,xmm2 747 movdqa xmm2,[32+ebx] 748 pxor xmm7,xmm3 749 movdqa xmm3,[48+ebx] 750 movdqu [edi-128],xmm4 751 movdqu [edi-64],xmm5 752 movdqu [edi],xmm6 753 movdqu [64+edi],xmm7 754 lea edi,[16+edi] 755 paddd xmm0,[ebp] 756 paddd xmm1,[16+ebp] 757 paddd xmm2,[32+ebp] 758 paddd xmm3,[48+ebp] 759 movdqa xmm6,xmm0 760 punpckldq xmm0,xmm1 761 movdqa xmm7,xmm2 762 punpckldq xmm2,xmm3 763 punpckhdq xmm6,xmm1 764 punpckhdq xmm7,xmm3 765 movdqa xmm1,xmm0 766 punpcklqdq xmm0,xmm2 767 movdqa xmm3,xmm6 768 punpcklqdq xmm6,xmm7 769 punpckhqdq xmm1,xmm2 770 punpckhqdq xmm3,xmm7 771 movdqu xmm4,[esi-128] 772 movdqu xmm5,[esi-64] 773 movdqu xmm2,[esi] 774 movdqu xmm7,[64+esi] 775 lea esi,[16+esi] 776 pxor xmm4,xmm0 777 movdqa xmm0,[64+ebx] 778 pxor xmm5,xmm1 779 movdqa xmm1,[80+ebx] 780 pxor xmm6,xmm2 781 movdqa xmm2,[96+ebx] 782 pxor xmm7,xmm3 783 movdqa xmm3,[112+ebx] 784 movdqu [edi-128],xmm4 785 movdqu [edi-64],xmm5 786 movdqu [edi],xmm6 787 movdqu [64+edi],xmm7 788 lea edi,[16+edi] 789 paddd xmm0,[64+ebp] 790 paddd xmm1,[80+ebp] 791 paddd xmm2,[96+ebp] 792 paddd xmm3,[112+ebp] 793 movdqa xmm6,xmm0 794 punpckldq xmm0,xmm1 795 movdqa xmm7,xmm2 796 punpckldq xmm2,xmm3 797 punpckhdq xmm6,xmm1 798 punpckhdq xmm7,xmm3 799 movdqa xmm1,xmm0 800 punpcklqdq xmm0,xmm2 801 movdqa xmm3,xmm6 802 punpcklqdq xmm6,xmm7 803 punpckhqdq xmm1,xmm2 804 punpckhqdq xmm3,xmm7 805 movdqu xmm4,[esi-128] 806 movdqu xmm5,[esi-64] 807 movdqu xmm2,[esi] 808 movdqu xmm7,[64+esi] 809 lea esi,[208+esi] 810 pxor xmm4,xmm0 811 pxor xmm5,xmm1 812 pxor xmm6,xmm2 813 pxor xmm7,xmm3 814 movdqu [edi-128],xmm4 815 movdqu [edi-64],xmm5 816 movdqu [edi],xmm6 817 movdqu [64+edi],xmm7 818 lea edi,[208+edi] 819 sub ecx,256 820 jnc NEAR L$007outer_loop 821 add ecx,256 822 jz NEAR L$009done 823 mov ebx,DWORD [520+esp] 824 lea esi,[esi-128] 825 mov edx,DWORD [516+esp] 826 lea edi,[edi-128] 827 movd xmm2,DWORD [64+ebp] 828 movdqu xmm3,[ebx] 829 paddd xmm2,[96+eax] 830 pand xmm3,[112+eax] 831 por xmm3,xmm2 832L$0061x: 833 movdqa xmm0,[32+eax] 834 movdqu xmm1,[edx] 835 movdqu xmm2,[16+edx] 836 movdqa xmm6,[eax] 837 movdqa xmm7,[16+eax] 838 mov DWORD [48+esp],ebp 839 movdqa [esp],xmm0 840 movdqa [16+esp],xmm1 841 movdqa [32+esp],xmm2 842 movdqa [48+esp],xmm3 843 mov edx,10 844 jmp NEAR L$010loop1x 845align 16 846L$011outer1x: 847 movdqa xmm3,[80+eax] 848 movdqa xmm0,[esp] 849 movdqa xmm1,[16+esp] 850 movdqa xmm2,[32+esp] 851 paddd xmm3,[48+esp] 852 mov edx,10 853 movdqa [48+esp],xmm3 854 jmp NEAR L$010loop1x 855align 16 856L$010loop1x: 857 paddd xmm0,xmm1 858 pxor xmm3,xmm0 859db 102,15,56,0,222 860 paddd xmm2,xmm3 861 pxor xmm1,xmm2 862 movdqa xmm4,xmm1 863 psrld xmm1,20 864 pslld xmm4,12 865 por xmm1,xmm4 866 paddd xmm0,xmm1 867 pxor xmm3,xmm0 868db 102,15,56,0,223 869 paddd xmm2,xmm3 870 pxor xmm1,xmm2 871 movdqa xmm4,xmm1 872 psrld xmm1,25 873 pslld xmm4,7 874 por xmm1,xmm4 875 pshufd xmm2,xmm2,78 876 pshufd xmm1,xmm1,57 877 pshufd xmm3,xmm3,147 878 nop 879 paddd xmm0,xmm1 880 pxor xmm3,xmm0 881db 102,15,56,0,222 882 paddd xmm2,xmm3 883 pxor xmm1,xmm2 884 movdqa xmm4,xmm1 885 psrld xmm1,20 886 pslld xmm4,12 887 por xmm1,xmm4 888 paddd xmm0,xmm1 889 pxor xmm3,xmm0 890db 102,15,56,0,223 891 paddd xmm2,xmm3 892 pxor xmm1,xmm2 893 movdqa xmm4,xmm1 894 psrld xmm1,25 895 pslld xmm4,7 896 por xmm1,xmm4 897 pshufd xmm2,xmm2,78 898 pshufd xmm1,xmm1,147 899 pshufd xmm3,xmm3,57 900 dec edx 901 jnz NEAR L$010loop1x 902 paddd xmm0,[esp] 903 paddd xmm1,[16+esp] 904 paddd xmm2,[32+esp] 905 paddd xmm3,[48+esp] 906 cmp ecx,64 907 jb NEAR L$012tail 908 movdqu xmm4,[esi] 909 movdqu xmm5,[16+esi] 910 pxor xmm0,xmm4 911 movdqu xmm4,[32+esi] 912 pxor xmm1,xmm5 913 movdqu xmm5,[48+esi] 914 pxor xmm2,xmm4 915 pxor xmm3,xmm5 916 lea esi,[64+esi] 917 movdqu [edi],xmm0 918 movdqu [16+edi],xmm1 919 movdqu [32+edi],xmm2 920 movdqu [48+edi],xmm3 921 lea edi,[64+edi] 922 sub ecx,64 923 jnz NEAR L$011outer1x 924 jmp NEAR L$009done 925L$012tail: 926 movdqa [esp],xmm0 927 movdqa [16+esp],xmm1 928 movdqa [32+esp],xmm2 929 movdqa [48+esp],xmm3 930 xor eax,eax 931 xor edx,edx 932 xor ebp,ebp 933L$013tail_loop: 934 mov al,BYTE [ebp*1+esp] 935 mov dl,BYTE [ebp*1+esi] 936 lea ebp,[1+ebp] 937 xor al,dl 938 mov BYTE [ebp*1+edi-1],al 939 dec ecx 940 jnz NEAR L$013tail_loop 941L$009done: 942 mov esp,DWORD [512+esp] 943 pop edi 944 pop esi 945 pop ebx 946 pop ebp 947 ret 948align 64 949L$ssse3_data: 950db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 951db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 952dd 1634760805,857760878,2036477234,1797285236 953dd 0,1,2,3 954dd 4,4,4,4 955dd 1,0,0,0 956dd 4,0,0,0 957dd 0,-1,-1,-1 958align 64 959db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 960db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 961db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 962db 114,103,62,0 963%else 964; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 965ret 966%endif 967