1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%ifdef BORINGSSL_PREFIX 12%include "boringssl_prefix_symbols_nasm.inc" 13%endif 14section .text code align=64 15 16 17global bn_mul_mont_nohw 18 19ALIGN 16 20bn_mul_mont_nohw: 21 mov QWORD[8+rsp],rdi ;WIN64 prologue 22 mov QWORD[16+rsp],rsi 23 mov rax,rsp 24$L$SEH_begin_bn_mul_mont_nohw: 25 mov rdi,rcx 26 mov rsi,rdx 27 mov rdx,r8 28 mov rcx,r9 29 mov r8,QWORD[40+rsp] 30 mov r9,QWORD[48+rsp] 31 32 33 34_CET_ENDBR 35 mov r9d,r9d 36 mov rax,rsp 37 38 push rbx 39 40 push rbp 41 42 push r12 43 44 push r13 45 46 push r14 47 48 push r15 49 50 51 neg r9 52 mov r11,rsp 53 lea r10,[((-16))+r9*8+rsp] 54 neg r9 55 and r10,-1024 56 57 58 59 60 61 62 63 64 65 sub r11,r10 66 and r11,-4096 67 lea rsp,[r11*1+r10] 68 mov r11,QWORD[rsp] 69 cmp rsp,r10 70 ja NEAR $L$mul_page_walk 71 jmp NEAR $L$mul_page_walk_done 72 73ALIGN 16 74$L$mul_page_walk: 75 lea rsp,[((-4096))+rsp] 76 mov r11,QWORD[rsp] 77 cmp rsp,r10 78 ja NEAR $L$mul_page_walk 79$L$mul_page_walk_done: 80 81 mov QWORD[8+r9*8+rsp],rax 82 83$L$mul_body: 84 mov r12,rdx 85 mov r8,QWORD[r8] 86 mov rbx,QWORD[r12] 87 mov rax,QWORD[rsi] 88 89 xor r14,r14 90 xor r15,r15 91 92 mov rbp,r8 93 mul rbx 94 mov r10,rax 95 mov rax,QWORD[rcx] 96 97 imul rbp,r10 98 mov r11,rdx 99 100 mul rbp 101 add r10,rax 102 mov rax,QWORD[8+rsi] 103 adc rdx,0 104 mov r13,rdx 105 106 lea r15,[1+r15] 107 jmp NEAR $L$1st_enter 108 109ALIGN 16 110$L$1st: 111 add r13,rax 112 mov rax,QWORD[r15*8+rsi] 113 adc rdx,0 114 add r13,r11 115 mov r11,r10 116 adc rdx,0 117 mov QWORD[((-16))+r15*8+rsp],r13 118 mov r13,rdx 119 120$L$1st_enter: 121 mul rbx 122 add r11,rax 123 mov rax,QWORD[r15*8+rcx] 124 adc rdx,0 125 lea r15,[1+r15] 126 mov r10,rdx 127 128 mul rbp 129 cmp r15,r9 130 jne NEAR $L$1st 131 132 add r13,rax 133 mov rax,QWORD[rsi] 134 adc rdx,0 135 add r13,r11 136 adc rdx,0 137 mov QWORD[((-16))+r15*8+rsp],r13 138 mov r13,rdx 139 mov r11,r10 140 141 xor rdx,rdx 142 add r13,r11 143 adc rdx,0 144 mov QWORD[((-8))+r9*8+rsp],r13 145 mov QWORD[r9*8+rsp],rdx 146 147 lea r14,[1+r14] 148 jmp NEAR $L$outer 149ALIGN 16 150$L$outer: 151 mov rbx,QWORD[r14*8+r12] 152 xor r15,r15 153 mov rbp,r8 154 mov r10,QWORD[rsp] 155 mul rbx 156 add r10,rax 157 mov rax,QWORD[rcx] 158 adc rdx,0 159 160 imul rbp,r10 161 mov r11,rdx 162 163 mul rbp 164 add r10,rax 165 mov rax,QWORD[8+rsi] 166 adc rdx,0 167 mov r10,QWORD[8+rsp] 168 mov r13,rdx 169 170 lea r15,[1+r15] 171 jmp NEAR $L$inner_enter 172 173ALIGN 16 174$L$inner: 175 add r13,rax 176 mov rax,QWORD[r15*8+rsi] 177 adc rdx,0 178 add r13,r10 179 mov r10,QWORD[r15*8+rsp] 180 adc rdx,0 181 mov QWORD[((-16))+r15*8+rsp],r13 182 mov r13,rdx 183 184$L$inner_enter: 185 mul rbx 186 add r11,rax 187 mov rax,QWORD[r15*8+rcx] 188 adc rdx,0 189 add r10,r11 190 mov r11,rdx 191 adc r11,0 192 lea r15,[1+r15] 193 194 mul rbp 195 cmp r15,r9 196 jne NEAR $L$inner 197 198 add r13,rax 199 mov rax,QWORD[rsi] 200 adc rdx,0 201 add r13,r10 202 mov r10,QWORD[r15*8+rsp] 203 adc rdx,0 204 mov QWORD[((-16))+r15*8+rsp],r13 205 mov r13,rdx 206 207 xor rdx,rdx 208 add r13,r11 209 adc rdx,0 210 add r13,r10 211 adc rdx,0 212 mov QWORD[((-8))+r9*8+rsp],r13 213 mov QWORD[r9*8+rsp],rdx 214 215 lea r14,[1+r14] 216 cmp r14,r9 217 jb NEAR $L$outer 218 219 xor r14,r14 220 mov rax,QWORD[rsp] 221 mov r15,r9 222 223ALIGN 16 224$L$sub: sbb rax,QWORD[r14*8+rcx] 225 mov QWORD[r14*8+rdi],rax 226 mov rax,QWORD[8+r14*8+rsp] 227 lea r14,[1+r14] 228 dec r15 229 jnz NEAR $L$sub 230 231 sbb rax,0 232 mov rbx,-1 233 xor rbx,rax 234 xor r14,r14 235 mov r15,r9 236 237$L$copy: 238 mov rcx,QWORD[r14*8+rdi] 239 mov rdx,QWORD[r14*8+rsp] 240 and rcx,rbx 241 and rdx,rax 242 mov QWORD[r14*8+rsp],r9 243 or rdx,rcx 244 mov QWORD[r14*8+rdi],rdx 245 lea r14,[1+r14] 246 sub r15,1 247 jnz NEAR $L$copy 248 249 mov rsi,QWORD[8+r9*8+rsp] 250 251 mov rax,1 252 mov r15,QWORD[((-48))+rsi] 253 254 mov r14,QWORD[((-40))+rsi] 255 256 mov r13,QWORD[((-32))+rsi] 257 258 mov r12,QWORD[((-24))+rsi] 259 260 mov rbp,QWORD[((-16))+rsi] 261 262 mov rbx,QWORD[((-8))+rsi] 263 264 lea rsp,[rsi] 265 266$L$mul_epilogue: 267 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 268 mov rsi,QWORD[16+rsp] 269 ret 270 271$L$SEH_end_bn_mul_mont_nohw: 272global bn_mul4x_mont 273 274ALIGN 16 275bn_mul4x_mont: 276 mov QWORD[8+rsp],rdi ;WIN64 prologue 277 mov QWORD[16+rsp],rsi 278 mov rax,rsp 279$L$SEH_begin_bn_mul4x_mont: 280 mov rdi,rcx 281 mov rsi,rdx 282 mov rdx,r8 283 mov rcx,r9 284 mov r8,QWORD[40+rsp] 285 mov r9,QWORD[48+rsp] 286 287 288 289_CET_ENDBR 290 mov r9d,r9d 291 mov rax,rsp 292 293 push rbx 294 295 push rbp 296 297 push r12 298 299 push r13 300 301 push r14 302 303 push r15 304 305 306 neg r9 307 mov r11,rsp 308 lea r10,[((-32))+r9*8+rsp] 309 neg r9 310 and r10,-1024 311 312 sub r11,r10 313 and r11,-4096 314 lea rsp,[r11*1+r10] 315 mov r11,QWORD[rsp] 316 cmp rsp,r10 317 ja NEAR $L$mul4x_page_walk 318 jmp NEAR $L$mul4x_page_walk_done 319 320$L$mul4x_page_walk: 321 lea rsp,[((-4096))+rsp] 322 mov r11,QWORD[rsp] 323 cmp rsp,r10 324 ja NEAR $L$mul4x_page_walk 325$L$mul4x_page_walk_done: 326 327 mov QWORD[8+r9*8+rsp],rax 328 329$L$mul4x_body: 330 mov QWORD[16+r9*8+rsp],rdi 331 mov r12,rdx 332 mov r8,QWORD[r8] 333 mov rbx,QWORD[r12] 334 mov rax,QWORD[rsi] 335 336 xor r14,r14 337 xor r15,r15 338 339 mov rbp,r8 340 mul rbx 341 mov r10,rax 342 mov rax,QWORD[rcx] 343 344 imul rbp,r10 345 mov r11,rdx 346 347 mul rbp 348 add r10,rax 349 mov rax,QWORD[8+rsi] 350 adc rdx,0 351 mov rdi,rdx 352 353 mul rbx 354 add r11,rax 355 mov rax,QWORD[8+rcx] 356 adc rdx,0 357 mov r10,rdx 358 359 mul rbp 360 add rdi,rax 361 mov rax,QWORD[16+rsi] 362 adc rdx,0 363 add rdi,r11 364 lea r15,[4+r15] 365 adc rdx,0 366 mov QWORD[rsp],rdi 367 mov r13,rdx 368 jmp NEAR $L$1st4x 369ALIGN 16 370$L$1st4x: 371 mul rbx 372 add r10,rax 373 mov rax,QWORD[((-16))+r15*8+rcx] 374 adc rdx,0 375 mov r11,rdx 376 377 mul rbp 378 add r13,rax 379 mov rax,QWORD[((-8))+r15*8+rsi] 380 adc rdx,0 381 add r13,r10 382 adc rdx,0 383 mov QWORD[((-24))+r15*8+rsp],r13 384 mov rdi,rdx 385 386 mul rbx 387 add r11,rax 388 mov rax,QWORD[((-8))+r15*8+rcx] 389 adc rdx,0 390 mov r10,rdx 391 392 mul rbp 393 add rdi,rax 394 mov rax,QWORD[r15*8+rsi] 395 adc rdx,0 396 add rdi,r11 397 adc rdx,0 398 mov QWORD[((-16))+r15*8+rsp],rdi 399 mov r13,rdx 400 401 mul rbx 402 add r10,rax 403 mov rax,QWORD[r15*8+rcx] 404 adc rdx,0 405 mov r11,rdx 406 407 mul rbp 408 add r13,rax 409 mov rax,QWORD[8+r15*8+rsi] 410 adc rdx,0 411 add r13,r10 412 adc rdx,0 413 mov QWORD[((-8))+r15*8+rsp],r13 414 mov rdi,rdx 415 416 mul rbx 417 add r11,rax 418 mov rax,QWORD[8+r15*8+rcx] 419 adc rdx,0 420 lea r15,[4+r15] 421 mov r10,rdx 422 423 mul rbp 424 add rdi,rax 425 mov rax,QWORD[((-16))+r15*8+rsi] 426 adc rdx,0 427 add rdi,r11 428 adc rdx,0 429 mov QWORD[((-32))+r15*8+rsp],rdi 430 mov r13,rdx 431 cmp r15,r9 432 jb NEAR $L$1st4x 433 434 mul rbx 435 add r10,rax 436 mov rax,QWORD[((-16))+r15*8+rcx] 437 adc rdx,0 438 mov r11,rdx 439 440 mul rbp 441 add r13,rax 442 mov rax,QWORD[((-8))+r15*8+rsi] 443 adc rdx,0 444 add r13,r10 445 adc rdx,0 446 mov QWORD[((-24))+r15*8+rsp],r13 447 mov rdi,rdx 448 449 mul rbx 450 add r11,rax 451 mov rax,QWORD[((-8))+r15*8+rcx] 452 adc rdx,0 453 mov r10,rdx 454 455 mul rbp 456 add rdi,rax 457 mov rax,QWORD[rsi] 458 adc rdx,0 459 add rdi,r11 460 adc rdx,0 461 mov QWORD[((-16))+r15*8+rsp],rdi 462 mov r13,rdx 463 464 xor rdi,rdi 465 add r13,r10 466 adc rdi,0 467 mov QWORD[((-8))+r15*8+rsp],r13 468 mov QWORD[r15*8+rsp],rdi 469 470 lea r14,[1+r14] 471ALIGN 4 472$L$outer4x: 473 mov rbx,QWORD[r14*8+r12] 474 xor r15,r15 475 mov r10,QWORD[rsp] 476 mov rbp,r8 477 mul rbx 478 add r10,rax 479 mov rax,QWORD[rcx] 480 adc rdx,0 481 482 imul rbp,r10 483 mov r11,rdx 484 485 mul rbp 486 add r10,rax 487 mov rax,QWORD[8+rsi] 488 adc rdx,0 489 mov rdi,rdx 490 491 mul rbx 492 add r11,rax 493 mov rax,QWORD[8+rcx] 494 adc rdx,0 495 add r11,QWORD[8+rsp] 496 adc rdx,0 497 mov r10,rdx 498 499 mul rbp 500 add rdi,rax 501 mov rax,QWORD[16+rsi] 502 adc rdx,0 503 add rdi,r11 504 lea r15,[4+r15] 505 adc rdx,0 506 mov QWORD[rsp],rdi 507 mov r13,rdx 508 jmp NEAR $L$inner4x 509ALIGN 16 510$L$inner4x: 511 mul rbx 512 add r10,rax 513 mov rax,QWORD[((-16))+r15*8+rcx] 514 adc rdx,0 515 add r10,QWORD[((-16))+r15*8+rsp] 516 adc rdx,0 517 mov r11,rdx 518 519 mul rbp 520 add r13,rax 521 mov rax,QWORD[((-8))+r15*8+rsi] 522 adc rdx,0 523 add r13,r10 524 adc rdx,0 525 mov QWORD[((-24))+r15*8+rsp],r13 526 mov rdi,rdx 527 528 mul rbx 529 add r11,rax 530 mov rax,QWORD[((-8))+r15*8+rcx] 531 adc rdx,0 532 add r11,QWORD[((-8))+r15*8+rsp] 533 adc rdx,0 534 mov r10,rdx 535 536 mul rbp 537 add rdi,rax 538 mov rax,QWORD[r15*8+rsi] 539 adc rdx,0 540 add rdi,r11 541 adc rdx,0 542 mov QWORD[((-16))+r15*8+rsp],rdi 543 mov r13,rdx 544 545 mul rbx 546 add r10,rax 547 mov rax,QWORD[r15*8+rcx] 548 adc rdx,0 549 add r10,QWORD[r15*8+rsp] 550 adc rdx,0 551 mov r11,rdx 552 553 mul rbp 554 add r13,rax 555 mov rax,QWORD[8+r15*8+rsi] 556 adc rdx,0 557 add r13,r10 558 adc rdx,0 559 mov QWORD[((-8))+r15*8+rsp],r13 560 mov rdi,rdx 561 562 mul rbx 563 add r11,rax 564 mov rax,QWORD[8+r15*8+rcx] 565 adc rdx,0 566 add r11,QWORD[8+r15*8+rsp] 567 adc rdx,0 568 lea r15,[4+r15] 569 mov r10,rdx 570 571 mul rbp 572 add rdi,rax 573 mov rax,QWORD[((-16))+r15*8+rsi] 574 adc rdx,0 575 add rdi,r11 576 adc rdx,0 577 mov QWORD[((-32))+r15*8+rsp],rdi 578 mov r13,rdx 579 cmp r15,r9 580 jb NEAR $L$inner4x 581 582 mul rbx 583 add r10,rax 584 mov rax,QWORD[((-16))+r15*8+rcx] 585 adc rdx,0 586 add r10,QWORD[((-16))+r15*8+rsp] 587 adc rdx,0 588 mov r11,rdx 589 590 mul rbp 591 add r13,rax 592 mov rax,QWORD[((-8))+r15*8+rsi] 593 adc rdx,0 594 add r13,r10 595 adc rdx,0 596 mov QWORD[((-24))+r15*8+rsp],r13 597 mov rdi,rdx 598 599 mul rbx 600 add r11,rax 601 mov rax,QWORD[((-8))+r15*8+rcx] 602 adc rdx,0 603 add r11,QWORD[((-8))+r15*8+rsp] 604 adc rdx,0 605 lea r14,[1+r14] 606 mov r10,rdx 607 608 mul rbp 609 add rdi,rax 610 mov rax,QWORD[rsi] 611 adc rdx,0 612 add rdi,r11 613 adc rdx,0 614 mov QWORD[((-16))+r15*8+rsp],rdi 615 mov r13,rdx 616 617 xor rdi,rdi 618 add r13,r10 619 adc rdi,0 620 add r13,QWORD[r9*8+rsp] 621 adc rdi,0 622 mov QWORD[((-8))+r15*8+rsp],r13 623 mov QWORD[r15*8+rsp],rdi 624 625 cmp r14,r9 626 jb NEAR $L$outer4x 627 mov rdi,QWORD[16+r9*8+rsp] 628 lea r15,[((-4))+r9] 629 mov rax,QWORD[rsp] 630 mov rdx,QWORD[8+rsp] 631 shr r15,2 632 lea rsi,[rsp] 633 xor r14,r14 634 635 sub rax,QWORD[rcx] 636 mov rbx,QWORD[16+rsi] 637 mov rbp,QWORD[24+rsi] 638 sbb rdx,QWORD[8+rcx] 639 640$L$sub4x: 641 mov QWORD[r14*8+rdi],rax 642 mov QWORD[8+r14*8+rdi],rdx 643 sbb rbx,QWORD[16+r14*8+rcx] 644 mov rax,QWORD[32+r14*8+rsi] 645 mov rdx,QWORD[40+r14*8+rsi] 646 sbb rbp,QWORD[24+r14*8+rcx] 647 mov QWORD[16+r14*8+rdi],rbx 648 mov QWORD[24+r14*8+rdi],rbp 649 sbb rax,QWORD[32+r14*8+rcx] 650 mov rbx,QWORD[48+r14*8+rsi] 651 mov rbp,QWORD[56+r14*8+rsi] 652 sbb rdx,QWORD[40+r14*8+rcx] 653 lea r14,[4+r14] 654 dec r15 655 jnz NEAR $L$sub4x 656 657 mov QWORD[r14*8+rdi],rax 658 mov rax,QWORD[32+r14*8+rsi] 659 sbb rbx,QWORD[16+r14*8+rcx] 660 mov QWORD[8+r14*8+rdi],rdx 661 sbb rbp,QWORD[24+r14*8+rcx] 662 mov QWORD[16+r14*8+rdi],rbx 663 664 sbb rax,0 665 mov QWORD[24+r14*8+rdi],rbp 666 pxor xmm0,xmm0 667DB 102,72,15,110,224 668 pcmpeqd xmm5,xmm5 669 pshufd xmm4,xmm4,0 670 mov r15,r9 671 pxor xmm5,xmm4 672 shr r15,2 673 xor eax,eax 674 675 jmp NEAR $L$copy4x 676ALIGN 16 677$L$copy4x: 678 movdqa xmm1,XMMWORD[rax*1+rsp] 679 movdqu xmm2,XMMWORD[rax*1+rdi] 680 pand xmm1,xmm4 681 pand xmm2,xmm5 682 movdqa xmm3,XMMWORD[16+rax*1+rsp] 683 movdqa XMMWORD[rax*1+rsp],xmm0 684 por xmm1,xmm2 685 movdqu xmm2,XMMWORD[16+rax*1+rdi] 686 movdqu XMMWORD[rax*1+rdi],xmm1 687 pand xmm3,xmm4 688 pand xmm2,xmm5 689 movdqa XMMWORD[16+rax*1+rsp],xmm0 690 por xmm3,xmm2 691 movdqu XMMWORD[16+rax*1+rdi],xmm3 692 lea rax,[32+rax] 693 dec r15 694 jnz NEAR $L$copy4x 695 mov rsi,QWORD[8+r9*8+rsp] 696 697 mov rax,1 698 mov r15,QWORD[((-48))+rsi] 699 700 mov r14,QWORD[((-40))+rsi] 701 702 mov r13,QWORD[((-32))+rsi] 703 704 mov r12,QWORD[((-24))+rsi] 705 706 mov rbp,QWORD[((-16))+rsi] 707 708 mov rbx,QWORD[((-8))+rsi] 709 710 lea rsp,[rsi] 711 712$L$mul4x_epilogue: 713 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 714 mov rsi,QWORD[16+rsp] 715 ret 716 717$L$SEH_end_bn_mul4x_mont: 718EXTERN bn_sqrx8x_internal 719EXTERN bn_sqr8x_internal 720 721global bn_sqr8x_mont 722 723ALIGN 32 724bn_sqr8x_mont: 725 mov QWORD[8+rsp],rdi ;WIN64 prologue 726 mov QWORD[16+rsp],rsi 727 mov rax,rsp 728$L$SEH_begin_bn_sqr8x_mont: 729 mov rdi,rcx 730 mov rsi,rdx 731 mov rdx,r8 732 mov rcx,r9 733 mov r8,QWORD[40+rsp] 734 mov r9,QWORD[48+rsp] 735 736 737 738_CET_ENDBR 739 mov r9d,r9d 740 mov rax,rsp 741 742 push rbx 743 744 push rbp 745 746 push r12 747 748 push r13 749 750 push r14 751 752 push r15 753 754$L$sqr8x_prologue: 755 756 mov r10d,r9d 757 shl r9d,3 758 shl r10,3+2 759 neg r9 760 761 762 763 764 765 766 lea r11,[((-64))+r9*2+rsp] 767 mov rbp,rsp 768 mov r8,QWORD[r8] 769 sub r11,rsi 770 and r11,4095 771 cmp r10,r11 772 jb NEAR $L$sqr8x_sp_alt 773 sub rbp,r11 774 lea rbp,[((-64))+r9*2+rbp] 775 jmp NEAR $L$sqr8x_sp_done 776 777ALIGN 32 778$L$sqr8x_sp_alt: 779 lea r10,[((4096-64))+r9*2] 780 lea rbp,[((-64))+r9*2+rbp] 781 sub r11,r10 782 mov r10,0 783 cmovc r11,r10 784 sub rbp,r11 785$L$sqr8x_sp_done: 786 and rbp,-64 787 mov r11,rsp 788 sub r11,rbp 789 and r11,-4096 790 lea rsp,[rbp*1+r11] 791 mov r10,QWORD[rsp] 792 cmp rsp,rbp 793 ja NEAR $L$sqr8x_page_walk 794 jmp NEAR $L$sqr8x_page_walk_done 795 796ALIGN 16 797$L$sqr8x_page_walk: 798 lea rsp,[((-4096))+rsp] 799 mov r10,QWORD[rsp] 800 cmp rsp,rbp 801 ja NEAR $L$sqr8x_page_walk 802$L$sqr8x_page_walk_done: 803 804 mov r10,r9 805 neg r9 806 807 mov QWORD[32+rsp],r8 808 mov QWORD[40+rsp],rax 809 810$L$sqr8x_body: 811 812DB 102,72,15,110,209 813 pxor xmm0,xmm0 814DB 102,72,15,110,207 815DB 102,73,15,110,218 816 test rdx,rdx 817 jz NEAR $L$sqr8x_nox 818 819 call bn_sqrx8x_internal 820 821 822 823 824 lea rbx,[rcx*1+r8] 825 mov r9,rcx 826 mov rdx,rcx 827DB 102,72,15,126,207 828 sar rcx,3+2 829 jmp NEAR $L$sqr8x_sub 830 831ALIGN 32 832$L$sqr8x_nox: 833 call bn_sqr8x_internal 834 835 836 837 838 lea rbx,[r9*1+rdi] 839 mov rcx,r9 840 mov rdx,r9 841DB 102,72,15,126,207 842 sar rcx,3+2 843 jmp NEAR $L$sqr8x_sub 844 845ALIGN 32 846$L$sqr8x_sub: 847 mov r12,QWORD[rbx] 848 mov r13,QWORD[8+rbx] 849 mov r14,QWORD[16+rbx] 850 mov r15,QWORD[24+rbx] 851 lea rbx,[32+rbx] 852 sbb r12,QWORD[rbp] 853 sbb r13,QWORD[8+rbp] 854 sbb r14,QWORD[16+rbp] 855 sbb r15,QWORD[24+rbp] 856 lea rbp,[32+rbp] 857 mov QWORD[rdi],r12 858 mov QWORD[8+rdi],r13 859 mov QWORD[16+rdi],r14 860 mov QWORD[24+rdi],r15 861 lea rdi,[32+rdi] 862 inc rcx 863 jnz NEAR $L$sqr8x_sub 864 865 sbb rax,0 866 lea rbx,[r9*1+rbx] 867 lea rdi,[r9*1+rdi] 868 869DB 102,72,15,110,200 870 pxor xmm0,xmm0 871 pshufd xmm1,xmm1,0 872 mov rsi,QWORD[40+rsp] 873 874 jmp NEAR $L$sqr8x_cond_copy 875 876ALIGN 32 877$L$sqr8x_cond_copy: 878 movdqa xmm2,XMMWORD[rbx] 879 movdqa xmm3,XMMWORD[16+rbx] 880 lea rbx,[32+rbx] 881 movdqu xmm4,XMMWORD[rdi] 882 movdqu xmm5,XMMWORD[16+rdi] 883 lea rdi,[32+rdi] 884 movdqa XMMWORD[(-32)+rbx],xmm0 885 movdqa XMMWORD[(-16)+rbx],xmm0 886 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 887 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 888 pcmpeqd xmm0,xmm1 889 pand xmm2,xmm1 890 pand xmm3,xmm1 891 pand xmm4,xmm0 892 pand xmm5,xmm0 893 pxor xmm0,xmm0 894 por xmm4,xmm2 895 por xmm5,xmm3 896 movdqu XMMWORD[(-32)+rdi],xmm4 897 movdqu XMMWORD[(-16)+rdi],xmm5 898 add r9,32 899 jnz NEAR $L$sqr8x_cond_copy 900 901 mov rax,1 902 mov r15,QWORD[((-48))+rsi] 903 904 mov r14,QWORD[((-40))+rsi] 905 906 mov r13,QWORD[((-32))+rsi] 907 908 mov r12,QWORD[((-24))+rsi] 909 910 mov rbp,QWORD[((-16))+rsi] 911 912 mov rbx,QWORD[((-8))+rsi] 913 914 lea rsp,[rsi] 915 916$L$sqr8x_epilogue: 917 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 918 mov rsi,QWORD[16+rsp] 919 ret 920 921$L$SEH_end_bn_sqr8x_mont: 922global bn_mulx4x_mont 923 924ALIGN 32 925bn_mulx4x_mont: 926 mov QWORD[8+rsp],rdi ;WIN64 prologue 927 mov QWORD[16+rsp],rsi 928 mov rax,rsp 929$L$SEH_begin_bn_mulx4x_mont: 930 mov rdi,rcx 931 mov rsi,rdx 932 mov rdx,r8 933 mov rcx,r9 934 mov r8,QWORD[40+rsp] 935 mov r9,QWORD[48+rsp] 936 937 938 939_CET_ENDBR 940 mov rax,rsp 941 942 push rbx 943 944 push rbp 945 946 push r12 947 948 push r13 949 950 push r14 951 952 push r15 953 954$L$mulx4x_prologue: 955 956 shl r9d,3 957 xor r10,r10 958 sub r10,r9 959 mov r8,QWORD[r8] 960 lea rbp,[((-72))+r10*1+rsp] 961 and rbp,-128 962 mov r11,rsp 963 sub r11,rbp 964 and r11,-4096 965 lea rsp,[rbp*1+r11] 966 mov r10,QWORD[rsp] 967 cmp rsp,rbp 968 ja NEAR $L$mulx4x_page_walk 969 jmp NEAR $L$mulx4x_page_walk_done 970 971ALIGN 16 972$L$mulx4x_page_walk: 973 lea rsp,[((-4096))+rsp] 974 mov r10,QWORD[rsp] 975 cmp rsp,rbp 976 ja NEAR $L$mulx4x_page_walk 977$L$mulx4x_page_walk_done: 978 979 lea r10,[r9*1+rdx] 980 981 982 983 984 985 986 987 988 989 990 991 992 mov QWORD[rsp],r9 993 shr r9,5 994 mov QWORD[16+rsp],r10 995 sub r9,1 996 mov QWORD[24+rsp],r8 997 mov QWORD[32+rsp],rdi 998 mov QWORD[40+rsp],rax 999 1000 mov QWORD[48+rsp],r9 1001 jmp NEAR $L$mulx4x_body 1002 1003ALIGN 32 1004$L$mulx4x_body: 1005 lea rdi,[8+rdx] 1006 mov rdx,QWORD[rdx] 1007 lea rbx,[((64+32))+rsp] 1008 mov r9,rdx 1009 1010 mulx rax,r8,QWORD[rsi] 1011 mulx r14,r11,QWORD[8+rsi] 1012 add r11,rax 1013 mov QWORD[8+rsp],rdi 1014 mulx r13,r12,QWORD[16+rsi] 1015 adc r12,r14 1016 adc r13,0 1017 1018 mov rdi,r8 1019 imul r8,QWORD[24+rsp] 1020 xor rbp,rbp 1021 1022 mulx r14,rax,QWORD[24+rsi] 1023 mov rdx,r8 1024 lea rsi,[32+rsi] 1025 adcx r13,rax 1026 adcx r14,rbp 1027 1028 mulx r10,rax,QWORD[rcx] 1029 adcx rdi,rax 1030 adox r10,r11 1031 mulx r11,rax,QWORD[8+rcx] 1032 adcx r10,rax 1033 adox r11,r12 1034 DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1035 mov rdi,QWORD[48+rsp] 1036 mov QWORD[((-32))+rbx],r10 1037 adcx r11,rax 1038 adox r12,r13 1039 mulx r15,rax,QWORD[24+rcx] 1040 mov rdx,r9 1041 mov QWORD[((-24))+rbx],r11 1042 adcx r12,rax 1043 adox r15,rbp 1044 lea rcx,[32+rcx] 1045 mov QWORD[((-16))+rbx],r12 1046 1047 jmp NEAR $L$mulx4x_1st 1048 1049ALIGN 32 1050$L$mulx4x_1st: 1051 adcx r15,rbp 1052 mulx rax,r10,QWORD[rsi] 1053 adcx r10,r14 1054 mulx r14,r11,QWORD[8+rsi] 1055 adcx r11,rax 1056 mulx rax,r12,QWORD[16+rsi] 1057 adcx r12,r14 1058 mulx r14,r13,QWORD[24+rsi] 1059 DB 0x67,0x67 1060 mov rdx,r8 1061 adcx r13,rax 1062 adcx r14,rbp 1063 lea rsi,[32+rsi] 1064 lea rbx,[32+rbx] 1065 1066 adox r10,r15 1067 mulx r15,rax,QWORD[rcx] 1068 adcx r10,rax 1069 adox r11,r15 1070 mulx r15,rax,QWORD[8+rcx] 1071 adcx r11,rax 1072 adox r12,r15 1073 mulx r15,rax,QWORD[16+rcx] 1074 mov QWORD[((-40))+rbx],r10 1075 adcx r12,rax 1076 mov QWORD[((-32))+rbx],r11 1077 adox r13,r15 1078 mulx r15,rax,QWORD[24+rcx] 1079 mov rdx,r9 1080 mov QWORD[((-24))+rbx],r12 1081 adcx r13,rax 1082 adox r15,rbp 1083 lea rcx,[32+rcx] 1084 mov QWORD[((-16))+rbx],r13 1085 1086 dec rdi 1087 jnz NEAR $L$mulx4x_1st 1088 1089 mov rax,QWORD[rsp] 1090 mov rdi,QWORD[8+rsp] 1091 adc r15,rbp 1092 add r14,r15 1093 sbb r15,r15 1094 mov QWORD[((-8))+rbx],r14 1095 jmp NEAR $L$mulx4x_outer 1096 1097ALIGN 32 1098$L$mulx4x_outer: 1099 mov rdx,QWORD[rdi] 1100 lea rdi,[8+rdi] 1101 sub rsi,rax 1102 mov QWORD[rbx],r15 1103 lea rbx,[((64+32))+rsp] 1104 sub rcx,rax 1105 1106 mulx r11,r8,QWORD[rsi] 1107 xor ebp,ebp 1108 mov r9,rdx 1109 mulx r12,r14,QWORD[8+rsi] 1110 adox r8,QWORD[((-32))+rbx] 1111 adcx r11,r14 1112 mulx r13,r15,QWORD[16+rsi] 1113 adox r11,QWORD[((-24))+rbx] 1114 adcx r12,r15 1115 adox r12,QWORD[((-16))+rbx] 1116 adcx r13,rbp 1117 adox r13,rbp 1118 1119 mov QWORD[8+rsp],rdi 1120 mov r15,r8 1121 imul r8,QWORD[24+rsp] 1122 xor ebp,ebp 1123 1124 mulx r14,rax,QWORD[24+rsi] 1125 mov rdx,r8 1126 adcx r13,rax 1127 adox r13,QWORD[((-8))+rbx] 1128 adcx r14,rbp 1129 lea rsi,[32+rsi] 1130 adox r14,rbp 1131 1132 mulx r10,rax,QWORD[rcx] 1133 adcx r15,rax 1134 adox r10,r11 1135 mulx r11,rax,QWORD[8+rcx] 1136 adcx r10,rax 1137 adox r11,r12 1138 mulx r12,rax,QWORD[16+rcx] 1139 mov QWORD[((-32))+rbx],r10 1140 adcx r11,rax 1141 adox r12,r13 1142 mulx r15,rax,QWORD[24+rcx] 1143 mov rdx,r9 1144 mov QWORD[((-24))+rbx],r11 1145 lea rcx,[32+rcx] 1146 adcx r12,rax 1147 adox r15,rbp 1148 mov rdi,QWORD[48+rsp] 1149 mov QWORD[((-16))+rbx],r12 1150 1151 jmp NEAR $L$mulx4x_inner 1152 1153ALIGN 32 1154$L$mulx4x_inner: 1155 mulx rax,r10,QWORD[rsi] 1156 adcx r15,rbp 1157 adox r10,r14 1158 mulx r14,r11,QWORD[8+rsi] 1159 adcx r10,QWORD[rbx] 1160 adox r11,rax 1161 mulx rax,r12,QWORD[16+rsi] 1162 adcx r11,QWORD[8+rbx] 1163 adox r12,r14 1164 mulx r14,r13,QWORD[24+rsi] 1165 mov rdx,r8 1166 adcx r12,QWORD[16+rbx] 1167 adox r13,rax 1168 adcx r13,QWORD[24+rbx] 1169 adox r14,rbp 1170 lea rsi,[32+rsi] 1171 lea rbx,[32+rbx] 1172 adcx r14,rbp 1173 1174 adox r10,r15 1175 mulx r15,rax,QWORD[rcx] 1176 adcx r10,rax 1177 adox r11,r15 1178 mulx r15,rax,QWORD[8+rcx] 1179 adcx r11,rax 1180 adox r12,r15 1181 mulx r15,rax,QWORD[16+rcx] 1182 mov QWORD[((-40))+rbx],r10 1183 adcx r12,rax 1184 adox r13,r15 1185 mulx r15,rax,QWORD[24+rcx] 1186 mov rdx,r9 1187 mov QWORD[((-32))+rbx],r11 1188 mov QWORD[((-24))+rbx],r12 1189 adcx r13,rax 1190 adox r15,rbp 1191 lea rcx,[32+rcx] 1192 mov QWORD[((-16))+rbx],r13 1193 1194 dec rdi 1195 jnz NEAR $L$mulx4x_inner 1196 1197 mov rax,QWORD[rsp] 1198 mov rdi,QWORD[8+rsp] 1199 adc r15,rbp 1200 sub rbp,QWORD[rbx] 1201 adc r14,r15 1202 sbb r15,r15 1203 mov QWORD[((-8))+rbx],r14 1204 1205 cmp rdi,QWORD[16+rsp] 1206 jne NEAR $L$mulx4x_outer 1207 1208 lea rbx,[64+rsp] 1209 sub rcx,rax 1210 neg r15 1211 mov rdx,rax 1212 shr rax,3+2 1213 mov rdi,QWORD[32+rsp] 1214 jmp NEAR $L$mulx4x_sub 1215 1216ALIGN 32 1217$L$mulx4x_sub: 1218 mov r11,QWORD[rbx] 1219 mov r12,QWORD[8+rbx] 1220 mov r13,QWORD[16+rbx] 1221 mov r14,QWORD[24+rbx] 1222 lea rbx,[32+rbx] 1223 sbb r11,QWORD[rcx] 1224 sbb r12,QWORD[8+rcx] 1225 sbb r13,QWORD[16+rcx] 1226 sbb r14,QWORD[24+rcx] 1227 lea rcx,[32+rcx] 1228 mov QWORD[rdi],r11 1229 mov QWORD[8+rdi],r12 1230 mov QWORD[16+rdi],r13 1231 mov QWORD[24+rdi],r14 1232 lea rdi,[32+rdi] 1233 dec rax 1234 jnz NEAR $L$mulx4x_sub 1235 1236 sbb r15,0 1237 lea rbx,[64+rsp] 1238 sub rdi,rdx 1239 1240DB 102,73,15,110,207 1241 pxor xmm0,xmm0 1242 pshufd xmm1,xmm1,0 1243 mov rsi,QWORD[40+rsp] 1244 1245 jmp NEAR $L$mulx4x_cond_copy 1246 1247ALIGN 32 1248$L$mulx4x_cond_copy: 1249 movdqa xmm2,XMMWORD[rbx] 1250 movdqa xmm3,XMMWORD[16+rbx] 1251 lea rbx,[32+rbx] 1252 movdqu xmm4,XMMWORD[rdi] 1253 movdqu xmm5,XMMWORD[16+rdi] 1254 lea rdi,[32+rdi] 1255 movdqa XMMWORD[(-32)+rbx],xmm0 1256 movdqa XMMWORD[(-16)+rbx],xmm0 1257 pcmpeqd xmm0,xmm1 1258 pand xmm2,xmm1 1259 pand xmm3,xmm1 1260 pand xmm4,xmm0 1261 pand xmm5,xmm0 1262 pxor xmm0,xmm0 1263 por xmm4,xmm2 1264 por xmm5,xmm3 1265 movdqu XMMWORD[(-32)+rdi],xmm4 1266 movdqu XMMWORD[(-16)+rdi],xmm5 1267 sub rdx,32 1268 jnz NEAR $L$mulx4x_cond_copy 1269 1270 mov QWORD[rbx],rdx 1271 1272 mov rax,1 1273 mov r15,QWORD[((-48))+rsi] 1274 1275 mov r14,QWORD[((-40))+rsi] 1276 1277 mov r13,QWORD[((-32))+rsi] 1278 1279 mov r12,QWORD[((-24))+rsi] 1280 1281 mov rbp,QWORD[((-16))+rsi] 1282 1283 mov rbx,QWORD[((-8))+rsi] 1284 1285 lea rsp,[rsi] 1286 1287$L$mulx4x_epilogue: 1288 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1289 mov rsi,QWORD[16+rsp] 1290 ret 1291 1292$L$SEH_end_bn_mulx4x_mont: 1293 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1294 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 1295 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 1296 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 1297 DB 115,108,46,111,114,103,62,0 1298ALIGN 16 1299EXTERN __imp_RtlVirtualUnwind 1300 1301ALIGN 16 1302mul_handler: 1303 push rsi 1304 push rdi 1305 push rbx 1306 push rbp 1307 push r12 1308 push r13 1309 push r14 1310 push r15 1311 pushfq 1312 sub rsp,64 1313 1314 mov rax,QWORD[120+r8] 1315 mov rbx,QWORD[248+r8] 1316 1317 mov rsi,QWORD[8+r9] 1318 mov r11,QWORD[56+r9] 1319 1320 mov r10d,DWORD[r11] 1321 lea r10,[r10*1+rsi] 1322 cmp rbx,r10 1323 jb NEAR $L$common_seh_tail 1324 1325 mov rax,QWORD[152+r8] 1326 1327 mov r10d,DWORD[4+r11] 1328 lea r10,[r10*1+rsi] 1329 cmp rbx,r10 1330 jae NEAR $L$common_seh_tail 1331 1332 mov r10,QWORD[192+r8] 1333 mov rax,QWORD[8+r10*8+rax] 1334 1335 jmp NEAR $L$common_pop_regs 1336 1337 1338 1339ALIGN 16 1340sqr_handler: 1341 push rsi 1342 push rdi 1343 push rbx 1344 push rbp 1345 push r12 1346 push r13 1347 push r14 1348 push r15 1349 pushfq 1350 sub rsp,64 1351 1352 mov rax,QWORD[120+r8] 1353 mov rbx,QWORD[248+r8] 1354 1355 mov rsi,QWORD[8+r9] 1356 mov r11,QWORD[56+r9] 1357 1358 mov r10d,DWORD[r11] 1359 lea r10,[r10*1+rsi] 1360 cmp rbx,r10 1361 jb NEAR $L$common_seh_tail 1362 1363 mov r10d,DWORD[4+r11] 1364 lea r10,[r10*1+rsi] 1365 cmp rbx,r10 1366 jb NEAR $L$common_pop_regs 1367 1368 mov rax,QWORD[152+r8] 1369 1370 mov r10d,DWORD[8+r11] 1371 lea r10,[r10*1+rsi] 1372 cmp rbx,r10 1373 jae NEAR $L$common_seh_tail 1374 1375 mov rax,QWORD[40+rax] 1376 1377$L$common_pop_regs: 1378 mov rbx,QWORD[((-8))+rax] 1379 mov rbp,QWORD[((-16))+rax] 1380 mov r12,QWORD[((-24))+rax] 1381 mov r13,QWORD[((-32))+rax] 1382 mov r14,QWORD[((-40))+rax] 1383 mov r15,QWORD[((-48))+rax] 1384 mov QWORD[144+r8],rbx 1385 mov QWORD[160+r8],rbp 1386 mov QWORD[216+r8],r12 1387 mov QWORD[224+r8],r13 1388 mov QWORD[232+r8],r14 1389 mov QWORD[240+r8],r15 1390 1391$L$common_seh_tail: 1392 mov rdi,QWORD[8+rax] 1393 mov rsi,QWORD[16+rax] 1394 mov QWORD[152+r8],rax 1395 mov QWORD[168+r8],rsi 1396 mov QWORD[176+r8],rdi 1397 1398 mov rdi,QWORD[40+r9] 1399 mov rsi,r8 1400 mov ecx,154 1401 DD 0xa548f3fc 1402 1403 mov rsi,r9 1404 xor rcx,rcx 1405 mov rdx,QWORD[8+rsi] 1406 mov r8,QWORD[rsi] 1407 mov r9,QWORD[16+rsi] 1408 mov r10,QWORD[40+rsi] 1409 lea r11,[56+rsi] 1410 lea r12,[24+rsi] 1411 mov QWORD[32+rsp],r10 1412 mov QWORD[40+rsp],r11 1413 mov QWORD[48+rsp],r12 1414 mov QWORD[56+rsp],rcx 1415 call QWORD[__imp_RtlVirtualUnwind] 1416 1417 mov eax,1 1418 add rsp,64 1419 popfq 1420 pop r15 1421 pop r14 1422 pop r13 1423 pop r12 1424 pop rbp 1425 pop rbx 1426 pop rdi 1427 pop rsi 1428 ret 1429 1430 1431section .pdata rdata align=4 1432ALIGN 4 1433 DD $L$SEH_begin_bn_mul_mont_nohw wrt ..imagebase 1434 DD $L$SEH_end_bn_mul_mont_nohw wrt ..imagebase 1435 DD $L$SEH_info_bn_mul_mont_nohw wrt ..imagebase 1436 1437 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase 1438 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase 1439 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase 1440 1441 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase 1442 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1443 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1444 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase 1445 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase 1446 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase 1447section .xdata rdata align=8 1448ALIGN 8 1449$L$SEH_info_bn_mul_mont_nohw: 1450 DB 9,0,0,0 1451 DD mul_handler wrt ..imagebase 1452 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1453$L$SEH_info_bn_mul4x_mont: 1454 DB 9,0,0,0 1455 DD mul_handler wrt ..imagebase 1456 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 1457$L$SEH_info_bn_sqr8x_mont: 1458 DB 9,0,0,0 1459 DD sqr_handler wrt ..imagebase 1460 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1461ALIGN 8 1462$L$SEH_info_bn_mulx4x_mont: 1463 DB 9,0,0,0 1464 DD sqr_handler wrt ..imagebase 1465 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 1466ALIGN 8 1467%else 1468; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 1469ret 1470%endif 1471