1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8 9.globl _bn_mul_mont_nohw 10.private_extern _bn_mul_mont_nohw 11 12.p2align 4 13_bn_mul_mont_nohw: 14 15_CET_ENDBR 16 movl %r9d,%r9d 17 movq %rsp,%rax 18 19 pushq %rbx 20 21 pushq %rbp 22 23 pushq %r12 24 25 pushq %r13 26 27 pushq %r14 28 29 pushq %r15 30 31 32 negq %r9 33 movq %rsp,%r11 34 leaq -16(%rsp,%r9,8),%r10 35 negq %r9 36 andq $-1024,%r10 37 38 39 40 41 42 43 44 45 46 subq %r10,%r11 47 andq $-4096,%r11 48 leaq (%r10,%r11,1),%rsp 49 movq (%rsp),%r11 50 cmpq %r10,%rsp 51 ja L$mul_page_walk 52 jmp L$mul_page_walk_done 53 54.p2align 4 55L$mul_page_walk: 56 leaq -4096(%rsp),%rsp 57 movq (%rsp),%r11 58 cmpq %r10,%rsp 59 ja L$mul_page_walk 60L$mul_page_walk_done: 61 62 movq %rax,8(%rsp,%r9,8) 63 64L$mul_body: 65 movq %rdx,%r12 66 movq (%r8),%r8 67 movq (%r12),%rbx 68 movq (%rsi),%rax 69 70 xorq %r14,%r14 71 xorq %r15,%r15 72 73 movq %r8,%rbp 74 mulq %rbx 75 movq %rax,%r10 76 movq (%rcx),%rax 77 78 imulq %r10,%rbp 79 movq %rdx,%r11 80 81 mulq %rbp 82 addq %rax,%r10 83 movq 8(%rsi),%rax 84 adcq $0,%rdx 85 movq %rdx,%r13 86 87 leaq 1(%r15),%r15 88 jmp L$1st_enter 89 90.p2align 4 91L$1st: 92 addq %rax,%r13 93 movq (%rsi,%r15,8),%rax 94 adcq $0,%rdx 95 addq %r11,%r13 96 movq %r10,%r11 97 adcq $0,%rdx 98 movq %r13,-16(%rsp,%r15,8) 99 movq %rdx,%r13 100 101L$1st_enter: 102 mulq %rbx 103 addq %rax,%r11 104 movq (%rcx,%r15,8),%rax 105 adcq $0,%rdx 106 leaq 1(%r15),%r15 107 movq %rdx,%r10 108 109 mulq %rbp 110 cmpq %r9,%r15 111 jne L$1st 112 113 addq %rax,%r13 114 movq (%rsi),%rax 115 adcq $0,%rdx 116 addq %r11,%r13 117 adcq $0,%rdx 118 movq %r13,-16(%rsp,%r15,8) 119 movq %rdx,%r13 120 movq %r10,%r11 121 122 xorq %rdx,%rdx 123 addq %r11,%r13 124 adcq $0,%rdx 125 movq %r13,-8(%rsp,%r9,8) 126 movq %rdx,(%rsp,%r9,8) 127 128 leaq 1(%r14),%r14 129 jmp L$outer 130.p2align 4 131L$outer: 132 movq (%r12,%r14,8),%rbx 133 xorq %r15,%r15 134 movq %r8,%rbp 135 movq (%rsp),%r10 136 mulq %rbx 137 addq %rax,%r10 138 movq (%rcx),%rax 139 adcq $0,%rdx 140 141 imulq %r10,%rbp 142 movq %rdx,%r11 143 144 mulq %rbp 145 addq %rax,%r10 146 movq 8(%rsi),%rax 147 adcq $0,%rdx 148 movq 8(%rsp),%r10 149 movq %rdx,%r13 150 151 leaq 1(%r15),%r15 152 jmp L$inner_enter 153 154.p2align 4 155L$inner: 156 addq %rax,%r13 157 movq (%rsi,%r15,8),%rax 158 adcq $0,%rdx 159 addq %r10,%r13 160 movq (%rsp,%r15,8),%r10 161 adcq $0,%rdx 162 movq %r13,-16(%rsp,%r15,8) 163 movq %rdx,%r13 164 165L$inner_enter: 166 mulq %rbx 167 addq %rax,%r11 168 movq (%rcx,%r15,8),%rax 169 adcq $0,%rdx 170 addq %r11,%r10 171 movq %rdx,%r11 172 adcq $0,%r11 173 leaq 1(%r15),%r15 174 175 mulq %rbp 176 cmpq %r9,%r15 177 jne L$inner 178 179 addq %rax,%r13 180 movq (%rsi),%rax 181 adcq $0,%rdx 182 addq %r10,%r13 183 movq (%rsp,%r15,8),%r10 184 adcq $0,%rdx 185 movq %r13,-16(%rsp,%r15,8) 186 movq %rdx,%r13 187 188 xorq %rdx,%rdx 189 addq %r11,%r13 190 adcq $0,%rdx 191 addq %r10,%r13 192 adcq $0,%rdx 193 movq %r13,-8(%rsp,%r9,8) 194 movq %rdx,(%rsp,%r9,8) 195 196 leaq 1(%r14),%r14 197 cmpq %r9,%r14 198 jb L$outer 199 200 xorq %r14,%r14 201 movq (%rsp),%rax 202 movq %r9,%r15 203 204.p2align 4 205L$sub: sbbq (%rcx,%r14,8),%rax 206 movq %rax,(%rdi,%r14,8) 207 movq 8(%rsp,%r14,8),%rax 208 leaq 1(%r14),%r14 209 decq %r15 210 jnz L$sub 211 212 sbbq $0,%rax 213 movq $-1,%rbx 214 xorq %rax,%rbx 215 xorq %r14,%r14 216 movq %r9,%r15 217 218L$copy: 219 movq (%rdi,%r14,8),%rcx 220 movq (%rsp,%r14,8),%rdx 221 andq %rbx,%rcx 222 andq %rax,%rdx 223 movq %r9,(%rsp,%r14,8) 224 orq %rcx,%rdx 225 movq %rdx,(%rdi,%r14,8) 226 leaq 1(%r14),%r14 227 subq $1,%r15 228 jnz L$copy 229 230 movq 8(%rsp,%r9,8),%rsi 231 232 movq $1,%rax 233 movq -48(%rsi),%r15 234 235 movq -40(%rsi),%r14 236 237 movq -32(%rsi),%r13 238 239 movq -24(%rsi),%r12 240 241 movq -16(%rsi),%rbp 242 243 movq -8(%rsi),%rbx 244 245 leaq (%rsi),%rsp 246 247L$mul_epilogue: 248 ret 249 250 251.globl _bn_mul4x_mont 252.private_extern _bn_mul4x_mont 253 254.p2align 4 255_bn_mul4x_mont: 256 257_CET_ENDBR 258 movl %r9d,%r9d 259 movq %rsp,%rax 260 261 pushq %rbx 262 263 pushq %rbp 264 265 pushq %r12 266 267 pushq %r13 268 269 pushq %r14 270 271 pushq %r15 272 273 274 negq %r9 275 movq %rsp,%r11 276 leaq -32(%rsp,%r9,8),%r10 277 negq %r9 278 andq $-1024,%r10 279 280 subq %r10,%r11 281 andq $-4096,%r11 282 leaq (%r10,%r11,1),%rsp 283 movq (%rsp),%r11 284 cmpq %r10,%rsp 285 ja L$mul4x_page_walk 286 jmp L$mul4x_page_walk_done 287 288L$mul4x_page_walk: 289 leaq -4096(%rsp),%rsp 290 movq (%rsp),%r11 291 cmpq %r10,%rsp 292 ja L$mul4x_page_walk 293L$mul4x_page_walk_done: 294 295 movq %rax,8(%rsp,%r9,8) 296 297L$mul4x_body: 298 movq %rdi,16(%rsp,%r9,8) 299 movq %rdx,%r12 300 movq (%r8),%r8 301 movq (%r12),%rbx 302 movq (%rsi),%rax 303 304 xorq %r14,%r14 305 xorq %r15,%r15 306 307 movq %r8,%rbp 308 mulq %rbx 309 movq %rax,%r10 310 movq (%rcx),%rax 311 312 imulq %r10,%rbp 313 movq %rdx,%r11 314 315 mulq %rbp 316 addq %rax,%r10 317 movq 8(%rsi),%rax 318 adcq $0,%rdx 319 movq %rdx,%rdi 320 321 mulq %rbx 322 addq %rax,%r11 323 movq 8(%rcx),%rax 324 adcq $0,%rdx 325 movq %rdx,%r10 326 327 mulq %rbp 328 addq %rax,%rdi 329 movq 16(%rsi),%rax 330 adcq $0,%rdx 331 addq %r11,%rdi 332 leaq 4(%r15),%r15 333 adcq $0,%rdx 334 movq %rdi,(%rsp) 335 movq %rdx,%r13 336 jmp L$1st4x 337.p2align 4 338L$1st4x: 339 mulq %rbx 340 addq %rax,%r10 341 movq -16(%rcx,%r15,8),%rax 342 adcq $0,%rdx 343 movq %rdx,%r11 344 345 mulq %rbp 346 addq %rax,%r13 347 movq -8(%rsi,%r15,8),%rax 348 adcq $0,%rdx 349 addq %r10,%r13 350 adcq $0,%rdx 351 movq %r13,-24(%rsp,%r15,8) 352 movq %rdx,%rdi 353 354 mulq %rbx 355 addq %rax,%r11 356 movq -8(%rcx,%r15,8),%rax 357 adcq $0,%rdx 358 movq %rdx,%r10 359 360 mulq %rbp 361 addq %rax,%rdi 362 movq (%rsi,%r15,8),%rax 363 adcq $0,%rdx 364 addq %r11,%rdi 365 adcq $0,%rdx 366 movq %rdi,-16(%rsp,%r15,8) 367 movq %rdx,%r13 368 369 mulq %rbx 370 addq %rax,%r10 371 movq (%rcx,%r15,8),%rax 372 adcq $0,%rdx 373 movq %rdx,%r11 374 375 mulq %rbp 376 addq %rax,%r13 377 movq 8(%rsi,%r15,8),%rax 378 adcq $0,%rdx 379 addq %r10,%r13 380 adcq $0,%rdx 381 movq %r13,-8(%rsp,%r15,8) 382 movq %rdx,%rdi 383 384 mulq %rbx 385 addq %rax,%r11 386 movq 8(%rcx,%r15,8),%rax 387 adcq $0,%rdx 388 leaq 4(%r15),%r15 389 movq %rdx,%r10 390 391 mulq %rbp 392 addq %rax,%rdi 393 movq -16(%rsi,%r15,8),%rax 394 adcq $0,%rdx 395 addq %r11,%rdi 396 adcq $0,%rdx 397 movq %rdi,-32(%rsp,%r15,8) 398 movq %rdx,%r13 399 cmpq %r9,%r15 400 jb L$1st4x 401 402 mulq %rbx 403 addq %rax,%r10 404 movq -16(%rcx,%r15,8),%rax 405 adcq $0,%rdx 406 movq %rdx,%r11 407 408 mulq %rbp 409 addq %rax,%r13 410 movq -8(%rsi,%r15,8),%rax 411 adcq $0,%rdx 412 addq %r10,%r13 413 adcq $0,%rdx 414 movq %r13,-24(%rsp,%r15,8) 415 movq %rdx,%rdi 416 417 mulq %rbx 418 addq %rax,%r11 419 movq -8(%rcx,%r15,8),%rax 420 adcq $0,%rdx 421 movq %rdx,%r10 422 423 mulq %rbp 424 addq %rax,%rdi 425 movq (%rsi),%rax 426 adcq $0,%rdx 427 addq %r11,%rdi 428 adcq $0,%rdx 429 movq %rdi,-16(%rsp,%r15,8) 430 movq %rdx,%r13 431 432 xorq %rdi,%rdi 433 addq %r10,%r13 434 adcq $0,%rdi 435 movq %r13,-8(%rsp,%r15,8) 436 movq %rdi,(%rsp,%r15,8) 437 438 leaq 1(%r14),%r14 439.p2align 2 440L$outer4x: 441 movq (%r12,%r14,8),%rbx 442 xorq %r15,%r15 443 movq (%rsp),%r10 444 movq %r8,%rbp 445 mulq %rbx 446 addq %rax,%r10 447 movq (%rcx),%rax 448 adcq $0,%rdx 449 450 imulq %r10,%rbp 451 movq %rdx,%r11 452 453 mulq %rbp 454 addq %rax,%r10 455 movq 8(%rsi),%rax 456 adcq $0,%rdx 457 movq %rdx,%rdi 458 459 mulq %rbx 460 addq %rax,%r11 461 movq 8(%rcx),%rax 462 adcq $0,%rdx 463 addq 8(%rsp),%r11 464 adcq $0,%rdx 465 movq %rdx,%r10 466 467 mulq %rbp 468 addq %rax,%rdi 469 movq 16(%rsi),%rax 470 adcq $0,%rdx 471 addq %r11,%rdi 472 leaq 4(%r15),%r15 473 adcq $0,%rdx 474 movq %rdi,(%rsp) 475 movq %rdx,%r13 476 jmp L$inner4x 477.p2align 4 478L$inner4x: 479 mulq %rbx 480 addq %rax,%r10 481 movq -16(%rcx,%r15,8),%rax 482 adcq $0,%rdx 483 addq -16(%rsp,%r15,8),%r10 484 adcq $0,%rdx 485 movq %rdx,%r11 486 487 mulq %rbp 488 addq %rax,%r13 489 movq -8(%rsi,%r15,8),%rax 490 adcq $0,%rdx 491 addq %r10,%r13 492 adcq $0,%rdx 493 movq %r13,-24(%rsp,%r15,8) 494 movq %rdx,%rdi 495 496 mulq %rbx 497 addq %rax,%r11 498 movq -8(%rcx,%r15,8),%rax 499 adcq $0,%rdx 500 addq -8(%rsp,%r15,8),%r11 501 adcq $0,%rdx 502 movq %rdx,%r10 503 504 mulq %rbp 505 addq %rax,%rdi 506 movq (%rsi,%r15,8),%rax 507 adcq $0,%rdx 508 addq %r11,%rdi 509 adcq $0,%rdx 510 movq %rdi,-16(%rsp,%r15,8) 511 movq %rdx,%r13 512 513 mulq %rbx 514 addq %rax,%r10 515 movq (%rcx,%r15,8),%rax 516 adcq $0,%rdx 517 addq (%rsp,%r15,8),%r10 518 adcq $0,%rdx 519 movq %rdx,%r11 520 521 mulq %rbp 522 addq %rax,%r13 523 movq 8(%rsi,%r15,8),%rax 524 adcq $0,%rdx 525 addq %r10,%r13 526 adcq $0,%rdx 527 movq %r13,-8(%rsp,%r15,8) 528 movq %rdx,%rdi 529 530 mulq %rbx 531 addq %rax,%r11 532 movq 8(%rcx,%r15,8),%rax 533 adcq $0,%rdx 534 addq 8(%rsp,%r15,8),%r11 535 adcq $0,%rdx 536 leaq 4(%r15),%r15 537 movq %rdx,%r10 538 539 mulq %rbp 540 addq %rax,%rdi 541 movq -16(%rsi,%r15,8),%rax 542 adcq $0,%rdx 543 addq %r11,%rdi 544 adcq $0,%rdx 545 movq %rdi,-32(%rsp,%r15,8) 546 movq %rdx,%r13 547 cmpq %r9,%r15 548 jb L$inner4x 549 550 mulq %rbx 551 addq %rax,%r10 552 movq -16(%rcx,%r15,8),%rax 553 adcq $0,%rdx 554 addq -16(%rsp,%r15,8),%r10 555 adcq $0,%rdx 556 movq %rdx,%r11 557 558 mulq %rbp 559 addq %rax,%r13 560 movq -8(%rsi,%r15,8),%rax 561 adcq $0,%rdx 562 addq %r10,%r13 563 adcq $0,%rdx 564 movq %r13,-24(%rsp,%r15,8) 565 movq %rdx,%rdi 566 567 mulq %rbx 568 addq %rax,%r11 569 movq -8(%rcx,%r15,8),%rax 570 adcq $0,%rdx 571 addq -8(%rsp,%r15,8),%r11 572 adcq $0,%rdx 573 leaq 1(%r14),%r14 574 movq %rdx,%r10 575 576 mulq %rbp 577 addq %rax,%rdi 578 movq (%rsi),%rax 579 adcq $0,%rdx 580 addq %r11,%rdi 581 adcq $0,%rdx 582 movq %rdi,-16(%rsp,%r15,8) 583 movq %rdx,%r13 584 585 xorq %rdi,%rdi 586 addq %r10,%r13 587 adcq $0,%rdi 588 addq (%rsp,%r9,8),%r13 589 adcq $0,%rdi 590 movq %r13,-8(%rsp,%r15,8) 591 movq %rdi,(%rsp,%r15,8) 592 593 cmpq %r9,%r14 594 jb L$outer4x 595 movq 16(%rsp,%r9,8),%rdi 596 leaq -4(%r9),%r15 597 movq 0(%rsp),%rax 598 movq 8(%rsp),%rdx 599 shrq $2,%r15 600 leaq (%rsp),%rsi 601 xorq %r14,%r14 602 603 subq 0(%rcx),%rax 604 movq 16(%rsi),%rbx 605 movq 24(%rsi),%rbp 606 sbbq 8(%rcx),%rdx 607 608L$sub4x: 609 movq %rax,0(%rdi,%r14,8) 610 movq %rdx,8(%rdi,%r14,8) 611 sbbq 16(%rcx,%r14,8),%rbx 612 movq 32(%rsi,%r14,8),%rax 613 movq 40(%rsi,%r14,8),%rdx 614 sbbq 24(%rcx,%r14,8),%rbp 615 movq %rbx,16(%rdi,%r14,8) 616 movq %rbp,24(%rdi,%r14,8) 617 sbbq 32(%rcx,%r14,8),%rax 618 movq 48(%rsi,%r14,8),%rbx 619 movq 56(%rsi,%r14,8),%rbp 620 sbbq 40(%rcx,%r14,8),%rdx 621 leaq 4(%r14),%r14 622 decq %r15 623 jnz L$sub4x 624 625 movq %rax,0(%rdi,%r14,8) 626 movq 32(%rsi,%r14,8),%rax 627 sbbq 16(%rcx,%r14,8),%rbx 628 movq %rdx,8(%rdi,%r14,8) 629 sbbq 24(%rcx,%r14,8),%rbp 630 movq %rbx,16(%rdi,%r14,8) 631 632 sbbq $0,%rax 633 movq %rbp,24(%rdi,%r14,8) 634 pxor %xmm0,%xmm0 635.byte 102,72,15,110,224 636 pcmpeqd %xmm5,%xmm5 637 pshufd $0,%xmm4,%xmm4 638 movq %r9,%r15 639 pxor %xmm4,%xmm5 640 shrq $2,%r15 641 xorl %eax,%eax 642 643 jmp L$copy4x 644.p2align 4 645L$copy4x: 646 movdqa (%rsp,%rax,1),%xmm1 647 movdqu (%rdi,%rax,1),%xmm2 648 pand %xmm4,%xmm1 649 pand %xmm5,%xmm2 650 movdqa 16(%rsp,%rax,1),%xmm3 651 movdqa %xmm0,(%rsp,%rax,1) 652 por %xmm2,%xmm1 653 movdqu 16(%rdi,%rax,1),%xmm2 654 movdqu %xmm1,(%rdi,%rax,1) 655 pand %xmm4,%xmm3 656 pand %xmm5,%xmm2 657 movdqa %xmm0,16(%rsp,%rax,1) 658 por %xmm2,%xmm3 659 movdqu %xmm3,16(%rdi,%rax,1) 660 leaq 32(%rax),%rax 661 decq %r15 662 jnz L$copy4x 663 movq 8(%rsp,%r9,8),%rsi 664 665 movq $1,%rax 666 movq -48(%rsi),%r15 667 668 movq -40(%rsi),%r14 669 670 movq -32(%rsi),%r13 671 672 movq -24(%rsi),%r12 673 674 movq -16(%rsi),%rbp 675 676 movq -8(%rsi),%rbx 677 678 leaq (%rsi),%rsp 679 680L$mul4x_epilogue: 681 ret 682 683 684 685 686 687.globl _bn_sqr8x_mont 688.private_extern _bn_sqr8x_mont 689 690.p2align 5 691_bn_sqr8x_mont: 692 693_CET_ENDBR 694 movl %r9d,%r9d 695 movq %rsp,%rax 696 697 pushq %rbx 698 699 pushq %rbp 700 701 pushq %r12 702 703 pushq %r13 704 705 pushq %r14 706 707 pushq %r15 708 709L$sqr8x_prologue: 710 711 movl %r9d,%r10d 712 shll $3,%r9d 713 shlq $3+2,%r10 714 negq %r9 715 716 717 718 719 720 721 leaq -64(%rsp,%r9,2),%r11 722 movq %rsp,%rbp 723 movq (%r8),%r8 724 subq %rsi,%r11 725 andq $4095,%r11 726 cmpq %r11,%r10 727 jb L$sqr8x_sp_alt 728 subq %r11,%rbp 729 leaq -64(%rbp,%r9,2),%rbp 730 jmp L$sqr8x_sp_done 731 732.p2align 5 733L$sqr8x_sp_alt: 734 leaq 4096-64(,%r9,2),%r10 735 leaq -64(%rbp,%r9,2),%rbp 736 subq %r10,%r11 737 movq $0,%r10 738 cmovcq %r10,%r11 739 subq %r11,%rbp 740L$sqr8x_sp_done: 741 andq $-64,%rbp 742 movq %rsp,%r11 743 subq %rbp,%r11 744 andq $-4096,%r11 745 leaq (%r11,%rbp,1),%rsp 746 movq (%rsp),%r10 747 cmpq %rbp,%rsp 748 ja L$sqr8x_page_walk 749 jmp L$sqr8x_page_walk_done 750 751.p2align 4 752L$sqr8x_page_walk: 753 leaq -4096(%rsp),%rsp 754 movq (%rsp),%r10 755 cmpq %rbp,%rsp 756 ja L$sqr8x_page_walk 757L$sqr8x_page_walk_done: 758 759 movq %r9,%r10 760 negq %r9 761 762 movq %r8,32(%rsp) 763 movq %rax,40(%rsp) 764 765L$sqr8x_body: 766 767.byte 102,72,15,110,209 768 pxor %xmm0,%xmm0 769.byte 102,72,15,110,207 770.byte 102,73,15,110,218 771 testq %rdx,%rdx 772 jz L$sqr8x_nox 773 774 call _bn_sqrx8x_internal 775 776 777 778 779 leaq (%r8,%rcx,1),%rbx 780 movq %rcx,%r9 781 movq %rcx,%rdx 782.byte 102,72,15,126,207 783 sarq $3+2,%rcx 784 jmp L$sqr8x_sub 785 786.p2align 5 787L$sqr8x_nox: 788 call _bn_sqr8x_internal 789 790 791 792 793 leaq (%rdi,%r9,1),%rbx 794 movq %r9,%rcx 795 movq %r9,%rdx 796.byte 102,72,15,126,207 797 sarq $3+2,%rcx 798 jmp L$sqr8x_sub 799 800.p2align 5 801L$sqr8x_sub: 802 movq 0(%rbx),%r12 803 movq 8(%rbx),%r13 804 movq 16(%rbx),%r14 805 movq 24(%rbx),%r15 806 leaq 32(%rbx),%rbx 807 sbbq 0(%rbp),%r12 808 sbbq 8(%rbp),%r13 809 sbbq 16(%rbp),%r14 810 sbbq 24(%rbp),%r15 811 leaq 32(%rbp),%rbp 812 movq %r12,0(%rdi) 813 movq %r13,8(%rdi) 814 movq %r14,16(%rdi) 815 movq %r15,24(%rdi) 816 leaq 32(%rdi),%rdi 817 incq %rcx 818 jnz L$sqr8x_sub 819 820 sbbq $0,%rax 821 leaq (%rbx,%r9,1),%rbx 822 leaq (%rdi,%r9,1),%rdi 823 824.byte 102,72,15,110,200 825 pxor %xmm0,%xmm0 826 pshufd $0,%xmm1,%xmm1 827 movq 40(%rsp),%rsi 828 829 jmp L$sqr8x_cond_copy 830 831.p2align 5 832L$sqr8x_cond_copy: 833 movdqa 0(%rbx),%xmm2 834 movdqa 16(%rbx),%xmm3 835 leaq 32(%rbx),%rbx 836 movdqu 0(%rdi),%xmm4 837 movdqu 16(%rdi),%xmm5 838 leaq 32(%rdi),%rdi 839 movdqa %xmm0,-32(%rbx) 840 movdqa %xmm0,-16(%rbx) 841 movdqa %xmm0,-32(%rbx,%rdx,1) 842 movdqa %xmm0,-16(%rbx,%rdx,1) 843 pcmpeqd %xmm1,%xmm0 844 pand %xmm1,%xmm2 845 pand %xmm1,%xmm3 846 pand %xmm0,%xmm4 847 pand %xmm0,%xmm5 848 pxor %xmm0,%xmm0 849 por %xmm2,%xmm4 850 por %xmm3,%xmm5 851 movdqu %xmm4,-32(%rdi) 852 movdqu %xmm5,-16(%rdi) 853 addq $32,%r9 854 jnz L$sqr8x_cond_copy 855 856 movq $1,%rax 857 movq -48(%rsi),%r15 858 859 movq -40(%rsi),%r14 860 861 movq -32(%rsi),%r13 862 863 movq -24(%rsi),%r12 864 865 movq -16(%rsi),%rbp 866 867 movq -8(%rsi),%rbx 868 869 leaq (%rsi),%rsp 870 871L$sqr8x_epilogue: 872 ret 873 874 875.globl _bn_mulx4x_mont 876.private_extern _bn_mulx4x_mont 877 878.p2align 5 879_bn_mulx4x_mont: 880 881_CET_ENDBR 882 movq %rsp,%rax 883 884 pushq %rbx 885 886 pushq %rbp 887 888 pushq %r12 889 890 pushq %r13 891 892 pushq %r14 893 894 pushq %r15 895 896L$mulx4x_prologue: 897 898 shll $3,%r9d 899 xorq %r10,%r10 900 subq %r9,%r10 901 movq (%r8),%r8 902 leaq -72(%rsp,%r10,1),%rbp 903 andq $-128,%rbp 904 movq %rsp,%r11 905 subq %rbp,%r11 906 andq $-4096,%r11 907 leaq (%r11,%rbp,1),%rsp 908 movq (%rsp),%r10 909 cmpq %rbp,%rsp 910 ja L$mulx4x_page_walk 911 jmp L$mulx4x_page_walk_done 912 913.p2align 4 914L$mulx4x_page_walk: 915 leaq -4096(%rsp),%rsp 916 movq (%rsp),%r10 917 cmpq %rbp,%rsp 918 ja L$mulx4x_page_walk 919L$mulx4x_page_walk_done: 920 921 leaq (%rdx,%r9,1),%r10 922 923 924 925 926 927 928 929 930 931 932 933 934 movq %r9,0(%rsp) 935 shrq $5,%r9 936 movq %r10,16(%rsp) 937 subq $1,%r9 938 movq %r8,24(%rsp) 939 movq %rdi,32(%rsp) 940 movq %rax,40(%rsp) 941 942 movq %r9,48(%rsp) 943 jmp L$mulx4x_body 944 945.p2align 5 946L$mulx4x_body: 947 leaq 8(%rdx),%rdi 948 movq (%rdx),%rdx 949 leaq 64+32(%rsp),%rbx 950 movq %rdx,%r9 951 952 mulxq 0(%rsi),%r8,%rax 953 mulxq 8(%rsi),%r11,%r14 954 addq %rax,%r11 955 movq %rdi,8(%rsp) 956 mulxq 16(%rsi),%r12,%r13 957 adcq %r14,%r12 958 adcq $0,%r13 959 960 movq %r8,%rdi 961 imulq 24(%rsp),%r8 962 xorq %rbp,%rbp 963 964 mulxq 24(%rsi),%rax,%r14 965 movq %r8,%rdx 966 leaq 32(%rsi),%rsi 967 adcxq %rax,%r13 968 adcxq %rbp,%r14 969 970 mulxq 0(%rcx),%rax,%r10 971 adcxq %rax,%rdi 972 adoxq %r11,%r10 973 mulxq 8(%rcx),%rax,%r11 974 adcxq %rax,%r10 975 adoxq %r12,%r11 976.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 977 movq 48(%rsp),%rdi 978 movq %r10,-32(%rbx) 979 adcxq %rax,%r11 980 adoxq %r13,%r12 981 mulxq 24(%rcx),%rax,%r15 982 movq %r9,%rdx 983 movq %r11,-24(%rbx) 984 adcxq %rax,%r12 985 adoxq %rbp,%r15 986 leaq 32(%rcx),%rcx 987 movq %r12,-16(%rbx) 988 989 jmp L$mulx4x_1st 990 991.p2align 5 992L$mulx4x_1st: 993 adcxq %rbp,%r15 994 mulxq 0(%rsi),%r10,%rax 995 adcxq %r14,%r10 996 mulxq 8(%rsi),%r11,%r14 997 adcxq %rax,%r11 998 mulxq 16(%rsi),%r12,%rax 999 adcxq %r14,%r12 1000 mulxq 24(%rsi),%r13,%r14 1001.byte 0x67,0x67 1002 movq %r8,%rdx 1003 adcxq %rax,%r13 1004 adcxq %rbp,%r14 1005 leaq 32(%rsi),%rsi 1006 leaq 32(%rbx),%rbx 1007 1008 adoxq %r15,%r10 1009 mulxq 0(%rcx),%rax,%r15 1010 adcxq %rax,%r10 1011 adoxq %r15,%r11 1012 mulxq 8(%rcx),%rax,%r15 1013 adcxq %rax,%r11 1014 adoxq %r15,%r12 1015 mulxq 16(%rcx),%rax,%r15 1016 movq %r10,-40(%rbx) 1017 adcxq %rax,%r12 1018 movq %r11,-32(%rbx) 1019 adoxq %r15,%r13 1020 mulxq 24(%rcx),%rax,%r15 1021 movq %r9,%rdx 1022 movq %r12,-24(%rbx) 1023 adcxq %rax,%r13 1024 adoxq %rbp,%r15 1025 leaq 32(%rcx),%rcx 1026 movq %r13,-16(%rbx) 1027 1028 decq %rdi 1029 jnz L$mulx4x_1st 1030 1031 movq 0(%rsp),%rax 1032 movq 8(%rsp),%rdi 1033 adcq %rbp,%r15 1034 addq %r15,%r14 1035 sbbq %r15,%r15 1036 movq %r14,-8(%rbx) 1037 jmp L$mulx4x_outer 1038 1039.p2align 5 1040L$mulx4x_outer: 1041 movq (%rdi),%rdx 1042 leaq 8(%rdi),%rdi 1043 subq %rax,%rsi 1044 movq %r15,(%rbx) 1045 leaq 64+32(%rsp),%rbx 1046 subq %rax,%rcx 1047 1048 mulxq 0(%rsi),%r8,%r11 1049 xorl %ebp,%ebp 1050 movq %rdx,%r9 1051 mulxq 8(%rsi),%r14,%r12 1052 adoxq -32(%rbx),%r8 1053 adcxq %r14,%r11 1054 mulxq 16(%rsi),%r15,%r13 1055 adoxq -24(%rbx),%r11 1056 adcxq %r15,%r12 1057 adoxq -16(%rbx),%r12 1058 adcxq %rbp,%r13 1059 adoxq %rbp,%r13 1060 1061 movq %rdi,8(%rsp) 1062 movq %r8,%r15 1063 imulq 24(%rsp),%r8 1064 xorl %ebp,%ebp 1065 1066 mulxq 24(%rsi),%rax,%r14 1067 movq %r8,%rdx 1068 adcxq %rax,%r13 1069 adoxq -8(%rbx),%r13 1070 adcxq %rbp,%r14 1071 leaq 32(%rsi),%rsi 1072 adoxq %rbp,%r14 1073 1074 mulxq 0(%rcx),%rax,%r10 1075 adcxq %rax,%r15 1076 adoxq %r11,%r10 1077 mulxq 8(%rcx),%rax,%r11 1078 adcxq %rax,%r10 1079 adoxq %r12,%r11 1080 mulxq 16(%rcx),%rax,%r12 1081 movq %r10,-32(%rbx) 1082 adcxq %rax,%r11 1083 adoxq %r13,%r12 1084 mulxq 24(%rcx),%rax,%r15 1085 movq %r9,%rdx 1086 movq %r11,-24(%rbx) 1087 leaq 32(%rcx),%rcx 1088 adcxq %rax,%r12 1089 adoxq %rbp,%r15 1090 movq 48(%rsp),%rdi 1091 movq %r12,-16(%rbx) 1092 1093 jmp L$mulx4x_inner 1094 1095.p2align 5 1096L$mulx4x_inner: 1097 mulxq 0(%rsi),%r10,%rax 1098 adcxq %rbp,%r15 1099 adoxq %r14,%r10 1100 mulxq 8(%rsi),%r11,%r14 1101 adcxq 0(%rbx),%r10 1102 adoxq %rax,%r11 1103 mulxq 16(%rsi),%r12,%rax 1104 adcxq 8(%rbx),%r11 1105 adoxq %r14,%r12 1106 mulxq 24(%rsi),%r13,%r14 1107 movq %r8,%rdx 1108 adcxq 16(%rbx),%r12 1109 adoxq %rax,%r13 1110 adcxq 24(%rbx),%r13 1111 adoxq %rbp,%r14 1112 leaq 32(%rsi),%rsi 1113 leaq 32(%rbx),%rbx 1114 adcxq %rbp,%r14 1115 1116 adoxq %r15,%r10 1117 mulxq 0(%rcx),%rax,%r15 1118 adcxq %rax,%r10 1119 adoxq %r15,%r11 1120 mulxq 8(%rcx),%rax,%r15 1121 adcxq %rax,%r11 1122 adoxq %r15,%r12 1123 mulxq 16(%rcx),%rax,%r15 1124 movq %r10,-40(%rbx) 1125 adcxq %rax,%r12 1126 adoxq %r15,%r13 1127 mulxq 24(%rcx),%rax,%r15 1128 movq %r9,%rdx 1129 movq %r11,-32(%rbx) 1130 movq %r12,-24(%rbx) 1131 adcxq %rax,%r13 1132 adoxq %rbp,%r15 1133 leaq 32(%rcx),%rcx 1134 movq %r13,-16(%rbx) 1135 1136 decq %rdi 1137 jnz L$mulx4x_inner 1138 1139 movq 0(%rsp),%rax 1140 movq 8(%rsp),%rdi 1141 adcq %rbp,%r15 1142 subq 0(%rbx),%rbp 1143 adcq %r15,%r14 1144 sbbq %r15,%r15 1145 movq %r14,-8(%rbx) 1146 1147 cmpq 16(%rsp),%rdi 1148 jne L$mulx4x_outer 1149 1150 leaq 64(%rsp),%rbx 1151 subq %rax,%rcx 1152 negq %r15 1153 movq %rax,%rdx 1154 shrq $3+2,%rax 1155 movq 32(%rsp),%rdi 1156 jmp L$mulx4x_sub 1157 1158.p2align 5 1159L$mulx4x_sub: 1160 movq 0(%rbx),%r11 1161 movq 8(%rbx),%r12 1162 movq 16(%rbx),%r13 1163 movq 24(%rbx),%r14 1164 leaq 32(%rbx),%rbx 1165 sbbq 0(%rcx),%r11 1166 sbbq 8(%rcx),%r12 1167 sbbq 16(%rcx),%r13 1168 sbbq 24(%rcx),%r14 1169 leaq 32(%rcx),%rcx 1170 movq %r11,0(%rdi) 1171 movq %r12,8(%rdi) 1172 movq %r13,16(%rdi) 1173 movq %r14,24(%rdi) 1174 leaq 32(%rdi),%rdi 1175 decq %rax 1176 jnz L$mulx4x_sub 1177 1178 sbbq $0,%r15 1179 leaq 64(%rsp),%rbx 1180 subq %rdx,%rdi 1181 1182.byte 102,73,15,110,207 1183 pxor %xmm0,%xmm0 1184 pshufd $0,%xmm1,%xmm1 1185 movq 40(%rsp),%rsi 1186 1187 jmp L$mulx4x_cond_copy 1188 1189.p2align 5 1190L$mulx4x_cond_copy: 1191 movdqa 0(%rbx),%xmm2 1192 movdqa 16(%rbx),%xmm3 1193 leaq 32(%rbx),%rbx 1194 movdqu 0(%rdi),%xmm4 1195 movdqu 16(%rdi),%xmm5 1196 leaq 32(%rdi),%rdi 1197 movdqa %xmm0,-32(%rbx) 1198 movdqa %xmm0,-16(%rbx) 1199 pcmpeqd %xmm1,%xmm0 1200 pand %xmm1,%xmm2 1201 pand %xmm1,%xmm3 1202 pand %xmm0,%xmm4 1203 pand %xmm0,%xmm5 1204 pxor %xmm0,%xmm0 1205 por %xmm2,%xmm4 1206 por %xmm3,%xmm5 1207 movdqu %xmm4,-32(%rdi) 1208 movdqu %xmm5,-16(%rdi) 1209 subq $32,%rdx 1210 jnz L$mulx4x_cond_copy 1211 1212 movq %rdx,(%rbx) 1213 1214 movq $1,%rax 1215 movq -48(%rsi),%r15 1216 1217 movq -40(%rsi),%r14 1218 1219 movq -32(%rsi),%r13 1220 1221 movq -24(%rsi),%r12 1222 1223 movq -16(%rsi),%rbp 1224 1225 movq -8(%rsi),%rbx 1226 1227 leaq (%rsi),%rsp 1228 1229L$mulx4x_epilogue: 1230 ret 1231 1232 1233.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1234.p2align 4 1235#endif 1236