1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8.extern OPENSSL_ia32cap_P 9.hidden OPENSSL_ia32cap_P 10 11 12.section .rodata 13.align 64 14.Lpoly: 15.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 16 17.LOne: 18.long 1,1,1,1,1,1,1,1 19.LTwo: 20.long 2,2,2,2,2,2,2,2 21.LThree: 22.long 3,3,3,3,3,3,3,3 23.LONE_mont: 24.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe 25 26 27.Lord: 28.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 29.LordK: 30.quad 0xccd1c8aaee00bc4f 31.text 32 33 34 35.globl ecp_nistz256_neg 36.hidden ecp_nistz256_neg 37.type ecp_nistz256_neg,@function 38.align 32 39ecp_nistz256_neg: 40.cfi_startproc 41_CET_ENDBR 42 pushq %r12 43.cfi_adjust_cfa_offset 8 44.cfi_offset %r12,-16 45 pushq %r13 46.cfi_adjust_cfa_offset 8 47.cfi_offset %r13,-24 48.Lneg_body: 49 50 xorq %r8,%r8 51 xorq %r9,%r9 52 xorq %r10,%r10 53 xorq %r11,%r11 54 xorq %r13,%r13 55 56 subq 0(%rsi),%r8 57 sbbq 8(%rsi),%r9 58 sbbq 16(%rsi),%r10 59 movq %r8,%rax 60 sbbq 24(%rsi),%r11 61 leaq .Lpoly(%rip),%rsi 62 movq %r9,%rdx 63 sbbq $0,%r13 64 65 addq 0(%rsi),%r8 66 movq %r10,%rcx 67 adcq 8(%rsi),%r9 68 adcq 16(%rsi),%r10 69 movq %r11,%r12 70 adcq 24(%rsi),%r11 71 testq %r13,%r13 72 73 cmovzq %rax,%r8 74 cmovzq %rdx,%r9 75 movq %r8,0(%rdi) 76 cmovzq %rcx,%r10 77 movq %r9,8(%rdi) 78 cmovzq %r12,%r11 79 movq %r10,16(%rdi) 80 movq %r11,24(%rdi) 81 82 movq 0(%rsp),%r13 83.cfi_restore %r13 84 movq 8(%rsp),%r12 85.cfi_restore %r12 86 leaq 16(%rsp),%rsp 87.cfi_adjust_cfa_offset -16 88.Lneg_epilogue: 89 ret 90.cfi_endproc 91.size ecp_nistz256_neg,.-ecp_nistz256_neg 92 93 94 95 96 97 98.globl ecp_nistz256_ord_mul_mont 99.hidden ecp_nistz256_ord_mul_mont 100.type ecp_nistz256_ord_mul_mont,@function 101.align 32 102ecp_nistz256_ord_mul_mont: 103.cfi_startproc 104_CET_ENDBR 105 leaq OPENSSL_ia32cap_P(%rip),%rcx 106 movq 8(%rcx),%rcx 107 andl $0x80100,%ecx 108 cmpl $0x80100,%ecx 109 je .Lecp_nistz256_ord_mul_montx 110 pushq %rbp 111.cfi_adjust_cfa_offset 8 112.cfi_offset %rbp,-16 113 pushq %rbx 114.cfi_adjust_cfa_offset 8 115.cfi_offset %rbx,-24 116 pushq %r12 117.cfi_adjust_cfa_offset 8 118.cfi_offset %r12,-32 119 pushq %r13 120.cfi_adjust_cfa_offset 8 121.cfi_offset %r13,-40 122 pushq %r14 123.cfi_adjust_cfa_offset 8 124.cfi_offset %r14,-48 125 pushq %r15 126.cfi_adjust_cfa_offset 8 127.cfi_offset %r15,-56 128.Lord_mul_body: 129 130 movq 0(%rdx),%rax 131 movq %rdx,%rbx 132 leaq .Lord(%rip),%r14 133 movq .LordK(%rip),%r15 134 135 136 movq %rax,%rcx 137 mulq 0(%rsi) 138 movq %rax,%r8 139 movq %rcx,%rax 140 movq %rdx,%r9 141 142 mulq 8(%rsi) 143 addq %rax,%r9 144 movq %rcx,%rax 145 adcq $0,%rdx 146 movq %rdx,%r10 147 148 mulq 16(%rsi) 149 addq %rax,%r10 150 movq %rcx,%rax 151 adcq $0,%rdx 152 153 movq %r8,%r13 154 imulq %r15,%r8 155 156 movq %rdx,%r11 157 mulq 24(%rsi) 158 addq %rax,%r11 159 movq %r8,%rax 160 adcq $0,%rdx 161 movq %rdx,%r12 162 163 164 mulq 0(%r14) 165 movq %r8,%rbp 166 addq %rax,%r13 167 movq %r8,%rax 168 adcq $0,%rdx 169 movq %rdx,%rcx 170 171 subq %r8,%r10 172 sbbq $0,%r8 173 174 mulq 8(%r14) 175 addq %rcx,%r9 176 adcq $0,%rdx 177 addq %rax,%r9 178 movq %rbp,%rax 179 adcq %rdx,%r10 180 movq %rbp,%rdx 181 adcq $0,%r8 182 183 shlq $32,%rax 184 shrq $32,%rdx 185 subq %rax,%r11 186 movq 8(%rbx),%rax 187 sbbq %rdx,%rbp 188 189 addq %r8,%r11 190 adcq %rbp,%r12 191 adcq $0,%r13 192 193 194 movq %rax,%rcx 195 mulq 0(%rsi) 196 addq %rax,%r9 197 movq %rcx,%rax 198 adcq $0,%rdx 199 movq %rdx,%rbp 200 201 mulq 8(%rsi) 202 addq %rbp,%r10 203 adcq $0,%rdx 204 addq %rax,%r10 205 movq %rcx,%rax 206 adcq $0,%rdx 207 movq %rdx,%rbp 208 209 mulq 16(%rsi) 210 addq %rbp,%r11 211 adcq $0,%rdx 212 addq %rax,%r11 213 movq %rcx,%rax 214 adcq $0,%rdx 215 216 movq %r9,%rcx 217 imulq %r15,%r9 218 219 movq %rdx,%rbp 220 mulq 24(%rsi) 221 addq %rbp,%r12 222 adcq $0,%rdx 223 xorq %r8,%r8 224 addq %rax,%r12 225 movq %r9,%rax 226 adcq %rdx,%r13 227 adcq $0,%r8 228 229 230 mulq 0(%r14) 231 movq %r9,%rbp 232 addq %rax,%rcx 233 movq %r9,%rax 234 adcq %rdx,%rcx 235 236 subq %r9,%r11 237 sbbq $0,%r9 238 239 mulq 8(%r14) 240 addq %rcx,%r10 241 adcq $0,%rdx 242 addq %rax,%r10 243 movq %rbp,%rax 244 adcq %rdx,%r11 245 movq %rbp,%rdx 246 adcq $0,%r9 247 248 shlq $32,%rax 249 shrq $32,%rdx 250 subq %rax,%r12 251 movq 16(%rbx),%rax 252 sbbq %rdx,%rbp 253 254 addq %r9,%r12 255 adcq %rbp,%r13 256 adcq $0,%r8 257 258 259 movq %rax,%rcx 260 mulq 0(%rsi) 261 addq %rax,%r10 262 movq %rcx,%rax 263 adcq $0,%rdx 264 movq %rdx,%rbp 265 266 mulq 8(%rsi) 267 addq %rbp,%r11 268 adcq $0,%rdx 269 addq %rax,%r11 270 movq %rcx,%rax 271 adcq $0,%rdx 272 movq %rdx,%rbp 273 274 mulq 16(%rsi) 275 addq %rbp,%r12 276 adcq $0,%rdx 277 addq %rax,%r12 278 movq %rcx,%rax 279 adcq $0,%rdx 280 281 movq %r10,%rcx 282 imulq %r15,%r10 283 284 movq %rdx,%rbp 285 mulq 24(%rsi) 286 addq %rbp,%r13 287 adcq $0,%rdx 288 xorq %r9,%r9 289 addq %rax,%r13 290 movq %r10,%rax 291 adcq %rdx,%r8 292 adcq $0,%r9 293 294 295 mulq 0(%r14) 296 movq %r10,%rbp 297 addq %rax,%rcx 298 movq %r10,%rax 299 adcq %rdx,%rcx 300 301 subq %r10,%r12 302 sbbq $0,%r10 303 304 mulq 8(%r14) 305 addq %rcx,%r11 306 adcq $0,%rdx 307 addq %rax,%r11 308 movq %rbp,%rax 309 adcq %rdx,%r12 310 movq %rbp,%rdx 311 adcq $0,%r10 312 313 shlq $32,%rax 314 shrq $32,%rdx 315 subq %rax,%r13 316 movq 24(%rbx),%rax 317 sbbq %rdx,%rbp 318 319 addq %r10,%r13 320 adcq %rbp,%r8 321 adcq $0,%r9 322 323 324 movq %rax,%rcx 325 mulq 0(%rsi) 326 addq %rax,%r11 327 movq %rcx,%rax 328 adcq $0,%rdx 329 movq %rdx,%rbp 330 331 mulq 8(%rsi) 332 addq %rbp,%r12 333 adcq $0,%rdx 334 addq %rax,%r12 335 movq %rcx,%rax 336 adcq $0,%rdx 337 movq %rdx,%rbp 338 339 mulq 16(%rsi) 340 addq %rbp,%r13 341 adcq $0,%rdx 342 addq %rax,%r13 343 movq %rcx,%rax 344 adcq $0,%rdx 345 346 movq %r11,%rcx 347 imulq %r15,%r11 348 349 movq %rdx,%rbp 350 mulq 24(%rsi) 351 addq %rbp,%r8 352 adcq $0,%rdx 353 xorq %r10,%r10 354 addq %rax,%r8 355 movq %r11,%rax 356 adcq %rdx,%r9 357 adcq $0,%r10 358 359 360 mulq 0(%r14) 361 movq %r11,%rbp 362 addq %rax,%rcx 363 movq %r11,%rax 364 adcq %rdx,%rcx 365 366 subq %r11,%r13 367 sbbq $0,%r11 368 369 mulq 8(%r14) 370 addq %rcx,%r12 371 adcq $0,%rdx 372 addq %rax,%r12 373 movq %rbp,%rax 374 adcq %rdx,%r13 375 movq %rbp,%rdx 376 adcq $0,%r11 377 378 shlq $32,%rax 379 shrq $32,%rdx 380 subq %rax,%r8 381 sbbq %rdx,%rbp 382 383 addq %r11,%r8 384 adcq %rbp,%r9 385 adcq $0,%r10 386 387 388 movq %r12,%rsi 389 subq 0(%r14),%r12 390 movq %r13,%r11 391 sbbq 8(%r14),%r13 392 movq %r8,%rcx 393 sbbq 16(%r14),%r8 394 movq %r9,%rbp 395 sbbq 24(%r14),%r9 396 sbbq $0,%r10 397 398 cmovcq %rsi,%r12 399 cmovcq %r11,%r13 400 cmovcq %rcx,%r8 401 cmovcq %rbp,%r9 402 403 movq %r12,0(%rdi) 404 movq %r13,8(%rdi) 405 movq %r8,16(%rdi) 406 movq %r9,24(%rdi) 407 408 movq 0(%rsp),%r15 409.cfi_restore %r15 410 movq 8(%rsp),%r14 411.cfi_restore %r14 412 movq 16(%rsp),%r13 413.cfi_restore %r13 414 movq 24(%rsp),%r12 415.cfi_restore %r12 416 movq 32(%rsp),%rbx 417.cfi_restore %rbx 418 movq 40(%rsp),%rbp 419.cfi_restore %rbp 420 leaq 48(%rsp),%rsp 421.cfi_adjust_cfa_offset -48 422.Lord_mul_epilogue: 423 ret 424.cfi_endproc 425.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont 426 427 428 429 430 431 432 433.globl ecp_nistz256_ord_sqr_mont 434.hidden ecp_nistz256_ord_sqr_mont 435.type ecp_nistz256_ord_sqr_mont,@function 436.align 32 437ecp_nistz256_ord_sqr_mont: 438.cfi_startproc 439_CET_ENDBR 440 leaq OPENSSL_ia32cap_P(%rip),%rcx 441 movq 8(%rcx),%rcx 442 andl $0x80100,%ecx 443 cmpl $0x80100,%ecx 444 je .Lecp_nistz256_ord_sqr_montx 445 pushq %rbp 446.cfi_adjust_cfa_offset 8 447.cfi_offset %rbp,-16 448 pushq %rbx 449.cfi_adjust_cfa_offset 8 450.cfi_offset %rbx,-24 451 pushq %r12 452.cfi_adjust_cfa_offset 8 453.cfi_offset %r12,-32 454 pushq %r13 455.cfi_adjust_cfa_offset 8 456.cfi_offset %r13,-40 457 pushq %r14 458.cfi_adjust_cfa_offset 8 459.cfi_offset %r14,-48 460 pushq %r15 461.cfi_adjust_cfa_offset 8 462.cfi_offset %r15,-56 463.Lord_sqr_body: 464 465 movq 0(%rsi),%r8 466 movq 8(%rsi),%rax 467 movq 16(%rsi),%r14 468 movq 24(%rsi),%r15 469 leaq .Lord(%rip),%rsi 470 movq %rdx,%rbx 471 jmp .Loop_ord_sqr 472 473.align 32 474.Loop_ord_sqr: 475 476 movq %rax,%rbp 477 mulq %r8 478 movq %rax,%r9 479.byte 102,72,15,110,205 480 movq %r14,%rax 481 movq %rdx,%r10 482 483 mulq %r8 484 addq %rax,%r10 485 movq %r15,%rax 486.byte 102,73,15,110,214 487 adcq $0,%rdx 488 movq %rdx,%r11 489 490 mulq %r8 491 addq %rax,%r11 492 movq %r15,%rax 493.byte 102,73,15,110,223 494 adcq $0,%rdx 495 movq %rdx,%r12 496 497 498 mulq %r14 499 movq %rax,%r13 500 movq %r14,%rax 501 movq %rdx,%r14 502 503 504 mulq %rbp 505 addq %rax,%r11 506 movq %r15,%rax 507 adcq $0,%rdx 508 movq %rdx,%r15 509 510 mulq %rbp 511 addq %rax,%r12 512 adcq $0,%rdx 513 514 addq %r15,%r12 515 adcq %rdx,%r13 516 adcq $0,%r14 517 518 519 xorq %r15,%r15 520 movq %r8,%rax 521 addq %r9,%r9 522 adcq %r10,%r10 523 adcq %r11,%r11 524 adcq %r12,%r12 525 adcq %r13,%r13 526 adcq %r14,%r14 527 adcq $0,%r15 528 529 530 mulq %rax 531 movq %rax,%r8 532.byte 102,72,15,126,200 533 movq %rdx,%rbp 534 535 mulq %rax 536 addq %rbp,%r9 537 adcq %rax,%r10 538.byte 102,72,15,126,208 539 adcq $0,%rdx 540 movq %rdx,%rbp 541 542 mulq %rax 543 addq %rbp,%r11 544 adcq %rax,%r12 545.byte 102,72,15,126,216 546 adcq $0,%rdx 547 movq %rdx,%rbp 548 549 movq %r8,%rcx 550 imulq 32(%rsi),%r8 551 552 mulq %rax 553 addq %rbp,%r13 554 adcq %rax,%r14 555 movq 0(%rsi),%rax 556 adcq %rdx,%r15 557 558 559 mulq %r8 560 movq %r8,%rbp 561 addq %rax,%rcx 562 movq 8(%rsi),%rax 563 adcq %rdx,%rcx 564 565 subq %r8,%r10 566 sbbq $0,%rbp 567 568 mulq %r8 569 addq %rcx,%r9 570 adcq $0,%rdx 571 addq %rax,%r9 572 movq %r8,%rax 573 adcq %rdx,%r10 574 movq %r8,%rdx 575 adcq $0,%rbp 576 577 movq %r9,%rcx 578 imulq 32(%rsi),%r9 579 580 shlq $32,%rax 581 shrq $32,%rdx 582 subq %rax,%r11 583 movq 0(%rsi),%rax 584 sbbq %rdx,%r8 585 586 addq %rbp,%r11 587 adcq $0,%r8 588 589 590 mulq %r9 591 movq %r9,%rbp 592 addq %rax,%rcx 593 movq 8(%rsi),%rax 594 adcq %rdx,%rcx 595 596 subq %r9,%r11 597 sbbq $0,%rbp 598 599 mulq %r9 600 addq %rcx,%r10 601 adcq $0,%rdx 602 addq %rax,%r10 603 movq %r9,%rax 604 adcq %rdx,%r11 605 movq %r9,%rdx 606 adcq $0,%rbp 607 608 movq %r10,%rcx 609 imulq 32(%rsi),%r10 610 611 shlq $32,%rax 612 shrq $32,%rdx 613 subq %rax,%r8 614 movq 0(%rsi),%rax 615 sbbq %rdx,%r9 616 617 addq %rbp,%r8 618 adcq $0,%r9 619 620 621 mulq %r10 622 movq %r10,%rbp 623 addq %rax,%rcx 624 movq 8(%rsi),%rax 625 adcq %rdx,%rcx 626 627 subq %r10,%r8 628 sbbq $0,%rbp 629 630 mulq %r10 631 addq %rcx,%r11 632 adcq $0,%rdx 633 addq %rax,%r11 634 movq %r10,%rax 635 adcq %rdx,%r8 636 movq %r10,%rdx 637 adcq $0,%rbp 638 639 movq %r11,%rcx 640 imulq 32(%rsi),%r11 641 642 shlq $32,%rax 643 shrq $32,%rdx 644 subq %rax,%r9 645 movq 0(%rsi),%rax 646 sbbq %rdx,%r10 647 648 addq %rbp,%r9 649 adcq $0,%r10 650 651 652 mulq %r11 653 movq %r11,%rbp 654 addq %rax,%rcx 655 movq 8(%rsi),%rax 656 adcq %rdx,%rcx 657 658 subq %r11,%r9 659 sbbq $0,%rbp 660 661 mulq %r11 662 addq %rcx,%r8 663 adcq $0,%rdx 664 addq %rax,%r8 665 movq %r11,%rax 666 adcq %rdx,%r9 667 movq %r11,%rdx 668 adcq $0,%rbp 669 670 shlq $32,%rax 671 shrq $32,%rdx 672 subq %rax,%r10 673 sbbq %rdx,%r11 674 675 addq %rbp,%r10 676 adcq $0,%r11 677 678 679 xorq %rdx,%rdx 680 addq %r12,%r8 681 adcq %r13,%r9 682 movq %r8,%r12 683 adcq %r14,%r10 684 adcq %r15,%r11 685 movq %r9,%rax 686 adcq $0,%rdx 687 688 689 subq 0(%rsi),%r8 690 movq %r10,%r14 691 sbbq 8(%rsi),%r9 692 sbbq 16(%rsi),%r10 693 movq %r11,%r15 694 sbbq 24(%rsi),%r11 695 sbbq $0,%rdx 696 697 cmovcq %r12,%r8 698 cmovncq %r9,%rax 699 cmovncq %r10,%r14 700 cmovncq %r11,%r15 701 702 decq %rbx 703 jnz .Loop_ord_sqr 704 705 movq %r8,0(%rdi) 706 movq %rax,8(%rdi) 707 pxor %xmm1,%xmm1 708 movq %r14,16(%rdi) 709 pxor %xmm2,%xmm2 710 movq %r15,24(%rdi) 711 pxor %xmm3,%xmm3 712 713 movq 0(%rsp),%r15 714.cfi_restore %r15 715 movq 8(%rsp),%r14 716.cfi_restore %r14 717 movq 16(%rsp),%r13 718.cfi_restore %r13 719 movq 24(%rsp),%r12 720.cfi_restore %r12 721 movq 32(%rsp),%rbx 722.cfi_restore %rbx 723 movq 40(%rsp),%rbp 724.cfi_restore %rbp 725 leaq 48(%rsp),%rsp 726.cfi_adjust_cfa_offset -48 727.Lord_sqr_epilogue: 728 ret 729.cfi_endproc 730.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont 731 732.type ecp_nistz256_ord_mul_montx,@function 733.align 32 734ecp_nistz256_ord_mul_montx: 735.cfi_startproc 736.Lecp_nistz256_ord_mul_montx: 737 pushq %rbp 738.cfi_adjust_cfa_offset 8 739.cfi_offset %rbp,-16 740 pushq %rbx 741.cfi_adjust_cfa_offset 8 742.cfi_offset %rbx,-24 743 pushq %r12 744.cfi_adjust_cfa_offset 8 745.cfi_offset %r12,-32 746 pushq %r13 747.cfi_adjust_cfa_offset 8 748.cfi_offset %r13,-40 749 pushq %r14 750.cfi_adjust_cfa_offset 8 751.cfi_offset %r14,-48 752 pushq %r15 753.cfi_adjust_cfa_offset 8 754.cfi_offset %r15,-56 755.Lord_mulx_body: 756 757 movq %rdx,%rbx 758 movq 0(%rdx),%rdx 759 movq 0(%rsi),%r9 760 movq 8(%rsi),%r10 761 movq 16(%rsi),%r11 762 movq 24(%rsi),%r12 763 leaq -128(%rsi),%rsi 764 leaq .Lord-128(%rip),%r14 765 movq .LordK(%rip),%r15 766 767 768 mulxq %r9,%r8,%r9 769 mulxq %r10,%rcx,%r10 770 mulxq %r11,%rbp,%r11 771 addq %rcx,%r9 772 mulxq %r12,%rcx,%r12 773 movq %r8,%rdx 774 mulxq %r15,%rdx,%rax 775 adcq %rbp,%r10 776 adcq %rcx,%r11 777 adcq $0,%r12 778 779 780 xorq %r13,%r13 781 mulxq 0+128(%r14),%rcx,%rbp 782 adcxq %rcx,%r8 783 adoxq %rbp,%r9 784 785 mulxq 8+128(%r14),%rcx,%rbp 786 adcxq %rcx,%r9 787 adoxq %rbp,%r10 788 789 mulxq 16+128(%r14),%rcx,%rbp 790 adcxq %rcx,%r10 791 adoxq %rbp,%r11 792 793 mulxq 24+128(%r14),%rcx,%rbp 794 movq 8(%rbx),%rdx 795 adcxq %rcx,%r11 796 adoxq %rbp,%r12 797 adcxq %r8,%r12 798 adoxq %r8,%r13 799 adcq $0,%r13 800 801 802 mulxq 0+128(%rsi),%rcx,%rbp 803 adcxq %rcx,%r9 804 adoxq %rbp,%r10 805 806 mulxq 8+128(%rsi),%rcx,%rbp 807 adcxq %rcx,%r10 808 adoxq %rbp,%r11 809 810 mulxq 16+128(%rsi),%rcx,%rbp 811 adcxq %rcx,%r11 812 adoxq %rbp,%r12 813 814 mulxq 24+128(%rsi),%rcx,%rbp 815 movq %r9,%rdx 816 mulxq %r15,%rdx,%rax 817 adcxq %rcx,%r12 818 adoxq %rbp,%r13 819 820 adcxq %r8,%r13 821 adoxq %r8,%r8 822 adcq $0,%r8 823 824 825 mulxq 0+128(%r14),%rcx,%rbp 826 adcxq %rcx,%r9 827 adoxq %rbp,%r10 828 829 mulxq 8+128(%r14),%rcx,%rbp 830 adcxq %rcx,%r10 831 adoxq %rbp,%r11 832 833 mulxq 16+128(%r14),%rcx,%rbp 834 adcxq %rcx,%r11 835 adoxq %rbp,%r12 836 837 mulxq 24+128(%r14),%rcx,%rbp 838 movq 16(%rbx),%rdx 839 adcxq %rcx,%r12 840 adoxq %rbp,%r13 841 adcxq %r9,%r13 842 adoxq %r9,%r8 843 adcq $0,%r8 844 845 846 mulxq 0+128(%rsi),%rcx,%rbp 847 adcxq %rcx,%r10 848 adoxq %rbp,%r11 849 850 mulxq 8+128(%rsi),%rcx,%rbp 851 adcxq %rcx,%r11 852 adoxq %rbp,%r12 853 854 mulxq 16+128(%rsi),%rcx,%rbp 855 adcxq %rcx,%r12 856 adoxq %rbp,%r13 857 858 mulxq 24+128(%rsi),%rcx,%rbp 859 movq %r10,%rdx 860 mulxq %r15,%rdx,%rax 861 adcxq %rcx,%r13 862 adoxq %rbp,%r8 863 864 adcxq %r9,%r8 865 adoxq %r9,%r9 866 adcq $0,%r9 867 868 869 mulxq 0+128(%r14),%rcx,%rbp 870 adcxq %rcx,%r10 871 adoxq %rbp,%r11 872 873 mulxq 8+128(%r14),%rcx,%rbp 874 adcxq %rcx,%r11 875 adoxq %rbp,%r12 876 877 mulxq 16+128(%r14),%rcx,%rbp 878 adcxq %rcx,%r12 879 adoxq %rbp,%r13 880 881 mulxq 24+128(%r14),%rcx,%rbp 882 movq 24(%rbx),%rdx 883 adcxq %rcx,%r13 884 adoxq %rbp,%r8 885 adcxq %r10,%r8 886 adoxq %r10,%r9 887 adcq $0,%r9 888 889 890 mulxq 0+128(%rsi),%rcx,%rbp 891 adcxq %rcx,%r11 892 adoxq %rbp,%r12 893 894 mulxq 8+128(%rsi),%rcx,%rbp 895 adcxq %rcx,%r12 896 adoxq %rbp,%r13 897 898 mulxq 16+128(%rsi),%rcx,%rbp 899 adcxq %rcx,%r13 900 adoxq %rbp,%r8 901 902 mulxq 24+128(%rsi),%rcx,%rbp 903 movq %r11,%rdx 904 mulxq %r15,%rdx,%rax 905 adcxq %rcx,%r8 906 adoxq %rbp,%r9 907 908 adcxq %r10,%r9 909 adoxq %r10,%r10 910 adcq $0,%r10 911 912 913 mulxq 0+128(%r14),%rcx,%rbp 914 adcxq %rcx,%r11 915 adoxq %rbp,%r12 916 917 mulxq 8+128(%r14),%rcx,%rbp 918 adcxq %rcx,%r12 919 adoxq %rbp,%r13 920 921 mulxq 16+128(%r14),%rcx,%rbp 922 adcxq %rcx,%r13 923 adoxq %rbp,%r8 924 925 mulxq 24+128(%r14),%rcx,%rbp 926 leaq 128(%r14),%r14 927 movq %r12,%rbx 928 adcxq %rcx,%r8 929 adoxq %rbp,%r9 930 movq %r13,%rdx 931 adcxq %r11,%r9 932 adoxq %r11,%r10 933 adcq $0,%r10 934 935 936 937 movq %r8,%rcx 938 subq 0(%r14),%r12 939 sbbq 8(%r14),%r13 940 sbbq 16(%r14),%r8 941 movq %r9,%rbp 942 sbbq 24(%r14),%r9 943 sbbq $0,%r10 944 945 cmovcq %rbx,%r12 946 cmovcq %rdx,%r13 947 cmovcq %rcx,%r8 948 cmovcq %rbp,%r9 949 950 movq %r12,0(%rdi) 951 movq %r13,8(%rdi) 952 movq %r8,16(%rdi) 953 movq %r9,24(%rdi) 954 955 movq 0(%rsp),%r15 956.cfi_restore %r15 957 movq 8(%rsp),%r14 958.cfi_restore %r14 959 movq 16(%rsp),%r13 960.cfi_restore %r13 961 movq 24(%rsp),%r12 962.cfi_restore %r12 963 movq 32(%rsp),%rbx 964.cfi_restore %rbx 965 movq 40(%rsp),%rbp 966.cfi_restore %rbp 967 leaq 48(%rsp),%rsp 968.cfi_adjust_cfa_offset -48 969.Lord_mulx_epilogue: 970 ret 971.cfi_endproc 972.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx 973 974.type ecp_nistz256_ord_sqr_montx,@function 975.align 32 976ecp_nistz256_ord_sqr_montx: 977.cfi_startproc 978.Lecp_nistz256_ord_sqr_montx: 979 pushq %rbp 980.cfi_adjust_cfa_offset 8 981.cfi_offset %rbp,-16 982 pushq %rbx 983.cfi_adjust_cfa_offset 8 984.cfi_offset %rbx,-24 985 pushq %r12 986.cfi_adjust_cfa_offset 8 987.cfi_offset %r12,-32 988 pushq %r13 989.cfi_adjust_cfa_offset 8 990.cfi_offset %r13,-40 991 pushq %r14 992.cfi_adjust_cfa_offset 8 993.cfi_offset %r14,-48 994 pushq %r15 995.cfi_adjust_cfa_offset 8 996.cfi_offset %r15,-56 997.Lord_sqrx_body: 998 999 movq %rdx,%rbx 1000 movq 0(%rsi),%rdx 1001 movq 8(%rsi),%r14 1002 movq 16(%rsi),%r15 1003 movq 24(%rsi),%r8 1004 leaq .Lord(%rip),%rsi 1005 jmp .Loop_ord_sqrx 1006 1007.align 32 1008.Loop_ord_sqrx: 1009 mulxq %r14,%r9,%r10 1010 mulxq %r15,%rcx,%r11 1011 movq %rdx,%rax 1012.byte 102,73,15,110,206 1013 mulxq %r8,%rbp,%r12 1014 movq %r14,%rdx 1015 addq %rcx,%r10 1016.byte 102,73,15,110,215 1017 adcq %rbp,%r11 1018 adcq $0,%r12 1019 xorq %r13,%r13 1020 1021 mulxq %r15,%rcx,%rbp 1022 adcxq %rcx,%r11 1023 adoxq %rbp,%r12 1024 1025 mulxq %r8,%rcx,%rbp 1026 movq %r15,%rdx 1027 adcxq %rcx,%r12 1028 adoxq %rbp,%r13 1029 adcq $0,%r13 1030 1031 mulxq %r8,%rcx,%r14 1032 movq %rax,%rdx 1033.byte 102,73,15,110,216 1034 xorq %r15,%r15 1035 adcxq %r9,%r9 1036 adoxq %rcx,%r13 1037 adcxq %r10,%r10 1038 adoxq %r15,%r14 1039 1040 1041 mulxq %rdx,%r8,%rbp 1042.byte 102,72,15,126,202 1043 adcxq %r11,%r11 1044 adoxq %rbp,%r9 1045 adcxq %r12,%r12 1046 mulxq %rdx,%rcx,%rax 1047.byte 102,72,15,126,210 1048 adcxq %r13,%r13 1049 adoxq %rcx,%r10 1050 adcxq %r14,%r14 1051 mulxq %rdx,%rcx,%rbp 1052.byte 0x67 1053.byte 102,72,15,126,218 1054 adoxq %rax,%r11 1055 adcxq %r15,%r15 1056 adoxq %rcx,%r12 1057 adoxq %rbp,%r13 1058 mulxq %rdx,%rcx,%rax 1059 adoxq %rcx,%r14 1060 adoxq %rax,%r15 1061 1062 1063 movq %r8,%rdx 1064 mulxq 32(%rsi),%rdx,%rcx 1065 1066 xorq %rax,%rax 1067 mulxq 0(%rsi),%rcx,%rbp 1068 adcxq %rcx,%r8 1069 adoxq %rbp,%r9 1070 mulxq 8(%rsi),%rcx,%rbp 1071 adcxq %rcx,%r9 1072 adoxq %rbp,%r10 1073 mulxq 16(%rsi),%rcx,%rbp 1074 adcxq %rcx,%r10 1075 adoxq %rbp,%r11 1076 mulxq 24(%rsi),%rcx,%rbp 1077 adcxq %rcx,%r11 1078 adoxq %rbp,%r8 1079 adcxq %rax,%r8 1080 1081 1082 movq %r9,%rdx 1083 mulxq 32(%rsi),%rdx,%rcx 1084 1085 mulxq 0(%rsi),%rcx,%rbp 1086 adoxq %rcx,%r9 1087 adcxq %rbp,%r10 1088 mulxq 8(%rsi),%rcx,%rbp 1089 adoxq %rcx,%r10 1090 adcxq %rbp,%r11 1091 mulxq 16(%rsi),%rcx,%rbp 1092 adoxq %rcx,%r11 1093 adcxq %rbp,%r8 1094 mulxq 24(%rsi),%rcx,%rbp 1095 adoxq %rcx,%r8 1096 adcxq %rbp,%r9 1097 adoxq %rax,%r9 1098 1099 1100 movq %r10,%rdx 1101 mulxq 32(%rsi),%rdx,%rcx 1102 1103 mulxq 0(%rsi),%rcx,%rbp 1104 adcxq %rcx,%r10 1105 adoxq %rbp,%r11 1106 mulxq 8(%rsi),%rcx,%rbp 1107 adcxq %rcx,%r11 1108 adoxq %rbp,%r8 1109 mulxq 16(%rsi),%rcx,%rbp 1110 adcxq %rcx,%r8 1111 adoxq %rbp,%r9 1112 mulxq 24(%rsi),%rcx,%rbp 1113 adcxq %rcx,%r9 1114 adoxq %rbp,%r10 1115 adcxq %rax,%r10 1116 1117 1118 movq %r11,%rdx 1119 mulxq 32(%rsi),%rdx,%rcx 1120 1121 mulxq 0(%rsi),%rcx,%rbp 1122 adoxq %rcx,%r11 1123 adcxq %rbp,%r8 1124 mulxq 8(%rsi),%rcx,%rbp 1125 adoxq %rcx,%r8 1126 adcxq %rbp,%r9 1127 mulxq 16(%rsi),%rcx,%rbp 1128 adoxq %rcx,%r9 1129 adcxq %rbp,%r10 1130 mulxq 24(%rsi),%rcx,%rbp 1131 adoxq %rcx,%r10 1132 adcxq %rbp,%r11 1133 adoxq %rax,%r11 1134 1135 1136 addq %r8,%r12 1137 adcq %r13,%r9 1138 movq %r12,%rdx 1139 adcq %r14,%r10 1140 adcq %r15,%r11 1141 movq %r9,%r14 1142 adcq $0,%rax 1143 1144 1145 subq 0(%rsi),%r12 1146 movq %r10,%r15 1147 sbbq 8(%rsi),%r9 1148 sbbq 16(%rsi),%r10 1149 movq %r11,%r8 1150 sbbq 24(%rsi),%r11 1151 sbbq $0,%rax 1152 1153 cmovncq %r12,%rdx 1154 cmovncq %r9,%r14 1155 cmovncq %r10,%r15 1156 cmovncq %r11,%r8 1157 1158 decq %rbx 1159 jnz .Loop_ord_sqrx 1160 1161 movq %rdx,0(%rdi) 1162 movq %r14,8(%rdi) 1163 pxor %xmm1,%xmm1 1164 movq %r15,16(%rdi) 1165 pxor %xmm2,%xmm2 1166 movq %r8,24(%rdi) 1167 pxor %xmm3,%xmm3 1168 1169 movq 0(%rsp),%r15 1170.cfi_restore %r15 1171 movq 8(%rsp),%r14 1172.cfi_restore %r14 1173 movq 16(%rsp),%r13 1174.cfi_restore %r13 1175 movq 24(%rsp),%r12 1176.cfi_restore %r12 1177 movq 32(%rsp),%rbx 1178.cfi_restore %rbx 1179 movq 40(%rsp),%rbp 1180.cfi_restore %rbp 1181 leaq 48(%rsp),%rsp 1182.cfi_adjust_cfa_offset -48 1183.Lord_sqrx_epilogue: 1184 ret 1185.cfi_endproc 1186.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx 1187 1188 1189 1190 1191 1192 1193.globl ecp_nistz256_mul_mont 1194.hidden ecp_nistz256_mul_mont 1195.type ecp_nistz256_mul_mont,@function 1196.align 32 1197ecp_nistz256_mul_mont: 1198.cfi_startproc 1199_CET_ENDBR 1200 leaq OPENSSL_ia32cap_P(%rip),%rcx 1201 movq 8(%rcx),%rcx 1202 andl $0x80100,%ecx 1203.Lmul_mont: 1204 pushq %rbp 1205.cfi_adjust_cfa_offset 8 1206.cfi_offset %rbp,-16 1207 pushq %rbx 1208.cfi_adjust_cfa_offset 8 1209.cfi_offset %rbx,-24 1210 pushq %r12 1211.cfi_adjust_cfa_offset 8 1212.cfi_offset %r12,-32 1213 pushq %r13 1214.cfi_adjust_cfa_offset 8 1215.cfi_offset %r13,-40 1216 pushq %r14 1217.cfi_adjust_cfa_offset 8 1218.cfi_offset %r14,-48 1219 pushq %r15 1220.cfi_adjust_cfa_offset 8 1221.cfi_offset %r15,-56 1222.Lmul_body: 1223 cmpl $0x80100,%ecx 1224 je .Lmul_montx 1225 movq %rdx,%rbx 1226 movq 0(%rdx),%rax 1227 movq 0(%rsi),%r9 1228 movq 8(%rsi),%r10 1229 movq 16(%rsi),%r11 1230 movq 24(%rsi),%r12 1231 1232 call __ecp_nistz256_mul_montq 1233 jmp .Lmul_mont_done 1234 1235.align 32 1236.Lmul_montx: 1237 movq %rdx,%rbx 1238 movq 0(%rdx),%rdx 1239 movq 0(%rsi),%r9 1240 movq 8(%rsi),%r10 1241 movq 16(%rsi),%r11 1242 movq 24(%rsi),%r12 1243 leaq -128(%rsi),%rsi 1244 1245 call __ecp_nistz256_mul_montx 1246.Lmul_mont_done: 1247 movq 0(%rsp),%r15 1248.cfi_restore %r15 1249 movq 8(%rsp),%r14 1250.cfi_restore %r14 1251 movq 16(%rsp),%r13 1252.cfi_restore %r13 1253 movq 24(%rsp),%r12 1254.cfi_restore %r12 1255 movq 32(%rsp),%rbx 1256.cfi_restore %rbx 1257 movq 40(%rsp),%rbp 1258.cfi_restore %rbp 1259 leaq 48(%rsp),%rsp 1260.cfi_adjust_cfa_offset -48 1261.Lmul_epilogue: 1262 ret 1263.cfi_endproc 1264.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont 1265 1266.type __ecp_nistz256_mul_montq,@function 1267.align 32 1268__ecp_nistz256_mul_montq: 1269.cfi_startproc 1270 1271 1272 movq %rax,%rbp 1273 mulq %r9 1274 movq .Lpoly+8(%rip),%r14 1275 movq %rax,%r8 1276 movq %rbp,%rax 1277 movq %rdx,%r9 1278 1279 mulq %r10 1280 movq .Lpoly+24(%rip),%r15 1281 addq %rax,%r9 1282 movq %rbp,%rax 1283 adcq $0,%rdx 1284 movq %rdx,%r10 1285 1286 mulq %r11 1287 addq %rax,%r10 1288 movq %rbp,%rax 1289 adcq $0,%rdx 1290 movq %rdx,%r11 1291 1292 mulq %r12 1293 addq %rax,%r11 1294 movq %r8,%rax 1295 adcq $0,%rdx 1296 xorq %r13,%r13 1297 movq %rdx,%r12 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 movq %r8,%rbp 1309 shlq $32,%r8 1310 mulq %r15 1311 shrq $32,%rbp 1312 addq %r8,%r9 1313 adcq %rbp,%r10 1314 adcq %rax,%r11 1315 movq 8(%rbx),%rax 1316 adcq %rdx,%r12 1317 adcq $0,%r13 1318 xorq %r8,%r8 1319 1320 1321 1322 movq %rax,%rbp 1323 mulq 0(%rsi) 1324 addq %rax,%r9 1325 movq %rbp,%rax 1326 adcq $0,%rdx 1327 movq %rdx,%rcx 1328 1329 mulq 8(%rsi) 1330 addq %rcx,%r10 1331 adcq $0,%rdx 1332 addq %rax,%r10 1333 movq %rbp,%rax 1334 adcq $0,%rdx 1335 movq %rdx,%rcx 1336 1337 mulq 16(%rsi) 1338 addq %rcx,%r11 1339 adcq $0,%rdx 1340 addq %rax,%r11 1341 movq %rbp,%rax 1342 adcq $0,%rdx 1343 movq %rdx,%rcx 1344 1345 mulq 24(%rsi) 1346 addq %rcx,%r12 1347 adcq $0,%rdx 1348 addq %rax,%r12 1349 movq %r9,%rax 1350 adcq %rdx,%r13 1351 adcq $0,%r8 1352 1353 1354 1355 movq %r9,%rbp 1356 shlq $32,%r9 1357 mulq %r15 1358 shrq $32,%rbp 1359 addq %r9,%r10 1360 adcq %rbp,%r11 1361 adcq %rax,%r12 1362 movq 16(%rbx),%rax 1363 adcq %rdx,%r13 1364 adcq $0,%r8 1365 xorq %r9,%r9 1366 1367 1368 1369 movq %rax,%rbp 1370 mulq 0(%rsi) 1371 addq %rax,%r10 1372 movq %rbp,%rax 1373 adcq $0,%rdx 1374 movq %rdx,%rcx 1375 1376 mulq 8(%rsi) 1377 addq %rcx,%r11 1378 adcq $0,%rdx 1379 addq %rax,%r11 1380 movq %rbp,%rax 1381 adcq $0,%rdx 1382 movq %rdx,%rcx 1383 1384 mulq 16(%rsi) 1385 addq %rcx,%r12 1386 adcq $0,%rdx 1387 addq %rax,%r12 1388 movq %rbp,%rax 1389 adcq $0,%rdx 1390 movq %rdx,%rcx 1391 1392 mulq 24(%rsi) 1393 addq %rcx,%r13 1394 adcq $0,%rdx 1395 addq %rax,%r13 1396 movq %r10,%rax 1397 adcq %rdx,%r8 1398 adcq $0,%r9 1399 1400 1401 1402 movq %r10,%rbp 1403 shlq $32,%r10 1404 mulq %r15 1405 shrq $32,%rbp 1406 addq %r10,%r11 1407 adcq %rbp,%r12 1408 adcq %rax,%r13 1409 movq 24(%rbx),%rax 1410 adcq %rdx,%r8 1411 adcq $0,%r9 1412 xorq %r10,%r10 1413 1414 1415 1416 movq %rax,%rbp 1417 mulq 0(%rsi) 1418 addq %rax,%r11 1419 movq %rbp,%rax 1420 adcq $0,%rdx 1421 movq %rdx,%rcx 1422 1423 mulq 8(%rsi) 1424 addq %rcx,%r12 1425 adcq $0,%rdx 1426 addq %rax,%r12 1427 movq %rbp,%rax 1428 adcq $0,%rdx 1429 movq %rdx,%rcx 1430 1431 mulq 16(%rsi) 1432 addq %rcx,%r13 1433 adcq $0,%rdx 1434 addq %rax,%r13 1435 movq %rbp,%rax 1436 adcq $0,%rdx 1437 movq %rdx,%rcx 1438 1439 mulq 24(%rsi) 1440 addq %rcx,%r8 1441 adcq $0,%rdx 1442 addq %rax,%r8 1443 movq %r11,%rax 1444 adcq %rdx,%r9 1445 adcq $0,%r10 1446 1447 1448 1449 movq %r11,%rbp 1450 shlq $32,%r11 1451 mulq %r15 1452 shrq $32,%rbp 1453 addq %r11,%r12 1454 adcq %rbp,%r13 1455 movq %r12,%rcx 1456 adcq %rax,%r8 1457 adcq %rdx,%r9 1458 movq %r13,%rbp 1459 adcq $0,%r10 1460 1461 1462 1463 subq $-1,%r12 1464 movq %r8,%rbx 1465 sbbq %r14,%r13 1466 sbbq $0,%r8 1467 movq %r9,%rdx 1468 sbbq %r15,%r9 1469 sbbq $0,%r10 1470 1471 cmovcq %rcx,%r12 1472 cmovcq %rbp,%r13 1473 movq %r12,0(%rdi) 1474 cmovcq %rbx,%r8 1475 movq %r13,8(%rdi) 1476 cmovcq %rdx,%r9 1477 movq %r8,16(%rdi) 1478 movq %r9,24(%rdi) 1479 1480 ret 1481.cfi_endproc 1482.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq 1483 1484 1485 1486 1487 1488 1489 1490 1491.globl ecp_nistz256_sqr_mont 1492.hidden ecp_nistz256_sqr_mont 1493.type ecp_nistz256_sqr_mont,@function 1494.align 32 1495ecp_nistz256_sqr_mont: 1496.cfi_startproc 1497_CET_ENDBR 1498 leaq OPENSSL_ia32cap_P(%rip),%rcx 1499 movq 8(%rcx),%rcx 1500 andl $0x80100,%ecx 1501 pushq %rbp 1502.cfi_adjust_cfa_offset 8 1503.cfi_offset %rbp,-16 1504 pushq %rbx 1505.cfi_adjust_cfa_offset 8 1506.cfi_offset %rbx,-24 1507 pushq %r12 1508.cfi_adjust_cfa_offset 8 1509.cfi_offset %r12,-32 1510 pushq %r13 1511.cfi_adjust_cfa_offset 8 1512.cfi_offset %r13,-40 1513 pushq %r14 1514.cfi_adjust_cfa_offset 8 1515.cfi_offset %r14,-48 1516 pushq %r15 1517.cfi_adjust_cfa_offset 8 1518.cfi_offset %r15,-56 1519.Lsqr_body: 1520 cmpl $0x80100,%ecx 1521 je .Lsqr_montx 1522 movq 0(%rsi),%rax 1523 movq 8(%rsi),%r14 1524 movq 16(%rsi),%r15 1525 movq 24(%rsi),%r8 1526 1527 call __ecp_nistz256_sqr_montq 1528 jmp .Lsqr_mont_done 1529 1530.align 32 1531.Lsqr_montx: 1532 movq 0(%rsi),%rdx 1533 movq 8(%rsi),%r14 1534 movq 16(%rsi),%r15 1535 movq 24(%rsi),%r8 1536 leaq -128(%rsi),%rsi 1537 1538 call __ecp_nistz256_sqr_montx 1539.Lsqr_mont_done: 1540 movq 0(%rsp),%r15 1541.cfi_restore %r15 1542 movq 8(%rsp),%r14 1543.cfi_restore %r14 1544 movq 16(%rsp),%r13 1545.cfi_restore %r13 1546 movq 24(%rsp),%r12 1547.cfi_restore %r12 1548 movq 32(%rsp),%rbx 1549.cfi_restore %rbx 1550 movq 40(%rsp),%rbp 1551.cfi_restore %rbp 1552 leaq 48(%rsp),%rsp 1553.cfi_adjust_cfa_offset -48 1554.Lsqr_epilogue: 1555 ret 1556.cfi_endproc 1557.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont 1558 1559.type __ecp_nistz256_sqr_montq,@function 1560.align 32 1561__ecp_nistz256_sqr_montq: 1562.cfi_startproc 1563 movq %rax,%r13 1564 mulq %r14 1565 movq %rax,%r9 1566 movq %r15,%rax 1567 movq %rdx,%r10 1568 1569 mulq %r13 1570 addq %rax,%r10 1571 movq %r8,%rax 1572 adcq $0,%rdx 1573 movq %rdx,%r11 1574 1575 mulq %r13 1576 addq %rax,%r11 1577 movq %r15,%rax 1578 adcq $0,%rdx 1579 movq %rdx,%r12 1580 1581 1582 mulq %r14 1583 addq %rax,%r11 1584 movq %r8,%rax 1585 adcq $0,%rdx 1586 movq %rdx,%rbp 1587 1588 mulq %r14 1589 addq %rax,%r12 1590 movq %r8,%rax 1591 adcq $0,%rdx 1592 addq %rbp,%r12 1593 movq %rdx,%r13 1594 adcq $0,%r13 1595 1596 1597 mulq %r15 1598 xorq %r15,%r15 1599 addq %rax,%r13 1600 movq 0(%rsi),%rax 1601 movq %rdx,%r14 1602 adcq $0,%r14 1603 1604 addq %r9,%r9 1605 adcq %r10,%r10 1606 adcq %r11,%r11 1607 adcq %r12,%r12 1608 adcq %r13,%r13 1609 adcq %r14,%r14 1610 adcq $0,%r15 1611 1612 mulq %rax 1613 movq %rax,%r8 1614 movq 8(%rsi),%rax 1615 movq %rdx,%rcx 1616 1617 mulq %rax 1618 addq %rcx,%r9 1619 adcq %rax,%r10 1620 movq 16(%rsi),%rax 1621 adcq $0,%rdx 1622 movq %rdx,%rcx 1623 1624 mulq %rax 1625 addq %rcx,%r11 1626 adcq %rax,%r12 1627 movq 24(%rsi),%rax 1628 adcq $0,%rdx 1629 movq %rdx,%rcx 1630 1631 mulq %rax 1632 addq %rcx,%r13 1633 adcq %rax,%r14 1634 movq %r8,%rax 1635 adcq %rdx,%r15 1636 1637 movq .Lpoly+8(%rip),%rsi 1638 movq .Lpoly+24(%rip),%rbp 1639 1640 1641 1642 1643 movq %r8,%rcx 1644 shlq $32,%r8 1645 mulq %rbp 1646 shrq $32,%rcx 1647 addq %r8,%r9 1648 adcq %rcx,%r10 1649 adcq %rax,%r11 1650 movq %r9,%rax 1651 adcq $0,%rdx 1652 1653 1654 1655 movq %r9,%rcx 1656 shlq $32,%r9 1657 movq %rdx,%r8 1658 mulq %rbp 1659 shrq $32,%rcx 1660 addq %r9,%r10 1661 adcq %rcx,%r11 1662 adcq %rax,%r8 1663 movq %r10,%rax 1664 adcq $0,%rdx 1665 1666 1667 1668 movq %r10,%rcx 1669 shlq $32,%r10 1670 movq %rdx,%r9 1671 mulq %rbp 1672 shrq $32,%rcx 1673 addq %r10,%r11 1674 adcq %rcx,%r8 1675 adcq %rax,%r9 1676 movq %r11,%rax 1677 adcq $0,%rdx 1678 1679 1680 1681 movq %r11,%rcx 1682 shlq $32,%r11 1683 movq %rdx,%r10 1684 mulq %rbp 1685 shrq $32,%rcx 1686 addq %r11,%r8 1687 adcq %rcx,%r9 1688 adcq %rax,%r10 1689 adcq $0,%rdx 1690 xorq %r11,%r11 1691 1692 1693 1694 addq %r8,%r12 1695 adcq %r9,%r13 1696 movq %r12,%r8 1697 adcq %r10,%r14 1698 adcq %rdx,%r15 1699 movq %r13,%r9 1700 adcq $0,%r11 1701 1702 subq $-1,%r12 1703 movq %r14,%r10 1704 sbbq %rsi,%r13 1705 sbbq $0,%r14 1706 movq %r15,%rcx 1707 sbbq %rbp,%r15 1708 sbbq $0,%r11 1709 1710 cmovcq %r8,%r12 1711 cmovcq %r9,%r13 1712 movq %r12,0(%rdi) 1713 cmovcq %r10,%r14 1714 movq %r13,8(%rdi) 1715 cmovcq %rcx,%r15 1716 movq %r14,16(%rdi) 1717 movq %r15,24(%rdi) 1718 1719 ret 1720.cfi_endproc 1721.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq 1722.type __ecp_nistz256_mul_montx,@function 1723.align 32 1724__ecp_nistz256_mul_montx: 1725.cfi_startproc 1726 1727 1728 mulxq %r9,%r8,%r9 1729 mulxq %r10,%rcx,%r10 1730 movq $32,%r14 1731 xorq %r13,%r13 1732 mulxq %r11,%rbp,%r11 1733 movq .Lpoly+24(%rip),%r15 1734 adcq %rcx,%r9 1735 mulxq %r12,%rcx,%r12 1736 movq %r8,%rdx 1737 adcq %rbp,%r10 1738 shlxq %r14,%r8,%rbp 1739 adcq %rcx,%r11 1740 shrxq %r14,%r8,%rcx 1741 adcq $0,%r12 1742 1743 1744 1745 addq %rbp,%r9 1746 adcq %rcx,%r10 1747 1748 mulxq %r15,%rcx,%rbp 1749 movq 8(%rbx),%rdx 1750 adcq %rcx,%r11 1751 adcq %rbp,%r12 1752 adcq $0,%r13 1753 xorq %r8,%r8 1754 1755 1756 1757 mulxq 0+128(%rsi),%rcx,%rbp 1758 adcxq %rcx,%r9 1759 adoxq %rbp,%r10 1760 1761 mulxq 8+128(%rsi),%rcx,%rbp 1762 adcxq %rcx,%r10 1763 adoxq %rbp,%r11 1764 1765 mulxq 16+128(%rsi),%rcx,%rbp 1766 adcxq %rcx,%r11 1767 adoxq %rbp,%r12 1768 1769 mulxq 24+128(%rsi),%rcx,%rbp 1770 movq %r9,%rdx 1771 adcxq %rcx,%r12 1772 shlxq %r14,%r9,%rcx 1773 adoxq %rbp,%r13 1774 shrxq %r14,%r9,%rbp 1775 1776 adcxq %r8,%r13 1777 adoxq %r8,%r8 1778 adcq $0,%r8 1779 1780 1781 1782 addq %rcx,%r10 1783 adcq %rbp,%r11 1784 1785 mulxq %r15,%rcx,%rbp 1786 movq 16(%rbx),%rdx 1787 adcq %rcx,%r12 1788 adcq %rbp,%r13 1789 adcq $0,%r8 1790 xorq %r9,%r9 1791 1792 1793 1794 mulxq 0+128(%rsi),%rcx,%rbp 1795 adcxq %rcx,%r10 1796 adoxq %rbp,%r11 1797 1798 mulxq 8+128(%rsi),%rcx,%rbp 1799 adcxq %rcx,%r11 1800 adoxq %rbp,%r12 1801 1802 mulxq 16+128(%rsi),%rcx,%rbp 1803 adcxq %rcx,%r12 1804 adoxq %rbp,%r13 1805 1806 mulxq 24+128(%rsi),%rcx,%rbp 1807 movq %r10,%rdx 1808 adcxq %rcx,%r13 1809 shlxq %r14,%r10,%rcx 1810 adoxq %rbp,%r8 1811 shrxq %r14,%r10,%rbp 1812 1813 adcxq %r9,%r8 1814 adoxq %r9,%r9 1815 adcq $0,%r9 1816 1817 1818 1819 addq %rcx,%r11 1820 adcq %rbp,%r12 1821 1822 mulxq %r15,%rcx,%rbp 1823 movq 24(%rbx),%rdx 1824 adcq %rcx,%r13 1825 adcq %rbp,%r8 1826 adcq $0,%r9 1827 xorq %r10,%r10 1828 1829 1830 1831 mulxq 0+128(%rsi),%rcx,%rbp 1832 adcxq %rcx,%r11 1833 adoxq %rbp,%r12 1834 1835 mulxq 8+128(%rsi),%rcx,%rbp 1836 adcxq %rcx,%r12 1837 adoxq %rbp,%r13 1838 1839 mulxq 16+128(%rsi),%rcx,%rbp 1840 adcxq %rcx,%r13 1841 adoxq %rbp,%r8 1842 1843 mulxq 24+128(%rsi),%rcx,%rbp 1844 movq %r11,%rdx 1845 adcxq %rcx,%r8 1846 shlxq %r14,%r11,%rcx 1847 adoxq %rbp,%r9 1848 shrxq %r14,%r11,%rbp 1849 1850 adcxq %r10,%r9 1851 adoxq %r10,%r10 1852 adcq $0,%r10 1853 1854 1855 1856 addq %rcx,%r12 1857 adcq %rbp,%r13 1858 1859 mulxq %r15,%rcx,%rbp 1860 movq %r12,%rbx 1861 movq .Lpoly+8(%rip),%r14 1862 adcq %rcx,%r8 1863 movq %r13,%rdx 1864 adcq %rbp,%r9 1865 adcq $0,%r10 1866 1867 1868 1869 xorl %eax,%eax 1870 movq %r8,%rcx 1871 sbbq $-1,%r12 1872 sbbq %r14,%r13 1873 sbbq $0,%r8 1874 movq %r9,%rbp 1875 sbbq %r15,%r9 1876 sbbq $0,%r10 1877 1878 cmovcq %rbx,%r12 1879 cmovcq %rdx,%r13 1880 movq %r12,0(%rdi) 1881 cmovcq %rcx,%r8 1882 movq %r13,8(%rdi) 1883 cmovcq %rbp,%r9 1884 movq %r8,16(%rdi) 1885 movq %r9,24(%rdi) 1886 1887 ret 1888.cfi_endproc 1889.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx 1890 1891.type __ecp_nistz256_sqr_montx,@function 1892.align 32 1893__ecp_nistz256_sqr_montx: 1894.cfi_startproc 1895 mulxq %r14,%r9,%r10 1896 mulxq %r15,%rcx,%r11 1897 xorl %eax,%eax 1898 adcq %rcx,%r10 1899 mulxq %r8,%rbp,%r12 1900 movq %r14,%rdx 1901 adcq %rbp,%r11 1902 adcq $0,%r12 1903 xorq %r13,%r13 1904 1905 1906 mulxq %r15,%rcx,%rbp 1907 adcxq %rcx,%r11 1908 adoxq %rbp,%r12 1909 1910 mulxq %r8,%rcx,%rbp 1911 movq %r15,%rdx 1912 adcxq %rcx,%r12 1913 adoxq %rbp,%r13 1914 adcq $0,%r13 1915 1916 1917 mulxq %r8,%rcx,%r14 1918 movq 0+128(%rsi),%rdx 1919 xorq %r15,%r15 1920 adcxq %r9,%r9 1921 adoxq %rcx,%r13 1922 adcxq %r10,%r10 1923 adoxq %r15,%r14 1924 1925 mulxq %rdx,%r8,%rbp 1926 movq 8+128(%rsi),%rdx 1927 adcxq %r11,%r11 1928 adoxq %rbp,%r9 1929 adcxq %r12,%r12 1930 mulxq %rdx,%rcx,%rax 1931 movq 16+128(%rsi),%rdx 1932 adcxq %r13,%r13 1933 adoxq %rcx,%r10 1934 adcxq %r14,%r14 1935.byte 0x67 1936 mulxq %rdx,%rcx,%rbp 1937 movq 24+128(%rsi),%rdx 1938 adoxq %rax,%r11 1939 adcxq %r15,%r15 1940 adoxq %rcx,%r12 1941 movq $32,%rsi 1942 adoxq %rbp,%r13 1943.byte 0x67,0x67 1944 mulxq %rdx,%rcx,%rax 1945 movq .Lpoly+24(%rip),%rdx 1946 adoxq %rcx,%r14 1947 shlxq %rsi,%r8,%rcx 1948 adoxq %rax,%r15 1949 shrxq %rsi,%r8,%rax 1950 movq %rdx,%rbp 1951 1952 1953 addq %rcx,%r9 1954 adcq %rax,%r10 1955 1956 mulxq %r8,%rcx,%r8 1957 adcq %rcx,%r11 1958 shlxq %rsi,%r9,%rcx 1959 adcq $0,%r8 1960 shrxq %rsi,%r9,%rax 1961 1962 1963 addq %rcx,%r10 1964 adcq %rax,%r11 1965 1966 mulxq %r9,%rcx,%r9 1967 adcq %rcx,%r8 1968 shlxq %rsi,%r10,%rcx 1969 adcq $0,%r9 1970 shrxq %rsi,%r10,%rax 1971 1972 1973 addq %rcx,%r11 1974 adcq %rax,%r8 1975 1976 mulxq %r10,%rcx,%r10 1977 adcq %rcx,%r9 1978 shlxq %rsi,%r11,%rcx 1979 adcq $0,%r10 1980 shrxq %rsi,%r11,%rax 1981 1982 1983 addq %rcx,%r8 1984 adcq %rax,%r9 1985 1986 mulxq %r11,%rcx,%r11 1987 adcq %rcx,%r10 1988 adcq $0,%r11 1989 1990 xorq %rdx,%rdx 1991 addq %r8,%r12 1992 movq .Lpoly+8(%rip),%rsi 1993 adcq %r9,%r13 1994 movq %r12,%r8 1995 adcq %r10,%r14 1996 adcq %r11,%r15 1997 movq %r13,%r9 1998 adcq $0,%rdx 1999 2000 subq $-1,%r12 2001 movq %r14,%r10 2002 sbbq %rsi,%r13 2003 sbbq $0,%r14 2004 movq %r15,%r11 2005 sbbq %rbp,%r15 2006 sbbq $0,%rdx 2007 2008 cmovcq %r8,%r12 2009 cmovcq %r9,%r13 2010 movq %r12,0(%rdi) 2011 cmovcq %r10,%r14 2012 movq %r13,8(%rdi) 2013 cmovcq %r11,%r15 2014 movq %r14,16(%rdi) 2015 movq %r15,24(%rdi) 2016 2017 ret 2018.cfi_endproc 2019.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx 2020 2021 2022.globl ecp_nistz256_select_w5 2023.hidden ecp_nistz256_select_w5 2024.type ecp_nistz256_select_w5,@function 2025.align 32 2026ecp_nistz256_select_w5: 2027.cfi_startproc 2028_CET_ENDBR 2029 leaq OPENSSL_ia32cap_P(%rip),%rax 2030 movq 8(%rax),%rax 2031 testl $32,%eax 2032 jnz .Lavx2_select_w5 2033 movdqa .LOne(%rip),%xmm0 2034 movd %edx,%xmm1 2035 2036 pxor %xmm2,%xmm2 2037 pxor %xmm3,%xmm3 2038 pxor %xmm4,%xmm4 2039 pxor %xmm5,%xmm5 2040 pxor %xmm6,%xmm6 2041 pxor %xmm7,%xmm7 2042 2043 movdqa %xmm0,%xmm8 2044 pshufd $0,%xmm1,%xmm1 2045 2046 movq $16,%rax 2047.Lselect_loop_sse_w5: 2048 2049 movdqa %xmm8,%xmm15 2050 paddd %xmm0,%xmm8 2051 pcmpeqd %xmm1,%xmm15 2052 2053 movdqa 0(%rsi),%xmm9 2054 movdqa 16(%rsi),%xmm10 2055 movdqa 32(%rsi),%xmm11 2056 movdqa 48(%rsi),%xmm12 2057 movdqa 64(%rsi),%xmm13 2058 movdqa 80(%rsi),%xmm14 2059 leaq 96(%rsi),%rsi 2060 2061 pand %xmm15,%xmm9 2062 pand %xmm15,%xmm10 2063 por %xmm9,%xmm2 2064 pand %xmm15,%xmm11 2065 por %xmm10,%xmm3 2066 pand %xmm15,%xmm12 2067 por %xmm11,%xmm4 2068 pand %xmm15,%xmm13 2069 por %xmm12,%xmm5 2070 pand %xmm15,%xmm14 2071 por %xmm13,%xmm6 2072 por %xmm14,%xmm7 2073 2074 decq %rax 2075 jnz .Lselect_loop_sse_w5 2076 2077 movdqu %xmm2,0(%rdi) 2078 movdqu %xmm3,16(%rdi) 2079 movdqu %xmm4,32(%rdi) 2080 movdqu %xmm5,48(%rdi) 2081 movdqu %xmm6,64(%rdi) 2082 movdqu %xmm7,80(%rdi) 2083 ret 2084.cfi_endproc 2085.LSEH_end_ecp_nistz256_select_w5: 2086.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 2087 2088 2089 2090.globl ecp_nistz256_select_w7 2091.hidden ecp_nistz256_select_w7 2092.type ecp_nistz256_select_w7,@function 2093.align 32 2094ecp_nistz256_select_w7: 2095.cfi_startproc 2096_CET_ENDBR 2097 leaq OPENSSL_ia32cap_P(%rip),%rax 2098 movq 8(%rax),%rax 2099 testl $32,%eax 2100 jnz .Lavx2_select_w7 2101 movdqa .LOne(%rip),%xmm8 2102 movd %edx,%xmm1 2103 2104 pxor %xmm2,%xmm2 2105 pxor %xmm3,%xmm3 2106 pxor %xmm4,%xmm4 2107 pxor %xmm5,%xmm5 2108 2109 movdqa %xmm8,%xmm0 2110 pshufd $0,%xmm1,%xmm1 2111 movq $64,%rax 2112 2113.Lselect_loop_sse_w7: 2114 movdqa %xmm8,%xmm15 2115 paddd %xmm0,%xmm8 2116 movdqa 0(%rsi),%xmm9 2117 movdqa 16(%rsi),%xmm10 2118 pcmpeqd %xmm1,%xmm15 2119 movdqa 32(%rsi),%xmm11 2120 movdqa 48(%rsi),%xmm12 2121 leaq 64(%rsi),%rsi 2122 2123 pand %xmm15,%xmm9 2124 pand %xmm15,%xmm10 2125 por %xmm9,%xmm2 2126 pand %xmm15,%xmm11 2127 por %xmm10,%xmm3 2128 pand %xmm15,%xmm12 2129 por %xmm11,%xmm4 2130 prefetcht0 255(%rsi) 2131 por %xmm12,%xmm5 2132 2133 decq %rax 2134 jnz .Lselect_loop_sse_w7 2135 2136 movdqu %xmm2,0(%rdi) 2137 movdqu %xmm3,16(%rdi) 2138 movdqu %xmm4,32(%rdi) 2139 movdqu %xmm5,48(%rdi) 2140 ret 2141.cfi_endproc 2142.LSEH_end_ecp_nistz256_select_w7: 2143.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 2144 2145 2146.type ecp_nistz256_avx2_select_w5,@function 2147.align 32 2148ecp_nistz256_avx2_select_w5: 2149.cfi_startproc 2150.Lavx2_select_w5: 2151 vzeroupper 2152 vmovdqa .LTwo(%rip),%ymm0 2153 2154 vpxor %ymm2,%ymm2,%ymm2 2155 vpxor %ymm3,%ymm3,%ymm3 2156 vpxor %ymm4,%ymm4,%ymm4 2157 2158 vmovdqa .LOne(%rip),%ymm5 2159 vmovdqa .LTwo(%rip),%ymm10 2160 2161 vmovd %edx,%xmm1 2162 vpermd %ymm1,%ymm2,%ymm1 2163 2164 movq $8,%rax 2165.Lselect_loop_avx2_w5: 2166 2167 vmovdqa 0(%rsi),%ymm6 2168 vmovdqa 32(%rsi),%ymm7 2169 vmovdqa 64(%rsi),%ymm8 2170 2171 vmovdqa 96(%rsi),%ymm11 2172 vmovdqa 128(%rsi),%ymm12 2173 vmovdqa 160(%rsi),%ymm13 2174 2175 vpcmpeqd %ymm1,%ymm5,%ymm9 2176 vpcmpeqd %ymm1,%ymm10,%ymm14 2177 2178 vpaddd %ymm0,%ymm5,%ymm5 2179 vpaddd %ymm0,%ymm10,%ymm10 2180 leaq 192(%rsi),%rsi 2181 2182 vpand %ymm9,%ymm6,%ymm6 2183 vpand %ymm9,%ymm7,%ymm7 2184 vpand %ymm9,%ymm8,%ymm8 2185 vpand %ymm14,%ymm11,%ymm11 2186 vpand %ymm14,%ymm12,%ymm12 2187 vpand %ymm14,%ymm13,%ymm13 2188 2189 vpxor %ymm6,%ymm2,%ymm2 2190 vpxor %ymm7,%ymm3,%ymm3 2191 vpxor %ymm8,%ymm4,%ymm4 2192 vpxor %ymm11,%ymm2,%ymm2 2193 vpxor %ymm12,%ymm3,%ymm3 2194 vpxor %ymm13,%ymm4,%ymm4 2195 2196 decq %rax 2197 jnz .Lselect_loop_avx2_w5 2198 2199 vmovdqu %ymm2,0(%rdi) 2200 vmovdqu %ymm3,32(%rdi) 2201 vmovdqu %ymm4,64(%rdi) 2202 vzeroupper 2203 ret 2204.cfi_endproc 2205.LSEH_end_ecp_nistz256_avx2_select_w5: 2206.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 2207 2208 2209 2210.type ecp_nistz256_avx2_select_w7,@function 2211.align 32 2212ecp_nistz256_avx2_select_w7: 2213.cfi_startproc 2214.Lavx2_select_w7: 2215_CET_ENDBR 2216 vzeroupper 2217 vmovdqa .LThree(%rip),%ymm0 2218 2219 vpxor %ymm2,%ymm2,%ymm2 2220 vpxor %ymm3,%ymm3,%ymm3 2221 2222 vmovdqa .LOne(%rip),%ymm4 2223 vmovdqa .LTwo(%rip),%ymm8 2224 vmovdqa .LThree(%rip),%ymm12 2225 2226 vmovd %edx,%xmm1 2227 vpermd %ymm1,%ymm2,%ymm1 2228 2229 2230 movq $21,%rax 2231.Lselect_loop_avx2_w7: 2232 2233 vmovdqa 0(%rsi),%ymm5 2234 vmovdqa 32(%rsi),%ymm6 2235 2236 vmovdqa 64(%rsi),%ymm9 2237 vmovdqa 96(%rsi),%ymm10 2238 2239 vmovdqa 128(%rsi),%ymm13 2240 vmovdqa 160(%rsi),%ymm14 2241 2242 vpcmpeqd %ymm1,%ymm4,%ymm7 2243 vpcmpeqd %ymm1,%ymm8,%ymm11 2244 vpcmpeqd %ymm1,%ymm12,%ymm15 2245 2246 vpaddd %ymm0,%ymm4,%ymm4 2247 vpaddd %ymm0,%ymm8,%ymm8 2248 vpaddd %ymm0,%ymm12,%ymm12 2249 leaq 192(%rsi),%rsi 2250 2251 vpand %ymm7,%ymm5,%ymm5 2252 vpand %ymm7,%ymm6,%ymm6 2253 vpand %ymm11,%ymm9,%ymm9 2254 vpand %ymm11,%ymm10,%ymm10 2255 vpand %ymm15,%ymm13,%ymm13 2256 vpand %ymm15,%ymm14,%ymm14 2257 2258 vpxor %ymm5,%ymm2,%ymm2 2259 vpxor %ymm6,%ymm3,%ymm3 2260 vpxor %ymm9,%ymm2,%ymm2 2261 vpxor %ymm10,%ymm3,%ymm3 2262 vpxor %ymm13,%ymm2,%ymm2 2263 vpxor %ymm14,%ymm3,%ymm3 2264 2265 decq %rax 2266 jnz .Lselect_loop_avx2_w7 2267 2268 2269 vmovdqa 0(%rsi),%ymm5 2270 vmovdqa 32(%rsi),%ymm6 2271 2272 vpcmpeqd %ymm1,%ymm4,%ymm7 2273 2274 vpand %ymm7,%ymm5,%ymm5 2275 vpand %ymm7,%ymm6,%ymm6 2276 2277 vpxor %ymm5,%ymm2,%ymm2 2278 vpxor %ymm6,%ymm3,%ymm3 2279 2280 vmovdqu %ymm2,0(%rdi) 2281 vmovdqu %ymm3,32(%rdi) 2282 vzeroupper 2283 ret 2284.cfi_endproc 2285.LSEH_end_ecp_nistz256_avx2_select_w7: 2286.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 2287.type __ecp_nistz256_add_toq,@function 2288.align 32 2289__ecp_nistz256_add_toq: 2290.cfi_startproc 2291 xorq %r11,%r11 2292 addq 0(%rbx),%r12 2293 adcq 8(%rbx),%r13 2294 movq %r12,%rax 2295 adcq 16(%rbx),%r8 2296 adcq 24(%rbx),%r9 2297 movq %r13,%rbp 2298 adcq $0,%r11 2299 2300 subq $-1,%r12 2301 movq %r8,%rcx 2302 sbbq %r14,%r13 2303 sbbq $0,%r8 2304 movq %r9,%r10 2305 sbbq %r15,%r9 2306 sbbq $0,%r11 2307 2308 cmovcq %rax,%r12 2309 cmovcq %rbp,%r13 2310 movq %r12,0(%rdi) 2311 cmovcq %rcx,%r8 2312 movq %r13,8(%rdi) 2313 cmovcq %r10,%r9 2314 movq %r8,16(%rdi) 2315 movq %r9,24(%rdi) 2316 2317 ret 2318.cfi_endproc 2319.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq 2320 2321.type __ecp_nistz256_sub_fromq,@function 2322.align 32 2323__ecp_nistz256_sub_fromq: 2324.cfi_startproc 2325 subq 0(%rbx),%r12 2326 sbbq 8(%rbx),%r13 2327 movq %r12,%rax 2328 sbbq 16(%rbx),%r8 2329 sbbq 24(%rbx),%r9 2330 movq %r13,%rbp 2331 sbbq %r11,%r11 2332 2333 addq $-1,%r12 2334 movq %r8,%rcx 2335 adcq %r14,%r13 2336 adcq $0,%r8 2337 movq %r9,%r10 2338 adcq %r15,%r9 2339 testq %r11,%r11 2340 2341 cmovzq %rax,%r12 2342 cmovzq %rbp,%r13 2343 movq %r12,0(%rdi) 2344 cmovzq %rcx,%r8 2345 movq %r13,8(%rdi) 2346 cmovzq %r10,%r9 2347 movq %r8,16(%rdi) 2348 movq %r9,24(%rdi) 2349 2350 ret 2351.cfi_endproc 2352.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq 2353 2354.type __ecp_nistz256_subq,@function 2355.align 32 2356__ecp_nistz256_subq: 2357.cfi_startproc 2358 subq %r12,%rax 2359 sbbq %r13,%rbp 2360 movq %rax,%r12 2361 sbbq %r8,%rcx 2362 sbbq %r9,%r10 2363 movq %rbp,%r13 2364 sbbq %r11,%r11 2365 2366 addq $-1,%rax 2367 movq %rcx,%r8 2368 adcq %r14,%rbp 2369 adcq $0,%rcx 2370 movq %r10,%r9 2371 adcq %r15,%r10 2372 testq %r11,%r11 2373 2374 cmovnzq %rax,%r12 2375 cmovnzq %rbp,%r13 2376 cmovnzq %rcx,%r8 2377 cmovnzq %r10,%r9 2378 2379 ret 2380.cfi_endproc 2381.size __ecp_nistz256_subq,.-__ecp_nistz256_subq 2382 2383.type __ecp_nistz256_mul_by_2q,@function 2384.align 32 2385__ecp_nistz256_mul_by_2q: 2386.cfi_startproc 2387 xorq %r11,%r11 2388 addq %r12,%r12 2389 adcq %r13,%r13 2390 movq %r12,%rax 2391 adcq %r8,%r8 2392 adcq %r9,%r9 2393 movq %r13,%rbp 2394 adcq $0,%r11 2395 2396 subq $-1,%r12 2397 movq %r8,%rcx 2398 sbbq %r14,%r13 2399 sbbq $0,%r8 2400 movq %r9,%r10 2401 sbbq %r15,%r9 2402 sbbq $0,%r11 2403 2404 cmovcq %rax,%r12 2405 cmovcq %rbp,%r13 2406 movq %r12,0(%rdi) 2407 cmovcq %rcx,%r8 2408 movq %r13,8(%rdi) 2409 cmovcq %r10,%r9 2410 movq %r8,16(%rdi) 2411 movq %r9,24(%rdi) 2412 2413 ret 2414.cfi_endproc 2415.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q 2416.globl ecp_nistz256_point_double 2417.hidden ecp_nistz256_point_double 2418.type ecp_nistz256_point_double,@function 2419.align 32 2420ecp_nistz256_point_double: 2421.cfi_startproc 2422_CET_ENDBR 2423 leaq OPENSSL_ia32cap_P(%rip),%rcx 2424 movq 8(%rcx),%rcx 2425 andl $0x80100,%ecx 2426 cmpl $0x80100,%ecx 2427 je .Lpoint_doublex 2428 pushq %rbp 2429.cfi_adjust_cfa_offset 8 2430.cfi_offset %rbp,-16 2431 pushq %rbx 2432.cfi_adjust_cfa_offset 8 2433.cfi_offset %rbx,-24 2434 pushq %r12 2435.cfi_adjust_cfa_offset 8 2436.cfi_offset %r12,-32 2437 pushq %r13 2438.cfi_adjust_cfa_offset 8 2439.cfi_offset %r13,-40 2440 pushq %r14 2441.cfi_adjust_cfa_offset 8 2442.cfi_offset %r14,-48 2443 pushq %r15 2444.cfi_adjust_cfa_offset 8 2445.cfi_offset %r15,-56 2446 subq $160+8,%rsp 2447.cfi_adjust_cfa_offset 32*5+8 2448.Lpoint_doubleq_body: 2449 2450.Lpoint_double_shortcutq: 2451 movdqu 0(%rsi),%xmm0 2452 movq %rsi,%rbx 2453 movdqu 16(%rsi),%xmm1 2454 movq 32+0(%rsi),%r12 2455 movq 32+8(%rsi),%r13 2456 movq 32+16(%rsi),%r8 2457 movq 32+24(%rsi),%r9 2458 movq .Lpoly+8(%rip),%r14 2459 movq .Lpoly+24(%rip),%r15 2460 movdqa %xmm0,96(%rsp) 2461 movdqa %xmm1,96+16(%rsp) 2462 leaq 32(%rdi),%r10 2463 leaq 64(%rdi),%r11 2464.byte 102,72,15,110,199 2465.byte 102,73,15,110,202 2466.byte 102,73,15,110,211 2467 2468 leaq 0(%rsp),%rdi 2469 call __ecp_nistz256_mul_by_2q 2470 2471 movq 64+0(%rsi),%rax 2472 movq 64+8(%rsi),%r14 2473 movq 64+16(%rsi),%r15 2474 movq 64+24(%rsi),%r8 2475 leaq 64-0(%rsi),%rsi 2476 leaq 64(%rsp),%rdi 2477 call __ecp_nistz256_sqr_montq 2478 2479 movq 0+0(%rsp),%rax 2480 movq 8+0(%rsp),%r14 2481 leaq 0+0(%rsp),%rsi 2482 movq 16+0(%rsp),%r15 2483 movq 24+0(%rsp),%r8 2484 leaq 0(%rsp),%rdi 2485 call __ecp_nistz256_sqr_montq 2486 2487 movq 32(%rbx),%rax 2488 movq 64+0(%rbx),%r9 2489 movq 64+8(%rbx),%r10 2490 movq 64+16(%rbx),%r11 2491 movq 64+24(%rbx),%r12 2492 leaq 64-0(%rbx),%rsi 2493 leaq 32(%rbx),%rbx 2494.byte 102,72,15,126,215 2495 call __ecp_nistz256_mul_montq 2496 call __ecp_nistz256_mul_by_2q 2497 2498 movq 96+0(%rsp),%r12 2499 movq 96+8(%rsp),%r13 2500 leaq 64(%rsp),%rbx 2501 movq 96+16(%rsp),%r8 2502 movq 96+24(%rsp),%r9 2503 leaq 32(%rsp),%rdi 2504 call __ecp_nistz256_add_toq 2505 2506 movq 96+0(%rsp),%r12 2507 movq 96+8(%rsp),%r13 2508 leaq 64(%rsp),%rbx 2509 movq 96+16(%rsp),%r8 2510 movq 96+24(%rsp),%r9 2511 leaq 64(%rsp),%rdi 2512 call __ecp_nistz256_sub_fromq 2513 2514 movq 0+0(%rsp),%rax 2515 movq 8+0(%rsp),%r14 2516 leaq 0+0(%rsp),%rsi 2517 movq 16+0(%rsp),%r15 2518 movq 24+0(%rsp),%r8 2519.byte 102,72,15,126,207 2520 call __ecp_nistz256_sqr_montq 2521 xorq %r9,%r9 2522 movq %r12,%rax 2523 addq $-1,%r12 2524 movq %r13,%r10 2525 adcq %rsi,%r13 2526 movq %r14,%rcx 2527 adcq $0,%r14 2528 movq %r15,%r8 2529 adcq %rbp,%r15 2530 adcq $0,%r9 2531 xorq %rsi,%rsi 2532 testq $1,%rax 2533 2534 cmovzq %rax,%r12 2535 cmovzq %r10,%r13 2536 cmovzq %rcx,%r14 2537 cmovzq %r8,%r15 2538 cmovzq %rsi,%r9 2539 2540 movq %r13,%rax 2541 shrq $1,%r12 2542 shlq $63,%rax 2543 movq %r14,%r10 2544 shrq $1,%r13 2545 orq %rax,%r12 2546 shlq $63,%r10 2547 movq %r15,%rcx 2548 shrq $1,%r14 2549 orq %r10,%r13 2550 shlq $63,%rcx 2551 movq %r12,0(%rdi) 2552 shrq $1,%r15 2553 movq %r13,8(%rdi) 2554 shlq $63,%r9 2555 orq %rcx,%r14 2556 orq %r9,%r15 2557 movq %r14,16(%rdi) 2558 movq %r15,24(%rdi) 2559 movq 64(%rsp),%rax 2560 leaq 64(%rsp),%rbx 2561 movq 0+32(%rsp),%r9 2562 movq 8+32(%rsp),%r10 2563 leaq 0+32(%rsp),%rsi 2564 movq 16+32(%rsp),%r11 2565 movq 24+32(%rsp),%r12 2566 leaq 32(%rsp),%rdi 2567 call __ecp_nistz256_mul_montq 2568 2569 leaq 128(%rsp),%rdi 2570 call __ecp_nistz256_mul_by_2q 2571 2572 leaq 32(%rsp),%rbx 2573 leaq 32(%rsp),%rdi 2574 call __ecp_nistz256_add_toq 2575 2576 movq 96(%rsp),%rax 2577 leaq 96(%rsp),%rbx 2578 movq 0+0(%rsp),%r9 2579 movq 8+0(%rsp),%r10 2580 leaq 0+0(%rsp),%rsi 2581 movq 16+0(%rsp),%r11 2582 movq 24+0(%rsp),%r12 2583 leaq 0(%rsp),%rdi 2584 call __ecp_nistz256_mul_montq 2585 2586 leaq 128(%rsp),%rdi 2587 call __ecp_nistz256_mul_by_2q 2588 2589 movq 0+32(%rsp),%rax 2590 movq 8+32(%rsp),%r14 2591 leaq 0+32(%rsp),%rsi 2592 movq 16+32(%rsp),%r15 2593 movq 24+32(%rsp),%r8 2594.byte 102,72,15,126,199 2595 call __ecp_nistz256_sqr_montq 2596 2597 leaq 128(%rsp),%rbx 2598 movq %r14,%r8 2599 movq %r15,%r9 2600 movq %rsi,%r14 2601 movq %rbp,%r15 2602 call __ecp_nistz256_sub_fromq 2603 2604 movq 0+0(%rsp),%rax 2605 movq 0+8(%rsp),%rbp 2606 movq 0+16(%rsp),%rcx 2607 movq 0+24(%rsp),%r10 2608 leaq 0(%rsp),%rdi 2609 call __ecp_nistz256_subq 2610 2611 movq 32(%rsp),%rax 2612 leaq 32(%rsp),%rbx 2613 movq %r12,%r14 2614 xorl %ecx,%ecx 2615 movq %r12,0+0(%rsp) 2616 movq %r13,%r10 2617 movq %r13,0+8(%rsp) 2618 cmovzq %r8,%r11 2619 movq %r8,0+16(%rsp) 2620 leaq 0-0(%rsp),%rsi 2621 cmovzq %r9,%r12 2622 movq %r9,0+24(%rsp) 2623 movq %r14,%r9 2624 leaq 0(%rsp),%rdi 2625 call __ecp_nistz256_mul_montq 2626 2627.byte 102,72,15,126,203 2628.byte 102,72,15,126,207 2629 call __ecp_nistz256_sub_fromq 2630 2631 leaq 160+56(%rsp),%rsi 2632.cfi_def_cfa %rsi,8 2633 movq -48(%rsi),%r15 2634.cfi_restore %r15 2635 movq -40(%rsi),%r14 2636.cfi_restore %r14 2637 movq -32(%rsi),%r13 2638.cfi_restore %r13 2639 movq -24(%rsi),%r12 2640.cfi_restore %r12 2641 movq -16(%rsi),%rbx 2642.cfi_restore %rbx 2643 movq -8(%rsi),%rbp 2644.cfi_restore %rbp 2645 leaq (%rsi),%rsp 2646.cfi_def_cfa_register %rsp 2647.Lpoint_doubleq_epilogue: 2648 ret 2649.cfi_endproc 2650.size ecp_nistz256_point_double,.-ecp_nistz256_point_double 2651.globl ecp_nistz256_point_add 2652.hidden ecp_nistz256_point_add 2653.type ecp_nistz256_point_add,@function 2654.align 32 2655ecp_nistz256_point_add: 2656.cfi_startproc 2657_CET_ENDBR 2658 leaq OPENSSL_ia32cap_P(%rip),%rcx 2659 movq 8(%rcx),%rcx 2660 andl $0x80100,%ecx 2661 cmpl $0x80100,%ecx 2662 je .Lpoint_addx 2663 pushq %rbp 2664.cfi_adjust_cfa_offset 8 2665.cfi_offset %rbp,-16 2666 pushq %rbx 2667.cfi_adjust_cfa_offset 8 2668.cfi_offset %rbx,-24 2669 pushq %r12 2670.cfi_adjust_cfa_offset 8 2671.cfi_offset %r12,-32 2672 pushq %r13 2673.cfi_adjust_cfa_offset 8 2674.cfi_offset %r13,-40 2675 pushq %r14 2676.cfi_adjust_cfa_offset 8 2677.cfi_offset %r14,-48 2678 pushq %r15 2679.cfi_adjust_cfa_offset 8 2680.cfi_offset %r15,-56 2681 subq $576+8,%rsp 2682.cfi_adjust_cfa_offset 32*18+8 2683.Lpoint_addq_body: 2684 2685 movdqu 0(%rsi),%xmm0 2686 movdqu 16(%rsi),%xmm1 2687 movdqu 32(%rsi),%xmm2 2688 movdqu 48(%rsi),%xmm3 2689 movdqu 64(%rsi),%xmm4 2690 movdqu 80(%rsi),%xmm5 2691 movq %rsi,%rbx 2692 movq %rdx,%rsi 2693 movdqa %xmm0,384(%rsp) 2694 movdqa %xmm1,384+16(%rsp) 2695 movdqa %xmm2,416(%rsp) 2696 movdqa %xmm3,416+16(%rsp) 2697 movdqa %xmm4,448(%rsp) 2698 movdqa %xmm5,448+16(%rsp) 2699 por %xmm4,%xmm5 2700 2701 movdqu 0(%rsi),%xmm0 2702 pshufd $0xb1,%xmm5,%xmm3 2703 movdqu 16(%rsi),%xmm1 2704 movdqu 32(%rsi),%xmm2 2705 por %xmm3,%xmm5 2706 movdqu 48(%rsi),%xmm3 2707 movq 64+0(%rsi),%rax 2708 movq 64+8(%rsi),%r14 2709 movq 64+16(%rsi),%r15 2710 movq 64+24(%rsi),%r8 2711 movdqa %xmm0,480(%rsp) 2712 pshufd $0x1e,%xmm5,%xmm4 2713 movdqa %xmm1,480+16(%rsp) 2714 movdqu 64(%rsi),%xmm0 2715 movdqu 80(%rsi),%xmm1 2716 movdqa %xmm2,512(%rsp) 2717 movdqa %xmm3,512+16(%rsp) 2718 por %xmm4,%xmm5 2719 pxor %xmm4,%xmm4 2720 por %xmm0,%xmm1 2721.byte 102,72,15,110,199 2722 2723 leaq 64-0(%rsi),%rsi 2724 movq %rax,544+0(%rsp) 2725 movq %r14,544+8(%rsp) 2726 movq %r15,544+16(%rsp) 2727 movq %r8,544+24(%rsp) 2728 leaq 96(%rsp),%rdi 2729 call __ecp_nistz256_sqr_montq 2730 2731 pcmpeqd %xmm4,%xmm5 2732 pshufd $0xb1,%xmm1,%xmm4 2733 por %xmm1,%xmm4 2734 pshufd $0,%xmm5,%xmm5 2735 pshufd $0x1e,%xmm4,%xmm3 2736 por %xmm3,%xmm4 2737 pxor %xmm3,%xmm3 2738 pcmpeqd %xmm3,%xmm4 2739 pshufd $0,%xmm4,%xmm4 2740 movq 64+0(%rbx),%rax 2741 movq 64+8(%rbx),%r14 2742 movq 64+16(%rbx),%r15 2743 movq 64+24(%rbx),%r8 2744.byte 102,72,15,110,203 2745 2746 leaq 64-0(%rbx),%rsi 2747 leaq 32(%rsp),%rdi 2748 call __ecp_nistz256_sqr_montq 2749 2750 movq 544(%rsp),%rax 2751 leaq 544(%rsp),%rbx 2752 movq 0+96(%rsp),%r9 2753 movq 8+96(%rsp),%r10 2754 leaq 0+96(%rsp),%rsi 2755 movq 16+96(%rsp),%r11 2756 movq 24+96(%rsp),%r12 2757 leaq 224(%rsp),%rdi 2758 call __ecp_nistz256_mul_montq 2759 2760 movq 448(%rsp),%rax 2761 leaq 448(%rsp),%rbx 2762 movq 0+32(%rsp),%r9 2763 movq 8+32(%rsp),%r10 2764 leaq 0+32(%rsp),%rsi 2765 movq 16+32(%rsp),%r11 2766 movq 24+32(%rsp),%r12 2767 leaq 256(%rsp),%rdi 2768 call __ecp_nistz256_mul_montq 2769 2770 movq 416(%rsp),%rax 2771 leaq 416(%rsp),%rbx 2772 movq 0+224(%rsp),%r9 2773 movq 8+224(%rsp),%r10 2774 leaq 0+224(%rsp),%rsi 2775 movq 16+224(%rsp),%r11 2776 movq 24+224(%rsp),%r12 2777 leaq 224(%rsp),%rdi 2778 call __ecp_nistz256_mul_montq 2779 2780 movq 512(%rsp),%rax 2781 leaq 512(%rsp),%rbx 2782 movq 0+256(%rsp),%r9 2783 movq 8+256(%rsp),%r10 2784 leaq 0+256(%rsp),%rsi 2785 movq 16+256(%rsp),%r11 2786 movq 24+256(%rsp),%r12 2787 leaq 256(%rsp),%rdi 2788 call __ecp_nistz256_mul_montq 2789 2790 leaq 224(%rsp),%rbx 2791 leaq 64(%rsp),%rdi 2792 call __ecp_nistz256_sub_fromq 2793 2794 orq %r13,%r12 2795 movdqa %xmm4,%xmm2 2796 orq %r8,%r12 2797 orq %r9,%r12 2798 por %xmm5,%xmm2 2799.byte 102,73,15,110,220 2800 2801 movq 384(%rsp),%rax 2802 leaq 384(%rsp),%rbx 2803 movq 0+96(%rsp),%r9 2804 movq 8+96(%rsp),%r10 2805 leaq 0+96(%rsp),%rsi 2806 movq 16+96(%rsp),%r11 2807 movq 24+96(%rsp),%r12 2808 leaq 160(%rsp),%rdi 2809 call __ecp_nistz256_mul_montq 2810 2811 movq 480(%rsp),%rax 2812 leaq 480(%rsp),%rbx 2813 movq 0+32(%rsp),%r9 2814 movq 8+32(%rsp),%r10 2815 leaq 0+32(%rsp),%rsi 2816 movq 16+32(%rsp),%r11 2817 movq 24+32(%rsp),%r12 2818 leaq 192(%rsp),%rdi 2819 call __ecp_nistz256_mul_montq 2820 2821 leaq 160(%rsp),%rbx 2822 leaq 0(%rsp),%rdi 2823 call __ecp_nistz256_sub_fromq 2824 2825 orq %r13,%r12 2826 orq %r8,%r12 2827 orq %r9,%r12 2828 2829.byte 102,73,15,126,208 2830.byte 102,73,15,126,217 2831 orq %r8,%r12 2832.byte 0x3e 2833 jnz .Ladd_proceedq 2834 2835 2836 2837 testq %r9,%r9 2838 jz .Ladd_doubleq 2839 2840 2841 2842 2843 2844 2845.byte 102,72,15,126,199 2846 pxor %xmm0,%xmm0 2847 movdqu %xmm0,0(%rdi) 2848 movdqu %xmm0,16(%rdi) 2849 movdqu %xmm0,32(%rdi) 2850 movdqu %xmm0,48(%rdi) 2851 movdqu %xmm0,64(%rdi) 2852 movdqu %xmm0,80(%rdi) 2853 jmp .Ladd_doneq 2854 2855.align 32 2856.Ladd_doubleq: 2857.byte 102,72,15,126,206 2858.byte 102,72,15,126,199 2859 addq $416,%rsp 2860.cfi_adjust_cfa_offset -416 2861 jmp .Lpoint_double_shortcutq 2862.cfi_adjust_cfa_offset 416 2863 2864.align 32 2865.Ladd_proceedq: 2866 movq 0+64(%rsp),%rax 2867 movq 8+64(%rsp),%r14 2868 leaq 0+64(%rsp),%rsi 2869 movq 16+64(%rsp),%r15 2870 movq 24+64(%rsp),%r8 2871 leaq 96(%rsp),%rdi 2872 call __ecp_nistz256_sqr_montq 2873 2874 movq 448(%rsp),%rax 2875 leaq 448(%rsp),%rbx 2876 movq 0+0(%rsp),%r9 2877 movq 8+0(%rsp),%r10 2878 leaq 0+0(%rsp),%rsi 2879 movq 16+0(%rsp),%r11 2880 movq 24+0(%rsp),%r12 2881 leaq 352(%rsp),%rdi 2882 call __ecp_nistz256_mul_montq 2883 2884 movq 0+0(%rsp),%rax 2885 movq 8+0(%rsp),%r14 2886 leaq 0+0(%rsp),%rsi 2887 movq 16+0(%rsp),%r15 2888 movq 24+0(%rsp),%r8 2889 leaq 32(%rsp),%rdi 2890 call __ecp_nistz256_sqr_montq 2891 2892 movq 544(%rsp),%rax 2893 leaq 544(%rsp),%rbx 2894 movq 0+352(%rsp),%r9 2895 movq 8+352(%rsp),%r10 2896 leaq 0+352(%rsp),%rsi 2897 movq 16+352(%rsp),%r11 2898 movq 24+352(%rsp),%r12 2899 leaq 352(%rsp),%rdi 2900 call __ecp_nistz256_mul_montq 2901 2902 movq 0(%rsp),%rax 2903 leaq 0(%rsp),%rbx 2904 movq 0+32(%rsp),%r9 2905 movq 8+32(%rsp),%r10 2906 leaq 0+32(%rsp),%rsi 2907 movq 16+32(%rsp),%r11 2908 movq 24+32(%rsp),%r12 2909 leaq 128(%rsp),%rdi 2910 call __ecp_nistz256_mul_montq 2911 2912 movq 160(%rsp),%rax 2913 leaq 160(%rsp),%rbx 2914 movq 0+32(%rsp),%r9 2915 movq 8+32(%rsp),%r10 2916 leaq 0+32(%rsp),%rsi 2917 movq 16+32(%rsp),%r11 2918 movq 24+32(%rsp),%r12 2919 leaq 192(%rsp),%rdi 2920 call __ecp_nistz256_mul_montq 2921 2922 2923 2924 2925 xorq %r11,%r11 2926 addq %r12,%r12 2927 leaq 96(%rsp),%rsi 2928 adcq %r13,%r13 2929 movq %r12,%rax 2930 adcq %r8,%r8 2931 adcq %r9,%r9 2932 movq %r13,%rbp 2933 adcq $0,%r11 2934 2935 subq $-1,%r12 2936 movq %r8,%rcx 2937 sbbq %r14,%r13 2938 sbbq $0,%r8 2939 movq %r9,%r10 2940 sbbq %r15,%r9 2941 sbbq $0,%r11 2942 2943 cmovcq %rax,%r12 2944 movq 0(%rsi),%rax 2945 cmovcq %rbp,%r13 2946 movq 8(%rsi),%rbp 2947 cmovcq %rcx,%r8 2948 movq 16(%rsi),%rcx 2949 cmovcq %r10,%r9 2950 movq 24(%rsi),%r10 2951 2952 call __ecp_nistz256_subq 2953 2954 leaq 128(%rsp),%rbx 2955 leaq 288(%rsp),%rdi 2956 call __ecp_nistz256_sub_fromq 2957 2958 movq 192+0(%rsp),%rax 2959 movq 192+8(%rsp),%rbp 2960 movq 192+16(%rsp),%rcx 2961 movq 192+24(%rsp),%r10 2962 leaq 320(%rsp),%rdi 2963 2964 call __ecp_nistz256_subq 2965 2966 movq %r12,0(%rdi) 2967 movq %r13,8(%rdi) 2968 movq %r8,16(%rdi) 2969 movq %r9,24(%rdi) 2970 movq 128(%rsp),%rax 2971 leaq 128(%rsp),%rbx 2972 movq 0+224(%rsp),%r9 2973 movq 8+224(%rsp),%r10 2974 leaq 0+224(%rsp),%rsi 2975 movq 16+224(%rsp),%r11 2976 movq 24+224(%rsp),%r12 2977 leaq 256(%rsp),%rdi 2978 call __ecp_nistz256_mul_montq 2979 2980 movq 320(%rsp),%rax 2981 leaq 320(%rsp),%rbx 2982 movq 0+64(%rsp),%r9 2983 movq 8+64(%rsp),%r10 2984 leaq 0+64(%rsp),%rsi 2985 movq 16+64(%rsp),%r11 2986 movq 24+64(%rsp),%r12 2987 leaq 320(%rsp),%rdi 2988 call __ecp_nistz256_mul_montq 2989 2990 leaq 256(%rsp),%rbx 2991 leaq 320(%rsp),%rdi 2992 call __ecp_nistz256_sub_fromq 2993 2994.byte 102,72,15,126,199 2995 2996 movdqa %xmm5,%xmm0 2997 movdqa %xmm5,%xmm1 2998 pandn 352(%rsp),%xmm0 2999 movdqa %xmm5,%xmm2 3000 pandn 352+16(%rsp),%xmm1 3001 movdqa %xmm5,%xmm3 3002 pand 544(%rsp),%xmm2 3003 pand 544+16(%rsp),%xmm3 3004 por %xmm0,%xmm2 3005 por %xmm1,%xmm3 3006 3007 movdqa %xmm4,%xmm0 3008 movdqa %xmm4,%xmm1 3009 pandn %xmm2,%xmm0 3010 movdqa %xmm4,%xmm2 3011 pandn %xmm3,%xmm1 3012 movdqa %xmm4,%xmm3 3013 pand 448(%rsp),%xmm2 3014 pand 448+16(%rsp),%xmm3 3015 por %xmm0,%xmm2 3016 por %xmm1,%xmm3 3017 movdqu %xmm2,64(%rdi) 3018 movdqu %xmm3,80(%rdi) 3019 3020 movdqa %xmm5,%xmm0 3021 movdqa %xmm5,%xmm1 3022 pandn 288(%rsp),%xmm0 3023 movdqa %xmm5,%xmm2 3024 pandn 288+16(%rsp),%xmm1 3025 movdqa %xmm5,%xmm3 3026 pand 480(%rsp),%xmm2 3027 pand 480+16(%rsp),%xmm3 3028 por %xmm0,%xmm2 3029 por %xmm1,%xmm3 3030 3031 movdqa %xmm4,%xmm0 3032 movdqa %xmm4,%xmm1 3033 pandn %xmm2,%xmm0 3034 movdqa %xmm4,%xmm2 3035 pandn %xmm3,%xmm1 3036 movdqa %xmm4,%xmm3 3037 pand 384(%rsp),%xmm2 3038 pand 384+16(%rsp),%xmm3 3039 por %xmm0,%xmm2 3040 por %xmm1,%xmm3 3041 movdqu %xmm2,0(%rdi) 3042 movdqu %xmm3,16(%rdi) 3043 3044 movdqa %xmm5,%xmm0 3045 movdqa %xmm5,%xmm1 3046 pandn 320(%rsp),%xmm0 3047 movdqa %xmm5,%xmm2 3048 pandn 320+16(%rsp),%xmm1 3049 movdqa %xmm5,%xmm3 3050 pand 512(%rsp),%xmm2 3051 pand 512+16(%rsp),%xmm3 3052 por %xmm0,%xmm2 3053 por %xmm1,%xmm3 3054 3055 movdqa %xmm4,%xmm0 3056 movdqa %xmm4,%xmm1 3057 pandn %xmm2,%xmm0 3058 movdqa %xmm4,%xmm2 3059 pandn %xmm3,%xmm1 3060 movdqa %xmm4,%xmm3 3061 pand 416(%rsp),%xmm2 3062 pand 416+16(%rsp),%xmm3 3063 por %xmm0,%xmm2 3064 por %xmm1,%xmm3 3065 movdqu %xmm2,32(%rdi) 3066 movdqu %xmm3,48(%rdi) 3067 3068.Ladd_doneq: 3069 leaq 576+56(%rsp),%rsi 3070.cfi_def_cfa %rsi,8 3071 movq -48(%rsi),%r15 3072.cfi_restore %r15 3073 movq -40(%rsi),%r14 3074.cfi_restore %r14 3075 movq -32(%rsi),%r13 3076.cfi_restore %r13 3077 movq -24(%rsi),%r12 3078.cfi_restore %r12 3079 movq -16(%rsi),%rbx 3080.cfi_restore %rbx 3081 movq -8(%rsi),%rbp 3082.cfi_restore %rbp 3083 leaq (%rsi),%rsp 3084.cfi_def_cfa_register %rsp 3085.Lpoint_addq_epilogue: 3086 ret 3087.cfi_endproc 3088.size ecp_nistz256_point_add,.-ecp_nistz256_point_add 3089.globl ecp_nistz256_point_add_affine 3090.hidden ecp_nistz256_point_add_affine 3091.type ecp_nistz256_point_add_affine,@function 3092.align 32 3093ecp_nistz256_point_add_affine: 3094.cfi_startproc 3095_CET_ENDBR 3096 leaq OPENSSL_ia32cap_P(%rip),%rcx 3097 movq 8(%rcx),%rcx 3098 andl $0x80100,%ecx 3099 cmpl $0x80100,%ecx 3100 je .Lpoint_add_affinex 3101 pushq %rbp 3102.cfi_adjust_cfa_offset 8 3103.cfi_offset %rbp,-16 3104 pushq %rbx 3105.cfi_adjust_cfa_offset 8 3106.cfi_offset %rbx,-24 3107 pushq %r12 3108.cfi_adjust_cfa_offset 8 3109.cfi_offset %r12,-32 3110 pushq %r13 3111.cfi_adjust_cfa_offset 8 3112.cfi_offset %r13,-40 3113 pushq %r14 3114.cfi_adjust_cfa_offset 8 3115.cfi_offset %r14,-48 3116 pushq %r15 3117.cfi_adjust_cfa_offset 8 3118.cfi_offset %r15,-56 3119 subq $480+8,%rsp 3120.cfi_adjust_cfa_offset 32*15+8 3121.Ladd_affineq_body: 3122 3123 movdqu 0(%rsi),%xmm0 3124 movq %rdx,%rbx 3125 movdqu 16(%rsi),%xmm1 3126 movdqu 32(%rsi),%xmm2 3127 movdqu 48(%rsi),%xmm3 3128 movdqu 64(%rsi),%xmm4 3129 movdqu 80(%rsi),%xmm5 3130 movq 64+0(%rsi),%rax 3131 movq 64+8(%rsi),%r14 3132 movq 64+16(%rsi),%r15 3133 movq 64+24(%rsi),%r8 3134 movdqa %xmm0,320(%rsp) 3135 movdqa %xmm1,320+16(%rsp) 3136 movdqa %xmm2,352(%rsp) 3137 movdqa %xmm3,352+16(%rsp) 3138 movdqa %xmm4,384(%rsp) 3139 movdqa %xmm5,384+16(%rsp) 3140 por %xmm4,%xmm5 3141 3142 movdqu 0(%rbx),%xmm0 3143 pshufd $0xb1,%xmm5,%xmm3 3144 movdqu 16(%rbx),%xmm1 3145 movdqu 32(%rbx),%xmm2 3146 por %xmm3,%xmm5 3147 movdqu 48(%rbx),%xmm3 3148 movdqa %xmm0,416(%rsp) 3149 pshufd $0x1e,%xmm5,%xmm4 3150 movdqa %xmm1,416+16(%rsp) 3151 por %xmm0,%xmm1 3152.byte 102,72,15,110,199 3153 movdqa %xmm2,448(%rsp) 3154 movdqa %xmm3,448+16(%rsp) 3155 por %xmm2,%xmm3 3156 por %xmm4,%xmm5 3157 pxor %xmm4,%xmm4 3158 por %xmm1,%xmm3 3159 3160 leaq 64-0(%rsi),%rsi 3161 leaq 32(%rsp),%rdi 3162 call __ecp_nistz256_sqr_montq 3163 3164 pcmpeqd %xmm4,%xmm5 3165 pshufd $0xb1,%xmm3,%xmm4 3166 movq 0(%rbx),%rax 3167 3168 movq %r12,%r9 3169 por %xmm3,%xmm4 3170 pshufd $0,%xmm5,%xmm5 3171 pshufd $0x1e,%xmm4,%xmm3 3172 movq %r13,%r10 3173 por %xmm3,%xmm4 3174 pxor %xmm3,%xmm3 3175 movq %r14,%r11 3176 pcmpeqd %xmm3,%xmm4 3177 pshufd $0,%xmm4,%xmm4 3178 3179 leaq 32-0(%rsp),%rsi 3180 movq %r15,%r12 3181 leaq 0(%rsp),%rdi 3182 call __ecp_nistz256_mul_montq 3183 3184 leaq 320(%rsp),%rbx 3185 leaq 64(%rsp),%rdi 3186 call __ecp_nistz256_sub_fromq 3187 3188 movq 384(%rsp),%rax 3189 leaq 384(%rsp),%rbx 3190 movq 0+32(%rsp),%r9 3191 movq 8+32(%rsp),%r10 3192 leaq 0+32(%rsp),%rsi 3193 movq 16+32(%rsp),%r11 3194 movq 24+32(%rsp),%r12 3195 leaq 32(%rsp),%rdi 3196 call __ecp_nistz256_mul_montq 3197 3198 movq 384(%rsp),%rax 3199 leaq 384(%rsp),%rbx 3200 movq 0+64(%rsp),%r9 3201 movq 8+64(%rsp),%r10 3202 leaq 0+64(%rsp),%rsi 3203 movq 16+64(%rsp),%r11 3204 movq 24+64(%rsp),%r12 3205 leaq 288(%rsp),%rdi 3206 call __ecp_nistz256_mul_montq 3207 3208 movq 448(%rsp),%rax 3209 leaq 448(%rsp),%rbx 3210 movq 0+32(%rsp),%r9 3211 movq 8+32(%rsp),%r10 3212 leaq 0+32(%rsp),%rsi 3213 movq 16+32(%rsp),%r11 3214 movq 24+32(%rsp),%r12 3215 leaq 32(%rsp),%rdi 3216 call __ecp_nistz256_mul_montq 3217 3218 leaq 352(%rsp),%rbx 3219 leaq 96(%rsp),%rdi 3220 call __ecp_nistz256_sub_fromq 3221 3222 movq 0+64(%rsp),%rax 3223 movq 8+64(%rsp),%r14 3224 leaq 0+64(%rsp),%rsi 3225 movq 16+64(%rsp),%r15 3226 movq 24+64(%rsp),%r8 3227 leaq 128(%rsp),%rdi 3228 call __ecp_nistz256_sqr_montq 3229 3230 movq 0+96(%rsp),%rax 3231 movq 8+96(%rsp),%r14 3232 leaq 0+96(%rsp),%rsi 3233 movq 16+96(%rsp),%r15 3234 movq 24+96(%rsp),%r8 3235 leaq 192(%rsp),%rdi 3236 call __ecp_nistz256_sqr_montq 3237 3238 movq 128(%rsp),%rax 3239 leaq 128(%rsp),%rbx 3240 movq 0+64(%rsp),%r9 3241 movq 8+64(%rsp),%r10 3242 leaq 0+64(%rsp),%rsi 3243 movq 16+64(%rsp),%r11 3244 movq 24+64(%rsp),%r12 3245 leaq 160(%rsp),%rdi 3246 call __ecp_nistz256_mul_montq 3247 3248 movq 320(%rsp),%rax 3249 leaq 320(%rsp),%rbx 3250 movq 0+128(%rsp),%r9 3251 movq 8+128(%rsp),%r10 3252 leaq 0+128(%rsp),%rsi 3253 movq 16+128(%rsp),%r11 3254 movq 24+128(%rsp),%r12 3255 leaq 0(%rsp),%rdi 3256 call __ecp_nistz256_mul_montq 3257 3258 3259 3260 3261 xorq %r11,%r11 3262 addq %r12,%r12 3263 leaq 192(%rsp),%rsi 3264 adcq %r13,%r13 3265 movq %r12,%rax 3266 adcq %r8,%r8 3267 adcq %r9,%r9 3268 movq %r13,%rbp 3269 adcq $0,%r11 3270 3271 subq $-1,%r12 3272 movq %r8,%rcx 3273 sbbq %r14,%r13 3274 sbbq $0,%r8 3275 movq %r9,%r10 3276 sbbq %r15,%r9 3277 sbbq $0,%r11 3278 3279 cmovcq %rax,%r12 3280 movq 0(%rsi),%rax 3281 cmovcq %rbp,%r13 3282 movq 8(%rsi),%rbp 3283 cmovcq %rcx,%r8 3284 movq 16(%rsi),%rcx 3285 cmovcq %r10,%r9 3286 movq 24(%rsi),%r10 3287 3288 call __ecp_nistz256_subq 3289 3290 leaq 160(%rsp),%rbx 3291 leaq 224(%rsp),%rdi 3292 call __ecp_nistz256_sub_fromq 3293 3294 movq 0+0(%rsp),%rax 3295 movq 0+8(%rsp),%rbp 3296 movq 0+16(%rsp),%rcx 3297 movq 0+24(%rsp),%r10 3298 leaq 64(%rsp),%rdi 3299 3300 call __ecp_nistz256_subq 3301 3302 movq %r12,0(%rdi) 3303 movq %r13,8(%rdi) 3304 movq %r8,16(%rdi) 3305 movq %r9,24(%rdi) 3306 movq 352(%rsp),%rax 3307 leaq 352(%rsp),%rbx 3308 movq 0+160(%rsp),%r9 3309 movq 8+160(%rsp),%r10 3310 leaq 0+160(%rsp),%rsi 3311 movq 16+160(%rsp),%r11 3312 movq 24+160(%rsp),%r12 3313 leaq 32(%rsp),%rdi 3314 call __ecp_nistz256_mul_montq 3315 3316 movq 96(%rsp),%rax 3317 leaq 96(%rsp),%rbx 3318 movq 0+64(%rsp),%r9 3319 movq 8+64(%rsp),%r10 3320 leaq 0+64(%rsp),%rsi 3321 movq 16+64(%rsp),%r11 3322 movq 24+64(%rsp),%r12 3323 leaq 64(%rsp),%rdi 3324 call __ecp_nistz256_mul_montq 3325 3326 leaq 32(%rsp),%rbx 3327 leaq 256(%rsp),%rdi 3328 call __ecp_nistz256_sub_fromq 3329 3330.byte 102,72,15,126,199 3331 3332 movdqa %xmm5,%xmm0 3333 movdqa %xmm5,%xmm1 3334 pandn 288(%rsp),%xmm0 3335 movdqa %xmm5,%xmm2 3336 pandn 288+16(%rsp),%xmm1 3337 movdqa %xmm5,%xmm3 3338 pand .LONE_mont(%rip),%xmm2 3339 pand .LONE_mont+16(%rip),%xmm3 3340 por %xmm0,%xmm2 3341 por %xmm1,%xmm3 3342 3343 movdqa %xmm4,%xmm0 3344 movdqa %xmm4,%xmm1 3345 pandn %xmm2,%xmm0 3346 movdqa %xmm4,%xmm2 3347 pandn %xmm3,%xmm1 3348 movdqa %xmm4,%xmm3 3349 pand 384(%rsp),%xmm2 3350 pand 384+16(%rsp),%xmm3 3351 por %xmm0,%xmm2 3352 por %xmm1,%xmm3 3353 movdqu %xmm2,64(%rdi) 3354 movdqu %xmm3,80(%rdi) 3355 3356 movdqa %xmm5,%xmm0 3357 movdqa %xmm5,%xmm1 3358 pandn 224(%rsp),%xmm0 3359 movdqa %xmm5,%xmm2 3360 pandn 224+16(%rsp),%xmm1 3361 movdqa %xmm5,%xmm3 3362 pand 416(%rsp),%xmm2 3363 pand 416+16(%rsp),%xmm3 3364 por %xmm0,%xmm2 3365 por %xmm1,%xmm3 3366 3367 movdqa %xmm4,%xmm0 3368 movdqa %xmm4,%xmm1 3369 pandn %xmm2,%xmm0 3370 movdqa %xmm4,%xmm2 3371 pandn %xmm3,%xmm1 3372 movdqa %xmm4,%xmm3 3373 pand 320(%rsp),%xmm2 3374 pand 320+16(%rsp),%xmm3 3375 por %xmm0,%xmm2 3376 por %xmm1,%xmm3 3377 movdqu %xmm2,0(%rdi) 3378 movdqu %xmm3,16(%rdi) 3379 3380 movdqa %xmm5,%xmm0 3381 movdqa %xmm5,%xmm1 3382 pandn 256(%rsp),%xmm0 3383 movdqa %xmm5,%xmm2 3384 pandn 256+16(%rsp),%xmm1 3385 movdqa %xmm5,%xmm3 3386 pand 448(%rsp),%xmm2 3387 pand 448+16(%rsp),%xmm3 3388 por %xmm0,%xmm2 3389 por %xmm1,%xmm3 3390 3391 movdqa %xmm4,%xmm0 3392 movdqa %xmm4,%xmm1 3393 pandn %xmm2,%xmm0 3394 movdqa %xmm4,%xmm2 3395 pandn %xmm3,%xmm1 3396 movdqa %xmm4,%xmm3 3397 pand 352(%rsp),%xmm2 3398 pand 352+16(%rsp),%xmm3 3399 por %xmm0,%xmm2 3400 por %xmm1,%xmm3 3401 movdqu %xmm2,32(%rdi) 3402 movdqu %xmm3,48(%rdi) 3403 3404 leaq 480+56(%rsp),%rsi 3405.cfi_def_cfa %rsi,8 3406 movq -48(%rsi),%r15 3407.cfi_restore %r15 3408 movq -40(%rsi),%r14 3409.cfi_restore %r14 3410 movq -32(%rsi),%r13 3411.cfi_restore %r13 3412 movq -24(%rsi),%r12 3413.cfi_restore %r12 3414 movq -16(%rsi),%rbx 3415.cfi_restore %rbx 3416 movq -8(%rsi),%rbp 3417.cfi_restore %rbp 3418 leaq (%rsi),%rsp 3419.cfi_def_cfa_register %rsp 3420.Ladd_affineq_epilogue: 3421 ret 3422.cfi_endproc 3423.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine 3424.type __ecp_nistz256_add_tox,@function 3425.align 32 3426__ecp_nistz256_add_tox: 3427.cfi_startproc 3428 xorq %r11,%r11 3429 adcq 0(%rbx),%r12 3430 adcq 8(%rbx),%r13 3431 movq %r12,%rax 3432 adcq 16(%rbx),%r8 3433 adcq 24(%rbx),%r9 3434 movq %r13,%rbp 3435 adcq $0,%r11 3436 3437 xorq %r10,%r10 3438 sbbq $-1,%r12 3439 movq %r8,%rcx 3440 sbbq %r14,%r13 3441 sbbq $0,%r8 3442 movq %r9,%r10 3443 sbbq %r15,%r9 3444 sbbq $0,%r11 3445 3446 cmovcq %rax,%r12 3447 cmovcq %rbp,%r13 3448 movq %r12,0(%rdi) 3449 cmovcq %rcx,%r8 3450 movq %r13,8(%rdi) 3451 cmovcq %r10,%r9 3452 movq %r8,16(%rdi) 3453 movq %r9,24(%rdi) 3454 3455 ret 3456.cfi_endproc 3457.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox 3458 3459.type __ecp_nistz256_sub_fromx,@function 3460.align 32 3461__ecp_nistz256_sub_fromx: 3462.cfi_startproc 3463 xorq %r11,%r11 3464 sbbq 0(%rbx),%r12 3465 sbbq 8(%rbx),%r13 3466 movq %r12,%rax 3467 sbbq 16(%rbx),%r8 3468 sbbq 24(%rbx),%r9 3469 movq %r13,%rbp 3470 sbbq $0,%r11 3471 3472 xorq %r10,%r10 3473 adcq $-1,%r12 3474 movq %r8,%rcx 3475 adcq %r14,%r13 3476 adcq $0,%r8 3477 movq %r9,%r10 3478 adcq %r15,%r9 3479 3480 btq $0,%r11 3481 cmovncq %rax,%r12 3482 cmovncq %rbp,%r13 3483 movq %r12,0(%rdi) 3484 cmovncq %rcx,%r8 3485 movq %r13,8(%rdi) 3486 cmovncq %r10,%r9 3487 movq %r8,16(%rdi) 3488 movq %r9,24(%rdi) 3489 3490 ret 3491.cfi_endproc 3492.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx 3493 3494.type __ecp_nistz256_subx,@function 3495.align 32 3496__ecp_nistz256_subx: 3497.cfi_startproc 3498 xorq %r11,%r11 3499 sbbq %r12,%rax 3500 sbbq %r13,%rbp 3501 movq %rax,%r12 3502 sbbq %r8,%rcx 3503 sbbq %r9,%r10 3504 movq %rbp,%r13 3505 sbbq $0,%r11 3506 3507 xorq %r9,%r9 3508 adcq $-1,%rax 3509 movq %rcx,%r8 3510 adcq %r14,%rbp 3511 adcq $0,%rcx 3512 movq %r10,%r9 3513 adcq %r15,%r10 3514 3515 btq $0,%r11 3516 cmovcq %rax,%r12 3517 cmovcq %rbp,%r13 3518 cmovcq %rcx,%r8 3519 cmovcq %r10,%r9 3520 3521 ret 3522.cfi_endproc 3523.size __ecp_nistz256_subx,.-__ecp_nistz256_subx 3524 3525.type __ecp_nistz256_mul_by_2x,@function 3526.align 32 3527__ecp_nistz256_mul_by_2x: 3528.cfi_startproc 3529 xorq %r11,%r11 3530 adcq %r12,%r12 3531 adcq %r13,%r13 3532 movq %r12,%rax 3533 adcq %r8,%r8 3534 adcq %r9,%r9 3535 movq %r13,%rbp 3536 adcq $0,%r11 3537 3538 xorq %r10,%r10 3539 sbbq $-1,%r12 3540 movq %r8,%rcx 3541 sbbq %r14,%r13 3542 sbbq $0,%r8 3543 movq %r9,%r10 3544 sbbq %r15,%r9 3545 sbbq $0,%r11 3546 3547 cmovcq %rax,%r12 3548 cmovcq %rbp,%r13 3549 movq %r12,0(%rdi) 3550 cmovcq %rcx,%r8 3551 movq %r13,8(%rdi) 3552 cmovcq %r10,%r9 3553 movq %r8,16(%rdi) 3554 movq %r9,24(%rdi) 3555 3556 ret 3557.cfi_endproc 3558.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x 3559.type ecp_nistz256_point_doublex,@function 3560.align 32 3561ecp_nistz256_point_doublex: 3562.cfi_startproc 3563.Lpoint_doublex: 3564 pushq %rbp 3565.cfi_adjust_cfa_offset 8 3566.cfi_offset %rbp,-16 3567 pushq %rbx 3568.cfi_adjust_cfa_offset 8 3569.cfi_offset %rbx,-24 3570 pushq %r12 3571.cfi_adjust_cfa_offset 8 3572.cfi_offset %r12,-32 3573 pushq %r13 3574.cfi_adjust_cfa_offset 8 3575.cfi_offset %r13,-40 3576 pushq %r14 3577.cfi_adjust_cfa_offset 8 3578.cfi_offset %r14,-48 3579 pushq %r15 3580.cfi_adjust_cfa_offset 8 3581.cfi_offset %r15,-56 3582 subq $160+8,%rsp 3583.cfi_adjust_cfa_offset 32*5+8 3584.Lpoint_doublex_body: 3585 3586.Lpoint_double_shortcutx: 3587 movdqu 0(%rsi),%xmm0 3588 movq %rsi,%rbx 3589 movdqu 16(%rsi),%xmm1 3590 movq 32+0(%rsi),%r12 3591 movq 32+8(%rsi),%r13 3592 movq 32+16(%rsi),%r8 3593 movq 32+24(%rsi),%r9 3594 movq .Lpoly+8(%rip),%r14 3595 movq .Lpoly+24(%rip),%r15 3596 movdqa %xmm0,96(%rsp) 3597 movdqa %xmm1,96+16(%rsp) 3598 leaq 32(%rdi),%r10 3599 leaq 64(%rdi),%r11 3600.byte 102,72,15,110,199 3601.byte 102,73,15,110,202 3602.byte 102,73,15,110,211 3603 3604 leaq 0(%rsp),%rdi 3605 call __ecp_nistz256_mul_by_2x 3606 3607 movq 64+0(%rsi),%rdx 3608 movq 64+8(%rsi),%r14 3609 movq 64+16(%rsi),%r15 3610 movq 64+24(%rsi),%r8 3611 leaq 64-128(%rsi),%rsi 3612 leaq 64(%rsp),%rdi 3613 call __ecp_nistz256_sqr_montx 3614 3615 movq 0+0(%rsp),%rdx 3616 movq 8+0(%rsp),%r14 3617 leaq -128+0(%rsp),%rsi 3618 movq 16+0(%rsp),%r15 3619 movq 24+0(%rsp),%r8 3620 leaq 0(%rsp),%rdi 3621 call __ecp_nistz256_sqr_montx 3622 3623 movq 32(%rbx),%rdx 3624 movq 64+0(%rbx),%r9 3625 movq 64+8(%rbx),%r10 3626 movq 64+16(%rbx),%r11 3627 movq 64+24(%rbx),%r12 3628 leaq 64-128(%rbx),%rsi 3629 leaq 32(%rbx),%rbx 3630.byte 102,72,15,126,215 3631 call __ecp_nistz256_mul_montx 3632 call __ecp_nistz256_mul_by_2x 3633 3634 movq 96+0(%rsp),%r12 3635 movq 96+8(%rsp),%r13 3636 leaq 64(%rsp),%rbx 3637 movq 96+16(%rsp),%r8 3638 movq 96+24(%rsp),%r9 3639 leaq 32(%rsp),%rdi 3640 call __ecp_nistz256_add_tox 3641 3642 movq 96+0(%rsp),%r12 3643 movq 96+8(%rsp),%r13 3644 leaq 64(%rsp),%rbx 3645 movq 96+16(%rsp),%r8 3646 movq 96+24(%rsp),%r9 3647 leaq 64(%rsp),%rdi 3648 call __ecp_nistz256_sub_fromx 3649 3650 movq 0+0(%rsp),%rdx 3651 movq 8+0(%rsp),%r14 3652 leaq -128+0(%rsp),%rsi 3653 movq 16+0(%rsp),%r15 3654 movq 24+0(%rsp),%r8 3655.byte 102,72,15,126,207 3656 call __ecp_nistz256_sqr_montx 3657 xorq %r9,%r9 3658 movq %r12,%rax 3659 addq $-1,%r12 3660 movq %r13,%r10 3661 adcq %rsi,%r13 3662 movq %r14,%rcx 3663 adcq $0,%r14 3664 movq %r15,%r8 3665 adcq %rbp,%r15 3666 adcq $0,%r9 3667 xorq %rsi,%rsi 3668 testq $1,%rax 3669 3670 cmovzq %rax,%r12 3671 cmovzq %r10,%r13 3672 cmovzq %rcx,%r14 3673 cmovzq %r8,%r15 3674 cmovzq %rsi,%r9 3675 3676 movq %r13,%rax 3677 shrq $1,%r12 3678 shlq $63,%rax 3679 movq %r14,%r10 3680 shrq $1,%r13 3681 orq %rax,%r12 3682 shlq $63,%r10 3683 movq %r15,%rcx 3684 shrq $1,%r14 3685 orq %r10,%r13 3686 shlq $63,%rcx 3687 movq %r12,0(%rdi) 3688 shrq $1,%r15 3689 movq %r13,8(%rdi) 3690 shlq $63,%r9 3691 orq %rcx,%r14 3692 orq %r9,%r15 3693 movq %r14,16(%rdi) 3694 movq %r15,24(%rdi) 3695 movq 64(%rsp),%rdx 3696 leaq 64(%rsp),%rbx 3697 movq 0+32(%rsp),%r9 3698 movq 8+32(%rsp),%r10 3699 leaq -128+32(%rsp),%rsi 3700 movq 16+32(%rsp),%r11 3701 movq 24+32(%rsp),%r12 3702 leaq 32(%rsp),%rdi 3703 call __ecp_nistz256_mul_montx 3704 3705 leaq 128(%rsp),%rdi 3706 call __ecp_nistz256_mul_by_2x 3707 3708 leaq 32(%rsp),%rbx 3709 leaq 32(%rsp),%rdi 3710 call __ecp_nistz256_add_tox 3711 3712 movq 96(%rsp),%rdx 3713 leaq 96(%rsp),%rbx 3714 movq 0+0(%rsp),%r9 3715 movq 8+0(%rsp),%r10 3716 leaq -128+0(%rsp),%rsi 3717 movq 16+0(%rsp),%r11 3718 movq 24+0(%rsp),%r12 3719 leaq 0(%rsp),%rdi 3720 call __ecp_nistz256_mul_montx 3721 3722 leaq 128(%rsp),%rdi 3723 call __ecp_nistz256_mul_by_2x 3724 3725 movq 0+32(%rsp),%rdx 3726 movq 8+32(%rsp),%r14 3727 leaq -128+32(%rsp),%rsi 3728 movq 16+32(%rsp),%r15 3729 movq 24+32(%rsp),%r8 3730.byte 102,72,15,126,199 3731 call __ecp_nistz256_sqr_montx 3732 3733 leaq 128(%rsp),%rbx 3734 movq %r14,%r8 3735 movq %r15,%r9 3736 movq %rsi,%r14 3737 movq %rbp,%r15 3738 call __ecp_nistz256_sub_fromx 3739 3740 movq 0+0(%rsp),%rax 3741 movq 0+8(%rsp),%rbp 3742 movq 0+16(%rsp),%rcx 3743 movq 0+24(%rsp),%r10 3744 leaq 0(%rsp),%rdi 3745 call __ecp_nistz256_subx 3746 3747 movq 32(%rsp),%rdx 3748 leaq 32(%rsp),%rbx 3749 movq %r12,%r14 3750 xorl %ecx,%ecx 3751 movq %r12,0+0(%rsp) 3752 movq %r13,%r10 3753 movq %r13,0+8(%rsp) 3754 cmovzq %r8,%r11 3755 movq %r8,0+16(%rsp) 3756 leaq 0-128(%rsp),%rsi 3757 cmovzq %r9,%r12 3758 movq %r9,0+24(%rsp) 3759 movq %r14,%r9 3760 leaq 0(%rsp),%rdi 3761 call __ecp_nistz256_mul_montx 3762 3763.byte 102,72,15,126,203 3764.byte 102,72,15,126,207 3765 call __ecp_nistz256_sub_fromx 3766 3767 leaq 160+56(%rsp),%rsi 3768.cfi_def_cfa %rsi,8 3769 movq -48(%rsi),%r15 3770.cfi_restore %r15 3771 movq -40(%rsi),%r14 3772.cfi_restore %r14 3773 movq -32(%rsi),%r13 3774.cfi_restore %r13 3775 movq -24(%rsi),%r12 3776.cfi_restore %r12 3777 movq -16(%rsi),%rbx 3778.cfi_restore %rbx 3779 movq -8(%rsi),%rbp 3780.cfi_restore %rbp 3781 leaq (%rsi),%rsp 3782.cfi_def_cfa_register %rsp 3783.Lpoint_doublex_epilogue: 3784 ret 3785.cfi_endproc 3786.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex 3787.type ecp_nistz256_point_addx,@function 3788.align 32 3789ecp_nistz256_point_addx: 3790.cfi_startproc 3791.Lpoint_addx: 3792 pushq %rbp 3793.cfi_adjust_cfa_offset 8 3794.cfi_offset %rbp,-16 3795 pushq %rbx 3796.cfi_adjust_cfa_offset 8 3797.cfi_offset %rbx,-24 3798 pushq %r12 3799.cfi_adjust_cfa_offset 8 3800.cfi_offset %r12,-32 3801 pushq %r13 3802.cfi_adjust_cfa_offset 8 3803.cfi_offset %r13,-40 3804 pushq %r14 3805.cfi_adjust_cfa_offset 8 3806.cfi_offset %r14,-48 3807 pushq %r15 3808.cfi_adjust_cfa_offset 8 3809.cfi_offset %r15,-56 3810 subq $576+8,%rsp 3811.cfi_adjust_cfa_offset 32*18+8 3812.Lpoint_addx_body: 3813 3814 movdqu 0(%rsi),%xmm0 3815 movdqu 16(%rsi),%xmm1 3816 movdqu 32(%rsi),%xmm2 3817 movdqu 48(%rsi),%xmm3 3818 movdqu 64(%rsi),%xmm4 3819 movdqu 80(%rsi),%xmm5 3820 movq %rsi,%rbx 3821 movq %rdx,%rsi 3822 movdqa %xmm0,384(%rsp) 3823 movdqa %xmm1,384+16(%rsp) 3824 movdqa %xmm2,416(%rsp) 3825 movdqa %xmm3,416+16(%rsp) 3826 movdqa %xmm4,448(%rsp) 3827 movdqa %xmm5,448+16(%rsp) 3828 por %xmm4,%xmm5 3829 3830 movdqu 0(%rsi),%xmm0 3831 pshufd $0xb1,%xmm5,%xmm3 3832 movdqu 16(%rsi),%xmm1 3833 movdqu 32(%rsi),%xmm2 3834 por %xmm3,%xmm5 3835 movdqu 48(%rsi),%xmm3 3836 movq 64+0(%rsi),%rdx 3837 movq 64+8(%rsi),%r14 3838 movq 64+16(%rsi),%r15 3839 movq 64+24(%rsi),%r8 3840 movdqa %xmm0,480(%rsp) 3841 pshufd $0x1e,%xmm5,%xmm4 3842 movdqa %xmm1,480+16(%rsp) 3843 movdqu 64(%rsi),%xmm0 3844 movdqu 80(%rsi),%xmm1 3845 movdqa %xmm2,512(%rsp) 3846 movdqa %xmm3,512+16(%rsp) 3847 por %xmm4,%xmm5 3848 pxor %xmm4,%xmm4 3849 por %xmm0,%xmm1 3850.byte 102,72,15,110,199 3851 3852 leaq 64-128(%rsi),%rsi 3853 movq %rdx,544+0(%rsp) 3854 movq %r14,544+8(%rsp) 3855 movq %r15,544+16(%rsp) 3856 movq %r8,544+24(%rsp) 3857 leaq 96(%rsp),%rdi 3858 call __ecp_nistz256_sqr_montx 3859 3860 pcmpeqd %xmm4,%xmm5 3861 pshufd $0xb1,%xmm1,%xmm4 3862 por %xmm1,%xmm4 3863 pshufd $0,%xmm5,%xmm5 3864 pshufd $0x1e,%xmm4,%xmm3 3865 por %xmm3,%xmm4 3866 pxor %xmm3,%xmm3 3867 pcmpeqd %xmm3,%xmm4 3868 pshufd $0,%xmm4,%xmm4 3869 movq 64+0(%rbx),%rdx 3870 movq 64+8(%rbx),%r14 3871 movq 64+16(%rbx),%r15 3872 movq 64+24(%rbx),%r8 3873.byte 102,72,15,110,203 3874 3875 leaq 64-128(%rbx),%rsi 3876 leaq 32(%rsp),%rdi 3877 call __ecp_nistz256_sqr_montx 3878 3879 movq 544(%rsp),%rdx 3880 leaq 544(%rsp),%rbx 3881 movq 0+96(%rsp),%r9 3882 movq 8+96(%rsp),%r10 3883 leaq -128+96(%rsp),%rsi 3884 movq 16+96(%rsp),%r11 3885 movq 24+96(%rsp),%r12 3886 leaq 224(%rsp),%rdi 3887 call __ecp_nistz256_mul_montx 3888 3889 movq 448(%rsp),%rdx 3890 leaq 448(%rsp),%rbx 3891 movq 0+32(%rsp),%r9 3892 movq 8+32(%rsp),%r10 3893 leaq -128+32(%rsp),%rsi 3894 movq 16+32(%rsp),%r11 3895 movq 24+32(%rsp),%r12 3896 leaq 256(%rsp),%rdi 3897 call __ecp_nistz256_mul_montx 3898 3899 movq 416(%rsp),%rdx 3900 leaq 416(%rsp),%rbx 3901 movq 0+224(%rsp),%r9 3902 movq 8+224(%rsp),%r10 3903 leaq -128+224(%rsp),%rsi 3904 movq 16+224(%rsp),%r11 3905 movq 24+224(%rsp),%r12 3906 leaq 224(%rsp),%rdi 3907 call __ecp_nistz256_mul_montx 3908 3909 movq 512(%rsp),%rdx 3910 leaq 512(%rsp),%rbx 3911 movq 0+256(%rsp),%r9 3912 movq 8+256(%rsp),%r10 3913 leaq -128+256(%rsp),%rsi 3914 movq 16+256(%rsp),%r11 3915 movq 24+256(%rsp),%r12 3916 leaq 256(%rsp),%rdi 3917 call __ecp_nistz256_mul_montx 3918 3919 leaq 224(%rsp),%rbx 3920 leaq 64(%rsp),%rdi 3921 call __ecp_nistz256_sub_fromx 3922 3923 orq %r13,%r12 3924 movdqa %xmm4,%xmm2 3925 orq %r8,%r12 3926 orq %r9,%r12 3927 por %xmm5,%xmm2 3928.byte 102,73,15,110,220 3929 3930 movq 384(%rsp),%rdx 3931 leaq 384(%rsp),%rbx 3932 movq 0+96(%rsp),%r9 3933 movq 8+96(%rsp),%r10 3934 leaq -128+96(%rsp),%rsi 3935 movq 16+96(%rsp),%r11 3936 movq 24+96(%rsp),%r12 3937 leaq 160(%rsp),%rdi 3938 call __ecp_nistz256_mul_montx 3939 3940 movq 480(%rsp),%rdx 3941 leaq 480(%rsp),%rbx 3942 movq 0+32(%rsp),%r9 3943 movq 8+32(%rsp),%r10 3944 leaq -128+32(%rsp),%rsi 3945 movq 16+32(%rsp),%r11 3946 movq 24+32(%rsp),%r12 3947 leaq 192(%rsp),%rdi 3948 call __ecp_nistz256_mul_montx 3949 3950 leaq 160(%rsp),%rbx 3951 leaq 0(%rsp),%rdi 3952 call __ecp_nistz256_sub_fromx 3953 3954 orq %r13,%r12 3955 orq %r8,%r12 3956 orq %r9,%r12 3957 3958.byte 102,73,15,126,208 3959.byte 102,73,15,126,217 3960 orq %r8,%r12 3961.byte 0x3e 3962 jnz .Ladd_proceedx 3963 3964 3965 3966 testq %r9,%r9 3967 jz .Ladd_doublex 3968 3969 3970 3971 3972 3973 3974.byte 102,72,15,126,199 3975 pxor %xmm0,%xmm0 3976 movdqu %xmm0,0(%rdi) 3977 movdqu %xmm0,16(%rdi) 3978 movdqu %xmm0,32(%rdi) 3979 movdqu %xmm0,48(%rdi) 3980 movdqu %xmm0,64(%rdi) 3981 movdqu %xmm0,80(%rdi) 3982 jmp .Ladd_donex 3983 3984.align 32 3985.Ladd_doublex: 3986.byte 102,72,15,126,206 3987.byte 102,72,15,126,199 3988 addq $416,%rsp 3989.cfi_adjust_cfa_offset -416 3990 jmp .Lpoint_double_shortcutx 3991.cfi_adjust_cfa_offset 416 3992 3993.align 32 3994.Ladd_proceedx: 3995 movq 0+64(%rsp),%rdx 3996 movq 8+64(%rsp),%r14 3997 leaq -128+64(%rsp),%rsi 3998 movq 16+64(%rsp),%r15 3999 movq 24+64(%rsp),%r8 4000 leaq 96(%rsp),%rdi 4001 call __ecp_nistz256_sqr_montx 4002 4003 movq 448(%rsp),%rdx 4004 leaq 448(%rsp),%rbx 4005 movq 0+0(%rsp),%r9 4006 movq 8+0(%rsp),%r10 4007 leaq -128+0(%rsp),%rsi 4008 movq 16+0(%rsp),%r11 4009 movq 24+0(%rsp),%r12 4010 leaq 352(%rsp),%rdi 4011 call __ecp_nistz256_mul_montx 4012 4013 movq 0+0(%rsp),%rdx 4014 movq 8+0(%rsp),%r14 4015 leaq -128+0(%rsp),%rsi 4016 movq 16+0(%rsp),%r15 4017 movq 24+0(%rsp),%r8 4018 leaq 32(%rsp),%rdi 4019 call __ecp_nistz256_sqr_montx 4020 4021 movq 544(%rsp),%rdx 4022 leaq 544(%rsp),%rbx 4023 movq 0+352(%rsp),%r9 4024 movq 8+352(%rsp),%r10 4025 leaq -128+352(%rsp),%rsi 4026 movq 16+352(%rsp),%r11 4027 movq 24+352(%rsp),%r12 4028 leaq 352(%rsp),%rdi 4029 call __ecp_nistz256_mul_montx 4030 4031 movq 0(%rsp),%rdx 4032 leaq 0(%rsp),%rbx 4033 movq 0+32(%rsp),%r9 4034 movq 8+32(%rsp),%r10 4035 leaq -128+32(%rsp),%rsi 4036 movq 16+32(%rsp),%r11 4037 movq 24+32(%rsp),%r12 4038 leaq 128(%rsp),%rdi 4039 call __ecp_nistz256_mul_montx 4040 4041 movq 160(%rsp),%rdx 4042 leaq 160(%rsp),%rbx 4043 movq 0+32(%rsp),%r9 4044 movq 8+32(%rsp),%r10 4045 leaq -128+32(%rsp),%rsi 4046 movq 16+32(%rsp),%r11 4047 movq 24+32(%rsp),%r12 4048 leaq 192(%rsp),%rdi 4049 call __ecp_nistz256_mul_montx 4050 4051 4052 4053 4054 xorq %r11,%r11 4055 addq %r12,%r12 4056 leaq 96(%rsp),%rsi 4057 adcq %r13,%r13 4058 movq %r12,%rax 4059 adcq %r8,%r8 4060 adcq %r9,%r9 4061 movq %r13,%rbp 4062 adcq $0,%r11 4063 4064 subq $-1,%r12 4065 movq %r8,%rcx 4066 sbbq %r14,%r13 4067 sbbq $0,%r8 4068 movq %r9,%r10 4069 sbbq %r15,%r9 4070 sbbq $0,%r11 4071 4072 cmovcq %rax,%r12 4073 movq 0(%rsi),%rax 4074 cmovcq %rbp,%r13 4075 movq 8(%rsi),%rbp 4076 cmovcq %rcx,%r8 4077 movq 16(%rsi),%rcx 4078 cmovcq %r10,%r9 4079 movq 24(%rsi),%r10 4080 4081 call __ecp_nistz256_subx 4082 4083 leaq 128(%rsp),%rbx 4084 leaq 288(%rsp),%rdi 4085 call __ecp_nistz256_sub_fromx 4086 4087 movq 192+0(%rsp),%rax 4088 movq 192+8(%rsp),%rbp 4089 movq 192+16(%rsp),%rcx 4090 movq 192+24(%rsp),%r10 4091 leaq 320(%rsp),%rdi 4092 4093 call __ecp_nistz256_subx 4094 4095 movq %r12,0(%rdi) 4096 movq %r13,8(%rdi) 4097 movq %r8,16(%rdi) 4098 movq %r9,24(%rdi) 4099 movq 128(%rsp),%rdx 4100 leaq 128(%rsp),%rbx 4101 movq 0+224(%rsp),%r9 4102 movq 8+224(%rsp),%r10 4103 leaq -128+224(%rsp),%rsi 4104 movq 16+224(%rsp),%r11 4105 movq 24+224(%rsp),%r12 4106 leaq 256(%rsp),%rdi 4107 call __ecp_nistz256_mul_montx 4108 4109 movq 320(%rsp),%rdx 4110 leaq 320(%rsp),%rbx 4111 movq 0+64(%rsp),%r9 4112 movq 8+64(%rsp),%r10 4113 leaq -128+64(%rsp),%rsi 4114 movq 16+64(%rsp),%r11 4115 movq 24+64(%rsp),%r12 4116 leaq 320(%rsp),%rdi 4117 call __ecp_nistz256_mul_montx 4118 4119 leaq 256(%rsp),%rbx 4120 leaq 320(%rsp),%rdi 4121 call __ecp_nistz256_sub_fromx 4122 4123.byte 102,72,15,126,199 4124 4125 movdqa %xmm5,%xmm0 4126 movdqa %xmm5,%xmm1 4127 pandn 352(%rsp),%xmm0 4128 movdqa %xmm5,%xmm2 4129 pandn 352+16(%rsp),%xmm1 4130 movdqa %xmm5,%xmm3 4131 pand 544(%rsp),%xmm2 4132 pand 544+16(%rsp),%xmm3 4133 por %xmm0,%xmm2 4134 por %xmm1,%xmm3 4135 4136 movdqa %xmm4,%xmm0 4137 movdqa %xmm4,%xmm1 4138 pandn %xmm2,%xmm0 4139 movdqa %xmm4,%xmm2 4140 pandn %xmm3,%xmm1 4141 movdqa %xmm4,%xmm3 4142 pand 448(%rsp),%xmm2 4143 pand 448+16(%rsp),%xmm3 4144 por %xmm0,%xmm2 4145 por %xmm1,%xmm3 4146 movdqu %xmm2,64(%rdi) 4147 movdqu %xmm3,80(%rdi) 4148 4149 movdqa %xmm5,%xmm0 4150 movdqa %xmm5,%xmm1 4151 pandn 288(%rsp),%xmm0 4152 movdqa %xmm5,%xmm2 4153 pandn 288+16(%rsp),%xmm1 4154 movdqa %xmm5,%xmm3 4155 pand 480(%rsp),%xmm2 4156 pand 480+16(%rsp),%xmm3 4157 por %xmm0,%xmm2 4158 por %xmm1,%xmm3 4159 4160 movdqa %xmm4,%xmm0 4161 movdqa %xmm4,%xmm1 4162 pandn %xmm2,%xmm0 4163 movdqa %xmm4,%xmm2 4164 pandn %xmm3,%xmm1 4165 movdqa %xmm4,%xmm3 4166 pand 384(%rsp),%xmm2 4167 pand 384+16(%rsp),%xmm3 4168 por %xmm0,%xmm2 4169 por %xmm1,%xmm3 4170 movdqu %xmm2,0(%rdi) 4171 movdqu %xmm3,16(%rdi) 4172 4173 movdqa %xmm5,%xmm0 4174 movdqa %xmm5,%xmm1 4175 pandn 320(%rsp),%xmm0 4176 movdqa %xmm5,%xmm2 4177 pandn 320+16(%rsp),%xmm1 4178 movdqa %xmm5,%xmm3 4179 pand 512(%rsp),%xmm2 4180 pand 512+16(%rsp),%xmm3 4181 por %xmm0,%xmm2 4182 por %xmm1,%xmm3 4183 4184 movdqa %xmm4,%xmm0 4185 movdqa %xmm4,%xmm1 4186 pandn %xmm2,%xmm0 4187 movdqa %xmm4,%xmm2 4188 pandn %xmm3,%xmm1 4189 movdqa %xmm4,%xmm3 4190 pand 416(%rsp),%xmm2 4191 pand 416+16(%rsp),%xmm3 4192 por %xmm0,%xmm2 4193 por %xmm1,%xmm3 4194 movdqu %xmm2,32(%rdi) 4195 movdqu %xmm3,48(%rdi) 4196 4197.Ladd_donex: 4198 leaq 576+56(%rsp),%rsi 4199.cfi_def_cfa %rsi,8 4200 movq -48(%rsi),%r15 4201.cfi_restore %r15 4202 movq -40(%rsi),%r14 4203.cfi_restore %r14 4204 movq -32(%rsi),%r13 4205.cfi_restore %r13 4206 movq -24(%rsi),%r12 4207.cfi_restore %r12 4208 movq -16(%rsi),%rbx 4209.cfi_restore %rbx 4210 movq -8(%rsi),%rbp 4211.cfi_restore %rbp 4212 leaq (%rsi),%rsp 4213.cfi_def_cfa_register %rsp 4214.Lpoint_addx_epilogue: 4215 ret 4216.cfi_endproc 4217.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx 4218.type ecp_nistz256_point_add_affinex,@function 4219.align 32 4220ecp_nistz256_point_add_affinex: 4221.cfi_startproc 4222.Lpoint_add_affinex: 4223 pushq %rbp 4224.cfi_adjust_cfa_offset 8 4225.cfi_offset %rbp,-16 4226 pushq %rbx 4227.cfi_adjust_cfa_offset 8 4228.cfi_offset %rbx,-24 4229 pushq %r12 4230.cfi_adjust_cfa_offset 8 4231.cfi_offset %r12,-32 4232 pushq %r13 4233.cfi_adjust_cfa_offset 8 4234.cfi_offset %r13,-40 4235 pushq %r14 4236.cfi_adjust_cfa_offset 8 4237.cfi_offset %r14,-48 4238 pushq %r15 4239.cfi_adjust_cfa_offset 8 4240.cfi_offset %r15,-56 4241 subq $480+8,%rsp 4242.cfi_adjust_cfa_offset 32*15+8 4243.Ladd_affinex_body: 4244 4245 movdqu 0(%rsi),%xmm0 4246 movq %rdx,%rbx 4247 movdqu 16(%rsi),%xmm1 4248 movdqu 32(%rsi),%xmm2 4249 movdqu 48(%rsi),%xmm3 4250 movdqu 64(%rsi),%xmm4 4251 movdqu 80(%rsi),%xmm5 4252 movq 64+0(%rsi),%rdx 4253 movq 64+8(%rsi),%r14 4254 movq 64+16(%rsi),%r15 4255 movq 64+24(%rsi),%r8 4256 movdqa %xmm0,320(%rsp) 4257 movdqa %xmm1,320+16(%rsp) 4258 movdqa %xmm2,352(%rsp) 4259 movdqa %xmm3,352+16(%rsp) 4260 movdqa %xmm4,384(%rsp) 4261 movdqa %xmm5,384+16(%rsp) 4262 por %xmm4,%xmm5 4263 4264 movdqu 0(%rbx),%xmm0 4265 pshufd $0xb1,%xmm5,%xmm3 4266 movdqu 16(%rbx),%xmm1 4267 movdqu 32(%rbx),%xmm2 4268 por %xmm3,%xmm5 4269 movdqu 48(%rbx),%xmm3 4270 movdqa %xmm0,416(%rsp) 4271 pshufd $0x1e,%xmm5,%xmm4 4272 movdqa %xmm1,416+16(%rsp) 4273 por %xmm0,%xmm1 4274.byte 102,72,15,110,199 4275 movdqa %xmm2,448(%rsp) 4276 movdqa %xmm3,448+16(%rsp) 4277 por %xmm2,%xmm3 4278 por %xmm4,%xmm5 4279 pxor %xmm4,%xmm4 4280 por %xmm1,%xmm3 4281 4282 leaq 64-128(%rsi),%rsi 4283 leaq 32(%rsp),%rdi 4284 call __ecp_nistz256_sqr_montx 4285 4286 pcmpeqd %xmm4,%xmm5 4287 pshufd $0xb1,%xmm3,%xmm4 4288 movq 0(%rbx),%rdx 4289 4290 movq %r12,%r9 4291 por %xmm3,%xmm4 4292 pshufd $0,%xmm5,%xmm5 4293 pshufd $0x1e,%xmm4,%xmm3 4294 movq %r13,%r10 4295 por %xmm3,%xmm4 4296 pxor %xmm3,%xmm3 4297 movq %r14,%r11 4298 pcmpeqd %xmm3,%xmm4 4299 pshufd $0,%xmm4,%xmm4 4300 4301 leaq 32-128(%rsp),%rsi 4302 movq %r15,%r12 4303 leaq 0(%rsp),%rdi 4304 call __ecp_nistz256_mul_montx 4305 4306 leaq 320(%rsp),%rbx 4307 leaq 64(%rsp),%rdi 4308 call __ecp_nistz256_sub_fromx 4309 4310 movq 384(%rsp),%rdx 4311 leaq 384(%rsp),%rbx 4312 movq 0+32(%rsp),%r9 4313 movq 8+32(%rsp),%r10 4314 leaq -128+32(%rsp),%rsi 4315 movq 16+32(%rsp),%r11 4316 movq 24+32(%rsp),%r12 4317 leaq 32(%rsp),%rdi 4318 call __ecp_nistz256_mul_montx 4319 4320 movq 384(%rsp),%rdx 4321 leaq 384(%rsp),%rbx 4322 movq 0+64(%rsp),%r9 4323 movq 8+64(%rsp),%r10 4324 leaq -128+64(%rsp),%rsi 4325 movq 16+64(%rsp),%r11 4326 movq 24+64(%rsp),%r12 4327 leaq 288(%rsp),%rdi 4328 call __ecp_nistz256_mul_montx 4329 4330 movq 448(%rsp),%rdx 4331 leaq 448(%rsp),%rbx 4332 movq 0+32(%rsp),%r9 4333 movq 8+32(%rsp),%r10 4334 leaq -128+32(%rsp),%rsi 4335 movq 16+32(%rsp),%r11 4336 movq 24+32(%rsp),%r12 4337 leaq 32(%rsp),%rdi 4338 call __ecp_nistz256_mul_montx 4339 4340 leaq 352(%rsp),%rbx 4341 leaq 96(%rsp),%rdi 4342 call __ecp_nistz256_sub_fromx 4343 4344 movq 0+64(%rsp),%rdx 4345 movq 8+64(%rsp),%r14 4346 leaq -128+64(%rsp),%rsi 4347 movq 16+64(%rsp),%r15 4348 movq 24+64(%rsp),%r8 4349 leaq 128(%rsp),%rdi 4350 call __ecp_nistz256_sqr_montx 4351 4352 movq 0+96(%rsp),%rdx 4353 movq 8+96(%rsp),%r14 4354 leaq -128+96(%rsp),%rsi 4355 movq 16+96(%rsp),%r15 4356 movq 24+96(%rsp),%r8 4357 leaq 192(%rsp),%rdi 4358 call __ecp_nistz256_sqr_montx 4359 4360 movq 128(%rsp),%rdx 4361 leaq 128(%rsp),%rbx 4362 movq 0+64(%rsp),%r9 4363 movq 8+64(%rsp),%r10 4364 leaq -128+64(%rsp),%rsi 4365 movq 16+64(%rsp),%r11 4366 movq 24+64(%rsp),%r12 4367 leaq 160(%rsp),%rdi 4368 call __ecp_nistz256_mul_montx 4369 4370 movq 320(%rsp),%rdx 4371 leaq 320(%rsp),%rbx 4372 movq 0+128(%rsp),%r9 4373 movq 8+128(%rsp),%r10 4374 leaq -128+128(%rsp),%rsi 4375 movq 16+128(%rsp),%r11 4376 movq 24+128(%rsp),%r12 4377 leaq 0(%rsp),%rdi 4378 call __ecp_nistz256_mul_montx 4379 4380 4381 4382 4383 xorq %r11,%r11 4384 addq %r12,%r12 4385 leaq 192(%rsp),%rsi 4386 adcq %r13,%r13 4387 movq %r12,%rax 4388 adcq %r8,%r8 4389 adcq %r9,%r9 4390 movq %r13,%rbp 4391 adcq $0,%r11 4392 4393 subq $-1,%r12 4394 movq %r8,%rcx 4395 sbbq %r14,%r13 4396 sbbq $0,%r8 4397 movq %r9,%r10 4398 sbbq %r15,%r9 4399 sbbq $0,%r11 4400 4401 cmovcq %rax,%r12 4402 movq 0(%rsi),%rax 4403 cmovcq %rbp,%r13 4404 movq 8(%rsi),%rbp 4405 cmovcq %rcx,%r8 4406 movq 16(%rsi),%rcx 4407 cmovcq %r10,%r9 4408 movq 24(%rsi),%r10 4409 4410 call __ecp_nistz256_subx 4411 4412 leaq 160(%rsp),%rbx 4413 leaq 224(%rsp),%rdi 4414 call __ecp_nistz256_sub_fromx 4415 4416 movq 0+0(%rsp),%rax 4417 movq 0+8(%rsp),%rbp 4418 movq 0+16(%rsp),%rcx 4419 movq 0+24(%rsp),%r10 4420 leaq 64(%rsp),%rdi 4421 4422 call __ecp_nistz256_subx 4423 4424 movq %r12,0(%rdi) 4425 movq %r13,8(%rdi) 4426 movq %r8,16(%rdi) 4427 movq %r9,24(%rdi) 4428 movq 352(%rsp),%rdx 4429 leaq 352(%rsp),%rbx 4430 movq 0+160(%rsp),%r9 4431 movq 8+160(%rsp),%r10 4432 leaq -128+160(%rsp),%rsi 4433 movq 16+160(%rsp),%r11 4434 movq 24+160(%rsp),%r12 4435 leaq 32(%rsp),%rdi 4436 call __ecp_nistz256_mul_montx 4437 4438 movq 96(%rsp),%rdx 4439 leaq 96(%rsp),%rbx 4440 movq 0+64(%rsp),%r9 4441 movq 8+64(%rsp),%r10 4442 leaq -128+64(%rsp),%rsi 4443 movq 16+64(%rsp),%r11 4444 movq 24+64(%rsp),%r12 4445 leaq 64(%rsp),%rdi 4446 call __ecp_nistz256_mul_montx 4447 4448 leaq 32(%rsp),%rbx 4449 leaq 256(%rsp),%rdi 4450 call __ecp_nistz256_sub_fromx 4451 4452.byte 102,72,15,126,199 4453 4454 movdqa %xmm5,%xmm0 4455 movdqa %xmm5,%xmm1 4456 pandn 288(%rsp),%xmm0 4457 movdqa %xmm5,%xmm2 4458 pandn 288+16(%rsp),%xmm1 4459 movdqa %xmm5,%xmm3 4460 pand .LONE_mont(%rip),%xmm2 4461 pand .LONE_mont+16(%rip),%xmm3 4462 por %xmm0,%xmm2 4463 por %xmm1,%xmm3 4464 4465 movdqa %xmm4,%xmm0 4466 movdqa %xmm4,%xmm1 4467 pandn %xmm2,%xmm0 4468 movdqa %xmm4,%xmm2 4469 pandn %xmm3,%xmm1 4470 movdqa %xmm4,%xmm3 4471 pand 384(%rsp),%xmm2 4472 pand 384+16(%rsp),%xmm3 4473 por %xmm0,%xmm2 4474 por %xmm1,%xmm3 4475 movdqu %xmm2,64(%rdi) 4476 movdqu %xmm3,80(%rdi) 4477 4478 movdqa %xmm5,%xmm0 4479 movdqa %xmm5,%xmm1 4480 pandn 224(%rsp),%xmm0 4481 movdqa %xmm5,%xmm2 4482 pandn 224+16(%rsp),%xmm1 4483 movdqa %xmm5,%xmm3 4484 pand 416(%rsp),%xmm2 4485 pand 416+16(%rsp),%xmm3 4486 por %xmm0,%xmm2 4487 por %xmm1,%xmm3 4488 4489 movdqa %xmm4,%xmm0 4490 movdqa %xmm4,%xmm1 4491 pandn %xmm2,%xmm0 4492 movdqa %xmm4,%xmm2 4493 pandn %xmm3,%xmm1 4494 movdqa %xmm4,%xmm3 4495 pand 320(%rsp),%xmm2 4496 pand 320+16(%rsp),%xmm3 4497 por %xmm0,%xmm2 4498 por %xmm1,%xmm3 4499 movdqu %xmm2,0(%rdi) 4500 movdqu %xmm3,16(%rdi) 4501 4502 movdqa %xmm5,%xmm0 4503 movdqa %xmm5,%xmm1 4504 pandn 256(%rsp),%xmm0 4505 movdqa %xmm5,%xmm2 4506 pandn 256+16(%rsp),%xmm1 4507 movdqa %xmm5,%xmm3 4508 pand 448(%rsp),%xmm2 4509 pand 448+16(%rsp),%xmm3 4510 por %xmm0,%xmm2 4511 por %xmm1,%xmm3 4512 4513 movdqa %xmm4,%xmm0 4514 movdqa %xmm4,%xmm1 4515 pandn %xmm2,%xmm0 4516 movdqa %xmm4,%xmm2 4517 pandn %xmm3,%xmm1 4518 movdqa %xmm4,%xmm3 4519 pand 352(%rsp),%xmm2 4520 pand 352+16(%rsp),%xmm3 4521 por %xmm0,%xmm2 4522 por %xmm1,%xmm3 4523 movdqu %xmm2,32(%rdi) 4524 movdqu %xmm3,48(%rdi) 4525 4526 leaq 480+56(%rsp),%rsi 4527.cfi_def_cfa %rsi,8 4528 movq -48(%rsi),%r15 4529.cfi_restore %r15 4530 movq -40(%rsi),%r14 4531.cfi_restore %r14 4532 movq -32(%rsi),%r13 4533.cfi_restore %r13 4534 movq -24(%rsi),%r12 4535.cfi_restore %r12 4536 movq -16(%rsi),%rbx 4537.cfi_restore %rbx 4538 movq -8(%rsi),%rbp 4539.cfi_restore %rbp 4540 leaq (%rsi),%rsp 4541.cfi_def_cfa_register %rsp 4542.Ladd_affinex_epilogue: 4543 ret 4544.cfi_endproc 4545.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex 4546#endif 4547