1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8.extern OPENSSL_ia32cap_P 9.hidden OPENSSL_ia32cap_P 10 11 12.section .rodata 13.align 64 14.Lpoly: 15.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 16 17.LOne: 18.long 1,1,1,1,1,1,1,1 19.LTwo: 20.long 2,2,2,2,2,2,2,2 21.LThree: 22.long 3,3,3,3,3,3,3,3 23.LONE_mont: 24.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe 25 26 27.Lord: 28.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 29.LordK: 30.quad 0xccd1c8aaee00bc4f 31.text 32 33 34 35.globl ecp_nistz256_neg 36.hidden ecp_nistz256_neg 37.type ecp_nistz256_neg,@function 38.align 32 39ecp_nistz256_neg: 40.cfi_startproc 41_CET_ENDBR 42 pushq %r12 43.cfi_adjust_cfa_offset 8 44.cfi_offset %r12,-16 45 pushq %r13 46.cfi_adjust_cfa_offset 8 47.cfi_offset %r13,-24 48.Lneg_body: 49 50 xorq %r8,%r8 51 xorq %r9,%r9 52 xorq %r10,%r10 53 xorq %r11,%r11 54 xorq %r13,%r13 55 56 subq 0(%rsi),%r8 57 sbbq 8(%rsi),%r9 58 sbbq 16(%rsi),%r10 59 movq %r8,%rax 60 sbbq 24(%rsi),%r11 61 leaq .Lpoly(%rip),%rsi 62 movq %r9,%rdx 63 sbbq $0,%r13 64 65 addq 0(%rsi),%r8 66 movq %r10,%rcx 67 adcq 8(%rsi),%r9 68 adcq 16(%rsi),%r10 69 movq %r11,%r12 70 adcq 24(%rsi),%r11 71 testq %r13,%r13 72 73 cmovzq %rax,%r8 74 cmovzq %rdx,%r9 75 movq %r8,0(%rdi) 76 cmovzq %rcx,%r10 77 movq %r9,8(%rdi) 78 cmovzq %r12,%r11 79 movq %r10,16(%rdi) 80 movq %r11,24(%rdi) 81 82 movq 0(%rsp),%r13 83.cfi_restore %r13 84 movq 8(%rsp),%r12 85.cfi_restore %r12 86 leaq 16(%rsp),%rsp 87.cfi_adjust_cfa_offset -16 88.Lneg_epilogue: 89 ret 90.cfi_endproc 91.size ecp_nistz256_neg,.-ecp_nistz256_neg 92 93 94 95 96 97 98.globl ecp_nistz256_ord_mul_mont 99.hidden ecp_nistz256_ord_mul_mont 100.type ecp_nistz256_ord_mul_mont,@function 101.align 32 102ecp_nistz256_ord_mul_mont: 103.cfi_startproc 104_CET_ENDBR 105 leaq OPENSSL_ia32cap_P(%rip),%rcx 106 movq 8(%rcx),%rcx 107 andl $0x80100,%ecx 108 cmpl $0x80100,%ecx 109 je .Lecp_nistz256_ord_mul_montx 110 pushq %rbp 111.cfi_adjust_cfa_offset 8 112.cfi_offset %rbp,-16 113 pushq %rbx 114.cfi_adjust_cfa_offset 8 115.cfi_offset %rbx,-24 116 pushq %r12 117.cfi_adjust_cfa_offset 8 118.cfi_offset %r12,-32 119 pushq %r13 120.cfi_adjust_cfa_offset 8 121.cfi_offset %r13,-40 122 pushq %r14 123.cfi_adjust_cfa_offset 8 124.cfi_offset %r14,-48 125 pushq %r15 126.cfi_adjust_cfa_offset 8 127.cfi_offset %r15,-56 128.Lord_mul_body: 129 130 movq 0(%rdx),%rax 131 movq %rdx,%rbx 132 leaq .Lord(%rip),%r14 133 movq .LordK(%rip),%r15 134 135 136 movq %rax,%rcx 137 mulq 0(%rsi) 138 movq %rax,%r8 139 movq %rcx,%rax 140 movq %rdx,%r9 141 142 mulq 8(%rsi) 143 addq %rax,%r9 144 movq %rcx,%rax 145 adcq $0,%rdx 146 movq %rdx,%r10 147 148 mulq 16(%rsi) 149 addq %rax,%r10 150 movq %rcx,%rax 151 adcq $0,%rdx 152 153 movq %r8,%r13 154 imulq %r15,%r8 155 156 movq %rdx,%r11 157 mulq 24(%rsi) 158 addq %rax,%r11 159 movq %r8,%rax 160 adcq $0,%rdx 161 movq %rdx,%r12 162 163 164 mulq 0(%r14) 165 movq %r8,%rbp 166 addq %rax,%r13 167 movq %r8,%rax 168 adcq $0,%rdx 169 movq %rdx,%rcx 170 171 subq %r8,%r10 172 sbbq $0,%r8 173 174 mulq 8(%r14) 175 addq %rcx,%r9 176 adcq $0,%rdx 177 addq %rax,%r9 178 movq %rbp,%rax 179 adcq %rdx,%r10 180 movq %rbp,%rdx 181 adcq $0,%r8 182 183 shlq $32,%rax 184 shrq $32,%rdx 185 subq %rax,%r11 186 movq 8(%rbx),%rax 187 sbbq %rdx,%rbp 188 189 addq %r8,%r11 190 adcq %rbp,%r12 191 adcq $0,%r13 192 193 194 movq %rax,%rcx 195 mulq 0(%rsi) 196 addq %rax,%r9 197 movq %rcx,%rax 198 adcq $0,%rdx 199 movq %rdx,%rbp 200 201 mulq 8(%rsi) 202 addq %rbp,%r10 203 adcq $0,%rdx 204 addq %rax,%r10 205 movq %rcx,%rax 206 adcq $0,%rdx 207 movq %rdx,%rbp 208 209 mulq 16(%rsi) 210 addq %rbp,%r11 211 adcq $0,%rdx 212 addq %rax,%r11 213 movq %rcx,%rax 214 adcq $0,%rdx 215 216 movq %r9,%rcx 217 imulq %r15,%r9 218 219 movq %rdx,%rbp 220 mulq 24(%rsi) 221 addq %rbp,%r12 222 adcq $0,%rdx 223 xorq %r8,%r8 224 addq %rax,%r12 225 movq %r9,%rax 226 adcq %rdx,%r13 227 adcq $0,%r8 228 229 230 mulq 0(%r14) 231 movq %r9,%rbp 232 addq %rax,%rcx 233 movq %r9,%rax 234 adcq %rdx,%rcx 235 236 subq %r9,%r11 237 sbbq $0,%r9 238 239 mulq 8(%r14) 240 addq %rcx,%r10 241 adcq $0,%rdx 242 addq %rax,%r10 243 movq %rbp,%rax 244 adcq %rdx,%r11 245 movq %rbp,%rdx 246 adcq $0,%r9 247 248 shlq $32,%rax 249 shrq $32,%rdx 250 subq %rax,%r12 251 movq 16(%rbx),%rax 252 sbbq %rdx,%rbp 253 254 addq %r9,%r12 255 adcq %rbp,%r13 256 adcq $0,%r8 257 258 259 movq %rax,%rcx 260 mulq 0(%rsi) 261 addq %rax,%r10 262 movq %rcx,%rax 263 adcq $0,%rdx 264 movq %rdx,%rbp 265 266 mulq 8(%rsi) 267 addq %rbp,%r11 268 adcq $0,%rdx 269 addq %rax,%r11 270 movq %rcx,%rax 271 adcq $0,%rdx 272 movq %rdx,%rbp 273 274 mulq 16(%rsi) 275 addq %rbp,%r12 276 adcq $0,%rdx 277 addq %rax,%r12 278 movq %rcx,%rax 279 adcq $0,%rdx 280 281 movq %r10,%rcx 282 imulq %r15,%r10 283 284 movq %rdx,%rbp 285 mulq 24(%rsi) 286 addq %rbp,%r13 287 adcq $0,%rdx 288 xorq %r9,%r9 289 addq %rax,%r13 290 movq %r10,%rax 291 adcq %rdx,%r8 292 adcq $0,%r9 293 294 295 mulq 0(%r14) 296 movq %r10,%rbp 297 addq %rax,%rcx 298 movq %r10,%rax 299 adcq %rdx,%rcx 300 301 subq %r10,%r12 302 sbbq $0,%r10 303 304 mulq 8(%r14) 305 addq %rcx,%r11 306 adcq $0,%rdx 307 addq %rax,%r11 308 movq %rbp,%rax 309 adcq %rdx,%r12 310 movq %rbp,%rdx 311 adcq $0,%r10 312 313 shlq $32,%rax 314 shrq $32,%rdx 315 subq %rax,%r13 316 movq 24(%rbx),%rax 317 sbbq %rdx,%rbp 318 319 addq %r10,%r13 320 adcq %rbp,%r8 321 adcq $0,%r9 322 323 324 movq %rax,%rcx 325 mulq 0(%rsi) 326 addq %rax,%r11 327 movq %rcx,%rax 328 adcq $0,%rdx 329 movq %rdx,%rbp 330 331 mulq 8(%rsi) 332 addq %rbp,%r12 333 adcq $0,%rdx 334 addq %rax,%r12 335 movq %rcx,%rax 336 adcq $0,%rdx 337 movq %rdx,%rbp 338 339 mulq 16(%rsi) 340 addq %rbp,%r13 341 adcq $0,%rdx 342 addq %rax,%r13 343 movq %rcx,%rax 344 adcq $0,%rdx 345 346 movq %r11,%rcx 347 imulq %r15,%r11 348 349 movq %rdx,%rbp 350 mulq 24(%rsi) 351 addq %rbp,%r8 352 adcq $0,%rdx 353 xorq %r10,%r10 354 addq %rax,%r8 355 movq %r11,%rax 356 adcq %rdx,%r9 357 adcq $0,%r10 358 359 360 mulq 0(%r14) 361 movq %r11,%rbp 362 addq %rax,%rcx 363 movq %r11,%rax 364 adcq %rdx,%rcx 365 366 subq %r11,%r13 367 sbbq $0,%r11 368 369 mulq 8(%r14) 370 addq %rcx,%r12 371 adcq $0,%rdx 372 addq %rax,%r12 373 movq %rbp,%rax 374 adcq %rdx,%r13 375 movq %rbp,%rdx 376 adcq $0,%r11 377 378 shlq $32,%rax 379 shrq $32,%rdx 380 subq %rax,%r8 381 sbbq %rdx,%rbp 382 383 addq %r11,%r8 384 adcq %rbp,%r9 385 adcq $0,%r10 386 387 388 movq %r12,%rsi 389 subq 0(%r14),%r12 390 movq %r13,%r11 391 sbbq 8(%r14),%r13 392 movq %r8,%rcx 393 sbbq 16(%r14),%r8 394 movq %r9,%rbp 395 sbbq 24(%r14),%r9 396 sbbq $0,%r10 397 398 cmovcq %rsi,%r12 399 cmovcq %r11,%r13 400 cmovcq %rcx,%r8 401 cmovcq %rbp,%r9 402 403 movq %r12,0(%rdi) 404 movq %r13,8(%rdi) 405 movq %r8,16(%rdi) 406 movq %r9,24(%rdi) 407 408 movq 0(%rsp),%r15 409.cfi_restore %r15 410 movq 8(%rsp),%r14 411.cfi_restore %r14 412 movq 16(%rsp),%r13 413.cfi_restore %r13 414 movq 24(%rsp),%r12 415.cfi_restore %r12 416 movq 32(%rsp),%rbx 417.cfi_restore %rbx 418 movq 40(%rsp),%rbp 419.cfi_restore %rbp 420 leaq 48(%rsp),%rsp 421.cfi_adjust_cfa_offset -48 422.Lord_mul_epilogue: 423 ret 424.cfi_endproc 425.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont 426 427 428 429 430 431 432 433.globl ecp_nistz256_ord_sqr_mont 434.hidden ecp_nistz256_ord_sqr_mont 435.type ecp_nistz256_ord_sqr_mont,@function 436.align 32 437ecp_nistz256_ord_sqr_mont: 438.cfi_startproc 439_CET_ENDBR 440 leaq OPENSSL_ia32cap_P(%rip),%rcx 441 movq 8(%rcx),%rcx 442 andl $0x80100,%ecx 443 cmpl $0x80100,%ecx 444 je .Lecp_nistz256_ord_sqr_montx 445 pushq %rbp 446.cfi_adjust_cfa_offset 8 447.cfi_offset %rbp,-16 448 pushq %rbx 449.cfi_adjust_cfa_offset 8 450.cfi_offset %rbx,-24 451 pushq %r12 452.cfi_adjust_cfa_offset 8 453.cfi_offset %r12,-32 454 pushq %r13 455.cfi_adjust_cfa_offset 8 456.cfi_offset %r13,-40 457 pushq %r14 458.cfi_adjust_cfa_offset 8 459.cfi_offset %r14,-48 460 pushq %r15 461.cfi_adjust_cfa_offset 8 462.cfi_offset %r15,-56 463.Lord_sqr_body: 464 465 movq 0(%rsi),%r8 466 movq 8(%rsi),%rax 467 movq 16(%rsi),%r14 468 movq 24(%rsi),%r15 469 leaq .Lord(%rip),%rsi 470 movq %rdx,%rbx 471 jmp .Loop_ord_sqr 472 473.align 32 474.Loop_ord_sqr: 475 476 movq %rax,%rbp 477 mulq %r8 478 movq %rax,%r9 479.byte 102,72,15,110,205 480 movq %r14,%rax 481 movq %rdx,%r10 482 483 mulq %r8 484 addq %rax,%r10 485 movq %r15,%rax 486.byte 102,73,15,110,214 487 adcq $0,%rdx 488 movq %rdx,%r11 489 490 mulq %r8 491 addq %rax,%r11 492 movq %r15,%rax 493.byte 102,73,15,110,223 494 adcq $0,%rdx 495 movq %rdx,%r12 496 497 498 mulq %r14 499 movq %rax,%r13 500 movq %r14,%rax 501 movq %rdx,%r14 502 503 504 mulq %rbp 505 addq %rax,%r11 506 movq %r15,%rax 507 adcq $0,%rdx 508 movq %rdx,%r15 509 510 mulq %rbp 511 addq %rax,%r12 512 adcq $0,%rdx 513 514 addq %r15,%r12 515 adcq %rdx,%r13 516 adcq $0,%r14 517 518 519 xorq %r15,%r15 520 movq %r8,%rax 521 addq %r9,%r9 522 adcq %r10,%r10 523 adcq %r11,%r11 524 adcq %r12,%r12 525 adcq %r13,%r13 526 adcq %r14,%r14 527 adcq $0,%r15 528 529 530 mulq %rax 531 movq %rax,%r8 532.byte 102,72,15,126,200 533 movq %rdx,%rbp 534 535 mulq %rax 536 addq %rbp,%r9 537 adcq %rax,%r10 538.byte 102,72,15,126,208 539 adcq $0,%rdx 540 movq %rdx,%rbp 541 542 mulq %rax 543 addq %rbp,%r11 544 adcq %rax,%r12 545.byte 102,72,15,126,216 546 adcq $0,%rdx 547 movq %rdx,%rbp 548 549 movq %r8,%rcx 550 imulq 32(%rsi),%r8 551 552 mulq %rax 553 addq %rbp,%r13 554 adcq %rax,%r14 555 movq 0(%rsi),%rax 556 adcq %rdx,%r15 557 558 559 mulq %r8 560 movq %r8,%rbp 561 addq %rax,%rcx 562 movq 8(%rsi),%rax 563 adcq %rdx,%rcx 564 565 subq %r8,%r10 566 sbbq $0,%rbp 567 568 mulq %r8 569 addq %rcx,%r9 570 adcq $0,%rdx 571 addq %rax,%r9 572 movq %r8,%rax 573 adcq %rdx,%r10 574 movq %r8,%rdx 575 adcq $0,%rbp 576 577 movq %r9,%rcx 578 imulq 32(%rsi),%r9 579 580 shlq $32,%rax 581 shrq $32,%rdx 582 subq %rax,%r11 583 movq 0(%rsi),%rax 584 sbbq %rdx,%r8 585 586 addq %rbp,%r11 587 adcq $0,%r8 588 589 590 mulq %r9 591 movq %r9,%rbp 592 addq %rax,%rcx 593 movq 8(%rsi),%rax 594 adcq %rdx,%rcx 595 596 subq %r9,%r11 597 sbbq $0,%rbp 598 599 mulq %r9 600 addq %rcx,%r10 601 adcq $0,%rdx 602 addq %rax,%r10 603 movq %r9,%rax 604 adcq %rdx,%r11 605 movq %r9,%rdx 606 adcq $0,%rbp 607 608 movq %r10,%rcx 609 imulq 32(%rsi),%r10 610 611 shlq $32,%rax 612 shrq $32,%rdx 613 subq %rax,%r8 614 movq 0(%rsi),%rax 615 sbbq %rdx,%r9 616 617 addq %rbp,%r8 618 adcq $0,%r9 619 620 621 mulq %r10 622 movq %r10,%rbp 623 addq %rax,%rcx 624 movq 8(%rsi),%rax 625 adcq %rdx,%rcx 626 627 subq %r10,%r8 628 sbbq $0,%rbp 629 630 mulq %r10 631 addq %rcx,%r11 632 adcq $0,%rdx 633 addq %rax,%r11 634 movq %r10,%rax 635 adcq %rdx,%r8 636 movq %r10,%rdx 637 adcq $0,%rbp 638 639 movq %r11,%rcx 640 imulq 32(%rsi),%r11 641 642 shlq $32,%rax 643 shrq $32,%rdx 644 subq %rax,%r9 645 movq 0(%rsi),%rax 646 sbbq %rdx,%r10 647 648 addq %rbp,%r9 649 adcq $0,%r10 650 651 652 mulq %r11 653 movq %r11,%rbp 654 addq %rax,%rcx 655 movq 8(%rsi),%rax 656 adcq %rdx,%rcx 657 658 subq %r11,%r9 659 sbbq $0,%rbp 660 661 mulq %r11 662 addq %rcx,%r8 663 adcq $0,%rdx 664 addq %rax,%r8 665 movq %r11,%rax 666 adcq %rdx,%r9 667 movq %r11,%rdx 668 adcq $0,%rbp 669 670 shlq $32,%rax 671 shrq $32,%rdx 672 subq %rax,%r10 673 sbbq %rdx,%r11 674 675 addq %rbp,%r10 676 adcq $0,%r11 677 678 679 xorq %rdx,%rdx 680 addq %r12,%r8 681 adcq %r13,%r9 682 movq %r8,%r12 683 adcq %r14,%r10 684 adcq %r15,%r11 685 movq %r9,%rax 686 adcq $0,%rdx 687 688 689 subq 0(%rsi),%r8 690 movq %r10,%r14 691 sbbq 8(%rsi),%r9 692 sbbq 16(%rsi),%r10 693 movq %r11,%r15 694 sbbq 24(%rsi),%r11 695 sbbq $0,%rdx 696 697 cmovcq %r12,%r8 698 cmovncq %r9,%rax 699 cmovncq %r10,%r14 700 cmovncq %r11,%r15 701 702 decq %rbx 703 jnz .Loop_ord_sqr 704 705 movq %r8,0(%rdi) 706 movq %rax,8(%rdi) 707 pxor %xmm1,%xmm1 708 movq %r14,16(%rdi) 709 pxor %xmm2,%xmm2 710 movq %r15,24(%rdi) 711 pxor %xmm3,%xmm3 712 713 movq 0(%rsp),%r15 714.cfi_restore %r15 715 movq 8(%rsp),%r14 716.cfi_restore %r14 717 movq 16(%rsp),%r13 718.cfi_restore %r13 719 movq 24(%rsp),%r12 720.cfi_restore %r12 721 movq 32(%rsp),%rbx 722.cfi_restore %rbx 723 movq 40(%rsp),%rbp 724.cfi_restore %rbp 725 leaq 48(%rsp),%rsp 726.cfi_adjust_cfa_offset -48 727.Lord_sqr_epilogue: 728 ret 729.cfi_endproc 730.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont 731 732.type ecp_nistz256_ord_mul_montx,@function 733.align 32 734ecp_nistz256_ord_mul_montx: 735.cfi_startproc 736.Lecp_nistz256_ord_mul_montx: 737 pushq %rbp 738.cfi_adjust_cfa_offset 8 739.cfi_offset %rbp,-16 740 pushq %rbx 741.cfi_adjust_cfa_offset 8 742.cfi_offset %rbx,-24 743 pushq %r12 744.cfi_adjust_cfa_offset 8 745.cfi_offset %r12,-32 746 pushq %r13 747.cfi_adjust_cfa_offset 8 748.cfi_offset %r13,-40 749 pushq %r14 750.cfi_adjust_cfa_offset 8 751.cfi_offset %r14,-48 752 pushq %r15 753.cfi_adjust_cfa_offset 8 754.cfi_offset %r15,-56 755.Lord_mulx_body: 756 757 movq %rdx,%rbx 758 movq 0(%rdx),%rdx 759 movq 0(%rsi),%r9 760 movq 8(%rsi),%r10 761 movq 16(%rsi),%r11 762 movq 24(%rsi),%r12 763 leaq -128(%rsi),%rsi 764 leaq .Lord-128(%rip),%r14 765 movq .LordK(%rip),%r15 766 767 768 mulxq %r9,%r8,%r9 769 mulxq %r10,%rcx,%r10 770 mulxq %r11,%rbp,%r11 771 addq %rcx,%r9 772 mulxq %r12,%rcx,%r12 773 movq %r8,%rdx 774 mulxq %r15,%rdx,%rax 775 adcq %rbp,%r10 776 adcq %rcx,%r11 777 adcq $0,%r12 778 779 780 xorq %r13,%r13 781 mulxq 0+128(%r14),%rcx,%rbp 782 adcxq %rcx,%r8 783 adoxq %rbp,%r9 784 785 mulxq 8+128(%r14),%rcx,%rbp 786 adcxq %rcx,%r9 787 adoxq %rbp,%r10 788 789 mulxq 16+128(%r14),%rcx,%rbp 790 adcxq %rcx,%r10 791 adoxq %rbp,%r11 792 793 mulxq 24+128(%r14),%rcx,%rbp 794 movq 8(%rbx),%rdx 795 adcxq %rcx,%r11 796 adoxq %rbp,%r12 797 adcxq %r8,%r12 798 adoxq %r8,%r13 799 adcq $0,%r13 800 801 802 mulxq 0+128(%rsi),%rcx,%rbp 803 adcxq %rcx,%r9 804 adoxq %rbp,%r10 805 806 mulxq 8+128(%rsi),%rcx,%rbp 807 adcxq %rcx,%r10 808 adoxq %rbp,%r11 809 810 mulxq 16+128(%rsi),%rcx,%rbp 811 adcxq %rcx,%r11 812 adoxq %rbp,%r12 813 814 mulxq 24+128(%rsi),%rcx,%rbp 815 movq %r9,%rdx 816 mulxq %r15,%rdx,%rax 817 adcxq %rcx,%r12 818 adoxq %rbp,%r13 819 820 adcxq %r8,%r13 821 adoxq %r8,%r8 822 adcq $0,%r8 823 824 825 mulxq 0+128(%r14),%rcx,%rbp 826 adcxq %rcx,%r9 827 adoxq %rbp,%r10 828 829 mulxq 8+128(%r14),%rcx,%rbp 830 adcxq %rcx,%r10 831 adoxq %rbp,%r11 832 833 mulxq 16+128(%r14),%rcx,%rbp 834 adcxq %rcx,%r11 835 adoxq %rbp,%r12 836 837 mulxq 24+128(%r14),%rcx,%rbp 838 movq 16(%rbx),%rdx 839 adcxq %rcx,%r12 840 adoxq %rbp,%r13 841 adcxq %r9,%r13 842 adoxq %r9,%r8 843 adcq $0,%r8 844 845 846 mulxq 0+128(%rsi),%rcx,%rbp 847 adcxq %rcx,%r10 848 adoxq %rbp,%r11 849 850 mulxq 8+128(%rsi),%rcx,%rbp 851 adcxq %rcx,%r11 852 adoxq %rbp,%r12 853 854 mulxq 16+128(%rsi),%rcx,%rbp 855 adcxq %rcx,%r12 856 adoxq %rbp,%r13 857 858 mulxq 24+128(%rsi),%rcx,%rbp 859 movq %r10,%rdx 860 mulxq %r15,%rdx,%rax 861 adcxq %rcx,%r13 862 adoxq %rbp,%r8 863 864 adcxq %r9,%r8 865 adoxq %r9,%r9 866 adcq $0,%r9 867 868 869 mulxq 0+128(%r14),%rcx,%rbp 870 adcxq %rcx,%r10 871 adoxq %rbp,%r11 872 873 mulxq 8+128(%r14),%rcx,%rbp 874 adcxq %rcx,%r11 875 adoxq %rbp,%r12 876 877 mulxq 16+128(%r14),%rcx,%rbp 878 adcxq %rcx,%r12 879 adoxq %rbp,%r13 880 881 mulxq 24+128(%r14),%rcx,%rbp 882 movq 24(%rbx),%rdx 883 adcxq %rcx,%r13 884 adoxq %rbp,%r8 885 adcxq %r10,%r8 886 adoxq %r10,%r9 887 adcq $0,%r9 888 889 890 mulxq 0+128(%rsi),%rcx,%rbp 891 adcxq %rcx,%r11 892 adoxq %rbp,%r12 893 894 mulxq 8+128(%rsi),%rcx,%rbp 895 adcxq %rcx,%r12 896 adoxq %rbp,%r13 897 898 mulxq 16+128(%rsi),%rcx,%rbp 899 adcxq %rcx,%r13 900 adoxq %rbp,%r8 901 902 mulxq 24+128(%rsi),%rcx,%rbp 903 movq %r11,%rdx 904 mulxq %r15,%rdx,%rax 905 adcxq %rcx,%r8 906 adoxq %rbp,%r9 907 908 adcxq %r10,%r9 909 adoxq %r10,%r10 910 adcq $0,%r10 911 912 913 mulxq 0+128(%r14),%rcx,%rbp 914 adcxq %rcx,%r11 915 adoxq %rbp,%r12 916 917 mulxq 8+128(%r14),%rcx,%rbp 918 adcxq %rcx,%r12 919 adoxq %rbp,%r13 920 921 mulxq 16+128(%r14),%rcx,%rbp 922 adcxq %rcx,%r13 923 adoxq %rbp,%r8 924 925 mulxq 24+128(%r14),%rcx,%rbp 926 leaq 128(%r14),%r14 927 movq %r12,%rbx 928 adcxq %rcx,%r8 929 adoxq %rbp,%r9 930 movq %r13,%rdx 931 adcxq %r11,%r9 932 adoxq %r11,%r10 933 adcq $0,%r10 934 935 936 937 movq %r8,%rcx 938 subq 0(%r14),%r12 939 sbbq 8(%r14),%r13 940 sbbq 16(%r14),%r8 941 movq %r9,%rbp 942 sbbq 24(%r14),%r9 943 sbbq $0,%r10 944 945 cmovcq %rbx,%r12 946 cmovcq %rdx,%r13 947 cmovcq %rcx,%r8 948 cmovcq %rbp,%r9 949 950 movq %r12,0(%rdi) 951 movq %r13,8(%rdi) 952 movq %r8,16(%rdi) 953 movq %r9,24(%rdi) 954 955 movq 0(%rsp),%r15 956.cfi_restore %r15 957 movq 8(%rsp),%r14 958.cfi_restore %r14 959 movq 16(%rsp),%r13 960.cfi_restore %r13 961 movq 24(%rsp),%r12 962.cfi_restore %r12 963 movq 32(%rsp),%rbx 964.cfi_restore %rbx 965 movq 40(%rsp),%rbp 966.cfi_restore %rbp 967 leaq 48(%rsp),%rsp 968.cfi_adjust_cfa_offset -48 969.Lord_mulx_epilogue: 970 ret 971.cfi_endproc 972.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx 973 974.type ecp_nistz256_ord_sqr_montx,@function 975.align 32 976ecp_nistz256_ord_sqr_montx: 977.cfi_startproc 978.Lecp_nistz256_ord_sqr_montx: 979 pushq %rbp 980.cfi_adjust_cfa_offset 8 981.cfi_offset %rbp,-16 982 pushq %rbx 983.cfi_adjust_cfa_offset 8 984.cfi_offset %rbx,-24 985 pushq %r12 986.cfi_adjust_cfa_offset 8 987.cfi_offset %r12,-32 988 pushq %r13 989.cfi_adjust_cfa_offset 8 990.cfi_offset %r13,-40 991 pushq %r14 992.cfi_adjust_cfa_offset 8 993.cfi_offset %r14,-48 994 pushq %r15 995.cfi_adjust_cfa_offset 8 996.cfi_offset %r15,-56 997.Lord_sqrx_body: 998 999 movq %rdx,%rbx 1000 movq 0(%rsi),%rdx 1001 movq 8(%rsi),%r14 1002 movq 16(%rsi),%r15 1003 movq 24(%rsi),%r8 1004 leaq .Lord(%rip),%rsi 1005 jmp .Loop_ord_sqrx 1006 1007.align 32 1008.Loop_ord_sqrx: 1009 mulxq %r14,%r9,%r10 1010 mulxq %r15,%rcx,%r11 1011 movq %rdx,%rax 1012.byte 102,73,15,110,206 1013 mulxq %r8,%rbp,%r12 1014 movq %r14,%rdx 1015 addq %rcx,%r10 1016.byte 102,73,15,110,215 1017 adcq %rbp,%r11 1018 adcq $0,%r12 1019 xorq %r13,%r13 1020 1021 mulxq %r15,%rcx,%rbp 1022 adcxq %rcx,%r11 1023 adoxq %rbp,%r12 1024 1025 mulxq %r8,%rcx,%rbp 1026 movq %r15,%rdx 1027 adcxq %rcx,%r12 1028 adoxq %rbp,%r13 1029 adcq $0,%r13 1030 1031 mulxq %r8,%rcx,%r14 1032 movq %rax,%rdx 1033.byte 102,73,15,110,216 1034 xorq %r15,%r15 1035 adcxq %r9,%r9 1036 adoxq %rcx,%r13 1037 adcxq %r10,%r10 1038 adoxq %r15,%r14 1039 1040 1041 mulxq %rdx,%r8,%rbp 1042.byte 102,72,15,126,202 1043 adcxq %r11,%r11 1044 adoxq %rbp,%r9 1045 adcxq %r12,%r12 1046 mulxq %rdx,%rcx,%rax 1047.byte 102,72,15,126,210 1048 adcxq %r13,%r13 1049 adoxq %rcx,%r10 1050 adcxq %r14,%r14 1051 mulxq %rdx,%rcx,%rbp 1052.byte 0x67 1053.byte 102,72,15,126,218 1054 adoxq %rax,%r11 1055 adcxq %r15,%r15 1056 adoxq %rcx,%r12 1057 adoxq %rbp,%r13 1058 mulxq %rdx,%rcx,%rax 1059 adoxq %rcx,%r14 1060 adoxq %rax,%r15 1061 1062 1063 movq %r8,%rdx 1064 mulxq 32(%rsi),%rdx,%rcx 1065 1066 xorq %rax,%rax 1067 mulxq 0(%rsi),%rcx,%rbp 1068 adcxq %rcx,%r8 1069 adoxq %rbp,%r9 1070 mulxq 8(%rsi),%rcx,%rbp 1071 adcxq %rcx,%r9 1072 adoxq %rbp,%r10 1073 mulxq 16(%rsi),%rcx,%rbp 1074 adcxq %rcx,%r10 1075 adoxq %rbp,%r11 1076 mulxq 24(%rsi),%rcx,%rbp 1077 adcxq %rcx,%r11 1078 adoxq %rbp,%r8 1079 adcxq %rax,%r8 1080 1081 1082 movq %r9,%rdx 1083 mulxq 32(%rsi),%rdx,%rcx 1084 1085 mulxq 0(%rsi),%rcx,%rbp 1086 adoxq %rcx,%r9 1087 adcxq %rbp,%r10 1088 mulxq 8(%rsi),%rcx,%rbp 1089 adoxq %rcx,%r10 1090 adcxq %rbp,%r11 1091 mulxq 16(%rsi),%rcx,%rbp 1092 adoxq %rcx,%r11 1093 adcxq %rbp,%r8 1094 mulxq 24(%rsi),%rcx,%rbp 1095 adoxq %rcx,%r8 1096 adcxq %rbp,%r9 1097 adoxq %rax,%r9 1098 1099 1100 movq %r10,%rdx 1101 mulxq 32(%rsi),%rdx,%rcx 1102 1103 mulxq 0(%rsi),%rcx,%rbp 1104 adcxq %rcx,%r10 1105 adoxq %rbp,%r11 1106 mulxq 8(%rsi),%rcx,%rbp 1107 adcxq %rcx,%r11 1108 adoxq %rbp,%r8 1109 mulxq 16(%rsi),%rcx,%rbp 1110 adcxq %rcx,%r8 1111 adoxq %rbp,%r9 1112 mulxq 24(%rsi),%rcx,%rbp 1113 adcxq %rcx,%r9 1114 adoxq %rbp,%r10 1115 adcxq %rax,%r10 1116 1117 1118 movq %r11,%rdx 1119 mulxq 32(%rsi),%rdx,%rcx 1120 1121 mulxq 0(%rsi),%rcx,%rbp 1122 adoxq %rcx,%r11 1123 adcxq %rbp,%r8 1124 mulxq 8(%rsi),%rcx,%rbp 1125 adoxq %rcx,%r8 1126 adcxq %rbp,%r9 1127 mulxq 16(%rsi),%rcx,%rbp 1128 adoxq %rcx,%r9 1129 adcxq %rbp,%r10 1130 mulxq 24(%rsi),%rcx,%rbp 1131 adoxq %rcx,%r10 1132 adcxq %rbp,%r11 1133 adoxq %rax,%r11 1134 1135 1136 addq %r8,%r12 1137 adcq %r13,%r9 1138 movq %r12,%rdx 1139 adcq %r14,%r10 1140 adcq %r15,%r11 1141 movq %r9,%r14 1142 adcq $0,%rax 1143 1144 1145 subq 0(%rsi),%r12 1146 movq %r10,%r15 1147 sbbq 8(%rsi),%r9 1148 sbbq 16(%rsi),%r10 1149 movq %r11,%r8 1150 sbbq 24(%rsi),%r11 1151 sbbq $0,%rax 1152 1153 cmovncq %r12,%rdx 1154 cmovncq %r9,%r14 1155 cmovncq %r10,%r15 1156 cmovncq %r11,%r8 1157 1158 decq %rbx 1159 jnz .Loop_ord_sqrx 1160 1161 movq %rdx,0(%rdi) 1162 movq %r14,8(%rdi) 1163 pxor %xmm1,%xmm1 1164 movq %r15,16(%rdi) 1165 pxor %xmm2,%xmm2 1166 movq %r8,24(%rdi) 1167 pxor %xmm3,%xmm3 1168 1169 movq 0(%rsp),%r15 1170.cfi_restore %r15 1171 movq 8(%rsp),%r14 1172.cfi_restore %r14 1173 movq 16(%rsp),%r13 1174.cfi_restore %r13 1175 movq 24(%rsp),%r12 1176.cfi_restore %r12 1177 movq 32(%rsp),%rbx 1178.cfi_restore %rbx 1179 movq 40(%rsp),%rbp 1180.cfi_restore %rbp 1181 leaq 48(%rsp),%rsp 1182.cfi_adjust_cfa_offset -48 1183.Lord_sqrx_epilogue: 1184 ret 1185.cfi_endproc 1186.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx 1187 1188 1189 1190 1191 1192 1193.globl ecp_nistz256_mul_mont 1194.hidden ecp_nistz256_mul_mont 1195.type ecp_nistz256_mul_mont,@function 1196.align 32 1197ecp_nistz256_mul_mont: 1198.cfi_startproc 1199_CET_ENDBR 1200 leaq OPENSSL_ia32cap_P(%rip),%rcx 1201 movq 8(%rcx),%rcx 1202 andl $0x80100,%ecx 1203.Lmul_mont: 1204 pushq %rbp 1205.cfi_adjust_cfa_offset 8 1206.cfi_offset %rbp,-16 1207 pushq %rbx 1208.cfi_adjust_cfa_offset 8 1209.cfi_offset %rbx,-24 1210 pushq %r12 1211.cfi_adjust_cfa_offset 8 1212.cfi_offset %r12,-32 1213 pushq %r13 1214.cfi_adjust_cfa_offset 8 1215.cfi_offset %r13,-40 1216 pushq %r14 1217.cfi_adjust_cfa_offset 8 1218.cfi_offset %r14,-48 1219 pushq %r15 1220.cfi_adjust_cfa_offset 8 1221.cfi_offset %r15,-56 1222.Lmul_body: 1223 cmpl $0x80100,%ecx 1224 je .Lmul_montx 1225 movq %rdx,%rbx 1226 movq 0(%rdx),%rax 1227 movq 0(%rsi),%r9 1228 movq 8(%rsi),%r10 1229 movq 16(%rsi),%r11 1230 movq 24(%rsi),%r12 1231 1232 call __ecp_nistz256_mul_montq 1233 jmp .Lmul_mont_done 1234 1235.align 32 1236.Lmul_montx: 1237 movq %rdx,%rbx 1238 movq 0(%rdx),%rdx 1239 movq 0(%rsi),%r9 1240 movq 8(%rsi),%r10 1241 movq 16(%rsi),%r11 1242 movq 24(%rsi),%r12 1243 leaq -128(%rsi),%rsi 1244 1245 call __ecp_nistz256_mul_montx 1246.Lmul_mont_done: 1247 movq 0(%rsp),%r15 1248.cfi_restore %r15 1249 movq 8(%rsp),%r14 1250.cfi_restore %r14 1251 movq 16(%rsp),%r13 1252.cfi_restore %r13 1253 movq 24(%rsp),%r12 1254.cfi_restore %r12 1255 movq 32(%rsp),%rbx 1256.cfi_restore %rbx 1257 movq 40(%rsp),%rbp 1258.cfi_restore %rbp 1259 leaq 48(%rsp),%rsp 1260.cfi_adjust_cfa_offset -48 1261.Lmul_epilogue: 1262 ret 1263.cfi_endproc 1264.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont 1265 1266.type __ecp_nistz256_mul_montq,@function 1267.align 32 1268__ecp_nistz256_mul_montq: 1269.cfi_startproc 1270 1271 1272 movq %rax,%rbp 1273 mulq %r9 1274 movq .Lpoly+8(%rip),%r14 1275 movq %rax,%r8 1276 movq %rbp,%rax 1277 movq %rdx,%r9 1278 1279 mulq %r10 1280 movq .Lpoly+24(%rip),%r15 1281 addq %rax,%r9 1282 movq %rbp,%rax 1283 adcq $0,%rdx 1284 movq %rdx,%r10 1285 1286 mulq %r11 1287 addq %rax,%r10 1288 movq %rbp,%rax 1289 adcq $0,%rdx 1290 movq %rdx,%r11 1291 1292 mulq %r12 1293 addq %rax,%r11 1294 movq %r8,%rax 1295 adcq $0,%rdx 1296 xorq %r13,%r13 1297 movq %rdx,%r12 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 movq %r8,%rbp 1309 shlq $32,%r8 1310 mulq %r15 1311 shrq $32,%rbp 1312 addq %r8,%r9 1313 adcq %rbp,%r10 1314 adcq %rax,%r11 1315 movq 8(%rbx),%rax 1316 adcq %rdx,%r12 1317 adcq $0,%r13 1318 xorq %r8,%r8 1319 1320 1321 1322 movq %rax,%rbp 1323 mulq 0(%rsi) 1324 addq %rax,%r9 1325 movq %rbp,%rax 1326 adcq $0,%rdx 1327 movq %rdx,%rcx 1328 1329 mulq 8(%rsi) 1330 addq %rcx,%r10 1331 adcq $0,%rdx 1332 addq %rax,%r10 1333 movq %rbp,%rax 1334 adcq $0,%rdx 1335 movq %rdx,%rcx 1336 1337 mulq 16(%rsi) 1338 addq %rcx,%r11 1339 adcq $0,%rdx 1340 addq %rax,%r11 1341 movq %rbp,%rax 1342 adcq $0,%rdx 1343 movq %rdx,%rcx 1344 1345 mulq 24(%rsi) 1346 addq %rcx,%r12 1347 adcq $0,%rdx 1348 addq %rax,%r12 1349 movq %r9,%rax 1350 adcq %rdx,%r13 1351 adcq $0,%r8 1352 1353 1354 1355 movq %r9,%rbp 1356 shlq $32,%r9 1357 mulq %r15 1358 shrq $32,%rbp 1359 addq %r9,%r10 1360 adcq %rbp,%r11 1361 adcq %rax,%r12 1362 movq 16(%rbx),%rax 1363 adcq %rdx,%r13 1364 adcq $0,%r8 1365 xorq %r9,%r9 1366 1367 1368 1369 movq %rax,%rbp 1370 mulq 0(%rsi) 1371 addq %rax,%r10 1372 movq %rbp,%rax 1373 adcq $0,%rdx 1374 movq %rdx,%rcx 1375 1376 mulq 8(%rsi) 1377 addq %rcx,%r11 1378 adcq $0,%rdx 1379 addq %rax,%r11 1380 movq %rbp,%rax 1381 adcq $0,%rdx 1382 movq %rdx,%rcx 1383 1384 mulq 16(%rsi) 1385 addq %rcx,%r12 1386 adcq $0,%rdx 1387 addq %rax,%r12 1388 movq %rbp,%rax 1389 adcq $0,%rdx 1390 movq %rdx,%rcx 1391 1392 mulq 24(%rsi) 1393 addq %rcx,%r13 1394 adcq $0,%rdx 1395 addq %rax,%r13 1396 movq %r10,%rax 1397 adcq %rdx,%r8 1398 adcq $0,%r9 1399 1400 1401 1402 movq %r10,%rbp 1403 shlq $32,%r10 1404 mulq %r15 1405 shrq $32,%rbp 1406 addq %r10,%r11 1407 adcq %rbp,%r12 1408 adcq %rax,%r13 1409 movq 24(%rbx),%rax 1410 adcq %rdx,%r8 1411 adcq $0,%r9 1412 xorq %r10,%r10 1413 1414 1415 1416 movq %rax,%rbp 1417 mulq 0(%rsi) 1418 addq %rax,%r11 1419 movq %rbp,%rax 1420 adcq $0,%rdx 1421 movq %rdx,%rcx 1422 1423 mulq 8(%rsi) 1424 addq %rcx,%r12 1425 adcq $0,%rdx 1426 addq %rax,%r12 1427 movq %rbp,%rax 1428 adcq $0,%rdx 1429 movq %rdx,%rcx 1430 1431 mulq 16(%rsi) 1432 addq %rcx,%r13 1433 adcq $0,%rdx 1434 addq %rax,%r13 1435 movq %rbp,%rax 1436 adcq $0,%rdx 1437 movq %rdx,%rcx 1438 1439 mulq 24(%rsi) 1440 addq %rcx,%r8 1441 adcq $0,%rdx 1442 addq %rax,%r8 1443 movq %r11,%rax 1444 adcq %rdx,%r9 1445 adcq $0,%r10 1446 1447 1448 1449 movq %r11,%rbp 1450 shlq $32,%r11 1451 mulq %r15 1452 shrq $32,%rbp 1453 addq %r11,%r12 1454 adcq %rbp,%r13 1455 movq %r12,%rcx 1456 adcq %rax,%r8 1457 adcq %rdx,%r9 1458 movq %r13,%rbp 1459 adcq $0,%r10 1460 1461 1462 1463 subq $-1,%r12 1464 movq %r8,%rbx 1465 sbbq %r14,%r13 1466 sbbq $0,%r8 1467 movq %r9,%rdx 1468 sbbq %r15,%r9 1469 sbbq $0,%r10 1470 1471 cmovcq %rcx,%r12 1472 cmovcq %rbp,%r13 1473 movq %r12,0(%rdi) 1474 cmovcq %rbx,%r8 1475 movq %r13,8(%rdi) 1476 cmovcq %rdx,%r9 1477 movq %r8,16(%rdi) 1478 movq %r9,24(%rdi) 1479 1480 ret 1481.cfi_endproc 1482.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq 1483 1484 1485 1486 1487 1488 1489 1490 1491.globl ecp_nistz256_sqr_mont 1492.hidden ecp_nistz256_sqr_mont 1493.type ecp_nistz256_sqr_mont,@function 1494.align 32 1495ecp_nistz256_sqr_mont: 1496.cfi_startproc 1497_CET_ENDBR 1498 leaq OPENSSL_ia32cap_P(%rip),%rcx 1499 movq 8(%rcx),%rcx 1500 andl $0x80100,%ecx 1501 pushq %rbp 1502.cfi_adjust_cfa_offset 8 1503.cfi_offset %rbp,-16 1504 pushq %rbx 1505.cfi_adjust_cfa_offset 8 1506.cfi_offset %rbx,-24 1507 pushq %r12 1508.cfi_adjust_cfa_offset 8 1509.cfi_offset %r12,-32 1510 pushq %r13 1511.cfi_adjust_cfa_offset 8 1512.cfi_offset %r13,-40 1513 pushq %r14 1514.cfi_adjust_cfa_offset 8 1515.cfi_offset %r14,-48 1516 pushq %r15 1517.cfi_adjust_cfa_offset 8 1518.cfi_offset %r15,-56 1519.Lsqr_body: 1520 cmpl $0x80100,%ecx 1521 je .Lsqr_montx 1522 movq 0(%rsi),%rax 1523 movq 8(%rsi),%r14 1524 movq 16(%rsi),%r15 1525 movq 24(%rsi),%r8 1526 1527 call __ecp_nistz256_sqr_montq 1528 jmp .Lsqr_mont_done 1529 1530.align 32 1531.Lsqr_montx: 1532 movq 0(%rsi),%rdx 1533 movq 8(%rsi),%r14 1534 movq 16(%rsi),%r15 1535 movq 24(%rsi),%r8 1536 leaq -128(%rsi),%rsi 1537 1538 call __ecp_nistz256_sqr_montx 1539.Lsqr_mont_done: 1540 movq 0(%rsp),%r15 1541.cfi_restore %r15 1542 movq 8(%rsp),%r14 1543.cfi_restore %r14 1544 movq 16(%rsp),%r13 1545.cfi_restore %r13 1546 movq 24(%rsp),%r12 1547.cfi_restore %r12 1548 movq 32(%rsp),%rbx 1549.cfi_restore %rbx 1550 movq 40(%rsp),%rbp 1551.cfi_restore %rbp 1552 leaq 48(%rsp),%rsp 1553.cfi_adjust_cfa_offset -48 1554.Lsqr_epilogue: 1555 ret 1556.cfi_endproc 1557.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont 1558 1559.type __ecp_nistz256_sqr_montq,@function 1560.align 32 1561__ecp_nistz256_sqr_montq: 1562.cfi_startproc 1563 movq %rax,%r13 1564 mulq %r14 1565 movq %rax,%r9 1566 movq %r15,%rax 1567 movq %rdx,%r10 1568 1569 mulq %r13 1570 addq %rax,%r10 1571 movq %r8,%rax 1572 adcq $0,%rdx 1573 movq %rdx,%r11 1574 1575 mulq %r13 1576 addq %rax,%r11 1577 movq %r15,%rax 1578 adcq $0,%rdx 1579 movq %rdx,%r12 1580 1581 1582 mulq %r14 1583 addq %rax,%r11 1584 movq %r8,%rax 1585 adcq $0,%rdx 1586 movq %rdx,%rbp 1587 1588 mulq %r14 1589 addq %rax,%r12 1590 movq %r8,%rax 1591 adcq $0,%rdx 1592 addq %rbp,%r12 1593 movq %rdx,%r13 1594 adcq $0,%r13 1595 1596 1597 mulq %r15 1598 xorq %r15,%r15 1599 addq %rax,%r13 1600 movq 0(%rsi),%rax 1601 movq %rdx,%r14 1602 adcq $0,%r14 1603 1604 addq %r9,%r9 1605 adcq %r10,%r10 1606 adcq %r11,%r11 1607 adcq %r12,%r12 1608 adcq %r13,%r13 1609 adcq %r14,%r14 1610 adcq $0,%r15 1611 1612 mulq %rax 1613 movq %rax,%r8 1614 movq 8(%rsi),%rax 1615 movq %rdx,%rcx 1616 1617 mulq %rax 1618 addq %rcx,%r9 1619 adcq %rax,%r10 1620 movq 16(%rsi),%rax 1621 adcq $0,%rdx 1622 movq %rdx,%rcx 1623 1624 mulq %rax 1625 addq %rcx,%r11 1626 adcq %rax,%r12 1627 movq 24(%rsi),%rax 1628 adcq $0,%rdx 1629 movq %rdx,%rcx 1630 1631 mulq %rax 1632 addq %rcx,%r13 1633 adcq %rax,%r14 1634 movq %r8,%rax 1635 adcq %rdx,%r15 1636 1637 movq .Lpoly+8(%rip),%rsi 1638 movq .Lpoly+24(%rip),%rbp 1639 1640 1641 1642 1643 movq %r8,%rcx 1644 shlq $32,%r8 1645 mulq %rbp 1646 shrq $32,%rcx 1647 addq %r8,%r9 1648 adcq %rcx,%r10 1649 adcq %rax,%r11 1650 movq %r9,%rax 1651 adcq $0,%rdx 1652 1653 1654 1655 movq %r9,%rcx 1656 shlq $32,%r9 1657 movq %rdx,%r8 1658 mulq %rbp 1659 shrq $32,%rcx 1660 addq %r9,%r10 1661 adcq %rcx,%r11 1662 adcq %rax,%r8 1663 movq %r10,%rax 1664 adcq $0,%rdx 1665 1666 1667 1668 movq %r10,%rcx 1669 shlq $32,%r10 1670 movq %rdx,%r9 1671 mulq %rbp 1672 shrq $32,%rcx 1673 addq %r10,%r11 1674 adcq %rcx,%r8 1675 adcq %rax,%r9 1676 movq %r11,%rax 1677 adcq $0,%rdx 1678 1679 1680 1681 movq %r11,%rcx 1682 shlq $32,%r11 1683 movq %rdx,%r10 1684 mulq %rbp 1685 shrq $32,%rcx 1686 addq %r11,%r8 1687 adcq %rcx,%r9 1688 adcq %rax,%r10 1689 adcq $0,%rdx 1690 xorq %r11,%r11 1691 1692 1693 1694 addq %r8,%r12 1695 adcq %r9,%r13 1696 movq %r12,%r8 1697 adcq %r10,%r14 1698 adcq %rdx,%r15 1699 movq %r13,%r9 1700 adcq $0,%r11 1701 1702 subq $-1,%r12 1703 movq %r14,%r10 1704 sbbq %rsi,%r13 1705 sbbq $0,%r14 1706 movq %r15,%rcx 1707 sbbq %rbp,%r15 1708 sbbq $0,%r11 1709 1710 cmovcq %r8,%r12 1711 cmovcq %r9,%r13 1712 movq %r12,0(%rdi) 1713 cmovcq %r10,%r14 1714 movq %r13,8(%rdi) 1715 cmovcq %rcx,%r15 1716 movq %r14,16(%rdi) 1717 movq %r15,24(%rdi) 1718 1719 ret 1720.cfi_endproc 1721.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq 1722.type __ecp_nistz256_mul_montx,@function 1723.align 32 1724__ecp_nistz256_mul_montx: 1725.cfi_startproc 1726 1727 1728 mulxq %r9,%r8,%r9 1729 mulxq %r10,%rcx,%r10 1730 movq $32,%r14 1731 xorq %r13,%r13 1732 mulxq %r11,%rbp,%r11 1733 movq .Lpoly+24(%rip),%r15 1734 adcq %rcx,%r9 1735 mulxq %r12,%rcx,%r12 1736 movq %r8,%rdx 1737 adcq %rbp,%r10 1738 shlxq %r14,%r8,%rbp 1739 adcq %rcx,%r11 1740 shrxq %r14,%r8,%rcx 1741 adcq $0,%r12 1742 1743 1744 1745 addq %rbp,%r9 1746 adcq %rcx,%r10 1747 1748 mulxq %r15,%rcx,%rbp 1749 movq 8(%rbx),%rdx 1750 adcq %rcx,%r11 1751 adcq %rbp,%r12 1752 adcq $0,%r13 1753 xorq %r8,%r8 1754 1755 1756 1757 mulxq 0+128(%rsi),%rcx,%rbp 1758 adcxq %rcx,%r9 1759 adoxq %rbp,%r10 1760 1761 mulxq 8+128(%rsi),%rcx,%rbp 1762 adcxq %rcx,%r10 1763 adoxq %rbp,%r11 1764 1765 mulxq 16+128(%rsi),%rcx,%rbp 1766 adcxq %rcx,%r11 1767 adoxq %rbp,%r12 1768 1769 mulxq 24+128(%rsi),%rcx,%rbp 1770 movq %r9,%rdx 1771 adcxq %rcx,%r12 1772 shlxq %r14,%r9,%rcx 1773 adoxq %rbp,%r13 1774 shrxq %r14,%r9,%rbp 1775 1776 adcxq %r8,%r13 1777 adoxq %r8,%r8 1778 adcq $0,%r8 1779 1780 1781 1782 addq %rcx,%r10 1783 adcq %rbp,%r11 1784 1785 mulxq %r15,%rcx,%rbp 1786 movq 16(%rbx),%rdx 1787 adcq %rcx,%r12 1788 adcq %rbp,%r13 1789 adcq $0,%r8 1790 xorq %r9,%r9 1791 1792 1793 1794 mulxq 0+128(%rsi),%rcx,%rbp 1795 adcxq %rcx,%r10 1796 adoxq %rbp,%r11 1797 1798 mulxq 8+128(%rsi),%rcx,%rbp 1799 adcxq %rcx,%r11 1800 adoxq %rbp,%r12 1801 1802 mulxq 16+128(%rsi),%rcx,%rbp 1803 adcxq %rcx,%r12 1804 adoxq %rbp,%r13 1805 1806 mulxq 24+128(%rsi),%rcx,%rbp 1807 movq %r10,%rdx 1808 adcxq %rcx,%r13 1809 shlxq %r14,%r10,%rcx 1810 adoxq %rbp,%r8 1811 shrxq %r14,%r10,%rbp 1812 1813 adcxq %r9,%r8 1814 adoxq %r9,%r9 1815 adcq $0,%r9 1816 1817 1818 1819 addq %rcx,%r11 1820 adcq %rbp,%r12 1821 1822 mulxq %r15,%rcx,%rbp 1823 movq 24(%rbx),%rdx 1824 adcq %rcx,%r13 1825 adcq %rbp,%r8 1826 adcq $0,%r9 1827 xorq %r10,%r10 1828 1829 1830 1831 mulxq 0+128(%rsi),%rcx,%rbp 1832 adcxq %rcx,%r11 1833 adoxq %rbp,%r12 1834 1835 mulxq 8+128(%rsi),%rcx,%rbp 1836 adcxq %rcx,%r12 1837 adoxq %rbp,%r13 1838 1839 mulxq 16+128(%rsi),%rcx,%rbp 1840 adcxq %rcx,%r13 1841 adoxq %rbp,%r8 1842 1843 mulxq 24+128(%rsi),%rcx,%rbp 1844 movq %r11,%rdx 1845 adcxq %rcx,%r8 1846 shlxq %r14,%r11,%rcx 1847 adoxq %rbp,%r9 1848 shrxq %r14,%r11,%rbp 1849 1850 adcxq %r10,%r9 1851 adoxq %r10,%r10 1852 adcq $0,%r10 1853 1854 1855 1856 addq %rcx,%r12 1857 adcq %rbp,%r13 1858 1859 mulxq %r15,%rcx,%rbp 1860 movq %r12,%rbx 1861 movq .Lpoly+8(%rip),%r14 1862 adcq %rcx,%r8 1863 movq %r13,%rdx 1864 adcq %rbp,%r9 1865 adcq $0,%r10 1866 1867 1868 1869 xorl %eax,%eax 1870 movq %r8,%rcx 1871 sbbq $-1,%r12 1872 sbbq %r14,%r13 1873 sbbq $0,%r8 1874 movq %r9,%rbp 1875 sbbq %r15,%r9 1876 sbbq $0,%r10 1877 1878 cmovcq %rbx,%r12 1879 cmovcq %rdx,%r13 1880 movq %r12,0(%rdi) 1881 cmovcq %rcx,%r8 1882 movq %r13,8(%rdi) 1883 cmovcq %rbp,%r9 1884 movq %r8,16(%rdi) 1885 movq %r9,24(%rdi) 1886 1887 ret 1888.cfi_endproc 1889.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx 1890 1891.type __ecp_nistz256_sqr_montx,@function 1892.align 32 1893__ecp_nistz256_sqr_montx: 1894.cfi_startproc 1895 mulxq %r14,%r9,%r10 1896 mulxq %r15,%rcx,%r11 1897 xorl %eax,%eax 1898 adcq %rcx,%r10 1899 mulxq %r8,%rbp,%r12 1900 movq %r14,%rdx 1901 adcq %rbp,%r11 1902 adcq $0,%r12 1903 xorq %r13,%r13 1904 1905 1906 mulxq %r15,%rcx,%rbp 1907 adcxq %rcx,%r11 1908 adoxq %rbp,%r12 1909 1910 mulxq %r8,%rcx,%rbp 1911 movq %r15,%rdx 1912 adcxq %rcx,%r12 1913 adoxq %rbp,%r13 1914 adcq $0,%r13 1915 1916 1917 mulxq %r8,%rcx,%r14 1918 movq 0+128(%rsi),%rdx 1919 xorq %r15,%r15 1920 adcxq %r9,%r9 1921 adoxq %rcx,%r13 1922 adcxq %r10,%r10 1923 adoxq %r15,%r14 1924 1925 mulxq %rdx,%r8,%rbp 1926 movq 8+128(%rsi),%rdx 1927 adcxq %r11,%r11 1928 adoxq %rbp,%r9 1929 adcxq %r12,%r12 1930 mulxq %rdx,%rcx,%rax 1931 movq 16+128(%rsi),%rdx 1932 adcxq %r13,%r13 1933 adoxq %rcx,%r10 1934 adcxq %r14,%r14 1935.byte 0x67 1936 mulxq %rdx,%rcx,%rbp 1937 movq 24+128(%rsi),%rdx 1938 adoxq %rax,%r11 1939 adcxq %r15,%r15 1940 adoxq %rcx,%r12 1941 movq $32,%rsi 1942 adoxq %rbp,%r13 1943.byte 0x67,0x67 1944 mulxq %rdx,%rcx,%rax 1945 movq .Lpoly+24(%rip),%rdx 1946 adoxq %rcx,%r14 1947 shlxq %rsi,%r8,%rcx 1948 adoxq %rax,%r15 1949 shrxq %rsi,%r8,%rax 1950 movq %rdx,%rbp 1951 1952 1953 addq %rcx,%r9 1954 adcq %rax,%r10 1955 1956 mulxq %r8,%rcx,%r8 1957 adcq %rcx,%r11 1958 shlxq %rsi,%r9,%rcx 1959 adcq $0,%r8 1960 shrxq %rsi,%r9,%rax 1961 1962 1963 addq %rcx,%r10 1964 adcq %rax,%r11 1965 1966 mulxq %r9,%rcx,%r9 1967 adcq %rcx,%r8 1968 shlxq %rsi,%r10,%rcx 1969 adcq $0,%r9 1970 shrxq %rsi,%r10,%rax 1971 1972 1973 addq %rcx,%r11 1974 adcq %rax,%r8 1975 1976 mulxq %r10,%rcx,%r10 1977 adcq %rcx,%r9 1978 shlxq %rsi,%r11,%rcx 1979 adcq $0,%r10 1980 shrxq %rsi,%r11,%rax 1981 1982 1983 addq %rcx,%r8 1984 adcq %rax,%r9 1985 1986 mulxq %r11,%rcx,%r11 1987 adcq %rcx,%r10 1988 adcq $0,%r11 1989 1990 xorq %rdx,%rdx 1991 addq %r8,%r12 1992 movq .Lpoly+8(%rip),%rsi 1993 adcq %r9,%r13 1994 movq %r12,%r8 1995 adcq %r10,%r14 1996 adcq %r11,%r15 1997 movq %r13,%r9 1998 adcq $0,%rdx 1999 2000 subq $-1,%r12 2001 movq %r14,%r10 2002 sbbq %rsi,%r13 2003 sbbq $0,%r14 2004 movq %r15,%r11 2005 sbbq %rbp,%r15 2006 sbbq $0,%rdx 2007 2008 cmovcq %r8,%r12 2009 cmovcq %r9,%r13 2010 movq %r12,0(%rdi) 2011 cmovcq %r10,%r14 2012 movq %r13,8(%rdi) 2013 cmovcq %r11,%r15 2014 movq %r14,16(%rdi) 2015 movq %r15,24(%rdi) 2016 2017 ret 2018.cfi_endproc 2019.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx 2020 2021 2022.globl ecp_nistz256_select_w5 2023.hidden ecp_nistz256_select_w5 2024.type ecp_nistz256_select_w5,@function 2025.align 32 2026ecp_nistz256_select_w5: 2027.cfi_startproc 2028_CET_ENDBR 2029 leaq OPENSSL_ia32cap_P(%rip),%rax 2030 movq 8(%rax),%rax 2031 testl $32,%eax 2032 jnz .Lavx2_select_w5 2033 movdqa .LOne(%rip),%xmm0 2034 movd %edx,%xmm1 2035 2036 pxor %xmm2,%xmm2 2037 pxor %xmm3,%xmm3 2038 pxor %xmm4,%xmm4 2039 pxor %xmm5,%xmm5 2040 pxor %xmm6,%xmm6 2041 pxor %xmm7,%xmm7 2042 2043 movdqa %xmm0,%xmm8 2044 pshufd $0,%xmm1,%xmm1 2045 2046 movq $16,%rax 2047.Lselect_loop_sse_w5: 2048 2049 movdqa %xmm8,%xmm15 2050 paddd %xmm0,%xmm8 2051 pcmpeqd %xmm1,%xmm15 2052 2053 movdqa 0(%rsi),%xmm9 2054 movdqa 16(%rsi),%xmm10 2055 movdqa 32(%rsi),%xmm11 2056 movdqa 48(%rsi),%xmm12 2057 movdqa 64(%rsi),%xmm13 2058 movdqa 80(%rsi),%xmm14 2059 leaq 96(%rsi),%rsi 2060 2061 pand %xmm15,%xmm9 2062 pand %xmm15,%xmm10 2063 por %xmm9,%xmm2 2064 pand %xmm15,%xmm11 2065 por %xmm10,%xmm3 2066 pand %xmm15,%xmm12 2067 por %xmm11,%xmm4 2068 pand %xmm15,%xmm13 2069 por %xmm12,%xmm5 2070 pand %xmm15,%xmm14 2071 por %xmm13,%xmm6 2072 por %xmm14,%xmm7 2073 2074 decq %rax 2075 jnz .Lselect_loop_sse_w5 2076 2077 movdqu %xmm2,0(%rdi) 2078 movdqu %xmm3,16(%rdi) 2079 movdqu %xmm4,32(%rdi) 2080 movdqu %xmm5,48(%rdi) 2081 movdqu %xmm6,64(%rdi) 2082 movdqu %xmm7,80(%rdi) 2083 ret 2084.cfi_endproc 2085.LSEH_end_ecp_nistz256_select_w5: 2086.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 2087 2088 2089 2090.globl ecp_nistz256_select_w7 2091.hidden ecp_nistz256_select_w7 2092.type ecp_nistz256_select_w7,@function 2093.align 32 2094ecp_nistz256_select_w7: 2095.cfi_startproc 2096_CET_ENDBR 2097 leaq OPENSSL_ia32cap_P(%rip),%rax 2098 movq 8(%rax),%rax 2099 testl $32,%eax 2100 jnz .Lavx2_select_w7 2101 movdqa .LOne(%rip),%xmm8 2102 movd %edx,%xmm1 2103 2104 pxor %xmm2,%xmm2 2105 pxor %xmm3,%xmm3 2106 pxor %xmm4,%xmm4 2107 pxor %xmm5,%xmm5 2108 2109 movdqa %xmm8,%xmm0 2110 pshufd $0,%xmm1,%xmm1 2111 movq $64,%rax 2112 2113.Lselect_loop_sse_w7: 2114 movdqa %xmm8,%xmm15 2115 paddd %xmm0,%xmm8 2116 movdqa 0(%rsi),%xmm9 2117 movdqa 16(%rsi),%xmm10 2118 pcmpeqd %xmm1,%xmm15 2119 movdqa 32(%rsi),%xmm11 2120 movdqa 48(%rsi),%xmm12 2121 leaq 64(%rsi),%rsi 2122 2123 pand %xmm15,%xmm9 2124 pand %xmm15,%xmm10 2125 por %xmm9,%xmm2 2126 pand %xmm15,%xmm11 2127 por %xmm10,%xmm3 2128 pand %xmm15,%xmm12 2129 por %xmm11,%xmm4 2130 prefetcht0 255(%rsi) 2131 por %xmm12,%xmm5 2132 2133 decq %rax 2134 jnz .Lselect_loop_sse_w7 2135 2136 movdqu %xmm2,0(%rdi) 2137 movdqu %xmm3,16(%rdi) 2138 movdqu %xmm4,32(%rdi) 2139 movdqu %xmm5,48(%rdi) 2140 ret 2141.cfi_endproc 2142.LSEH_end_ecp_nistz256_select_w7: 2143.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 2144 2145 2146.type ecp_nistz256_avx2_select_w5,@function 2147.align 32 2148ecp_nistz256_avx2_select_w5: 2149.cfi_startproc 2150.Lavx2_select_w5: 2151 vzeroupper 2152 vmovdqa .LTwo(%rip),%ymm0 2153 2154 vpxor %ymm2,%ymm2,%ymm2 2155 vpxor %ymm3,%ymm3,%ymm3 2156 vpxor %ymm4,%ymm4,%ymm4 2157 2158 vmovdqa .LOne(%rip),%ymm5 2159 vmovdqa .LTwo(%rip),%ymm10 2160 2161 vmovd %edx,%xmm1 2162 vpermd %ymm1,%ymm2,%ymm1 2163 2164 movq $8,%rax 2165.Lselect_loop_avx2_w5: 2166 2167 vmovdqa 0(%rsi),%ymm6 2168 vmovdqa 32(%rsi),%ymm7 2169 vmovdqa 64(%rsi),%ymm8 2170 2171 vmovdqa 96(%rsi),%ymm11 2172 vmovdqa 128(%rsi),%ymm12 2173 vmovdqa 160(%rsi),%ymm13 2174 2175 vpcmpeqd %ymm1,%ymm5,%ymm9 2176 vpcmpeqd %ymm1,%ymm10,%ymm14 2177 2178 vpaddd %ymm0,%ymm5,%ymm5 2179 vpaddd %ymm0,%ymm10,%ymm10 2180 leaq 192(%rsi),%rsi 2181 2182 vpand %ymm9,%ymm6,%ymm6 2183 vpand %ymm9,%ymm7,%ymm7 2184 vpand %ymm9,%ymm8,%ymm8 2185 vpand %ymm14,%ymm11,%ymm11 2186 vpand %ymm14,%ymm12,%ymm12 2187 vpand %ymm14,%ymm13,%ymm13 2188 2189 vpxor %ymm6,%ymm2,%ymm2 2190 vpxor %ymm7,%ymm3,%ymm3 2191 vpxor %ymm8,%ymm4,%ymm4 2192 vpxor %ymm11,%ymm2,%ymm2 2193 vpxor %ymm12,%ymm3,%ymm3 2194 vpxor %ymm13,%ymm4,%ymm4 2195 2196 decq %rax 2197 jnz .Lselect_loop_avx2_w5 2198 2199 vmovdqu %ymm2,0(%rdi) 2200 vmovdqu %ymm3,32(%rdi) 2201 vmovdqu %ymm4,64(%rdi) 2202 vzeroupper 2203 ret 2204.cfi_endproc 2205.LSEH_end_ecp_nistz256_avx2_select_w5: 2206.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 2207 2208 2209 2210.globl ecp_nistz256_avx2_select_w7 2211.hidden ecp_nistz256_avx2_select_w7 2212.type ecp_nistz256_avx2_select_w7,@function 2213.align 32 2214ecp_nistz256_avx2_select_w7: 2215.cfi_startproc 2216.Lavx2_select_w7: 2217_CET_ENDBR 2218 vzeroupper 2219 vmovdqa .LThree(%rip),%ymm0 2220 2221 vpxor %ymm2,%ymm2,%ymm2 2222 vpxor %ymm3,%ymm3,%ymm3 2223 2224 vmovdqa .LOne(%rip),%ymm4 2225 vmovdqa .LTwo(%rip),%ymm8 2226 vmovdqa .LThree(%rip),%ymm12 2227 2228 vmovd %edx,%xmm1 2229 vpermd %ymm1,%ymm2,%ymm1 2230 2231 2232 movq $21,%rax 2233.Lselect_loop_avx2_w7: 2234 2235 vmovdqa 0(%rsi),%ymm5 2236 vmovdqa 32(%rsi),%ymm6 2237 2238 vmovdqa 64(%rsi),%ymm9 2239 vmovdqa 96(%rsi),%ymm10 2240 2241 vmovdqa 128(%rsi),%ymm13 2242 vmovdqa 160(%rsi),%ymm14 2243 2244 vpcmpeqd %ymm1,%ymm4,%ymm7 2245 vpcmpeqd %ymm1,%ymm8,%ymm11 2246 vpcmpeqd %ymm1,%ymm12,%ymm15 2247 2248 vpaddd %ymm0,%ymm4,%ymm4 2249 vpaddd %ymm0,%ymm8,%ymm8 2250 vpaddd %ymm0,%ymm12,%ymm12 2251 leaq 192(%rsi),%rsi 2252 2253 vpand %ymm7,%ymm5,%ymm5 2254 vpand %ymm7,%ymm6,%ymm6 2255 vpand %ymm11,%ymm9,%ymm9 2256 vpand %ymm11,%ymm10,%ymm10 2257 vpand %ymm15,%ymm13,%ymm13 2258 vpand %ymm15,%ymm14,%ymm14 2259 2260 vpxor %ymm5,%ymm2,%ymm2 2261 vpxor %ymm6,%ymm3,%ymm3 2262 vpxor %ymm9,%ymm2,%ymm2 2263 vpxor %ymm10,%ymm3,%ymm3 2264 vpxor %ymm13,%ymm2,%ymm2 2265 vpxor %ymm14,%ymm3,%ymm3 2266 2267 decq %rax 2268 jnz .Lselect_loop_avx2_w7 2269 2270 2271 vmovdqa 0(%rsi),%ymm5 2272 vmovdqa 32(%rsi),%ymm6 2273 2274 vpcmpeqd %ymm1,%ymm4,%ymm7 2275 2276 vpand %ymm7,%ymm5,%ymm5 2277 vpand %ymm7,%ymm6,%ymm6 2278 2279 vpxor %ymm5,%ymm2,%ymm2 2280 vpxor %ymm6,%ymm3,%ymm3 2281 2282 vmovdqu %ymm2,0(%rdi) 2283 vmovdqu %ymm3,32(%rdi) 2284 vzeroupper 2285 ret 2286.cfi_endproc 2287.LSEH_end_ecp_nistz256_avx2_select_w7: 2288.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 2289.type __ecp_nistz256_add_toq,@function 2290.align 32 2291__ecp_nistz256_add_toq: 2292.cfi_startproc 2293 xorq %r11,%r11 2294 addq 0(%rbx),%r12 2295 adcq 8(%rbx),%r13 2296 movq %r12,%rax 2297 adcq 16(%rbx),%r8 2298 adcq 24(%rbx),%r9 2299 movq %r13,%rbp 2300 adcq $0,%r11 2301 2302 subq $-1,%r12 2303 movq %r8,%rcx 2304 sbbq %r14,%r13 2305 sbbq $0,%r8 2306 movq %r9,%r10 2307 sbbq %r15,%r9 2308 sbbq $0,%r11 2309 2310 cmovcq %rax,%r12 2311 cmovcq %rbp,%r13 2312 movq %r12,0(%rdi) 2313 cmovcq %rcx,%r8 2314 movq %r13,8(%rdi) 2315 cmovcq %r10,%r9 2316 movq %r8,16(%rdi) 2317 movq %r9,24(%rdi) 2318 2319 ret 2320.cfi_endproc 2321.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq 2322 2323.type __ecp_nistz256_sub_fromq,@function 2324.align 32 2325__ecp_nistz256_sub_fromq: 2326.cfi_startproc 2327 subq 0(%rbx),%r12 2328 sbbq 8(%rbx),%r13 2329 movq %r12,%rax 2330 sbbq 16(%rbx),%r8 2331 sbbq 24(%rbx),%r9 2332 movq %r13,%rbp 2333 sbbq %r11,%r11 2334 2335 addq $-1,%r12 2336 movq %r8,%rcx 2337 adcq %r14,%r13 2338 adcq $0,%r8 2339 movq %r9,%r10 2340 adcq %r15,%r9 2341 testq %r11,%r11 2342 2343 cmovzq %rax,%r12 2344 cmovzq %rbp,%r13 2345 movq %r12,0(%rdi) 2346 cmovzq %rcx,%r8 2347 movq %r13,8(%rdi) 2348 cmovzq %r10,%r9 2349 movq %r8,16(%rdi) 2350 movq %r9,24(%rdi) 2351 2352 ret 2353.cfi_endproc 2354.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq 2355 2356.type __ecp_nistz256_subq,@function 2357.align 32 2358__ecp_nistz256_subq: 2359.cfi_startproc 2360 subq %r12,%rax 2361 sbbq %r13,%rbp 2362 movq %rax,%r12 2363 sbbq %r8,%rcx 2364 sbbq %r9,%r10 2365 movq %rbp,%r13 2366 sbbq %r11,%r11 2367 2368 addq $-1,%rax 2369 movq %rcx,%r8 2370 adcq %r14,%rbp 2371 adcq $0,%rcx 2372 movq %r10,%r9 2373 adcq %r15,%r10 2374 testq %r11,%r11 2375 2376 cmovnzq %rax,%r12 2377 cmovnzq %rbp,%r13 2378 cmovnzq %rcx,%r8 2379 cmovnzq %r10,%r9 2380 2381 ret 2382.cfi_endproc 2383.size __ecp_nistz256_subq,.-__ecp_nistz256_subq 2384 2385.type __ecp_nistz256_mul_by_2q,@function 2386.align 32 2387__ecp_nistz256_mul_by_2q: 2388.cfi_startproc 2389 xorq %r11,%r11 2390 addq %r12,%r12 2391 adcq %r13,%r13 2392 movq %r12,%rax 2393 adcq %r8,%r8 2394 adcq %r9,%r9 2395 movq %r13,%rbp 2396 adcq $0,%r11 2397 2398 subq $-1,%r12 2399 movq %r8,%rcx 2400 sbbq %r14,%r13 2401 sbbq $0,%r8 2402 movq %r9,%r10 2403 sbbq %r15,%r9 2404 sbbq $0,%r11 2405 2406 cmovcq %rax,%r12 2407 cmovcq %rbp,%r13 2408 movq %r12,0(%rdi) 2409 cmovcq %rcx,%r8 2410 movq %r13,8(%rdi) 2411 cmovcq %r10,%r9 2412 movq %r8,16(%rdi) 2413 movq %r9,24(%rdi) 2414 2415 ret 2416.cfi_endproc 2417.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q 2418.globl ecp_nistz256_point_double 2419.hidden ecp_nistz256_point_double 2420.type ecp_nistz256_point_double,@function 2421.align 32 2422ecp_nistz256_point_double: 2423.cfi_startproc 2424_CET_ENDBR 2425 leaq OPENSSL_ia32cap_P(%rip),%rcx 2426 movq 8(%rcx),%rcx 2427 andl $0x80100,%ecx 2428 cmpl $0x80100,%ecx 2429 je .Lpoint_doublex 2430 pushq %rbp 2431.cfi_adjust_cfa_offset 8 2432.cfi_offset %rbp,-16 2433 pushq %rbx 2434.cfi_adjust_cfa_offset 8 2435.cfi_offset %rbx,-24 2436 pushq %r12 2437.cfi_adjust_cfa_offset 8 2438.cfi_offset %r12,-32 2439 pushq %r13 2440.cfi_adjust_cfa_offset 8 2441.cfi_offset %r13,-40 2442 pushq %r14 2443.cfi_adjust_cfa_offset 8 2444.cfi_offset %r14,-48 2445 pushq %r15 2446.cfi_adjust_cfa_offset 8 2447.cfi_offset %r15,-56 2448 subq $160+8,%rsp 2449.cfi_adjust_cfa_offset 32*5+8 2450.Lpoint_doubleq_body: 2451 2452.Lpoint_double_shortcutq: 2453 movdqu 0(%rsi),%xmm0 2454 movq %rsi,%rbx 2455 movdqu 16(%rsi),%xmm1 2456 movq 32+0(%rsi),%r12 2457 movq 32+8(%rsi),%r13 2458 movq 32+16(%rsi),%r8 2459 movq 32+24(%rsi),%r9 2460 movq .Lpoly+8(%rip),%r14 2461 movq .Lpoly+24(%rip),%r15 2462 movdqa %xmm0,96(%rsp) 2463 movdqa %xmm1,96+16(%rsp) 2464 leaq 32(%rdi),%r10 2465 leaq 64(%rdi),%r11 2466.byte 102,72,15,110,199 2467.byte 102,73,15,110,202 2468.byte 102,73,15,110,211 2469 2470 leaq 0(%rsp),%rdi 2471 call __ecp_nistz256_mul_by_2q 2472 2473 movq 64+0(%rsi),%rax 2474 movq 64+8(%rsi),%r14 2475 movq 64+16(%rsi),%r15 2476 movq 64+24(%rsi),%r8 2477 leaq 64-0(%rsi),%rsi 2478 leaq 64(%rsp),%rdi 2479 call __ecp_nistz256_sqr_montq 2480 2481 movq 0+0(%rsp),%rax 2482 movq 8+0(%rsp),%r14 2483 leaq 0+0(%rsp),%rsi 2484 movq 16+0(%rsp),%r15 2485 movq 24+0(%rsp),%r8 2486 leaq 0(%rsp),%rdi 2487 call __ecp_nistz256_sqr_montq 2488 2489 movq 32(%rbx),%rax 2490 movq 64+0(%rbx),%r9 2491 movq 64+8(%rbx),%r10 2492 movq 64+16(%rbx),%r11 2493 movq 64+24(%rbx),%r12 2494 leaq 64-0(%rbx),%rsi 2495 leaq 32(%rbx),%rbx 2496.byte 102,72,15,126,215 2497 call __ecp_nistz256_mul_montq 2498 call __ecp_nistz256_mul_by_2q 2499 2500 movq 96+0(%rsp),%r12 2501 movq 96+8(%rsp),%r13 2502 leaq 64(%rsp),%rbx 2503 movq 96+16(%rsp),%r8 2504 movq 96+24(%rsp),%r9 2505 leaq 32(%rsp),%rdi 2506 call __ecp_nistz256_add_toq 2507 2508 movq 96+0(%rsp),%r12 2509 movq 96+8(%rsp),%r13 2510 leaq 64(%rsp),%rbx 2511 movq 96+16(%rsp),%r8 2512 movq 96+24(%rsp),%r9 2513 leaq 64(%rsp),%rdi 2514 call __ecp_nistz256_sub_fromq 2515 2516 movq 0+0(%rsp),%rax 2517 movq 8+0(%rsp),%r14 2518 leaq 0+0(%rsp),%rsi 2519 movq 16+0(%rsp),%r15 2520 movq 24+0(%rsp),%r8 2521.byte 102,72,15,126,207 2522 call __ecp_nistz256_sqr_montq 2523 xorq %r9,%r9 2524 movq %r12,%rax 2525 addq $-1,%r12 2526 movq %r13,%r10 2527 adcq %rsi,%r13 2528 movq %r14,%rcx 2529 adcq $0,%r14 2530 movq %r15,%r8 2531 adcq %rbp,%r15 2532 adcq $0,%r9 2533 xorq %rsi,%rsi 2534 testq $1,%rax 2535 2536 cmovzq %rax,%r12 2537 cmovzq %r10,%r13 2538 cmovzq %rcx,%r14 2539 cmovzq %r8,%r15 2540 cmovzq %rsi,%r9 2541 2542 movq %r13,%rax 2543 shrq $1,%r12 2544 shlq $63,%rax 2545 movq %r14,%r10 2546 shrq $1,%r13 2547 orq %rax,%r12 2548 shlq $63,%r10 2549 movq %r15,%rcx 2550 shrq $1,%r14 2551 orq %r10,%r13 2552 shlq $63,%rcx 2553 movq %r12,0(%rdi) 2554 shrq $1,%r15 2555 movq %r13,8(%rdi) 2556 shlq $63,%r9 2557 orq %rcx,%r14 2558 orq %r9,%r15 2559 movq %r14,16(%rdi) 2560 movq %r15,24(%rdi) 2561 movq 64(%rsp),%rax 2562 leaq 64(%rsp),%rbx 2563 movq 0+32(%rsp),%r9 2564 movq 8+32(%rsp),%r10 2565 leaq 0+32(%rsp),%rsi 2566 movq 16+32(%rsp),%r11 2567 movq 24+32(%rsp),%r12 2568 leaq 32(%rsp),%rdi 2569 call __ecp_nistz256_mul_montq 2570 2571 leaq 128(%rsp),%rdi 2572 call __ecp_nistz256_mul_by_2q 2573 2574 leaq 32(%rsp),%rbx 2575 leaq 32(%rsp),%rdi 2576 call __ecp_nistz256_add_toq 2577 2578 movq 96(%rsp),%rax 2579 leaq 96(%rsp),%rbx 2580 movq 0+0(%rsp),%r9 2581 movq 8+0(%rsp),%r10 2582 leaq 0+0(%rsp),%rsi 2583 movq 16+0(%rsp),%r11 2584 movq 24+0(%rsp),%r12 2585 leaq 0(%rsp),%rdi 2586 call __ecp_nistz256_mul_montq 2587 2588 leaq 128(%rsp),%rdi 2589 call __ecp_nistz256_mul_by_2q 2590 2591 movq 0+32(%rsp),%rax 2592 movq 8+32(%rsp),%r14 2593 leaq 0+32(%rsp),%rsi 2594 movq 16+32(%rsp),%r15 2595 movq 24+32(%rsp),%r8 2596.byte 102,72,15,126,199 2597 call __ecp_nistz256_sqr_montq 2598 2599 leaq 128(%rsp),%rbx 2600 movq %r14,%r8 2601 movq %r15,%r9 2602 movq %rsi,%r14 2603 movq %rbp,%r15 2604 call __ecp_nistz256_sub_fromq 2605 2606 movq 0+0(%rsp),%rax 2607 movq 0+8(%rsp),%rbp 2608 movq 0+16(%rsp),%rcx 2609 movq 0+24(%rsp),%r10 2610 leaq 0(%rsp),%rdi 2611 call __ecp_nistz256_subq 2612 2613 movq 32(%rsp),%rax 2614 leaq 32(%rsp),%rbx 2615 movq %r12,%r14 2616 xorl %ecx,%ecx 2617 movq %r12,0+0(%rsp) 2618 movq %r13,%r10 2619 movq %r13,0+8(%rsp) 2620 cmovzq %r8,%r11 2621 movq %r8,0+16(%rsp) 2622 leaq 0-0(%rsp),%rsi 2623 cmovzq %r9,%r12 2624 movq %r9,0+24(%rsp) 2625 movq %r14,%r9 2626 leaq 0(%rsp),%rdi 2627 call __ecp_nistz256_mul_montq 2628 2629.byte 102,72,15,126,203 2630.byte 102,72,15,126,207 2631 call __ecp_nistz256_sub_fromq 2632 2633 leaq 160+56(%rsp),%rsi 2634.cfi_def_cfa %rsi,8 2635 movq -48(%rsi),%r15 2636.cfi_restore %r15 2637 movq -40(%rsi),%r14 2638.cfi_restore %r14 2639 movq -32(%rsi),%r13 2640.cfi_restore %r13 2641 movq -24(%rsi),%r12 2642.cfi_restore %r12 2643 movq -16(%rsi),%rbx 2644.cfi_restore %rbx 2645 movq -8(%rsi),%rbp 2646.cfi_restore %rbp 2647 leaq (%rsi),%rsp 2648.cfi_def_cfa_register %rsp 2649.Lpoint_doubleq_epilogue: 2650 ret 2651.cfi_endproc 2652.size ecp_nistz256_point_double,.-ecp_nistz256_point_double 2653.globl ecp_nistz256_point_add 2654.hidden ecp_nistz256_point_add 2655.type ecp_nistz256_point_add,@function 2656.align 32 2657ecp_nistz256_point_add: 2658.cfi_startproc 2659_CET_ENDBR 2660 leaq OPENSSL_ia32cap_P(%rip),%rcx 2661 movq 8(%rcx),%rcx 2662 andl $0x80100,%ecx 2663 cmpl $0x80100,%ecx 2664 je .Lpoint_addx 2665 pushq %rbp 2666.cfi_adjust_cfa_offset 8 2667.cfi_offset %rbp,-16 2668 pushq %rbx 2669.cfi_adjust_cfa_offset 8 2670.cfi_offset %rbx,-24 2671 pushq %r12 2672.cfi_adjust_cfa_offset 8 2673.cfi_offset %r12,-32 2674 pushq %r13 2675.cfi_adjust_cfa_offset 8 2676.cfi_offset %r13,-40 2677 pushq %r14 2678.cfi_adjust_cfa_offset 8 2679.cfi_offset %r14,-48 2680 pushq %r15 2681.cfi_adjust_cfa_offset 8 2682.cfi_offset %r15,-56 2683 subq $576+8,%rsp 2684.cfi_adjust_cfa_offset 32*18+8 2685.Lpoint_addq_body: 2686 2687 movdqu 0(%rsi),%xmm0 2688 movdqu 16(%rsi),%xmm1 2689 movdqu 32(%rsi),%xmm2 2690 movdqu 48(%rsi),%xmm3 2691 movdqu 64(%rsi),%xmm4 2692 movdqu 80(%rsi),%xmm5 2693 movq %rsi,%rbx 2694 movq %rdx,%rsi 2695 movdqa %xmm0,384(%rsp) 2696 movdqa %xmm1,384+16(%rsp) 2697 movdqa %xmm2,416(%rsp) 2698 movdqa %xmm3,416+16(%rsp) 2699 movdqa %xmm4,448(%rsp) 2700 movdqa %xmm5,448+16(%rsp) 2701 por %xmm4,%xmm5 2702 2703 movdqu 0(%rsi),%xmm0 2704 pshufd $0xb1,%xmm5,%xmm3 2705 movdqu 16(%rsi),%xmm1 2706 movdqu 32(%rsi),%xmm2 2707 por %xmm3,%xmm5 2708 movdqu 48(%rsi),%xmm3 2709 movq 64+0(%rsi),%rax 2710 movq 64+8(%rsi),%r14 2711 movq 64+16(%rsi),%r15 2712 movq 64+24(%rsi),%r8 2713 movdqa %xmm0,480(%rsp) 2714 pshufd $0x1e,%xmm5,%xmm4 2715 movdqa %xmm1,480+16(%rsp) 2716 movdqu 64(%rsi),%xmm0 2717 movdqu 80(%rsi),%xmm1 2718 movdqa %xmm2,512(%rsp) 2719 movdqa %xmm3,512+16(%rsp) 2720 por %xmm4,%xmm5 2721 pxor %xmm4,%xmm4 2722 por %xmm0,%xmm1 2723.byte 102,72,15,110,199 2724 2725 leaq 64-0(%rsi),%rsi 2726 movq %rax,544+0(%rsp) 2727 movq %r14,544+8(%rsp) 2728 movq %r15,544+16(%rsp) 2729 movq %r8,544+24(%rsp) 2730 leaq 96(%rsp),%rdi 2731 call __ecp_nistz256_sqr_montq 2732 2733 pcmpeqd %xmm4,%xmm5 2734 pshufd $0xb1,%xmm1,%xmm4 2735 por %xmm1,%xmm4 2736 pshufd $0,%xmm5,%xmm5 2737 pshufd $0x1e,%xmm4,%xmm3 2738 por %xmm3,%xmm4 2739 pxor %xmm3,%xmm3 2740 pcmpeqd %xmm3,%xmm4 2741 pshufd $0,%xmm4,%xmm4 2742 movq 64+0(%rbx),%rax 2743 movq 64+8(%rbx),%r14 2744 movq 64+16(%rbx),%r15 2745 movq 64+24(%rbx),%r8 2746.byte 102,72,15,110,203 2747 2748 leaq 64-0(%rbx),%rsi 2749 leaq 32(%rsp),%rdi 2750 call __ecp_nistz256_sqr_montq 2751 2752 movq 544(%rsp),%rax 2753 leaq 544(%rsp),%rbx 2754 movq 0+96(%rsp),%r9 2755 movq 8+96(%rsp),%r10 2756 leaq 0+96(%rsp),%rsi 2757 movq 16+96(%rsp),%r11 2758 movq 24+96(%rsp),%r12 2759 leaq 224(%rsp),%rdi 2760 call __ecp_nistz256_mul_montq 2761 2762 movq 448(%rsp),%rax 2763 leaq 448(%rsp),%rbx 2764 movq 0+32(%rsp),%r9 2765 movq 8+32(%rsp),%r10 2766 leaq 0+32(%rsp),%rsi 2767 movq 16+32(%rsp),%r11 2768 movq 24+32(%rsp),%r12 2769 leaq 256(%rsp),%rdi 2770 call __ecp_nistz256_mul_montq 2771 2772 movq 416(%rsp),%rax 2773 leaq 416(%rsp),%rbx 2774 movq 0+224(%rsp),%r9 2775 movq 8+224(%rsp),%r10 2776 leaq 0+224(%rsp),%rsi 2777 movq 16+224(%rsp),%r11 2778 movq 24+224(%rsp),%r12 2779 leaq 224(%rsp),%rdi 2780 call __ecp_nistz256_mul_montq 2781 2782 movq 512(%rsp),%rax 2783 leaq 512(%rsp),%rbx 2784 movq 0+256(%rsp),%r9 2785 movq 8+256(%rsp),%r10 2786 leaq 0+256(%rsp),%rsi 2787 movq 16+256(%rsp),%r11 2788 movq 24+256(%rsp),%r12 2789 leaq 256(%rsp),%rdi 2790 call __ecp_nistz256_mul_montq 2791 2792 leaq 224(%rsp),%rbx 2793 leaq 64(%rsp),%rdi 2794 call __ecp_nistz256_sub_fromq 2795 2796 orq %r13,%r12 2797 movdqa %xmm4,%xmm2 2798 orq %r8,%r12 2799 orq %r9,%r12 2800 por %xmm5,%xmm2 2801.byte 102,73,15,110,220 2802 2803 movq 384(%rsp),%rax 2804 leaq 384(%rsp),%rbx 2805 movq 0+96(%rsp),%r9 2806 movq 8+96(%rsp),%r10 2807 leaq 0+96(%rsp),%rsi 2808 movq 16+96(%rsp),%r11 2809 movq 24+96(%rsp),%r12 2810 leaq 160(%rsp),%rdi 2811 call __ecp_nistz256_mul_montq 2812 2813 movq 480(%rsp),%rax 2814 leaq 480(%rsp),%rbx 2815 movq 0+32(%rsp),%r9 2816 movq 8+32(%rsp),%r10 2817 leaq 0+32(%rsp),%rsi 2818 movq 16+32(%rsp),%r11 2819 movq 24+32(%rsp),%r12 2820 leaq 192(%rsp),%rdi 2821 call __ecp_nistz256_mul_montq 2822 2823 leaq 160(%rsp),%rbx 2824 leaq 0(%rsp),%rdi 2825 call __ecp_nistz256_sub_fromq 2826 2827 orq %r13,%r12 2828 orq %r8,%r12 2829 orq %r9,%r12 2830 2831.byte 102,73,15,126,208 2832.byte 102,73,15,126,217 2833 orq %r8,%r12 2834.byte 0x3e 2835 jnz .Ladd_proceedq 2836 2837 2838 2839 testq %r9,%r9 2840 jz .Ladd_doubleq 2841 2842 2843 2844 2845 2846 2847.byte 102,72,15,126,199 2848 pxor %xmm0,%xmm0 2849 movdqu %xmm0,0(%rdi) 2850 movdqu %xmm0,16(%rdi) 2851 movdqu %xmm0,32(%rdi) 2852 movdqu %xmm0,48(%rdi) 2853 movdqu %xmm0,64(%rdi) 2854 movdqu %xmm0,80(%rdi) 2855 jmp .Ladd_doneq 2856 2857.align 32 2858.Ladd_doubleq: 2859.byte 102,72,15,126,206 2860.byte 102,72,15,126,199 2861 addq $416,%rsp 2862.cfi_adjust_cfa_offset -416 2863 jmp .Lpoint_double_shortcutq 2864.cfi_adjust_cfa_offset 416 2865 2866.align 32 2867.Ladd_proceedq: 2868 movq 0+64(%rsp),%rax 2869 movq 8+64(%rsp),%r14 2870 leaq 0+64(%rsp),%rsi 2871 movq 16+64(%rsp),%r15 2872 movq 24+64(%rsp),%r8 2873 leaq 96(%rsp),%rdi 2874 call __ecp_nistz256_sqr_montq 2875 2876 movq 448(%rsp),%rax 2877 leaq 448(%rsp),%rbx 2878 movq 0+0(%rsp),%r9 2879 movq 8+0(%rsp),%r10 2880 leaq 0+0(%rsp),%rsi 2881 movq 16+0(%rsp),%r11 2882 movq 24+0(%rsp),%r12 2883 leaq 352(%rsp),%rdi 2884 call __ecp_nistz256_mul_montq 2885 2886 movq 0+0(%rsp),%rax 2887 movq 8+0(%rsp),%r14 2888 leaq 0+0(%rsp),%rsi 2889 movq 16+0(%rsp),%r15 2890 movq 24+0(%rsp),%r8 2891 leaq 32(%rsp),%rdi 2892 call __ecp_nistz256_sqr_montq 2893 2894 movq 544(%rsp),%rax 2895 leaq 544(%rsp),%rbx 2896 movq 0+352(%rsp),%r9 2897 movq 8+352(%rsp),%r10 2898 leaq 0+352(%rsp),%rsi 2899 movq 16+352(%rsp),%r11 2900 movq 24+352(%rsp),%r12 2901 leaq 352(%rsp),%rdi 2902 call __ecp_nistz256_mul_montq 2903 2904 movq 0(%rsp),%rax 2905 leaq 0(%rsp),%rbx 2906 movq 0+32(%rsp),%r9 2907 movq 8+32(%rsp),%r10 2908 leaq 0+32(%rsp),%rsi 2909 movq 16+32(%rsp),%r11 2910 movq 24+32(%rsp),%r12 2911 leaq 128(%rsp),%rdi 2912 call __ecp_nistz256_mul_montq 2913 2914 movq 160(%rsp),%rax 2915 leaq 160(%rsp),%rbx 2916 movq 0+32(%rsp),%r9 2917 movq 8+32(%rsp),%r10 2918 leaq 0+32(%rsp),%rsi 2919 movq 16+32(%rsp),%r11 2920 movq 24+32(%rsp),%r12 2921 leaq 192(%rsp),%rdi 2922 call __ecp_nistz256_mul_montq 2923 2924 2925 2926 2927 xorq %r11,%r11 2928 addq %r12,%r12 2929 leaq 96(%rsp),%rsi 2930 adcq %r13,%r13 2931 movq %r12,%rax 2932 adcq %r8,%r8 2933 adcq %r9,%r9 2934 movq %r13,%rbp 2935 adcq $0,%r11 2936 2937 subq $-1,%r12 2938 movq %r8,%rcx 2939 sbbq %r14,%r13 2940 sbbq $0,%r8 2941 movq %r9,%r10 2942 sbbq %r15,%r9 2943 sbbq $0,%r11 2944 2945 cmovcq %rax,%r12 2946 movq 0(%rsi),%rax 2947 cmovcq %rbp,%r13 2948 movq 8(%rsi),%rbp 2949 cmovcq %rcx,%r8 2950 movq 16(%rsi),%rcx 2951 cmovcq %r10,%r9 2952 movq 24(%rsi),%r10 2953 2954 call __ecp_nistz256_subq 2955 2956 leaq 128(%rsp),%rbx 2957 leaq 288(%rsp),%rdi 2958 call __ecp_nistz256_sub_fromq 2959 2960 movq 192+0(%rsp),%rax 2961 movq 192+8(%rsp),%rbp 2962 movq 192+16(%rsp),%rcx 2963 movq 192+24(%rsp),%r10 2964 leaq 320(%rsp),%rdi 2965 2966 call __ecp_nistz256_subq 2967 2968 movq %r12,0(%rdi) 2969 movq %r13,8(%rdi) 2970 movq %r8,16(%rdi) 2971 movq %r9,24(%rdi) 2972 movq 128(%rsp),%rax 2973 leaq 128(%rsp),%rbx 2974 movq 0+224(%rsp),%r9 2975 movq 8+224(%rsp),%r10 2976 leaq 0+224(%rsp),%rsi 2977 movq 16+224(%rsp),%r11 2978 movq 24+224(%rsp),%r12 2979 leaq 256(%rsp),%rdi 2980 call __ecp_nistz256_mul_montq 2981 2982 movq 320(%rsp),%rax 2983 leaq 320(%rsp),%rbx 2984 movq 0+64(%rsp),%r9 2985 movq 8+64(%rsp),%r10 2986 leaq 0+64(%rsp),%rsi 2987 movq 16+64(%rsp),%r11 2988 movq 24+64(%rsp),%r12 2989 leaq 320(%rsp),%rdi 2990 call __ecp_nistz256_mul_montq 2991 2992 leaq 256(%rsp),%rbx 2993 leaq 320(%rsp),%rdi 2994 call __ecp_nistz256_sub_fromq 2995 2996.byte 102,72,15,126,199 2997 2998 movdqa %xmm5,%xmm0 2999 movdqa %xmm5,%xmm1 3000 pandn 352(%rsp),%xmm0 3001 movdqa %xmm5,%xmm2 3002 pandn 352+16(%rsp),%xmm1 3003 movdqa %xmm5,%xmm3 3004 pand 544(%rsp),%xmm2 3005 pand 544+16(%rsp),%xmm3 3006 por %xmm0,%xmm2 3007 por %xmm1,%xmm3 3008 3009 movdqa %xmm4,%xmm0 3010 movdqa %xmm4,%xmm1 3011 pandn %xmm2,%xmm0 3012 movdqa %xmm4,%xmm2 3013 pandn %xmm3,%xmm1 3014 movdqa %xmm4,%xmm3 3015 pand 448(%rsp),%xmm2 3016 pand 448+16(%rsp),%xmm3 3017 por %xmm0,%xmm2 3018 por %xmm1,%xmm3 3019 movdqu %xmm2,64(%rdi) 3020 movdqu %xmm3,80(%rdi) 3021 3022 movdqa %xmm5,%xmm0 3023 movdqa %xmm5,%xmm1 3024 pandn 288(%rsp),%xmm0 3025 movdqa %xmm5,%xmm2 3026 pandn 288+16(%rsp),%xmm1 3027 movdqa %xmm5,%xmm3 3028 pand 480(%rsp),%xmm2 3029 pand 480+16(%rsp),%xmm3 3030 por %xmm0,%xmm2 3031 por %xmm1,%xmm3 3032 3033 movdqa %xmm4,%xmm0 3034 movdqa %xmm4,%xmm1 3035 pandn %xmm2,%xmm0 3036 movdqa %xmm4,%xmm2 3037 pandn %xmm3,%xmm1 3038 movdqa %xmm4,%xmm3 3039 pand 384(%rsp),%xmm2 3040 pand 384+16(%rsp),%xmm3 3041 por %xmm0,%xmm2 3042 por %xmm1,%xmm3 3043 movdqu %xmm2,0(%rdi) 3044 movdqu %xmm3,16(%rdi) 3045 3046 movdqa %xmm5,%xmm0 3047 movdqa %xmm5,%xmm1 3048 pandn 320(%rsp),%xmm0 3049 movdqa %xmm5,%xmm2 3050 pandn 320+16(%rsp),%xmm1 3051 movdqa %xmm5,%xmm3 3052 pand 512(%rsp),%xmm2 3053 pand 512+16(%rsp),%xmm3 3054 por %xmm0,%xmm2 3055 por %xmm1,%xmm3 3056 3057 movdqa %xmm4,%xmm0 3058 movdqa %xmm4,%xmm1 3059 pandn %xmm2,%xmm0 3060 movdqa %xmm4,%xmm2 3061 pandn %xmm3,%xmm1 3062 movdqa %xmm4,%xmm3 3063 pand 416(%rsp),%xmm2 3064 pand 416+16(%rsp),%xmm3 3065 por %xmm0,%xmm2 3066 por %xmm1,%xmm3 3067 movdqu %xmm2,32(%rdi) 3068 movdqu %xmm3,48(%rdi) 3069 3070.Ladd_doneq: 3071 leaq 576+56(%rsp),%rsi 3072.cfi_def_cfa %rsi,8 3073 movq -48(%rsi),%r15 3074.cfi_restore %r15 3075 movq -40(%rsi),%r14 3076.cfi_restore %r14 3077 movq -32(%rsi),%r13 3078.cfi_restore %r13 3079 movq -24(%rsi),%r12 3080.cfi_restore %r12 3081 movq -16(%rsi),%rbx 3082.cfi_restore %rbx 3083 movq -8(%rsi),%rbp 3084.cfi_restore %rbp 3085 leaq (%rsi),%rsp 3086.cfi_def_cfa_register %rsp 3087.Lpoint_addq_epilogue: 3088 ret 3089.cfi_endproc 3090.size ecp_nistz256_point_add,.-ecp_nistz256_point_add 3091.globl ecp_nistz256_point_add_affine 3092.hidden ecp_nistz256_point_add_affine 3093.type ecp_nistz256_point_add_affine,@function 3094.align 32 3095ecp_nistz256_point_add_affine: 3096.cfi_startproc 3097_CET_ENDBR 3098 leaq OPENSSL_ia32cap_P(%rip),%rcx 3099 movq 8(%rcx),%rcx 3100 andl $0x80100,%ecx 3101 cmpl $0x80100,%ecx 3102 je .Lpoint_add_affinex 3103 pushq %rbp 3104.cfi_adjust_cfa_offset 8 3105.cfi_offset %rbp,-16 3106 pushq %rbx 3107.cfi_adjust_cfa_offset 8 3108.cfi_offset %rbx,-24 3109 pushq %r12 3110.cfi_adjust_cfa_offset 8 3111.cfi_offset %r12,-32 3112 pushq %r13 3113.cfi_adjust_cfa_offset 8 3114.cfi_offset %r13,-40 3115 pushq %r14 3116.cfi_adjust_cfa_offset 8 3117.cfi_offset %r14,-48 3118 pushq %r15 3119.cfi_adjust_cfa_offset 8 3120.cfi_offset %r15,-56 3121 subq $480+8,%rsp 3122.cfi_adjust_cfa_offset 32*15+8 3123.Ladd_affineq_body: 3124 3125 movdqu 0(%rsi),%xmm0 3126 movq %rdx,%rbx 3127 movdqu 16(%rsi),%xmm1 3128 movdqu 32(%rsi),%xmm2 3129 movdqu 48(%rsi),%xmm3 3130 movdqu 64(%rsi),%xmm4 3131 movdqu 80(%rsi),%xmm5 3132 movq 64+0(%rsi),%rax 3133 movq 64+8(%rsi),%r14 3134 movq 64+16(%rsi),%r15 3135 movq 64+24(%rsi),%r8 3136 movdqa %xmm0,320(%rsp) 3137 movdqa %xmm1,320+16(%rsp) 3138 movdqa %xmm2,352(%rsp) 3139 movdqa %xmm3,352+16(%rsp) 3140 movdqa %xmm4,384(%rsp) 3141 movdqa %xmm5,384+16(%rsp) 3142 por %xmm4,%xmm5 3143 3144 movdqu 0(%rbx),%xmm0 3145 pshufd $0xb1,%xmm5,%xmm3 3146 movdqu 16(%rbx),%xmm1 3147 movdqu 32(%rbx),%xmm2 3148 por %xmm3,%xmm5 3149 movdqu 48(%rbx),%xmm3 3150 movdqa %xmm0,416(%rsp) 3151 pshufd $0x1e,%xmm5,%xmm4 3152 movdqa %xmm1,416+16(%rsp) 3153 por %xmm0,%xmm1 3154.byte 102,72,15,110,199 3155 movdqa %xmm2,448(%rsp) 3156 movdqa %xmm3,448+16(%rsp) 3157 por %xmm2,%xmm3 3158 por %xmm4,%xmm5 3159 pxor %xmm4,%xmm4 3160 por %xmm1,%xmm3 3161 3162 leaq 64-0(%rsi),%rsi 3163 leaq 32(%rsp),%rdi 3164 call __ecp_nistz256_sqr_montq 3165 3166 pcmpeqd %xmm4,%xmm5 3167 pshufd $0xb1,%xmm3,%xmm4 3168 movq 0(%rbx),%rax 3169 3170 movq %r12,%r9 3171 por %xmm3,%xmm4 3172 pshufd $0,%xmm5,%xmm5 3173 pshufd $0x1e,%xmm4,%xmm3 3174 movq %r13,%r10 3175 por %xmm3,%xmm4 3176 pxor %xmm3,%xmm3 3177 movq %r14,%r11 3178 pcmpeqd %xmm3,%xmm4 3179 pshufd $0,%xmm4,%xmm4 3180 3181 leaq 32-0(%rsp),%rsi 3182 movq %r15,%r12 3183 leaq 0(%rsp),%rdi 3184 call __ecp_nistz256_mul_montq 3185 3186 leaq 320(%rsp),%rbx 3187 leaq 64(%rsp),%rdi 3188 call __ecp_nistz256_sub_fromq 3189 3190 movq 384(%rsp),%rax 3191 leaq 384(%rsp),%rbx 3192 movq 0+32(%rsp),%r9 3193 movq 8+32(%rsp),%r10 3194 leaq 0+32(%rsp),%rsi 3195 movq 16+32(%rsp),%r11 3196 movq 24+32(%rsp),%r12 3197 leaq 32(%rsp),%rdi 3198 call __ecp_nistz256_mul_montq 3199 3200 movq 384(%rsp),%rax 3201 leaq 384(%rsp),%rbx 3202 movq 0+64(%rsp),%r9 3203 movq 8+64(%rsp),%r10 3204 leaq 0+64(%rsp),%rsi 3205 movq 16+64(%rsp),%r11 3206 movq 24+64(%rsp),%r12 3207 leaq 288(%rsp),%rdi 3208 call __ecp_nistz256_mul_montq 3209 3210 movq 448(%rsp),%rax 3211 leaq 448(%rsp),%rbx 3212 movq 0+32(%rsp),%r9 3213 movq 8+32(%rsp),%r10 3214 leaq 0+32(%rsp),%rsi 3215 movq 16+32(%rsp),%r11 3216 movq 24+32(%rsp),%r12 3217 leaq 32(%rsp),%rdi 3218 call __ecp_nistz256_mul_montq 3219 3220 leaq 352(%rsp),%rbx 3221 leaq 96(%rsp),%rdi 3222 call __ecp_nistz256_sub_fromq 3223 3224 movq 0+64(%rsp),%rax 3225 movq 8+64(%rsp),%r14 3226 leaq 0+64(%rsp),%rsi 3227 movq 16+64(%rsp),%r15 3228 movq 24+64(%rsp),%r8 3229 leaq 128(%rsp),%rdi 3230 call __ecp_nistz256_sqr_montq 3231 3232 movq 0+96(%rsp),%rax 3233 movq 8+96(%rsp),%r14 3234 leaq 0+96(%rsp),%rsi 3235 movq 16+96(%rsp),%r15 3236 movq 24+96(%rsp),%r8 3237 leaq 192(%rsp),%rdi 3238 call __ecp_nistz256_sqr_montq 3239 3240 movq 128(%rsp),%rax 3241 leaq 128(%rsp),%rbx 3242 movq 0+64(%rsp),%r9 3243 movq 8+64(%rsp),%r10 3244 leaq 0+64(%rsp),%rsi 3245 movq 16+64(%rsp),%r11 3246 movq 24+64(%rsp),%r12 3247 leaq 160(%rsp),%rdi 3248 call __ecp_nistz256_mul_montq 3249 3250 movq 320(%rsp),%rax 3251 leaq 320(%rsp),%rbx 3252 movq 0+128(%rsp),%r9 3253 movq 8+128(%rsp),%r10 3254 leaq 0+128(%rsp),%rsi 3255 movq 16+128(%rsp),%r11 3256 movq 24+128(%rsp),%r12 3257 leaq 0(%rsp),%rdi 3258 call __ecp_nistz256_mul_montq 3259 3260 3261 3262 3263 xorq %r11,%r11 3264 addq %r12,%r12 3265 leaq 192(%rsp),%rsi 3266 adcq %r13,%r13 3267 movq %r12,%rax 3268 adcq %r8,%r8 3269 adcq %r9,%r9 3270 movq %r13,%rbp 3271 adcq $0,%r11 3272 3273 subq $-1,%r12 3274 movq %r8,%rcx 3275 sbbq %r14,%r13 3276 sbbq $0,%r8 3277 movq %r9,%r10 3278 sbbq %r15,%r9 3279 sbbq $0,%r11 3280 3281 cmovcq %rax,%r12 3282 movq 0(%rsi),%rax 3283 cmovcq %rbp,%r13 3284 movq 8(%rsi),%rbp 3285 cmovcq %rcx,%r8 3286 movq 16(%rsi),%rcx 3287 cmovcq %r10,%r9 3288 movq 24(%rsi),%r10 3289 3290 call __ecp_nistz256_subq 3291 3292 leaq 160(%rsp),%rbx 3293 leaq 224(%rsp),%rdi 3294 call __ecp_nistz256_sub_fromq 3295 3296 movq 0+0(%rsp),%rax 3297 movq 0+8(%rsp),%rbp 3298 movq 0+16(%rsp),%rcx 3299 movq 0+24(%rsp),%r10 3300 leaq 64(%rsp),%rdi 3301 3302 call __ecp_nistz256_subq 3303 3304 movq %r12,0(%rdi) 3305 movq %r13,8(%rdi) 3306 movq %r8,16(%rdi) 3307 movq %r9,24(%rdi) 3308 movq 352(%rsp),%rax 3309 leaq 352(%rsp),%rbx 3310 movq 0+160(%rsp),%r9 3311 movq 8+160(%rsp),%r10 3312 leaq 0+160(%rsp),%rsi 3313 movq 16+160(%rsp),%r11 3314 movq 24+160(%rsp),%r12 3315 leaq 32(%rsp),%rdi 3316 call __ecp_nistz256_mul_montq 3317 3318 movq 96(%rsp),%rax 3319 leaq 96(%rsp),%rbx 3320 movq 0+64(%rsp),%r9 3321 movq 8+64(%rsp),%r10 3322 leaq 0+64(%rsp),%rsi 3323 movq 16+64(%rsp),%r11 3324 movq 24+64(%rsp),%r12 3325 leaq 64(%rsp),%rdi 3326 call __ecp_nistz256_mul_montq 3327 3328 leaq 32(%rsp),%rbx 3329 leaq 256(%rsp),%rdi 3330 call __ecp_nistz256_sub_fromq 3331 3332.byte 102,72,15,126,199 3333 3334 movdqa %xmm5,%xmm0 3335 movdqa %xmm5,%xmm1 3336 pandn 288(%rsp),%xmm0 3337 movdqa %xmm5,%xmm2 3338 pandn 288+16(%rsp),%xmm1 3339 movdqa %xmm5,%xmm3 3340 pand .LONE_mont(%rip),%xmm2 3341 pand .LONE_mont+16(%rip),%xmm3 3342 por %xmm0,%xmm2 3343 por %xmm1,%xmm3 3344 3345 movdqa %xmm4,%xmm0 3346 movdqa %xmm4,%xmm1 3347 pandn %xmm2,%xmm0 3348 movdqa %xmm4,%xmm2 3349 pandn %xmm3,%xmm1 3350 movdqa %xmm4,%xmm3 3351 pand 384(%rsp),%xmm2 3352 pand 384+16(%rsp),%xmm3 3353 por %xmm0,%xmm2 3354 por %xmm1,%xmm3 3355 movdqu %xmm2,64(%rdi) 3356 movdqu %xmm3,80(%rdi) 3357 3358 movdqa %xmm5,%xmm0 3359 movdqa %xmm5,%xmm1 3360 pandn 224(%rsp),%xmm0 3361 movdqa %xmm5,%xmm2 3362 pandn 224+16(%rsp),%xmm1 3363 movdqa %xmm5,%xmm3 3364 pand 416(%rsp),%xmm2 3365 pand 416+16(%rsp),%xmm3 3366 por %xmm0,%xmm2 3367 por %xmm1,%xmm3 3368 3369 movdqa %xmm4,%xmm0 3370 movdqa %xmm4,%xmm1 3371 pandn %xmm2,%xmm0 3372 movdqa %xmm4,%xmm2 3373 pandn %xmm3,%xmm1 3374 movdqa %xmm4,%xmm3 3375 pand 320(%rsp),%xmm2 3376 pand 320+16(%rsp),%xmm3 3377 por %xmm0,%xmm2 3378 por %xmm1,%xmm3 3379 movdqu %xmm2,0(%rdi) 3380 movdqu %xmm3,16(%rdi) 3381 3382 movdqa %xmm5,%xmm0 3383 movdqa %xmm5,%xmm1 3384 pandn 256(%rsp),%xmm0 3385 movdqa %xmm5,%xmm2 3386 pandn 256+16(%rsp),%xmm1 3387 movdqa %xmm5,%xmm3 3388 pand 448(%rsp),%xmm2 3389 pand 448+16(%rsp),%xmm3 3390 por %xmm0,%xmm2 3391 por %xmm1,%xmm3 3392 3393 movdqa %xmm4,%xmm0 3394 movdqa %xmm4,%xmm1 3395 pandn %xmm2,%xmm0 3396 movdqa %xmm4,%xmm2 3397 pandn %xmm3,%xmm1 3398 movdqa %xmm4,%xmm3 3399 pand 352(%rsp),%xmm2 3400 pand 352+16(%rsp),%xmm3 3401 por %xmm0,%xmm2 3402 por %xmm1,%xmm3 3403 movdqu %xmm2,32(%rdi) 3404 movdqu %xmm3,48(%rdi) 3405 3406 leaq 480+56(%rsp),%rsi 3407.cfi_def_cfa %rsi,8 3408 movq -48(%rsi),%r15 3409.cfi_restore %r15 3410 movq -40(%rsi),%r14 3411.cfi_restore %r14 3412 movq -32(%rsi),%r13 3413.cfi_restore %r13 3414 movq -24(%rsi),%r12 3415.cfi_restore %r12 3416 movq -16(%rsi),%rbx 3417.cfi_restore %rbx 3418 movq -8(%rsi),%rbp 3419.cfi_restore %rbp 3420 leaq (%rsi),%rsp 3421.cfi_def_cfa_register %rsp 3422.Ladd_affineq_epilogue: 3423 ret 3424.cfi_endproc 3425.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine 3426.type __ecp_nistz256_add_tox,@function 3427.align 32 3428__ecp_nistz256_add_tox: 3429.cfi_startproc 3430 xorq %r11,%r11 3431 adcq 0(%rbx),%r12 3432 adcq 8(%rbx),%r13 3433 movq %r12,%rax 3434 adcq 16(%rbx),%r8 3435 adcq 24(%rbx),%r9 3436 movq %r13,%rbp 3437 adcq $0,%r11 3438 3439 xorq %r10,%r10 3440 sbbq $-1,%r12 3441 movq %r8,%rcx 3442 sbbq %r14,%r13 3443 sbbq $0,%r8 3444 movq %r9,%r10 3445 sbbq %r15,%r9 3446 sbbq $0,%r11 3447 3448 cmovcq %rax,%r12 3449 cmovcq %rbp,%r13 3450 movq %r12,0(%rdi) 3451 cmovcq %rcx,%r8 3452 movq %r13,8(%rdi) 3453 cmovcq %r10,%r9 3454 movq %r8,16(%rdi) 3455 movq %r9,24(%rdi) 3456 3457 ret 3458.cfi_endproc 3459.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox 3460 3461.type __ecp_nistz256_sub_fromx,@function 3462.align 32 3463__ecp_nistz256_sub_fromx: 3464.cfi_startproc 3465 xorq %r11,%r11 3466 sbbq 0(%rbx),%r12 3467 sbbq 8(%rbx),%r13 3468 movq %r12,%rax 3469 sbbq 16(%rbx),%r8 3470 sbbq 24(%rbx),%r9 3471 movq %r13,%rbp 3472 sbbq $0,%r11 3473 3474 xorq %r10,%r10 3475 adcq $-1,%r12 3476 movq %r8,%rcx 3477 adcq %r14,%r13 3478 adcq $0,%r8 3479 movq %r9,%r10 3480 adcq %r15,%r9 3481 3482 btq $0,%r11 3483 cmovncq %rax,%r12 3484 cmovncq %rbp,%r13 3485 movq %r12,0(%rdi) 3486 cmovncq %rcx,%r8 3487 movq %r13,8(%rdi) 3488 cmovncq %r10,%r9 3489 movq %r8,16(%rdi) 3490 movq %r9,24(%rdi) 3491 3492 ret 3493.cfi_endproc 3494.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx 3495 3496.type __ecp_nistz256_subx,@function 3497.align 32 3498__ecp_nistz256_subx: 3499.cfi_startproc 3500 xorq %r11,%r11 3501 sbbq %r12,%rax 3502 sbbq %r13,%rbp 3503 movq %rax,%r12 3504 sbbq %r8,%rcx 3505 sbbq %r9,%r10 3506 movq %rbp,%r13 3507 sbbq $0,%r11 3508 3509 xorq %r9,%r9 3510 adcq $-1,%rax 3511 movq %rcx,%r8 3512 adcq %r14,%rbp 3513 adcq $0,%rcx 3514 movq %r10,%r9 3515 adcq %r15,%r10 3516 3517 btq $0,%r11 3518 cmovcq %rax,%r12 3519 cmovcq %rbp,%r13 3520 cmovcq %rcx,%r8 3521 cmovcq %r10,%r9 3522 3523 ret 3524.cfi_endproc 3525.size __ecp_nistz256_subx,.-__ecp_nistz256_subx 3526 3527.type __ecp_nistz256_mul_by_2x,@function 3528.align 32 3529__ecp_nistz256_mul_by_2x: 3530.cfi_startproc 3531 xorq %r11,%r11 3532 adcq %r12,%r12 3533 adcq %r13,%r13 3534 movq %r12,%rax 3535 adcq %r8,%r8 3536 adcq %r9,%r9 3537 movq %r13,%rbp 3538 adcq $0,%r11 3539 3540 xorq %r10,%r10 3541 sbbq $-1,%r12 3542 movq %r8,%rcx 3543 sbbq %r14,%r13 3544 sbbq $0,%r8 3545 movq %r9,%r10 3546 sbbq %r15,%r9 3547 sbbq $0,%r11 3548 3549 cmovcq %rax,%r12 3550 cmovcq %rbp,%r13 3551 movq %r12,0(%rdi) 3552 cmovcq %rcx,%r8 3553 movq %r13,8(%rdi) 3554 cmovcq %r10,%r9 3555 movq %r8,16(%rdi) 3556 movq %r9,24(%rdi) 3557 3558 ret 3559.cfi_endproc 3560.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x 3561.type ecp_nistz256_point_doublex,@function 3562.align 32 3563ecp_nistz256_point_doublex: 3564.cfi_startproc 3565.Lpoint_doublex: 3566 pushq %rbp 3567.cfi_adjust_cfa_offset 8 3568.cfi_offset %rbp,-16 3569 pushq %rbx 3570.cfi_adjust_cfa_offset 8 3571.cfi_offset %rbx,-24 3572 pushq %r12 3573.cfi_adjust_cfa_offset 8 3574.cfi_offset %r12,-32 3575 pushq %r13 3576.cfi_adjust_cfa_offset 8 3577.cfi_offset %r13,-40 3578 pushq %r14 3579.cfi_adjust_cfa_offset 8 3580.cfi_offset %r14,-48 3581 pushq %r15 3582.cfi_adjust_cfa_offset 8 3583.cfi_offset %r15,-56 3584 subq $160+8,%rsp 3585.cfi_adjust_cfa_offset 32*5+8 3586.Lpoint_doublex_body: 3587 3588.Lpoint_double_shortcutx: 3589 movdqu 0(%rsi),%xmm0 3590 movq %rsi,%rbx 3591 movdqu 16(%rsi),%xmm1 3592 movq 32+0(%rsi),%r12 3593 movq 32+8(%rsi),%r13 3594 movq 32+16(%rsi),%r8 3595 movq 32+24(%rsi),%r9 3596 movq .Lpoly+8(%rip),%r14 3597 movq .Lpoly+24(%rip),%r15 3598 movdqa %xmm0,96(%rsp) 3599 movdqa %xmm1,96+16(%rsp) 3600 leaq 32(%rdi),%r10 3601 leaq 64(%rdi),%r11 3602.byte 102,72,15,110,199 3603.byte 102,73,15,110,202 3604.byte 102,73,15,110,211 3605 3606 leaq 0(%rsp),%rdi 3607 call __ecp_nistz256_mul_by_2x 3608 3609 movq 64+0(%rsi),%rdx 3610 movq 64+8(%rsi),%r14 3611 movq 64+16(%rsi),%r15 3612 movq 64+24(%rsi),%r8 3613 leaq 64-128(%rsi),%rsi 3614 leaq 64(%rsp),%rdi 3615 call __ecp_nistz256_sqr_montx 3616 3617 movq 0+0(%rsp),%rdx 3618 movq 8+0(%rsp),%r14 3619 leaq -128+0(%rsp),%rsi 3620 movq 16+0(%rsp),%r15 3621 movq 24+0(%rsp),%r8 3622 leaq 0(%rsp),%rdi 3623 call __ecp_nistz256_sqr_montx 3624 3625 movq 32(%rbx),%rdx 3626 movq 64+0(%rbx),%r9 3627 movq 64+8(%rbx),%r10 3628 movq 64+16(%rbx),%r11 3629 movq 64+24(%rbx),%r12 3630 leaq 64-128(%rbx),%rsi 3631 leaq 32(%rbx),%rbx 3632.byte 102,72,15,126,215 3633 call __ecp_nistz256_mul_montx 3634 call __ecp_nistz256_mul_by_2x 3635 3636 movq 96+0(%rsp),%r12 3637 movq 96+8(%rsp),%r13 3638 leaq 64(%rsp),%rbx 3639 movq 96+16(%rsp),%r8 3640 movq 96+24(%rsp),%r9 3641 leaq 32(%rsp),%rdi 3642 call __ecp_nistz256_add_tox 3643 3644 movq 96+0(%rsp),%r12 3645 movq 96+8(%rsp),%r13 3646 leaq 64(%rsp),%rbx 3647 movq 96+16(%rsp),%r8 3648 movq 96+24(%rsp),%r9 3649 leaq 64(%rsp),%rdi 3650 call __ecp_nistz256_sub_fromx 3651 3652 movq 0+0(%rsp),%rdx 3653 movq 8+0(%rsp),%r14 3654 leaq -128+0(%rsp),%rsi 3655 movq 16+0(%rsp),%r15 3656 movq 24+0(%rsp),%r8 3657.byte 102,72,15,126,207 3658 call __ecp_nistz256_sqr_montx 3659 xorq %r9,%r9 3660 movq %r12,%rax 3661 addq $-1,%r12 3662 movq %r13,%r10 3663 adcq %rsi,%r13 3664 movq %r14,%rcx 3665 adcq $0,%r14 3666 movq %r15,%r8 3667 adcq %rbp,%r15 3668 adcq $0,%r9 3669 xorq %rsi,%rsi 3670 testq $1,%rax 3671 3672 cmovzq %rax,%r12 3673 cmovzq %r10,%r13 3674 cmovzq %rcx,%r14 3675 cmovzq %r8,%r15 3676 cmovzq %rsi,%r9 3677 3678 movq %r13,%rax 3679 shrq $1,%r12 3680 shlq $63,%rax 3681 movq %r14,%r10 3682 shrq $1,%r13 3683 orq %rax,%r12 3684 shlq $63,%r10 3685 movq %r15,%rcx 3686 shrq $1,%r14 3687 orq %r10,%r13 3688 shlq $63,%rcx 3689 movq %r12,0(%rdi) 3690 shrq $1,%r15 3691 movq %r13,8(%rdi) 3692 shlq $63,%r9 3693 orq %rcx,%r14 3694 orq %r9,%r15 3695 movq %r14,16(%rdi) 3696 movq %r15,24(%rdi) 3697 movq 64(%rsp),%rdx 3698 leaq 64(%rsp),%rbx 3699 movq 0+32(%rsp),%r9 3700 movq 8+32(%rsp),%r10 3701 leaq -128+32(%rsp),%rsi 3702 movq 16+32(%rsp),%r11 3703 movq 24+32(%rsp),%r12 3704 leaq 32(%rsp),%rdi 3705 call __ecp_nistz256_mul_montx 3706 3707 leaq 128(%rsp),%rdi 3708 call __ecp_nistz256_mul_by_2x 3709 3710 leaq 32(%rsp),%rbx 3711 leaq 32(%rsp),%rdi 3712 call __ecp_nistz256_add_tox 3713 3714 movq 96(%rsp),%rdx 3715 leaq 96(%rsp),%rbx 3716 movq 0+0(%rsp),%r9 3717 movq 8+0(%rsp),%r10 3718 leaq -128+0(%rsp),%rsi 3719 movq 16+0(%rsp),%r11 3720 movq 24+0(%rsp),%r12 3721 leaq 0(%rsp),%rdi 3722 call __ecp_nistz256_mul_montx 3723 3724 leaq 128(%rsp),%rdi 3725 call __ecp_nistz256_mul_by_2x 3726 3727 movq 0+32(%rsp),%rdx 3728 movq 8+32(%rsp),%r14 3729 leaq -128+32(%rsp),%rsi 3730 movq 16+32(%rsp),%r15 3731 movq 24+32(%rsp),%r8 3732.byte 102,72,15,126,199 3733 call __ecp_nistz256_sqr_montx 3734 3735 leaq 128(%rsp),%rbx 3736 movq %r14,%r8 3737 movq %r15,%r9 3738 movq %rsi,%r14 3739 movq %rbp,%r15 3740 call __ecp_nistz256_sub_fromx 3741 3742 movq 0+0(%rsp),%rax 3743 movq 0+8(%rsp),%rbp 3744 movq 0+16(%rsp),%rcx 3745 movq 0+24(%rsp),%r10 3746 leaq 0(%rsp),%rdi 3747 call __ecp_nistz256_subx 3748 3749 movq 32(%rsp),%rdx 3750 leaq 32(%rsp),%rbx 3751 movq %r12,%r14 3752 xorl %ecx,%ecx 3753 movq %r12,0+0(%rsp) 3754 movq %r13,%r10 3755 movq %r13,0+8(%rsp) 3756 cmovzq %r8,%r11 3757 movq %r8,0+16(%rsp) 3758 leaq 0-128(%rsp),%rsi 3759 cmovzq %r9,%r12 3760 movq %r9,0+24(%rsp) 3761 movq %r14,%r9 3762 leaq 0(%rsp),%rdi 3763 call __ecp_nistz256_mul_montx 3764 3765.byte 102,72,15,126,203 3766.byte 102,72,15,126,207 3767 call __ecp_nistz256_sub_fromx 3768 3769 leaq 160+56(%rsp),%rsi 3770.cfi_def_cfa %rsi,8 3771 movq -48(%rsi),%r15 3772.cfi_restore %r15 3773 movq -40(%rsi),%r14 3774.cfi_restore %r14 3775 movq -32(%rsi),%r13 3776.cfi_restore %r13 3777 movq -24(%rsi),%r12 3778.cfi_restore %r12 3779 movq -16(%rsi),%rbx 3780.cfi_restore %rbx 3781 movq -8(%rsi),%rbp 3782.cfi_restore %rbp 3783 leaq (%rsi),%rsp 3784.cfi_def_cfa_register %rsp 3785.Lpoint_doublex_epilogue: 3786 ret 3787.cfi_endproc 3788.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex 3789.type ecp_nistz256_point_addx,@function 3790.align 32 3791ecp_nistz256_point_addx: 3792.cfi_startproc 3793.Lpoint_addx: 3794 pushq %rbp 3795.cfi_adjust_cfa_offset 8 3796.cfi_offset %rbp,-16 3797 pushq %rbx 3798.cfi_adjust_cfa_offset 8 3799.cfi_offset %rbx,-24 3800 pushq %r12 3801.cfi_adjust_cfa_offset 8 3802.cfi_offset %r12,-32 3803 pushq %r13 3804.cfi_adjust_cfa_offset 8 3805.cfi_offset %r13,-40 3806 pushq %r14 3807.cfi_adjust_cfa_offset 8 3808.cfi_offset %r14,-48 3809 pushq %r15 3810.cfi_adjust_cfa_offset 8 3811.cfi_offset %r15,-56 3812 subq $576+8,%rsp 3813.cfi_adjust_cfa_offset 32*18+8 3814.Lpoint_addx_body: 3815 3816 movdqu 0(%rsi),%xmm0 3817 movdqu 16(%rsi),%xmm1 3818 movdqu 32(%rsi),%xmm2 3819 movdqu 48(%rsi),%xmm3 3820 movdqu 64(%rsi),%xmm4 3821 movdqu 80(%rsi),%xmm5 3822 movq %rsi,%rbx 3823 movq %rdx,%rsi 3824 movdqa %xmm0,384(%rsp) 3825 movdqa %xmm1,384+16(%rsp) 3826 movdqa %xmm2,416(%rsp) 3827 movdqa %xmm3,416+16(%rsp) 3828 movdqa %xmm4,448(%rsp) 3829 movdqa %xmm5,448+16(%rsp) 3830 por %xmm4,%xmm5 3831 3832 movdqu 0(%rsi),%xmm0 3833 pshufd $0xb1,%xmm5,%xmm3 3834 movdqu 16(%rsi),%xmm1 3835 movdqu 32(%rsi),%xmm2 3836 por %xmm3,%xmm5 3837 movdqu 48(%rsi),%xmm3 3838 movq 64+0(%rsi),%rdx 3839 movq 64+8(%rsi),%r14 3840 movq 64+16(%rsi),%r15 3841 movq 64+24(%rsi),%r8 3842 movdqa %xmm0,480(%rsp) 3843 pshufd $0x1e,%xmm5,%xmm4 3844 movdqa %xmm1,480+16(%rsp) 3845 movdqu 64(%rsi),%xmm0 3846 movdqu 80(%rsi),%xmm1 3847 movdqa %xmm2,512(%rsp) 3848 movdqa %xmm3,512+16(%rsp) 3849 por %xmm4,%xmm5 3850 pxor %xmm4,%xmm4 3851 por %xmm0,%xmm1 3852.byte 102,72,15,110,199 3853 3854 leaq 64-128(%rsi),%rsi 3855 movq %rdx,544+0(%rsp) 3856 movq %r14,544+8(%rsp) 3857 movq %r15,544+16(%rsp) 3858 movq %r8,544+24(%rsp) 3859 leaq 96(%rsp),%rdi 3860 call __ecp_nistz256_sqr_montx 3861 3862 pcmpeqd %xmm4,%xmm5 3863 pshufd $0xb1,%xmm1,%xmm4 3864 por %xmm1,%xmm4 3865 pshufd $0,%xmm5,%xmm5 3866 pshufd $0x1e,%xmm4,%xmm3 3867 por %xmm3,%xmm4 3868 pxor %xmm3,%xmm3 3869 pcmpeqd %xmm3,%xmm4 3870 pshufd $0,%xmm4,%xmm4 3871 movq 64+0(%rbx),%rdx 3872 movq 64+8(%rbx),%r14 3873 movq 64+16(%rbx),%r15 3874 movq 64+24(%rbx),%r8 3875.byte 102,72,15,110,203 3876 3877 leaq 64-128(%rbx),%rsi 3878 leaq 32(%rsp),%rdi 3879 call __ecp_nistz256_sqr_montx 3880 3881 movq 544(%rsp),%rdx 3882 leaq 544(%rsp),%rbx 3883 movq 0+96(%rsp),%r9 3884 movq 8+96(%rsp),%r10 3885 leaq -128+96(%rsp),%rsi 3886 movq 16+96(%rsp),%r11 3887 movq 24+96(%rsp),%r12 3888 leaq 224(%rsp),%rdi 3889 call __ecp_nistz256_mul_montx 3890 3891 movq 448(%rsp),%rdx 3892 leaq 448(%rsp),%rbx 3893 movq 0+32(%rsp),%r9 3894 movq 8+32(%rsp),%r10 3895 leaq -128+32(%rsp),%rsi 3896 movq 16+32(%rsp),%r11 3897 movq 24+32(%rsp),%r12 3898 leaq 256(%rsp),%rdi 3899 call __ecp_nistz256_mul_montx 3900 3901 movq 416(%rsp),%rdx 3902 leaq 416(%rsp),%rbx 3903 movq 0+224(%rsp),%r9 3904 movq 8+224(%rsp),%r10 3905 leaq -128+224(%rsp),%rsi 3906 movq 16+224(%rsp),%r11 3907 movq 24+224(%rsp),%r12 3908 leaq 224(%rsp),%rdi 3909 call __ecp_nistz256_mul_montx 3910 3911 movq 512(%rsp),%rdx 3912 leaq 512(%rsp),%rbx 3913 movq 0+256(%rsp),%r9 3914 movq 8+256(%rsp),%r10 3915 leaq -128+256(%rsp),%rsi 3916 movq 16+256(%rsp),%r11 3917 movq 24+256(%rsp),%r12 3918 leaq 256(%rsp),%rdi 3919 call __ecp_nistz256_mul_montx 3920 3921 leaq 224(%rsp),%rbx 3922 leaq 64(%rsp),%rdi 3923 call __ecp_nistz256_sub_fromx 3924 3925 orq %r13,%r12 3926 movdqa %xmm4,%xmm2 3927 orq %r8,%r12 3928 orq %r9,%r12 3929 por %xmm5,%xmm2 3930.byte 102,73,15,110,220 3931 3932 movq 384(%rsp),%rdx 3933 leaq 384(%rsp),%rbx 3934 movq 0+96(%rsp),%r9 3935 movq 8+96(%rsp),%r10 3936 leaq -128+96(%rsp),%rsi 3937 movq 16+96(%rsp),%r11 3938 movq 24+96(%rsp),%r12 3939 leaq 160(%rsp),%rdi 3940 call __ecp_nistz256_mul_montx 3941 3942 movq 480(%rsp),%rdx 3943 leaq 480(%rsp),%rbx 3944 movq 0+32(%rsp),%r9 3945 movq 8+32(%rsp),%r10 3946 leaq -128+32(%rsp),%rsi 3947 movq 16+32(%rsp),%r11 3948 movq 24+32(%rsp),%r12 3949 leaq 192(%rsp),%rdi 3950 call __ecp_nistz256_mul_montx 3951 3952 leaq 160(%rsp),%rbx 3953 leaq 0(%rsp),%rdi 3954 call __ecp_nistz256_sub_fromx 3955 3956 orq %r13,%r12 3957 orq %r8,%r12 3958 orq %r9,%r12 3959 3960.byte 102,73,15,126,208 3961.byte 102,73,15,126,217 3962 orq %r8,%r12 3963.byte 0x3e 3964 jnz .Ladd_proceedx 3965 3966 3967 3968 testq %r9,%r9 3969 jz .Ladd_doublex 3970 3971 3972 3973 3974 3975 3976.byte 102,72,15,126,199 3977 pxor %xmm0,%xmm0 3978 movdqu %xmm0,0(%rdi) 3979 movdqu %xmm0,16(%rdi) 3980 movdqu %xmm0,32(%rdi) 3981 movdqu %xmm0,48(%rdi) 3982 movdqu %xmm0,64(%rdi) 3983 movdqu %xmm0,80(%rdi) 3984 jmp .Ladd_donex 3985 3986.align 32 3987.Ladd_doublex: 3988.byte 102,72,15,126,206 3989.byte 102,72,15,126,199 3990 addq $416,%rsp 3991.cfi_adjust_cfa_offset -416 3992 jmp .Lpoint_double_shortcutx 3993.cfi_adjust_cfa_offset 416 3994 3995.align 32 3996.Ladd_proceedx: 3997 movq 0+64(%rsp),%rdx 3998 movq 8+64(%rsp),%r14 3999 leaq -128+64(%rsp),%rsi 4000 movq 16+64(%rsp),%r15 4001 movq 24+64(%rsp),%r8 4002 leaq 96(%rsp),%rdi 4003 call __ecp_nistz256_sqr_montx 4004 4005 movq 448(%rsp),%rdx 4006 leaq 448(%rsp),%rbx 4007 movq 0+0(%rsp),%r9 4008 movq 8+0(%rsp),%r10 4009 leaq -128+0(%rsp),%rsi 4010 movq 16+0(%rsp),%r11 4011 movq 24+0(%rsp),%r12 4012 leaq 352(%rsp),%rdi 4013 call __ecp_nistz256_mul_montx 4014 4015 movq 0+0(%rsp),%rdx 4016 movq 8+0(%rsp),%r14 4017 leaq -128+0(%rsp),%rsi 4018 movq 16+0(%rsp),%r15 4019 movq 24+0(%rsp),%r8 4020 leaq 32(%rsp),%rdi 4021 call __ecp_nistz256_sqr_montx 4022 4023 movq 544(%rsp),%rdx 4024 leaq 544(%rsp),%rbx 4025 movq 0+352(%rsp),%r9 4026 movq 8+352(%rsp),%r10 4027 leaq -128+352(%rsp),%rsi 4028 movq 16+352(%rsp),%r11 4029 movq 24+352(%rsp),%r12 4030 leaq 352(%rsp),%rdi 4031 call __ecp_nistz256_mul_montx 4032 4033 movq 0(%rsp),%rdx 4034 leaq 0(%rsp),%rbx 4035 movq 0+32(%rsp),%r9 4036 movq 8+32(%rsp),%r10 4037 leaq -128+32(%rsp),%rsi 4038 movq 16+32(%rsp),%r11 4039 movq 24+32(%rsp),%r12 4040 leaq 128(%rsp),%rdi 4041 call __ecp_nistz256_mul_montx 4042 4043 movq 160(%rsp),%rdx 4044 leaq 160(%rsp),%rbx 4045 movq 0+32(%rsp),%r9 4046 movq 8+32(%rsp),%r10 4047 leaq -128+32(%rsp),%rsi 4048 movq 16+32(%rsp),%r11 4049 movq 24+32(%rsp),%r12 4050 leaq 192(%rsp),%rdi 4051 call __ecp_nistz256_mul_montx 4052 4053 4054 4055 4056 xorq %r11,%r11 4057 addq %r12,%r12 4058 leaq 96(%rsp),%rsi 4059 adcq %r13,%r13 4060 movq %r12,%rax 4061 adcq %r8,%r8 4062 adcq %r9,%r9 4063 movq %r13,%rbp 4064 adcq $0,%r11 4065 4066 subq $-1,%r12 4067 movq %r8,%rcx 4068 sbbq %r14,%r13 4069 sbbq $0,%r8 4070 movq %r9,%r10 4071 sbbq %r15,%r9 4072 sbbq $0,%r11 4073 4074 cmovcq %rax,%r12 4075 movq 0(%rsi),%rax 4076 cmovcq %rbp,%r13 4077 movq 8(%rsi),%rbp 4078 cmovcq %rcx,%r8 4079 movq 16(%rsi),%rcx 4080 cmovcq %r10,%r9 4081 movq 24(%rsi),%r10 4082 4083 call __ecp_nistz256_subx 4084 4085 leaq 128(%rsp),%rbx 4086 leaq 288(%rsp),%rdi 4087 call __ecp_nistz256_sub_fromx 4088 4089 movq 192+0(%rsp),%rax 4090 movq 192+8(%rsp),%rbp 4091 movq 192+16(%rsp),%rcx 4092 movq 192+24(%rsp),%r10 4093 leaq 320(%rsp),%rdi 4094 4095 call __ecp_nistz256_subx 4096 4097 movq %r12,0(%rdi) 4098 movq %r13,8(%rdi) 4099 movq %r8,16(%rdi) 4100 movq %r9,24(%rdi) 4101 movq 128(%rsp),%rdx 4102 leaq 128(%rsp),%rbx 4103 movq 0+224(%rsp),%r9 4104 movq 8+224(%rsp),%r10 4105 leaq -128+224(%rsp),%rsi 4106 movq 16+224(%rsp),%r11 4107 movq 24+224(%rsp),%r12 4108 leaq 256(%rsp),%rdi 4109 call __ecp_nistz256_mul_montx 4110 4111 movq 320(%rsp),%rdx 4112 leaq 320(%rsp),%rbx 4113 movq 0+64(%rsp),%r9 4114 movq 8+64(%rsp),%r10 4115 leaq -128+64(%rsp),%rsi 4116 movq 16+64(%rsp),%r11 4117 movq 24+64(%rsp),%r12 4118 leaq 320(%rsp),%rdi 4119 call __ecp_nistz256_mul_montx 4120 4121 leaq 256(%rsp),%rbx 4122 leaq 320(%rsp),%rdi 4123 call __ecp_nistz256_sub_fromx 4124 4125.byte 102,72,15,126,199 4126 4127 movdqa %xmm5,%xmm0 4128 movdqa %xmm5,%xmm1 4129 pandn 352(%rsp),%xmm0 4130 movdqa %xmm5,%xmm2 4131 pandn 352+16(%rsp),%xmm1 4132 movdqa %xmm5,%xmm3 4133 pand 544(%rsp),%xmm2 4134 pand 544+16(%rsp),%xmm3 4135 por %xmm0,%xmm2 4136 por %xmm1,%xmm3 4137 4138 movdqa %xmm4,%xmm0 4139 movdqa %xmm4,%xmm1 4140 pandn %xmm2,%xmm0 4141 movdqa %xmm4,%xmm2 4142 pandn %xmm3,%xmm1 4143 movdqa %xmm4,%xmm3 4144 pand 448(%rsp),%xmm2 4145 pand 448+16(%rsp),%xmm3 4146 por %xmm0,%xmm2 4147 por %xmm1,%xmm3 4148 movdqu %xmm2,64(%rdi) 4149 movdqu %xmm3,80(%rdi) 4150 4151 movdqa %xmm5,%xmm0 4152 movdqa %xmm5,%xmm1 4153 pandn 288(%rsp),%xmm0 4154 movdqa %xmm5,%xmm2 4155 pandn 288+16(%rsp),%xmm1 4156 movdqa %xmm5,%xmm3 4157 pand 480(%rsp),%xmm2 4158 pand 480+16(%rsp),%xmm3 4159 por %xmm0,%xmm2 4160 por %xmm1,%xmm3 4161 4162 movdqa %xmm4,%xmm0 4163 movdqa %xmm4,%xmm1 4164 pandn %xmm2,%xmm0 4165 movdqa %xmm4,%xmm2 4166 pandn %xmm3,%xmm1 4167 movdqa %xmm4,%xmm3 4168 pand 384(%rsp),%xmm2 4169 pand 384+16(%rsp),%xmm3 4170 por %xmm0,%xmm2 4171 por %xmm1,%xmm3 4172 movdqu %xmm2,0(%rdi) 4173 movdqu %xmm3,16(%rdi) 4174 4175 movdqa %xmm5,%xmm0 4176 movdqa %xmm5,%xmm1 4177 pandn 320(%rsp),%xmm0 4178 movdqa %xmm5,%xmm2 4179 pandn 320+16(%rsp),%xmm1 4180 movdqa %xmm5,%xmm3 4181 pand 512(%rsp),%xmm2 4182 pand 512+16(%rsp),%xmm3 4183 por %xmm0,%xmm2 4184 por %xmm1,%xmm3 4185 4186 movdqa %xmm4,%xmm0 4187 movdqa %xmm4,%xmm1 4188 pandn %xmm2,%xmm0 4189 movdqa %xmm4,%xmm2 4190 pandn %xmm3,%xmm1 4191 movdqa %xmm4,%xmm3 4192 pand 416(%rsp),%xmm2 4193 pand 416+16(%rsp),%xmm3 4194 por %xmm0,%xmm2 4195 por %xmm1,%xmm3 4196 movdqu %xmm2,32(%rdi) 4197 movdqu %xmm3,48(%rdi) 4198 4199.Ladd_donex: 4200 leaq 576+56(%rsp),%rsi 4201.cfi_def_cfa %rsi,8 4202 movq -48(%rsi),%r15 4203.cfi_restore %r15 4204 movq -40(%rsi),%r14 4205.cfi_restore %r14 4206 movq -32(%rsi),%r13 4207.cfi_restore %r13 4208 movq -24(%rsi),%r12 4209.cfi_restore %r12 4210 movq -16(%rsi),%rbx 4211.cfi_restore %rbx 4212 movq -8(%rsi),%rbp 4213.cfi_restore %rbp 4214 leaq (%rsi),%rsp 4215.cfi_def_cfa_register %rsp 4216.Lpoint_addx_epilogue: 4217 ret 4218.cfi_endproc 4219.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx 4220.type ecp_nistz256_point_add_affinex,@function 4221.align 32 4222ecp_nistz256_point_add_affinex: 4223.cfi_startproc 4224.Lpoint_add_affinex: 4225 pushq %rbp 4226.cfi_adjust_cfa_offset 8 4227.cfi_offset %rbp,-16 4228 pushq %rbx 4229.cfi_adjust_cfa_offset 8 4230.cfi_offset %rbx,-24 4231 pushq %r12 4232.cfi_adjust_cfa_offset 8 4233.cfi_offset %r12,-32 4234 pushq %r13 4235.cfi_adjust_cfa_offset 8 4236.cfi_offset %r13,-40 4237 pushq %r14 4238.cfi_adjust_cfa_offset 8 4239.cfi_offset %r14,-48 4240 pushq %r15 4241.cfi_adjust_cfa_offset 8 4242.cfi_offset %r15,-56 4243 subq $480+8,%rsp 4244.cfi_adjust_cfa_offset 32*15+8 4245.Ladd_affinex_body: 4246 4247 movdqu 0(%rsi),%xmm0 4248 movq %rdx,%rbx 4249 movdqu 16(%rsi),%xmm1 4250 movdqu 32(%rsi),%xmm2 4251 movdqu 48(%rsi),%xmm3 4252 movdqu 64(%rsi),%xmm4 4253 movdqu 80(%rsi),%xmm5 4254 movq 64+0(%rsi),%rdx 4255 movq 64+8(%rsi),%r14 4256 movq 64+16(%rsi),%r15 4257 movq 64+24(%rsi),%r8 4258 movdqa %xmm0,320(%rsp) 4259 movdqa %xmm1,320+16(%rsp) 4260 movdqa %xmm2,352(%rsp) 4261 movdqa %xmm3,352+16(%rsp) 4262 movdqa %xmm4,384(%rsp) 4263 movdqa %xmm5,384+16(%rsp) 4264 por %xmm4,%xmm5 4265 4266 movdqu 0(%rbx),%xmm0 4267 pshufd $0xb1,%xmm5,%xmm3 4268 movdqu 16(%rbx),%xmm1 4269 movdqu 32(%rbx),%xmm2 4270 por %xmm3,%xmm5 4271 movdqu 48(%rbx),%xmm3 4272 movdqa %xmm0,416(%rsp) 4273 pshufd $0x1e,%xmm5,%xmm4 4274 movdqa %xmm1,416+16(%rsp) 4275 por %xmm0,%xmm1 4276.byte 102,72,15,110,199 4277 movdqa %xmm2,448(%rsp) 4278 movdqa %xmm3,448+16(%rsp) 4279 por %xmm2,%xmm3 4280 por %xmm4,%xmm5 4281 pxor %xmm4,%xmm4 4282 por %xmm1,%xmm3 4283 4284 leaq 64-128(%rsi),%rsi 4285 leaq 32(%rsp),%rdi 4286 call __ecp_nistz256_sqr_montx 4287 4288 pcmpeqd %xmm4,%xmm5 4289 pshufd $0xb1,%xmm3,%xmm4 4290 movq 0(%rbx),%rdx 4291 4292 movq %r12,%r9 4293 por %xmm3,%xmm4 4294 pshufd $0,%xmm5,%xmm5 4295 pshufd $0x1e,%xmm4,%xmm3 4296 movq %r13,%r10 4297 por %xmm3,%xmm4 4298 pxor %xmm3,%xmm3 4299 movq %r14,%r11 4300 pcmpeqd %xmm3,%xmm4 4301 pshufd $0,%xmm4,%xmm4 4302 4303 leaq 32-128(%rsp),%rsi 4304 movq %r15,%r12 4305 leaq 0(%rsp),%rdi 4306 call __ecp_nistz256_mul_montx 4307 4308 leaq 320(%rsp),%rbx 4309 leaq 64(%rsp),%rdi 4310 call __ecp_nistz256_sub_fromx 4311 4312 movq 384(%rsp),%rdx 4313 leaq 384(%rsp),%rbx 4314 movq 0+32(%rsp),%r9 4315 movq 8+32(%rsp),%r10 4316 leaq -128+32(%rsp),%rsi 4317 movq 16+32(%rsp),%r11 4318 movq 24+32(%rsp),%r12 4319 leaq 32(%rsp),%rdi 4320 call __ecp_nistz256_mul_montx 4321 4322 movq 384(%rsp),%rdx 4323 leaq 384(%rsp),%rbx 4324 movq 0+64(%rsp),%r9 4325 movq 8+64(%rsp),%r10 4326 leaq -128+64(%rsp),%rsi 4327 movq 16+64(%rsp),%r11 4328 movq 24+64(%rsp),%r12 4329 leaq 288(%rsp),%rdi 4330 call __ecp_nistz256_mul_montx 4331 4332 movq 448(%rsp),%rdx 4333 leaq 448(%rsp),%rbx 4334 movq 0+32(%rsp),%r9 4335 movq 8+32(%rsp),%r10 4336 leaq -128+32(%rsp),%rsi 4337 movq 16+32(%rsp),%r11 4338 movq 24+32(%rsp),%r12 4339 leaq 32(%rsp),%rdi 4340 call __ecp_nistz256_mul_montx 4341 4342 leaq 352(%rsp),%rbx 4343 leaq 96(%rsp),%rdi 4344 call __ecp_nistz256_sub_fromx 4345 4346 movq 0+64(%rsp),%rdx 4347 movq 8+64(%rsp),%r14 4348 leaq -128+64(%rsp),%rsi 4349 movq 16+64(%rsp),%r15 4350 movq 24+64(%rsp),%r8 4351 leaq 128(%rsp),%rdi 4352 call __ecp_nistz256_sqr_montx 4353 4354 movq 0+96(%rsp),%rdx 4355 movq 8+96(%rsp),%r14 4356 leaq -128+96(%rsp),%rsi 4357 movq 16+96(%rsp),%r15 4358 movq 24+96(%rsp),%r8 4359 leaq 192(%rsp),%rdi 4360 call __ecp_nistz256_sqr_montx 4361 4362 movq 128(%rsp),%rdx 4363 leaq 128(%rsp),%rbx 4364 movq 0+64(%rsp),%r9 4365 movq 8+64(%rsp),%r10 4366 leaq -128+64(%rsp),%rsi 4367 movq 16+64(%rsp),%r11 4368 movq 24+64(%rsp),%r12 4369 leaq 160(%rsp),%rdi 4370 call __ecp_nistz256_mul_montx 4371 4372 movq 320(%rsp),%rdx 4373 leaq 320(%rsp),%rbx 4374 movq 0+128(%rsp),%r9 4375 movq 8+128(%rsp),%r10 4376 leaq -128+128(%rsp),%rsi 4377 movq 16+128(%rsp),%r11 4378 movq 24+128(%rsp),%r12 4379 leaq 0(%rsp),%rdi 4380 call __ecp_nistz256_mul_montx 4381 4382 4383 4384 4385 xorq %r11,%r11 4386 addq %r12,%r12 4387 leaq 192(%rsp),%rsi 4388 adcq %r13,%r13 4389 movq %r12,%rax 4390 adcq %r8,%r8 4391 adcq %r9,%r9 4392 movq %r13,%rbp 4393 adcq $0,%r11 4394 4395 subq $-1,%r12 4396 movq %r8,%rcx 4397 sbbq %r14,%r13 4398 sbbq $0,%r8 4399 movq %r9,%r10 4400 sbbq %r15,%r9 4401 sbbq $0,%r11 4402 4403 cmovcq %rax,%r12 4404 movq 0(%rsi),%rax 4405 cmovcq %rbp,%r13 4406 movq 8(%rsi),%rbp 4407 cmovcq %rcx,%r8 4408 movq 16(%rsi),%rcx 4409 cmovcq %r10,%r9 4410 movq 24(%rsi),%r10 4411 4412 call __ecp_nistz256_subx 4413 4414 leaq 160(%rsp),%rbx 4415 leaq 224(%rsp),%rdi 4416 call __ecp_nistz256_sub_fromx 4417 4418 movq 0+0(%rsp),%rax 4419 movq 0+8(%rsp),%rbp 4420 movq 0+16(%rsp),%rcx 4421 movq 0+24(%rsp),%r10 4422 leaq 64(%rsp),%rdi 4423 4424 call __ecp_nistz256_subx 4425 4426 movq %r12,0(%rdi) 4427 movq %r13,8(%rdi) 4428 movq %r8,16(%rdi) 4429 movq %r9,24(%rdi) 4430 movq 352(%rsp),%rdx 4431 leaq 352(%rsp),%rbx 4432 movq 0+160(%rsp),%r9 4433 movq 8+160(%rsp),%r10 4434 leaq -128+160(%rsp),%rsi 4435 movq 16+160(%rsp),%r11 4436 movq 24+160(%rsp),%r12 4437 leaq 32(%rsp),%rdi 4438 call __ecp_nistz256_mul_montx 4439 4440 movq 96(%rsp),%rdx 4441 leaq 96(%rsp),%rbx 4442 movq 0+64(%rsp),%r9 4443 movq 8+64(%rsp),%r10 4444 leaq -128+64(%rsp),%rsi 4445 movq 16+64(%rsp),%r11 4446 movq 24+64(%rsp),%r12 4447 leaq 64(%rsp),%rdi 4448 call __ecp_nistz256_mul_montx 4449 4450 leaq 32(%rsp),%rbx 4451 leaq 256(%rsp),%rdi 4452 call __ecp_nistz256_sub_fromx 4453 4454.byte 102,72,15,126,199 4455 4456 movdqa %xmm5,%xmm0 4457 movdqa %xmm5,%xmm1 4458 pandn 288(%rsp),%xmm0 4459 movdqa %xmm5,%xmm2 4460 pandn 288+16(%rsp),%xmm1 4461 movdqa %xmm5,%xmm3 4462 pand .LONE_mont(%rip),%xmm2 4463 pand .LONE_mont+16(%rip),%xmm3 4464 por %xmm0,%xmm2 4465 por %xmm1,%xmm3 4466 4467 movdqa %xmm4,%xmm0 4468 movdqa %xmm4,%xmm1 4469 pandn %xmm2,%xmm0 4470 movdqa %xmm4,%xmm2 4471 pandn %xmm3,%xmm1 4472 movdqa %xmm4,%xmm3 4473 pand 384(%rsp),%xmm2 4474 pand 384+16(%rsp),%xmm3 4475 por %xmm0,%xmm2 4476 por %xmm1,%xmm3 4477 movdqu %xmm2,64(%rdi) 4478 movdqu %xmm3,80(%rdi) 4479 4480 movdqa %xmm5,%xmm0 4481 movdqa %xmm5,%xmm1 4482 pandn 224(%rsp),%xmm0 4483 movdqa %xmm5,%xmm2 4484 pandn 224+16(%rsp),%xmm1 4485 movdqa %xmm5,%xmm3 4486 pand 416(%rsp),%xmm2 4487 pand 416+16(%rsp),%xmm3 4488 por %xmm0,%xmm2 4489 por %xmm1,%xmm3 4490 4491 movdqa %xmm4,%xmm0 4492 movdqa %xmm4,%xmm1 4493 pandn %xmm2,%xmm0 4494 movdqa %xmm4,%xmm2 4495 pandn %xmm3,%xmm1 4496 movdqa %xmm4,%xmm3 4497 pand 320(%rsp),%xmm2 4498 pand 320+16(%rsp),%xmm3 4499 por %xmm0,%xmm2 4500 por %xmm1,%xmm3 4501 movdqu %xmm2,0(%rdi) 4502 movdqu %xmm3,16(%rdi) 4503 4504 movdqa %xmm5,%xmm0 4505 movdqa %xmm5,%xmm1 4506 pandn 256(%rsp),%xmm0 4507 movdqa %xmm5,%xmm2 4508 pandn 256+16(%rsp),%xmm1 4509 movdqa %xmm5,%xmm3 4510 pand 448(%rsp),%xmm2 4511 pand 448+16(%rsp),%xmm3 4512 por %xmm0,%xmm2 4513 por %xmm1,%xmm3 4514 4515 movdqa %xmm4,%xmm0 4516 movdqa %xmm4,%xmm1 4517 pandn %xmm2,%xmm0 4518 movdqa %xmm4,%xmm2 4519 pandn %xmm3,%xmm1 4520 movdqa %xmm4,%xmm3 4521 pand 352(%rsp),%xmm2 4522 pand 352+16(%rsp),%xmm3 4523 por %xmm0,%xmm2 4524 por %xmm1,%xmm3 4525 movdqu %xmm2,32(%rdi) 4526 movdqu %xmm3,48(%rdi) 4527 4528 leaq 480+56(%rsp),%rsi 4529.cfi_def_cfa %rsi,8 4530 movq -48(%rsi),%r15 4531.cfi_restore %r15 4532 movq -40(%rsi),%r14 4533.cfi_restore %r14 4534 movq -32(%rsi),%r13 4535.cfi_restore %r13 4536 movq -24(%rsi),%r12 4537.cfi_restore %r12 4538 movq -16(%rsi),%rbx 4539.cfi_restore %rbx 4540 movq -8(%rsi),%rbp 4541.cfi_restore %rbp 4542 leaq (%rsi),%rsp 4543.cfi_def_cfa_register %rsp 4544.Ladd_affinex_epilogue: 4545 ret 4546.cfi_endproc 4547.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex 4548#endif 4549