1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8 9.globl bn_mul_mont_nohw 10.hidden bn_mul_mont_nohw 11.type bn_mul_mont_nohw,@function 12.align 16 13bn_mul_mont_nohw: 14.cfi_startproc 15_CET_ENDBR 16 movl %r9d,%r9d 17 movq %rsp,%rax 18.cfi_def_cfa_register %rax 19 pushq %rbx 20.cfi_offset %rbx,-16 21 pushq %rbp 22.cfi_offset %rbp,-24 23 pushq %r12 24.cfi_offset %r12,-32 25 pushq %r13 26.cfi_offset %r13,-40 27 pushq %r14 28.cfi_offset %r14,-48 29 pushq %r15 30.cfi_offset %r15,-56 31 32 negq %r9 33 movq %rsp,%r11 34 leaq -16(%rsp,%r9,8),%r10 35 negq %r9 36 andq $-1024,%r10 37 38 39 40 41 42 43 44 45 46 subq %r10,%r11 47 andq $-4096,%r11 48 leaq (%r10,%r11,1),%rsp 49 movq (%rsp),%r11 50 cmpq %r10,%rsp 51 ja .Lmul_page_walk 52 jmp .Lmul_page_walk_done 53 54.align 16 55.Lmul_page_walk: 56 leaq -4096(%rsp),%rsp 57 movq (%rsp),%r11 58 cmpq %r10,%rsp 59 ja .Lmul_page_walk 60.Lmul_page_walk_done: 61 62 movq %rax,8(%rsp,%r9,8) 63.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 64.Lmul_body: 65 movq %rdx,%r12 66 movq (%r8),%r8 67 movq (%r12),%rbx 68 movq (%rsi),%rax 69 70 xorq %r14,%r14 71 xorq %r15,%r15 72 73 movq %r8,%rbp 74 mulq %rbx 75 movq %rax,%r10 76 movq (%rcx),%rax 77 78 imulq %r10,%rbp 79 movq %rdx,%r11 80 81 mulq %rbp 82 addq %rax,%r10 83 movq 8(%rsi),%rax 84 adcq $0,%rdx 85 movq %rdx,%r13 86 87 leaq 1(%r15),%r15 88 jmp .L1st_enter 89 90.align 16 91.L1st: 92 addq %rax,%r13 93 movq (%rsi,%r15,8),%rax 94 adcq $0,%rdx 95 addq %r11,%r13 96 movq %r10,%r11 97 adcq $0,%rdx 98 movq %r13,-16(%rsp,%r15,8) 99 movq %rdx,%r13 100 101.L1st_enter: 102 mulq %rbx 103 addq %rax,%r11 104 movq (%rcx,%r15,8),%rax 105 adcq $0,%rdx 106 leaq 1(%r15),%r15 107 movq %rdx,%r10 108 109 mulq %rbp 110 cmpq %r9,%r15 111 jne .L1st 112 113 addq %rax,%r13 114 movq (%rsi),%rax 115 adcq $0,%rdx 116 addq %r11,%r13 117 adcq $0,%rdx 118 movq %r13,-16(%rsp,%r15,8) 119 movq %rdx,%r13 120 movq %r10,%r11 121 122 xorq %rdx,%rdx 123 addq %r11,%r13 124 adcq $0,%rdx 125 movq %r13,-8(%rsp,%r9,8) 126 movq %rdx,(%rsp,%r9,8) 127 128 leaq 1(%r14),%r14 129 jmp .Louter 130.align 16 131.Louter: 132 movq (%r12,%r14,8),%rbx 133 xorq %r15,%r15 134 movq %r8,%rbp 135 movq (%rsp),%r10 136 mulq %rbx 137 addq %rax,%r10 138 movq (%rcx),%rax 139 adcq $0,%rdx 140 141 imulq %r10,%rbp 142 movq %rdx,%r11 143 144 mulq %rbp 145 addq %rax,%r10 146 movq 8(%rsi),%rax 147 adcq $0,%rdx 148 movq 8(%rsp),%r10 149 movq %rdx,%r13 150 151 leaq 1(%r15),%r15 152 jmp .Linner_enter 153 154.align 16 155.Linner: 156 addq %rax,%r13 157 movq (%rsi,%r15,8),%rax 158 adcq $0,%rdx 159 addq %r10,%r13 160 movq (%rsp,%r15,8),%r10 161 adcq $0,%rdx 162 movq %r13,-16(%rsp,%r15,8) 163 movq %rdx,%r13 164 165.Linner_enter: 166 mulq %rbx 167 addq %rax,%r11 168 movq (%rcx,%r15,8),%rax 169 adcq $0,%rdx 170 addq %r11,%r10 171 movq %rdx,%r11 172 adcq $0,%r11 173 leaq 1(%r15),%r15 174 175 mulq %rbp 176 cmpq %r9,%r15 177 jne .Linner 178 179 addq %rax,%r13 180 movq (%rsi),%rax 181 adcq $0,%rdx 182 addq %r10,%r13 183 movq (%rsp,%r15,8),%r10 184 adcq $0,%rdx 185 movq %r13,-16(%rsp,%r15,8) 186 movq %rdx,%r13 187 188 xorq %rdx,%rdx 189 addq %r11,%r13 190 adcq $0,%rdx 191 addq %r10,%r13 192 adcq $0,%rdx 193 movq %r13,-8(%rsp,%r9,8) 194 movq %rdx,(%rsp,%r9,8) 195 196 leaq 1(%r14),%r14 197 cmpq %r9,%r14 198 jb .Louter 199 200 xorq %r14,%r14 201 movq (%rsp),%rax 202 movq %r9,%r15 203 204.align 16 205.Lsub: sbbq (%rcx,%r14,8),%rax 206 movq %rax,(%rdi,%r14,8) 207 movq 8(%rsp,%r14,8),%rax 208 leaq 1(%r14),%r14 209 decq %r15 210 jnz .Lsub 211 212 sbbq $0,%rax 213 movq $-1,%rbx 214 xorq %rax,%rbx 215 xorq %r14,%r14 216 movq %r9,%r15 217 218.Lcopy: 219 movq (%rdi,%r14,8),%rcx 220 movq (%rsp,%r14,8),%rdx 221 andq %rbx,%rcx 222 andq %rax,%rdx 223 movq %r9,(%rsp,%r14,8) 224 orq %rcx,%rdx 225 movq %rdx,(%rdi,%r14,8) 226 leaq 1(%r14),%r14 227 subq $1,%r15 228 jnz .Lcopy 229 230 movq 8(%rsp,%r9,8),%rsi 231.cfi_def_cfa %rsi,8 232 movq $1,%rax 233 movq -48(%rsi),%r15 234.cfi_restore %r15 235 movq -40(%rsi),%r14 236.cfi_restore %r14 237 movq -32(%rsi),%r13 238.cfi_restore %r13 239 movq -24(%rsi),%r12 240.cfi_restore %r12 241 movq -16(%rsi),%rbp 242.cfi_restore %rbp 243 movq -8(%rsi),%rbx 244.cfi_restore %rbx 245 leaq (%rsi),%rsp 246.cfi_def_cfa_register %rsp 247.Lmul_epilogue: 248 ret 249.cfi_endproc 250.size bn_mul_mont_nohw,.-bn_mul_mont_nohw 251.globl bn_mul4x_mont 252.hidden bn_mul4x_mont 253.type bn_mul4x_mont,@function 254.align 16 255bn_mul4x_mont: 256.cfi_startproc 257_CET_ENDBR 258 movl %r9d,%r9d 259 movq %rsp,%rax 260.cfi_def_cfa_register %rax 261 pushq %rbx 262.cfi_offset %rbx,-16 263 pushq %rbp 264.cfi_offset %rbp,-24 265 pushq %r12 266.cfi_offset %r12,-32 267 pushq %r13 268.cfi_offset %r13,-40 269 pushq %r14 270.cfi_offset %r14,-48 271 pushq %r15 272.cfi_offset %r15,-56 273 274 negq %r9 275 movq %rsp,%r11 276 leaq -32(%rsp,%r9,8),%r10 277 negq %r9 278 andq $-1024,%r10 279 280 subq %r10,%r11 281 andq $-4096,%r11 282 leaq (%r10,%r11,1),%rsp 283 movq (%rsp),%r11 284 cmpq %r10,%rsp 285 ja .Lmul4x_page_walk 286 jmp .Lmul4x_page_walk_done 287 288.Lmul4x_page_walk: 289 leaq -4096(%rsp),%rsp 290 movq (%rsp),%r11 291 cmpq %r10,%rsp 292 ja .Lmul4x_page_walk 293.Lmul4x_page_walk_done: 294 295 movq %rax,8(%rsp,%r9,8) 296.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 297.Lmul4x_body: 298 movq %rdi,16(%rsp,%r9,8) 299 movq %rdx,%r12 300 movq (%r8),%r8 301 movq (%r12),%rbx 302 movq (%rsi),%rax 303 304 xorq %r14,%r14 305 xorq %r15,%r15 306 307 movq %r8,%rbp 308 mulq %rbx 309 movq %rax,%r10 310 movq (%rcx),%rax 311 312 imulq %r10,%rbp 313 movq %rdx,%r11 314 315 mulq %rbp 316 addq %rax,%r10 317 movq 8(%rsi),%rax 318 adcq $0,%rdx 319 movq %rdx,%rdi 320 321 mulq %rbx 322 addq %rax,%r11 323 movq 8(%rcx),%rax 324 adcq $0,%rdx 325 movq %rdx,%r10 326 327 mulq %rbp 328 addq %rax,%rdi 329 movq 16(%rsi),%rax 330 adcq $0,%rdx 331 addq %r11,%rdi 332 leaq 4(%r15),%r15 333 adcq $0,%rdx 334 movq %rdi,(%rsp) 335 movq %rdx,%r13 336 jmp .L1st4x 337.align 16 338.L1st4x: 339 mulq %rbx 340 addq %rax,%r10 341 movq -16(%rcx,%r15,8),%rax 342 adcq $0,%rdx 343 movq %rdx,%r11 344 345 mulq %rbp 346 addq %rax,%r13 347 movq -8(%rsi,%r15,8),%rax 348 adcq $0,%rdx 349 addq %r10,%r13 350 adcq $0,%rdx 351 movq %r13,-24(%rsp,%r15,8) 352 movq %rdx,%rdi 353 354 mulq %rbx 355 addq %rax,%r11 356 movq -8(%rcx,%r15,8),%rax 357 adcq $0,%rdx 358 movq %rdx,%r10 359 360 mulq %rbp 361 addq %rax,%rdi 362 movq (%rsi,%r15,8),%rax 363 adcq $0,%rdx 364 addq %r11,%rdi 365 adcq $0,%rdx 366 movq %rdi,-16(%rsp,%r15,8) 367 movq %rdx,%r13 368 369 mulq %rbx 370 addq %rax,%r10 371 movq (%rcx,%r15,8),%rax 372 adcq $0,%rdx 373 movq %rdx,%r11 374 375 mulq %rbp 376 addq %rax,%r13 377 movq 8(%rsi,%r15,8),%rax 378 adcq $0,%rdx 379 addq %r10,%r13 380 adcq $0,%rdx 381 movq %r13,-8(%rsp,%r15,8) 382 movq %rdx,%rdi 383 384 mulq %rbx 385 addq %rax,%r11 386 movq 8(%rcx,%r15,8),%rax 387 adcq $0,%rdx 388 leaq 4(%r15),%r15 389 movq %rdx,%r10 390 391 mulq %rbp 392 addq %rax,%rdi 393 movq -16(%rsi,%r15,8),%rax 394 adcq $0,%rdx 395 addq %r11,%rdi 396 adcq $0,%rdx 397 movq %rdi,-32(%rsp,%r15,8) 398 movq %rdx,%r13 399 cmpq %r9,%r15 400 jb .L1st4x 401 402 mulq %rbx 403 addq %rax,%r10 404 movq -16(%rcx,%r15,8),%rax 405 adcq $0,%rdx 406 movq %rdx,%r11 407 408 mulq %rbp 409 addq %rax,%r13 410 movq -8(%rsi,%r15,8),%rax 411 adcq $0,%rdx 412 addq %r10,%r13 413 adcq $0,%rdx 414 movq %r13,-24(%rsp,%r15,8) 415 movq %rdx,%rdi 416 417 mulq %rbx 418 addq %rax,%r11 419 movq -8(%rcx,%r15,8),%rax 420 adcq $0,%rdx 421 movq %rdx,%r10 422 423 mulq %rbp 424 addq %rax,%rdi 425 movq (%rsi),%rax 426 adcq $0,%rdx 427 addq %r11,%rdi 428 adcq $0,%rdx 429 movq %rdi,-16(%rsp,%r15,8) 430 movq %rdx,%r13 431 432 xorq %rdi,%rdi 433 addq %r10,%r13 434 adcq $0,%rdi 435 movq %r13,-8(%rsp,%r15,8) 436 movq %rdi,(%rsp,%r15,8) 437 438 leaq 1(%r14),%r14 439.align 4 440.Louter4x: 441 movq (%r12,%r14,8),%rbx 442 xorq %r15,%r15 443 movq (%rsp),%r10 444 movq %r8,%rbp 445 mulq %rbx 446 addq %rax,%r10 447 movq (%rcx),%rax 448 adcq $0,%rdx 449 450 imulq %r10,%rbp 451 movq %rdx,%r11 452 453 mulq %rbp 454 addq %rax,%r10 455 movq 8(%rsi),%rax 456 adcq $0,%rdx 457 movq %rdx,%rdi 458 459 mulq %rbx 460 addq %rax,%r11 461 movq 8(%rcx),%rax 462 adcq $0,%rdx 463 addq 8(%rsp),%r11 464 adcq $0,%rdx 465 movq %rdx,%r10 466 467 mulq %rbp 468 addq %rax,%rdi 469 movq 16(%rsi),%rax 470 adcq $0,%rdx 471 addq %r11,%rdi 472 leaq 4(%r15),%r15 473 adcq $0,%rdx 474 movq %rdi,(%rsp) 475 movq %rdx,%r13 476 jmp .Linner4x 477.align 16 478.Linner4x: 479 mulq %rbx 480 addq %rax,%r10 481 movq -16(%rcx,%r15,8),%rax 482 adcq $0,%rdx 483 addq -16(%rsp,%r15,8),%r10 484 adcq $0,%rdx 485 movq %rdx,%r11 486 487 mulq %rbp 488 addq %rax,%r13 489 movq -8(%rsi,%r15,8),%rax 490 adcq $0,%rdx 491 addq %r10,%r13 492 adcq $0,%rdx 493 movq %r13,-24(%rsp,%r15,8) 494 movq %rdx,%rdi 495 496 mulq %rbx 497 addq %rax,%r11 498 movq -8(%rcx,%r15,8),%rax 499 adcq $0,%rdx 500 addq -8(%rsp,%r15,8),%r11 501 adcq $0,%rdx 502 movq %rdx,%r10 503 504 mulq %rbp 505 addq %rax,%rdi 506 movq (%rsi,%r15,8),%rax 507 adcq $0,%rdx 508 addq %r11,%rdi 509 adcq $0,%rdx 510 movq %rdi,-16(%rsp,%r15,8) 511 movq %rdx,%r13 512 513 mulq %rbx 514 addq %rax,%r10 515 movq (%rcx,%r15,8),%rax 516 adcq $0,%rdx 517 addq (%rsp,%r15,8),%r10 518 adcq $0,%rdx 519 movq %rdx,%r11 520 521 mulq %rbp 522 addq %rax,%r13 523 movq 8(%rsi,%r15,8),%rax 524 adcq $0,%rdx 525 addq %r10,%r13 526 adcq $0,%rdx 527 movq %r13,-8(%rsp,%r15,8) 528 movq %rdx,%rdi 529 530 mulq %rbx 531 addq %rax,%r11 532 movq 8(%rcx,%r15,8),%rax 533 adcq $0,%rdx 534 addq 8(%rsp,%r15,8),%r11 535 adcq $0,%rdx 536 leaq 4(%r15),%r15 537 movq %rdx,%r10 538 539 mulq %rbp 540 addq %rax,%rdi 541 movq -16(%rsi,%r15,8),%rax 542 adcq $0,%rdx 543 addq %r11,%rdi 544 adcq $0,%rdx 545 movq %rdi,-32(%rsp,%r15,8) 546 movq %rdx,%r13 547 cmpq %r9,%r15 548 jb .Linner4x 549 550 mulq %rbx 551 addq %rax,%r10 552 movq -16(%rcx,%r15,8),%rax 553 adcq $0,%rdx 554 addq -16(%rsp,%r15,8),%r10 555 adcq $0,%rdx 556 movq %rdx,%r11 557 558 mulq %rbp 559 addq %rax,%r13 560 movq -8(%rsi,%r15,8),%rax 561 adcq $0,%rdx 562 addq %r10,%r13 563 adcq $0,%rdx 564 movq %r13,-24(%rsp,%r15,8) 565 movq %rdx,%rdi 566 567 mulq %rbx 568 addq %rax,%r11 569 movq -8(%rcx,%r15,8),%rax 570 adcq $0,%rdx 571 addq -8(%rsp,%r15,8),%r11 572 adcq $0,%rdx 573 leaq 1(%r14),%r14 574 movq %rdx,%r10 575 576 mulq %rbp 577 addq %rax,%rdi 578 movq (%rsi),%rax 579 adcq $0,%rdx 580 addq %r11,%rdi 581 adcq $0,%rdx 582 movq %rdi,-16(%rsp,%r15,8) 583 movq %rdx,%r13 584 585 xorq %rdi,%rdi 586 addq %r10,%r13 587 adcq $0,%rdi 588 addq (%rsp,%r9,8),%r13 589 adcq $0,%rdi 590 movq %r13,-8(%rsp,%r15,8) 591 movq %rdi,(%rsp,%r15,8) 592 593 cmpq %r9,%r14 594 jb .Louter4x 595 movq 16(%rsp,%r9,8),%rdi 596 leaq -4(%r9),%r15 597 movq 0(%rsp),%rax 598 movq 8(%rsp),%rdx 599 shrq $2,%r15 600 leaq (%rsp),%rsi 601 xorq %r14,%r14 602 603 subq 0(%rcx),%rax 604 movq 16(%rsi),%rbx 605 movq 24(%rsi),%rbp 606 sbbq 8(%rcx),%rdx 607 608.Lsub4x: 609 movq %rax,0(%rdi,%r14,8) 610 movq %rdx,8(%rdi,%r14,8) 611 sbbq 16(%rcx,%r14,8),%rbx 612 movq 32(%rsi,%r14,8),%rax 613 movq 40(%rsi,%r14,8),%rdx 614 sbbq 24(%rcx,%r14,8),%rbp 615 movq %rbx,16(%rdi,%r14,8) 616 movq %rbp,24(%rdi,%r14,8) 617 sbbq 32(%rcx,%r14,8),%rax 618 movq 48(%rsi,%r14,8),%rbx 619 movq 56(%rsi,%r14,8),%rbp 620 sbbq 40(%rcx,%r14,8),%rdx 621 leaq 4(%r14),%r14 622 decq %r15 623 jnz .Lsub4x 624 625 movq %rax,0(%rdi,%r14,8) 626 movq 32(%rsi,%r14,8),%rax 627 sbbq 16(%rcx,%r14,8),%rbx 628 movq %rdx,8(%rdi,%r14,8) 629 sbbq 24(%rcx,%r14,8),%rbp 630 movq %rbx,16(%rdi,%r14,8) 631 632 sbbq $0,%rax 633 movq %rbp,24(%rdi,%r14,8) 634 pxor %xmm0,%xmm0 635.byte 102,72,15,110,224 636 pcmpeqd %xmm5,%xmm5 637 pshufd $0,%xmm4,%xmm4 638 movq %r9,%r15 639 pxor %xmm4,%xmm5 640 shrq $2,%r15 641 xorl %eax,%eax 642 643 jmp .Lcopy4x 644.align 16 645.Lcopy4x: 646 movdqa (%rsp,%rax,1),%xmm1 647 movdqu (%rdi,%rax,1),%xmm2 648 pand %xmm4,%xmm1 649 pand %xmm5,%xmm2 650 movdqa 16(%rsp,%rax,1),%xmm3 651 movdqa %xmm0,(%rsp,%rax,1) 652 por %xmm2,%xmm1 653 movdqu 16(%rdi,%rax,1),%xmm2 654 movdqu %xmm1,(%rdi,%rax,1) 655 pand %xmm4,%xmm3 656 pand %xmm5,%xmm2 657 movdqa %xmm0,16(%rsp,%rax,1) 658 por %xmm2,%xmm3 659 movdqu %xmm3,16(%rdi,%rax,1) 660 leaq 32(%rax),%rax 661 decq %r15 662 jnz .Lcopy4x 663 movq 8(%rsp,%r9,8),%rsi 664.cfi_def_cfa %rsi, 8 665 movq $1,%rax 666 movq -48(%rsi),%r15 667.cfi_restore %r15 668 movq -40(%rsi),%r14 669.cfi_restore %r14 670 movq -32(%rsi),%r13 671.cfi_restore %r13 672 movq -24(%rsi),%r12 673.cfi_restore %r12 674 movq -16(%rsi),%rbp 675.cfi_restore %rbp 676 movq -8(%rsi),%rbx 677.cfi_restore %rbx 678 leaq (%rsi),%rsp 679.cfi_def_cfa_register %rsp 680.Lmul4x_epilogue: 681 ret 682.cfi_endproc 683.size bn_mul4x_mont,.-bn_mul4x_mont 684.extern bn_sqrx8x_internal 685.hidden bn_sqrx8x_internal 686.extern bn_sqr8x_internal 687.hidden bn_sqr8x_internal 688 689.globl bn_sqr8x_mont 690.hidden bn_sqr8x_mont 691.type bn_sqr8x_mont,@function 692.align 32 693bn_sqr8x_mont: 694.cfi_startproc 695_CET_ENDBR 696 movl %r9d,%r9d 697 movq %rsp,%rax 698.cfi_def_cfa_register %rax 699 pushq %rbx 700.cfi_offset %rbx,-16 701 pushq %rbp 702.cfi_offset %rbp,-24 703 pushq %r12 704.cfi_offset %r12,-32 705 pushq %r13 706.cfi_offset %r13,-40 707 pushq %r14 708.cfi_offset %r14,-48 709 pushq %r15 710.cfi_offset %r15,-56 711.Lsqr8x_prologue: 712 713 movl %r9d,%r10d 714 shll $3,%r9d 715 shlq $3+2,%r10 716 negq %r9 717 718 719 720 721 722 723 leaq -64(%rsp,%r9,2),%r11 724 movq %rsp,%rbp 725 movq (%r8),%r8 726 subq %rsi,%r11 727 andq $4095,%r11 728 cmpq %r11,%r10 729 jb .Lsqr8x_sp_alt 730 subq %r11,%rbp 731 leaq -64(%rbp,%r9,2),%rbp 732 jmp .Lsqr8x_sp_done 733 734.align 32 735.Lsqr8x_sp_alt: 736 leaq 4096-64(,%r9,2),%r10 737 leaq -64(%rbp,%r9,2),%rbp 738 subq %r10,%r11 739 movq $0,%r10 740 cmovcq %r10,%r11 741 subq %r11,%rbp 742.Lsqr8x_sp_done: 743 andq $-64,%rbp 744 movq %rsp,%r11 745 subq %rbp,%r11 746 andq $-4096,%r11 747 leaq (%r11,%rbp,1),%rsp 748 movq (%rsp),%r10 749 cmpq %rbp,%rsp 750 ja .Lsqr8x_page_walk 751 jmp .Lsqr8x_page_walk_done 752 753.align 16 754.Lsqr8x_page_walk: 755 leaq -4096(%rsp),%rsp 756 movq (%rsp),%r10 757 cmpq %rbp,%rsp 758 ja .Lsqr8x_page_walk 759.Lsqr8x_page_walk_done: 760 761 movq %r9,%r10 762 negq %r9 763 764 movq %r8,32(%rsp) 765 movq %rax,40(%rsp) 766.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 767.Lsqr8x_body: 768 769.byte 102,72,15,110,209 770 pxor %xmm0,%xmm0 771.byte 102,72,15,110,207 772.byte 102,73,15,110,218 773 testq %rdx,%rdx 774 jz .Lsqr8x_nox 775 776 call bn_sqrx8x_internal 777 778 779 780 781 leaq (%r8,%rcx,1),%rbx 782 movq %rcx,%r9 783 movq %rcx,%rdx 784.byte 102,72,15,126,207 785 sarq $3+2,%rcx 786 jmp .Lsqr8x_sub 787 788.align 32 789.Lsqr8x_nox: 790 call bn_sqr8x_internal 791 792 793 794 795 leaq (%rdi,%r9,1),%rbx 796 movq %r9,%rcx 797 movq %r9,%rdx 798.byte 102,72,15,126,207 799 sarq $3+2,%rcx 800 jmp .Lsqr8x_sub 801 802.align 32 803.Lsqr8x_sub: 804 movq 0(%rbx),%r12 805 movq 8(%rbx),%r13 806 movq 16(%rbx),%r14 807 movq 24(%rbx),%r15 808 leaq 32(%rbx),%rbx 809 sbbq 0(%rbp),%r12 810 sbbq 8(%rbp),%r13 811 sbbq 16(%rbp),%r14 812 sbbq 24(%rbp),%r15 813 leaq 32(%rbp),%rbp 814 movq %r12,0(%rdi) 815 movq %r13,8(%rdi) 816 movq %r14,16(%rdi) 817 movq %r15,24(%rdi) 818 leaq 32(%rdi),%rdi 819 incq %rcx 820 jnz .Lsqr8x_sub 821 822 sbbq $0,%rax 823 leaq (%rbx,%r9,1),%rbx 824 leaq (%rdi,%r9,1),%rdi 825 826.byte 102,72,15,110,200 827 pxor %xmm0,%xmm0 828 pshufd $0,%xmm1,%xmm1 829 movq 40(%rsp),%rsi 830.cfi_def_cfa %rsi,8 831 jmp .Lsqr8x_cond_copy 832 833.align 32 834.Lsqr8x_cond_copy: 835 movdqa 0(%rbx),%xmm2 836 movdqa 16(%rbx),%xmm3 837 leaq 32(%rbx),%rbx 838 movdqu 0(%rdi),%xmm4 839 movdqu 16(%rdi),%xmm5 840 leaq 32(%rdi),%rdi 841 movdqa %xmm0,-32(%rbx) 842 movdqa %xmm0,-16(%rbx) 843 movdqa %xmm0,-32(%rbx,%rdx,1) 844 movdqa %xmm0,-16(%rbx,%rdx,1) 845 pcmpeqd %xmm1,%xmm0 846 pand %xmm1,%xmm2 847 pand %xmm1,%xmm3 848 pand %xmm0,%xmm4 849 pand %xmm0,%xmm5 850 pxor %xmm0,%xmm0 851 por %xmm2,%xmm4 852 por %xmm3,%xmm5 853 movdqu %xmm4,-32(%rdi) 854 movdqu %xmm5,-16(%rdi) 855 addq $32,%r9 856 jnz .Lsqr8x_cond_copy 857 858 movq $1,%rax 859 movq -48(%rsi),%r15 860.cfi_restore %r15 861 movq -40(%rsi),%r14 862.cfi_restore %r14 863 movq -32(%rsi),%r13 864.cfi_restore %r13 865 movq -24(%rsi),%r12 866.cfi_restore %r12 867 movq -16(%rsi),%rbp 868.cfi_restore %rbp 869 movq -8(%rsi),%rbx 870.cfi_restore %rbx 871 leaq (%rsi),%rsp 872.cfi_def_cfa_register %rsp 873.Lsqr8x_epilogue: 874 ret 875.cfi_endproc 876.size bn_sqr8x_mont,.-bn_sqr8x_mont 877.globl bn_mulx4x_mont 878.hidden bn_mulx4x_mont 879.type bn_mulx4x_mont,@function 880.align 32 881bn_mulx4x_mont: 882.cfi_startproc 883_CET_ENDBR 884 movq %rsp,%rax 885.cfi_def_cfa_register %rax 886 pushq %rbx 887.cfi_offset %rbx,-16 888 pushq %rbp 889.cfi_offset %rbp,-24 890 pushq %r12 891.cfi_offset %r12,-32 892 pushq %r13 893.cfi_offset %r13,-40 894 pushq %r14 895.cfi_offset %r14,-48 896 pushq %r15 897.cfi_offset %r15,-56 898.Lmulx4x_prologue: 899 900 shll $3,%r9d 901 xorq %r10,%r10 902 subq %r9,%r10 903 movq (%r8),%r8 904 leaq -72(%rsp,%r10,1),%rbp 905 andq $-128,%rbp 906 movq %rsp,%r11 907 subq %rbp,%r11 908 andq $-4096,%r11 909 leaq (%r11,%rbp,1),%rsp 910 movq (%rsp),%r10 911 cmpq %rbp,%rsp 912 ja .Lmulx4x_page_walk 913 jmp .Lmulx4x_page_walk_done 914 915.align 16 916.Lmulx4x_page_walk: 917 leaq -4096(%rsp),%rsp 918 movq (%rsp),%r10 919 cmpq %rbp,%rsp 920 ja .Lmulx4x_page_walk 921.Lmulx4x_page_walk_done: 922 923 leaq (%rdx,%r9,1),%r10 924 925 926 927 928 929 930 931 932 933 934 935 936 movq %r9,0(%rsp) 937 shrq $5,%r9 938 movq %r10,16(%rsp) 939 subq $1,%r9 940 movq %r8,24(%rsp) 941 movq %rdi,32(%rsp) 942 movq %rax,40(%rsp) 943.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 944 movq %r9,48(%rsp) 945 jmp .Lmulx4x_body 946 947.align 32 948.Lmulx4x_body: 949 leaq 8(%rdx),%rdi 950 movq (%rdx),%rdx 951 leaq 64+32(%rsp),%rbx 952 movq %rdx,%r9 953 954 mulxq 0(%rsi),%r8,%rax 955 mulxq 8(%rsi),%r11,%r14 956 addq %rax,%r11 957 movq %rdi,8(%rsp) 958 mulxq 16(%rsi),%r12,%r13 959 adcq %r14,%r12 960 adcq $0,%r13 961 962 movq %r8,%rdi 963 imulq 24(%rsp),%r8 964 xorq %rbp,%rbp 965 966 mulxq 24(%rsi),%rax,%r14 967 movq %r8,%rdx 968 leaq 32(%rsi),%rsi 969 adcxq %rax,%r13 970 adcxq %rbp,%r14 971 972 mulxq 0(%rcx),%rax,%r10 973 adcxq %rax,%rdi 974 adoxq %r11,%r10 975 mulxq 8(%rcx),%rax,%r11 976 adcxq %rax,%r10 977 adoxq %r12,%r11 978.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 979 movq 48(%rsp),%rdi 980 movq %r10,-32(%rbx) 981 adcxq %rax,%r11 982 adoxq %r13,%r12 983 mulxq 24(%rcx),%rax,%r15 984 movq %r9,%rdx 985 movq %r11,-24(%rbx) 986 adcxq %rax,%r12 987 adoxq %rbp,%r15 988 leaq 32(%rcx),%rcx 989 movq %r12,-16(%rbx) 990 991 jmp .Lmulx4x_1st 992 993.align 32 994.Lmulx4x_1st: 995 adcxq %rbp,%r15 996 mulxq 0(%rsi),%r10,%rax 997 adcxq %r14,%r10 998 mulxq 8(%rsi),%r11,%r14 999 adcxq %rax,%r11 1000 mulxq 16(%rsi),%r12,%rax 1001 adcxq %r14,%r12 1002 mulxq 24(%rsi),%r13,%r14 1003.byte 0x67,0x67 1004 movq %r8,%rdx 1005 adcxq %rax,%r13 1006 adcxq %rbp,%r14 1007 leaq 32(%rsi),%rsi 1008 leaq 32(%rbx),%rbx 1009 1010 adoxq %r15,%r10 1011 mulxq 0(%rcx),%rax,%r15 1012 adcxq %rax,%r10 1013 adoxq %r15,%r11 1014 mulxq 8(%rcx),%rax,%r15 1015 adcxq %rax,%r11 1016 adoxq %r15,%r12 1017 mulxq 16(%rcx),%rax,%r15 1018 movq %r10,-40(%rbx) 1019 adcxq %rax,%r12 1020 movq %r11,-32(%rbx) 1021 adoxq %r15,%r13 1022 mulxq 24(%rcx),%rax,%r15 1023 movq %r9,%rdx 1024 movq %r12,-24(%rbx) 1025 adcxq %rax,%r13 1026 adoxq %rbp,%r15 1027 leaq 32(%rcx),%rcx 1028 movq %r13,-16(%rbx) 1029 1030 decq %rdi 1031 jnz .Lmulx4x_1st 1032 1033 movq 0(%rsp),%rax 1034 movq 8(%rsp),%rdi 1035 adcq %rbp,%r15 1036 addq %r15,%r14 1037 sbbq %r15,%r15 1038 movq %r14,-8(%rbx) 1039 jmp .Lmulx4x_outer 1040 1041.align 32 1042.Lmulx4x_outer: 1043 movq (%rdi),%rdx 1044 leaq 8(%rdi),%rdi 1045 subq %rax,%rsi 1046 movq %r15,(%rbx) 1047 leaq 64+32(%rsp),%rbx 1048 subq %rax,%rcx 1049 1050 mulxq 0(%rsi),%r8,%r11 1051 xorl %ebp,%ebp 1052 movq %rdx,%r9 1053 mulxq 8(%rsi),%r14,%r12 1054 adoxq -32(%rbx),%r8 1055 adcxq %r14,%r11 1056 mulxq 16(%rsi),%r15,%r13 1057 adoxq -24(%rbx),%r11 1058 adcxq %r15,%r12 1059 adoxq -16(%rbx),%r12 1060 adcxq %rbp,%r13 1061 adoxq %rbp,%r13 1062 1063 movq %rdi,8(%rsp) 1064 movq %r8,%r15 1065 imulq 24(%rsp),%r8 1066 xorl %ebp,%ebp 1067 1068 mulxq 24(%rsi),%rax,%r14 1069 movq %r8,%rdx 1070 adcxq %rax,%r13 1071 adoxq -8(%rbx),%r13 1072 adcxq %rbp,%r14 1073 leaq 32(%rsi),%rsi 1074 adoxq %rbp,%r14 1075 1076 mulxq 0(%rcx),%rax,%r10 1077 adcxq %rax,%r15 1078 adoxq %r11,%r10 1079 mulxq 8(%rcx),%rax,%r11 1080 adcxq %rax,%r10 1081 adoxq %r12,%r11 1082 mulxq 16(%rcx),%rax,%r12 1083 movq %r10,-32(%rbx) 1084 adcxq %rax,%r11 1085 adoxq %r13,%r12 1086 mulxq 24(%rcx),%rax,%r15 1087 movq %r9,%rdx 1088 movq %r11,-24(%rbx) 1089 leaq 32(%rcx),%rcx 1090 adcxq %rax,%r12 1091 adoxq %rbp,%r15 1092 movq 48(%rsp),%rdi 1093 movq %r12,-16(%rbx) 1094 1095 jmp .Lmulx4x_inner 1096 1097.align 32 1098.Lmulx4x_inner: 1099 mulxq 0(%rsi),%r10,%rax 1100 adcxq %rbp,%r15 1101 adoxq %r14,%r10 1102 mulxq 8(%rsi),%r11,%r14 1103 adcxq 0(%rbx),%r10 1104 adoxq %rax,%r11 1105 mulxq 16(%rsi),%r12,%rax 1106 adcxq 8(%rbx),%r11 1107 adoxq %r14,%r12 1108 mulxq 24(%rsi),%r13,%r14 1109 movq %r8,%rdx 1110 adcxq 16(%rbx),%r12 1111 adoxq %rax,%r13 1112 adcxq 24(%rbx),%r13 1113 adoxq %rbp,%r14 1114 leaq 32(%rsi),%rsi 1115 leaq 32(%rbx),%rbx 1116 adcxq %rbp,%r14 1117 1118 adoxq %r15,%r10 1119 mulxq 0(%rcx),%rax,%r15 1120 adcxq %rax,%r10 1121 adoxq %r15,%r11 1122 mulxq 8(%rcx),%rax,%r15 1123 adcxq %rax,%r11 1124 adoxq %r15,%r12 1125 mulxq 16(%rcx),%rax,%r15 1126 movq %r10,-40(%rbx) 1127 adcxq %rax,%r12 1128 adoxq %r15,%r13 1129 mulxq 24(%rcx),%rax,%r15 1130 movq %r9,%rdx 1131 movq %r11,-32(%rbx) 1132 movq %r12,-24(%rbx) 1133 adcxq %rax,%r13 1134 adoxq %rbp,%r15 1135 leaq 32(%rcx),%rcx 1136 movq %r13,-16(%rbx) 1137 1138 decq %rdi 1139 jnz .Lmulx4x_inner 1140 1141 movq 0(%rsp),%rax 1142 movq 8(%rsp),%rdi 1143 adcq %rbp,%r15 1144 subq 0(%rbx),%rbp 1145 adcq %r15,%r14 1146 sbbq %r15,%r15 1147 movq %r14,-8(%rbx) 1148 1149 cmpq 16(%rsp),%rdi 1150 jne .Lmulx4x_outer 1151 1152 leaq 64(%rsp),%rbx 1153 subq %rax,%rcx 1154 negq %r15 1155 movq %rax,%rdx 1156 shrq $3+2,%rax 1157 movq 32(%rsp),%rdi 1158 jmp .Lmulx4x_sub 1159 1160.align 32 1161.Lmulx4x_sub: 1162 movq 0(%rbx),%r11 1163 movq 8(%rbx),%r12 1164 movq 16(%rbx),%r13 1165 movq 24(%rbx),%r14 1166 leaq 32(%rbx),%rbx 1167 sbbq 0(%rcx),%r11 1168 sbbq 8(%rcx),%r12 1169 sbbq 16(%rcx),%r13 1170 sbbq 24(%rcx),%r14 1171 leaq 32(%rcx),%rcx 1172 movq %r11,0(%rdi) 1173 movq %r12,8(%rdi) 1174 movq %r13,16(%rdi) 1175 movq %r14,24(%rdi) 1176 leaq 32(%rdi),%rdi 1177 decq %rax 1178 jnz .Lmulx4x_sub 1179 1180 sbbq $0,%r15 1181 leaq 64(%rsp),%rbx 1182 subq %rdx,%rdi 1183 1184.byte 102,73,15,110,207 1185 pxor %xmm0,%xmm0 1186 pshufd $0,%xmm1,%xmm1 1187 movq 40(%rsp),%rsi 1188.cfi_def_cfa %rsi,8 1189 jmp .Lmulx4x_cond_copy 1190 1191.align 32 1192.Lmulx4x_cond_copy: 1193 movdqa 0(%rbx),%xmm2 1194 movdqa 16(%rbx),%xmm3 1195 leaq 32(%rbx),%rbx 1196 movdqu 0(%rdi),%xmm4 1197 movdqu 16(%rdi),%xmm5 1198 leaq 32(%rdi),%rdi 1199 movdqa %xmm0,-32(%rbx) 1200 movdqa %xmm0,-16(%rbx) 1201 pcmpeqd %xmm1,%xmm0 1202 pand %xmm1,%xmm2 1203 pand %xmm1,%xmm3 1204 pand %xmm0,%xmm4 1205 pand %xmm0,%xmm5 1206 pxor %xmm0,%xmm0 1207 por %xmm2,%xmm4 1208 por %xmm3,%xmm5 1209 movdqu %xmm4,-32(%rdi) 1210 movdqu %xmm5,-16(%rdi) 1211 subq $32,%rdx 1212 jnz .Lmulx4x_cond_copy 1213 1214 movq %rdx,(%rbx) 1215 1216 movq $1,%rax 1217 movq -48(%rsi),%r15 1218.cfi_restore %r15 1219 movq -40(%rsi),%r14 1220.cfi_restore %r14 1221 movq -32(%rsi),%r13 1222.cfi_restore %r13 1223 movq -24(%rsi),%r12 1224.cfi_restore %r12 1225 movq -16(%rsi),%rbp 1226.cfi_restore %rbp 1227 movq -8(%rsi),%rbx 1228.cfi_restore %rbx 1229 leaq (%rsi),%rsp 1230.cfi_def_cfa_register %rsp 1231.Lmulx4x_epilogue: 1232 ret 1233.cfi_endproc 1234.size bn_mulx4x_mont,.-bn_mulx4x_mont 1235.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1236.align 16 1237#endif 1238