1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8 9.extern OPENSSL_ia32cap_P 10.hidden OPENSSL_ia32cap_P 11 12.globl bn_mul_mont_gather5 13.hidden bn_mul_mont_gather5 14.type bn_mul_mont_gather5,@function 15.align 64 16bn_mul_mont_gather5: 17.cfi_startproc 18_CET_ENDBR 19 movl %r9d,%r9d 20 movq %rsp,%rax 21.cfi_def_cfa_register %rax 22 testl $7,%r9d 23 jnz .Lmul_enter 24 leaq OPENSSL_ia32cap_P(%rip),%r11 25 movl 8(%r11),%r11d 26 jmp .Lmul4x_enter 27 28.align 16 29.Lmul_enter: 30 movd 8(%rsp),%xmm5 31 pushq %rbx 32.cfi_offset %rbx,-16 33 pushq %rbp 34.cfi_offset %rbp,-24 35 pushq %r12 36.cfi_offset %r12,-32 37 pushq %r13 38.cfi_offset %r13,-40 39 pushq %r14 40.cfi_offset %r14,-48 41 pushq %r15 42.cfi_offset %r15,-56 43 44 negq %r9 45 movq %rsp,%r11 46 leaq -280(%rsp,%r9,8),%r10 47 negq %r9 48 andq $-1024,%r10 49 50 51 52 53 54 55 56 57 58 subq %r10,%r11 59 andq $-4096,%r11 60 leaq (%r10,%r11,1),%rsp 61 movq (%rsp),%r11 62 cmpq %r10,%rsp 63 ja .Lmul_page_walk 64 jmp .Lmul_page_walk_done 65 66.Lmul_page_walk: 67 leaq -4096(%rsp),%rsp 68 movq (%rsp),%r11 69 cmpq %r10,%rsp 70 ja .Lmul_page_walk 71.Lmul_page_walk_done: 72 73 leaq .Linc(%rip),%r10 74 movq %rax,8(%rsp,%r9,8) 75.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 76.Lmul_body: 77 78 leaq 128(%rdx),%r12 79 movdqa 0(%r10),%xmm0 80 movdqa 16(%r10),%xmm1 81 leaq 24-112(%rsp,%r9,8),%r10 82 andq $-16,%r10 83 84 pshufd $0,%xmm5,%xmm5 85 movdqa %xmm1,%xmm4 86 movdqa %xmm1,%xmm2 87 paddd %xmm0,%xmm1 88 pcmpeqd %xmm5,%xmm0 89.byte 0x67 90 movdqa %xmm4,%xmm3 91 paddd %xmm1,%xmm2 92 pcmpeqd %xmm5,%xmm1 93 movdqa %xmm0,112(%r10) 94 movdqa %xmm4,%xmm0 95 96 paddd %xmm2,%xmm3 97 pcmpeqd %xmm5,%xmm2 98 movdqa %xmm1,128(%r10) 99 movdqa %xmm4,%xmm1 100 101 paddd %xmm3,%xmm0 102 pcmpeqd %xmm5,%xmm3 103 movdqa %xmm2,144(%r10) 104 movdqa %xmm4,%xmm2 105 106 paddd %xmm0,%xmm1 107 pcmpeqd %xmm5,%xmm0 108 movdqa %xmm3,160(%r10) 109 movdqa %xmm4,%xmm3 110 paddd %xmm1,%xmm2 111 pcmpeqd %xmm5,%xmm1 112 movdqa %xmm0,176(%r10) 113 movdqa %xmm4,%xmm0 114 115 paddd %xmm2,%xmm3 116 pcmpeqd %xmm5,%xmm2 117 movdqa %xmm1,192(%r10) 118 movdqa %xmm4,%xmm1 119 120 paddd %xmm3,%xmm0 121 pcmpeqd %xmm5,%xmm3 122 movdqa %xmm2,208(%r10) 123 movdqa %xmm4,%xmm2 124 125 paddd %xmm0,%xmm1 126 pcmpeqd %xmm5,%xmm0 127 movdqa %xmm3,224(%r10) 128 movdqa %xmm4,%xmm3 129 paddd %xmm1,%xmm2 130 pcmpeqd %xmm5,%xmm1 131 movdqa %xmm0,240(%r10) 132 movdqa %xmm4,%xmm0 133 134 paddd %xmm2,%xmm3 135 pcmpeqd %xmm5,%xmm2 136 movdqa %xmm1,256(%r10) 137 movdqa %xmm4,%xmm1 138 139 paddd %xmm3,%xmm0 140 pcmpeqd %xmm5,%xmm3 141 movdqa %xmm2,272(%r10) 142 movdqa %xmm4,%xmm2 143 144 paddd %xmm0,%xmm1 145 pcmpeqd %xmm5,%xmm0 146 movdqa %xmm3,288(%r10) 147 movdqa %xmm4,%xmm3 148 paddd %xmm1,%xmm2 149 pcmpeqd %xmm5,%xmm1 150 movdqa %xmm0,304(%r10) 151 152 paddd %xmm2,%xmm3 153.byte 0x67 154 pcmpeqd %xmm5,%xmm2 155 movdqa %xmm1,320(%r10) 156 157 pcmpeqd %xmm5,%xmm3 158 movdqa %xmm2,336(%r10) 159 pand 64(%r12),%xmm0 160 161 pand 80(%r12),%xmm1 162 pand 96(%r12),%xmm2 163 movdqa %xmm3,352(%r10) 164 pand 112(%r12),%xmm3 165 por %xmm2,%xmm0 166 por %xmm3,%xmm1 167 movdqa -128(%r12),%xmm4 168 movdqa -112(%r12),%xmm5 169 movdqa -96(%r12),%xmm2 170 pand 112(%r10),%xmm4 171 movdqa -80(%r12),%xmm3 172 pand 128(%r10),%xmm5 173 por %xmm4,%xmm0 174 pand 144(%r10),%xmm2 175 por %xmm5,%xmm1 176 pand 160(%r10),%xmm3 177 por %xmm2,%xmm0 178 por %xmm3,%xmm1 179 movdqa -64(%r12),%xmm4 180 movdqa -48(%r12),%xmm5 181 movdqa -32(%r12),%xmm2 182 pand 176(%r10),%xmm4 183 movdqa -16(%r12),%xmm3 184 pand 192(%r10),%xmm5 185 por %xmm4,%xmm0 186 pand 208(%r10),%xmm2 187 por %xmm5,%xmm1 188 pand 224(%r10),%xmm3 189 por %xmm2,%xmm0 190 por %xmm3,%xmm1 191 movdqa 0(%r12),%xmm4 192 movdqa 16(%r12),%xmm5 193 movdqa 32(%r12),%xmm2 194 pand 240(%r10),%xmm4 195 movdqa 48(%r12),%xmm3 196 pand 256(%r10),%xmm5 197 por %xmm4,%xmm0 198 pand 272(%r10),%xmm2 199 por %xmm5,%xmm1 200 pand 288(%r10),%xmm3 201 por %xmm2,%xmm0 202 por %xmm3,%xmm1 203 por %xmm1,%xmm0 204 205 pshufd $0x4e,%xmm0,%xmm1 206 por %xmm1,%xmm0 207 leaq 256(%r12),%r12 208.byte 102,72,15,126,195 209 210 movq (%r8),%r8 211 movq (%rsi),%rax 212 213 xorq %r14,%r14 214 xorq %r15,%r15 215 216 movq %r8,%rbp 217 mulq %rbx 218 movq %rax,%r10 219 movq (%rcx),%rax 220 221 imulq %r10,%rbp 222 movq %rdx,%r11 223 224 mulq %rbp 225 addq %rax,%r10 226 movq 8(%rsi),%rax 227 adcq $0,%rdx 228 movq %rdx,%r13 229 230 leaq 1(%r15),%r15 231 jmp .L1st_enter 232 233.align 16 234.L1st: 235 addq %rax,%r13 236 movq (%rsi,%r15,8),%rax 237 adcq $0,%rdx 238 addq %r11,%r13 239 movq %r10,%r11 240 adcq $0,%rdx 241 movq %r13,-16(%rsp,%r15,8) 242 movq %rdx,%r13 243 244.L1st_enter: 245 mulq %rbx 246 addq %rax,%r11 247 movq (%rcx,%r15,8),%rax 248 adcq $0,%rdx 249 leaq 1(%r15),%r15 250 movq %rdx,%r10 251 252 mulq %rbp 253 cmpq %r9,%r15 254 jne .L1st 255 256 257 addq %rax,%r13 258 adcq $0,%rdx 259 addq %r11,%r13 260 adcq $0,%rdx 261 movq %r13,-16(%rsp,%r9,8) 262 movq %rdx,%r13 263 movq %r10,%r11 264 265 xorq %rdx,%rdx 266 addq %r11,%r13 267 adcq $0,%rdx 268 movq %r13,-8(%rsp,%r9,8) 269 movq %rdx,(%rsp,%r9,8) 270 271 leaq 1(%r14),%r14 272 jmp .Louter 273.align 16 274.Louter: 275 leaq 24+128(%rsp,%r9,8),%rdx 276 andq $-16,%rdx 277 pxor %xmm4,%xmm4 278 pxor %xmm5,%xmm5 279 movdqa -128(%r12),%xmm0 280 movdqa -112(%r12),%xmm1 281 movdqa -96(%r12),%xmm2 282 movdqa -80(%r12),%xmm3 283 pand -128(%rdx),%xmm0 284 pand -112(%rdx),%xmm1 285 por %xmm0,%xmm4 286 pand -96(%rdx),%xmm2 287 por %xmm1,%xmm5 288 pand -80(%rdx),%xmm3 289 por %xmm2,%xmm4 290 por %xmm3,%xmm5 291 movdqa -64(%r12),%xmm0 292 movdqa -48(%r12),%xmm1 293 movdqa -32(%r12),%xmm2 294 movdqa -16(%r12),%xmm3 295 pand -64(%rdx),%xmm0 296 pand -48(%rdx),%xmm1 297 por %xmm0,%xmm4 298 pand -32(%rdx),%xmm2 299 por %xmm1,%xmm5 300 pand -16(%rdx),%xmm3 301 por %xmm2,%xmm4 302 por %xmm3,%xmm5 303 movdqa 0(%r12),%xmm0 304 movdqa 16(%r12),%xmm1 305 movdqa 32(%r12),%xmm2 306 movdqa 48(%r12),%xmm3 307 pand 0(%rdx),%xmm0 308 pand 16(%rdx),%xmm1 309 por %xmm0,%xmm4 310 pand 32(%rdx),%xmm2 311 por %xmm1,%xmm5 312 pand 48(%rdx),%xmm3 313 por %xmm2,%xmm4 314 por %xmm3,%xmm5 315 movdqa 64(%r12),%xmm0 316 movdqa 80(%r12),%xmm1 317 movdqa 96(%r12),%xmm2 318 movdqa 112(%r12),%xmm3 319 pand 64(%rdx),%xmm0 320 pand 80(%rdx),%xmm1 321 por %xmm0,%xmm4 322 pand 96(%rdx),%xmm2 323 por %xmm1,%xmm5 324 pand 112(%rdx),%xmm3 325 por %xmm2,%xmm4 326 por %xmm3,%xmm5 327 por %xmm5,%xmm4 328 329 pshufd $0x4e,%xmm4,%xmm0 330 por %xmm4,%xmm0 331 leaq 256(%r12),%r12 332 333 movq (%rsi),%rax 334.byte 102,72,15,126,195 335 336 xorq %r15,%r15 337 movq %r8,%rbp 338 movq (%rsp),%r10 339 340 mulq %rbx 341 addq %rax,%r10 342 movq (%rcx),%rax 343 adcq $0,%rdx 344 345 imulq %r10,%rbp 346 movq %rdx,%r11 347 348 mulq %rbp 349 addq %rax,%r10 350 movq 8(%rsi),%rax 351 adcq $0,%rdx 352 movq 8(%rsp),%r10 353 movq %rdx,%r13 354 355 leaq 1(%r15),%r15 356 jmp .Linner_enter 357 358.align 16 359.Linner: 360 addq %rax,%r13 361 movq (%rsi,%r15,8),%rax 362 adcq $0,%rdx 363 addq %r10,%r13 364 movq (%rsp,%r15,8),%r10 365 adcq $0,%rdx 366 movq %r13,-16(%rsp,%r15,8) 367 movq %rdx,%r13 368 369.Linner_enter: 370 mulq %rbx 371 addq %rax,%r11 372 movq (%rcx,%r15,8),%rax 373 adcq $0,%rdx 374 addq %r11,%r10 375 movq %rdx,%r11 376 adcq $0,%r11 377 leaq 1(%r15),%r15 378 379 mulq %rbp 380 cmpq %r9,%r15 381 jne .Linner 382 383 addq %rax,%r13 384 adcq $0,%rdx 385 addq %r10,%r13 386 movq (%rsp,%r9,8),%r10 387 adcq $0,%rdx 388 movq %r13,-16(%rsp,%r9,8) 389 movq %rdx,%r13 390 391 xorq %rdx,%rdx 392 addq %r11,%r13 393 adcq $0,%rdx 394 addq %r10,%r13 395 adcq $0,%rdx 396 movq %r13,-8(%rsp,%r9,8) 397 movq %rdx,(%rsp,%r9,8) 398 399 leaq 1(%r14),%r14 400 cmpq %r9,%r14 401 jb .Louter 402 403 xorq %r14,%r14 404 movq (%rsp),%rax 405 leaq (%rsp),%rsi 406 movq %r9,%r15 407 jmp .Lsub 408.align 16 409.Lsub: sbbq (%rcx,%r14,8),%rax 410 movq %rax,(%rdi,%r14,8) 411 movq 8(%rsi,%r14,8),%rax 412 leaq 1(%r14),%r14 413 decq %r15 414 jnz .Lsub 415 416 sbbq $0,%rax 417 movq $-1,%rbx 418 xorq %rax,%rbx 419 xorq %r14,%r14 420 movq %r9,%r15 421 422.Lcopy: 423 movq (%rdi,%r14,8),%rcx 424 movq (%rsp,%r14,8),%rdx 425 andq %rbx,%rcx 426 andq %rax,%rdx 427 movq %r14,(%rsp,%r14,8) 428 orq %rcx,%rdx 429 movq %rdx,(%rdi,%r14,8) 430 leaq 1(%r14),%r14 431 subq $1,%r15 432 jnz .Lcopy 433 434 movq 8(%rsp,%r9,8),%rsi 435.cfi_def_cfa %rsi,8 436 movq $1,%rax 437 438 movq -48(%rsi),%r15 439.cfi_restore %r15 440 movq -40(%rsi),%r14 441.cfi_restore %r14 442 movq -32(%rsi),%r13 443.cfi_restore %r13 444 movq -24(%rsi),%r12 445.cfi_restore %r12 446 movq -16(%rsi),%rbp 447.cfi_restore %rbp 448 movq -8(%rsi),%rbx 449.cfi_restore %rbx 450 leaq (%rsi),%rsp 451.cfi_def_cfa_register %rsp 452.Lmul_epilogue: 453 ret 454.cfi_endproc 455.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 456.type bn_mul4x_mont_gather5,@function 457.align 32 458bn_mul4x_mont_gather5: 459.cfi_startproc 460.byte 0x67 461 movq %rsp,%rax 462.cfi_def_cfa_register %rax 463.Lmul4x_enter: 464 andl $0x80108,%r11d 465 cmpl $0x80108,%r11d 466 je .Lmulx4x_enter 467 pushq %rbx 468.cfi_offset %rbx,-16 469 pushq %rbp 470.cfi_offset %rbp,-24 471 pushq %r12 472.cfi_offset %r12,-32 473 pushq %r13 474.cfi_offset %r13,-40 475 pushq %r14 476.cfi_offset %r14,-48 477 pushq %r15 478.cfi_offset %r15,-56 479.Lmul4x_prologue: 480 481.byte 0x67 482 shll $3,%r9d 483 leaq (%r9,%r9,2),%r10 484 negq %r9 485 486 487 488 489 490 491 492 493 494 495 leaq -320(%rsp,%r9,2),%r11 496 movq %rsp,%rbp 497 subq %rdi,%r11 498 andq $4095,%r11 499 cmpq %r11,%r10 500 jb .Lmul4xsp_alt 501 subq %r11,%rbp 502 leaq -320(%rbp,%r9,2),%rbp 503 jmp .Lmul4xsp_done 504 505.align 32 506.Lmul4xsp_alt: 507 leaq 4096-320(,%r9,2),%r10 508 leaq -320(%rbp,%r9,2),%rbp 509 subq %r10,%r11 510 movq $0,%r10 511 cmovcq %r10,%r11 512 subq %r11,%rbp 513.Lmul4xsp_done: 514 andq $-64,%rbp 515 movq %rsp,%r11 516 subq %rbp,%r11 517 andq $-4096,%r11 518 leaq (%r11,%rbp,1),%rsp 519 movq (%rsp),%r10 520 cmpq %rbp,%rsp 521 ja .Lmul4x_page_walk 522 jmp .Lmul4x_page_walk_done 523 524.Lmul4x_page_walk: 525 leaq -4096(%rsp),%rsp 526 movq (%rsp),%r10 527 cmpq %rbp,%rsp 528 ja .Lmul4x_page_walk 529.Lmul4x_page_walk_done: 530 531 negq %r9 532 533 movq %rax,40(%rsp) 534.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 535.Lmul4x_body: 536 537 call mul4x_internal 538 539 movq 40(%rsp),%rsi 540.cfi_def_cfa %rsi,8 541 movq $1,%rax 542 543 movq -48(%rsi),%r15 544.cfi_restore %r15 545 movq -40(%rsi),%r14 546.cfi_restore %r14 547 movq -32(%rsi),%r13 548.cfi_restore %r13 549 movq -24(%rsi),%r12 550.cfi_restore %r12 551 movq -16(%rsi),%rbp 552.cfi_restore %rbp 553 movq -8(%rsi),%rbx 554.cfi_restore %rbx 555 leaq (%rsi),%rsp 556.cfi_def_cfa_register %rsp 557.Lmul4x_epilogue: 558 ret 559.cfi_endproc 560.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 561 562.type mul4x_internal,@function 563.align 32 564mul4x_internal: 565.cfi_startproc 566 shlq $5,%r9 567 movd 8(%rax),%xmm5 568 leaq .Linc(%rip),%rax 569 leaq 128(%rdx,%r9,1),%r13 570 shrq $5,%r9 571 movdqa 0(%rax),%xmm0 572 movdqa 16(%rax),%xmm1 573 leaq 88-112(%rsp,%r9,1),%r10 574 leaq 128(%rdx),%r12 575 576 pshufd $0,%xmm5,%xmm5 577 movdqa %xmm1,%xmm4 578.byte 0x67,0x67 579 movdqa %xmm1,%xmm2 580 paddd %xmm0,%xmm1 581 pcmpeqd %xmm5,%xmm0 582.byte 0x67 583 movdqa %xmm4,%xmm3 584 paddd %xmm1,%xmm2 585 pcmpeqd %xmm5,%xmm1 586 movdqa %xmm0,112(%r10) 587 movdqa %xmm4,%xmm0 588 589 paddd %xmm2,%xmm3 590 pcmpeqd %xmm5,%xmm2 591 movdqa %xmm1,128(%r10) 592 movdqa %xmm4,%xmm1 593 594 paddd %xmm3,%xmm0 595 pcmpeqd %xmm5,%xmm3 596 movdqa %xmm2,144(%r10) 597 movdqa %xmm4,%xmm2 598 599 paddd %xmm0,%xmm1 600 pcmpeqd %xmm5,%xmm0 601 movdqa %xmm3,160(%r10) 602 movdqa %xmm4,%xmm3 603 paddd %xmm1,%xmm2 604 pcmpeqd %xmm5,%xmm1 605 movdqa %xmm0,176(%r10) 606 movdqa %xmm4,%xmm0 607 608 paddd %xmm2,%xmm3 609 pcmpeqd %xmm5,%xmm2 610 movdqa %xmm1,192(%r10) 611 movdqa %xmm4,%xmm1 612 613 paddd %xmm3,%xmm0 614 pcmpeqd %xmm5,%xmm3 615 movdqa %xmm2,208(%r10) 616 movdqa %xmm4,%xmm2 617 618 paddd %xmm0,%xmm1 619 pcmpeqd %xmm5,%xmm0 620 movdqa %xmm3,224(%r10) 621 movdqa %xmm4,%xmm3 622 paddd %xmm1,%xmm2 623 pcmpeqd %xmm5,%xmm1 624 movdqa %xmm0,240(%r10) 625 movdqa %xmm4,%xmm0 626 627 paddd %xmm2,%xmm3 628 pcmpeqd %xmm5,%xmm2 629 movdqa %xmm1,256(%r10) 630 movdqa %xmm4,%xmm1 631 632 paddd %xmm3,%xmm0 633 pcmpeqd %xmm5,%xmm3 634 movdqa %xmm2,272(%r10) 635 movdqa %xmm4,%xmm2 636 637 paddd %xmm0,%xmm1 638 pcmpeqd %xmm5,%xmm0 639 movdqa %xmm3,288(%r10) 640 movdqa %xmm4,%xmm3 641 paddd %xmm1,%xmm2 642 pcmpeqd %xmm5,%xmm1 643 movdqa %xmm0,304(%r10) 644 645 paddd %xmm2,%xmm3 646.byte 0x67 647 pcmpeqd %xmm5,%xmm2 648 movdqa %xmm1,320(%r10) 649 650 pcmpeqd %xmm5,%xmm3 651 movdqa %xmm2,336(%r10) 652 pand 64(%r12),%xmm0 653 654 pand 80(%r12),%xmm1 655 pand 96(%r12),%xmm2 656 movdqa %xmm3,352(%r10) 657 pand 112(%r12),%xmm3 658 por %xmm2,%xmm0 659 por %xmm3,%xmm1 660 movdqa -128(%r12),%xmm4 661 movdqa -112(%r12),%xmm5 662 movdqa -96(%r12),%xmm2 663 pand 112(%r10),%xmm4 664 movdqa -80(%r12),%xmm3 665 pand 128(%r10),%xmm5 666 por %xmm4,%xmm0 667 pand 144(%r10),%xmm2 668 por %xmm5,%xmm1 669 pand 160(%r10),%xmm3 670 por %xmm2,%xmm0 671 por %xmm3,%xmm1 672 movdqa -64(%r12),%xmm4 673 movdqa -48(%r12),%xmm5 674 movdqa -32(%r12),%xmm2 675 pand 176(%r10),%xmm4 676 movdqa -16(%r12),%xmm3 677 pand 192(%r10),%xmm5 678 por %xmm4,%xmm0 679 pand 208(%r10),%xmm2 680 por %xmm5,%xmm1 681 pand 224(%r10),%xmm3 682 por %xmm2,%xmm0 683 por %xmm3,%xmm1 684 movdqa 0(%r12),%xmm4 685 movdqa 16(%r12),%xmm5 686 movdqa 32(%r12),%xmm2 687 pand 240(%r10),%xmm4 688 movdqa 48(%r12),%xmm3 689 pand 256(%r10),%xmm5 690 por %xmm4,%xmm0 691 pand 272(%r10),%xmm2 692 por %xmm5,%xmm1 693 pand 288(%r10),%xmm3 694 por %xmm2,%xmm0 695 por %xmm3,%xmm1 696 por %xmm1,%xmm0 697 698 pshufd $0x4e,%xmm0,%xmm1 699 por %xmm1,%xmm0 700 leaq 256(%r12),%r12 701.byte 102,72,15,126,195 702 703 movq %r13,16+8(%rsp) 704 movq %rdi,56+8(%rsp) 705 706 movq (%r8),%r8 707 movq (%rsi),%rax 708 leaq (%rsi,%r9,1),%rsi 709 negq %r9 710 711 movq %r8,%rbp 712 mulq %rbx 713 movq %rax,%r10 714 movq (%rcx),%rax 715 716 imulq %r10,%rbp 717 leaq 64+8(%rsp),%r14 718 movq %rdx,%r11 719 720 mulq %rbp 721 addq %rax,%r10 722 movq 8(%rsi,%r9,1),%rax 723 adcq $0,%rdx 724 movq %rdx,%rdi 725 726 mulq %rbx 727 addq %rax,%r11 728 movq 8(%rcx),%rax 729 adcq $0,%rdx 730 movq %rdx,%r10 731 732 mulq %rbp 733 addq %rax,%rdi 734 movq 16(%rsi,%r9,1),%rax 735 adcq $0,%rdx 736 addq %r11,%rdi 737 leaq 32(%r9),%r15 738 leaq 32(%rcx),%rcx 739 adcq $0,%rdx 740 movq %rdi,(%r14) 741 movq %rdx,%r13 742 jmp .L1st4x 743 744.align 32 745.L1st4x: 746 mulq %rbx 747 addq %rax,%r10 748 movq -16(%rcx),%rax 749 leaq 32(%r14),%r14 750 adcq $0,%rdx 751 movq %rdx,%r11 752 753 mulq %rbp 754 addq %rax,%r13 755 movq -8(%rsi,%r15,1),%rax 756 adcq $0,%rdx 757 addq %r10,%r13 758 adcq $0,%rdx 759 movq %r13,-24(%r14) 760 movq %rdx,%rdi 761 762 mulq %rbx 763 addq %rax,%r11 764 movq -8(%rcx),%rax 765 adcq $0,%rdx 766 movq %rdx,%r10 767 768 mulq %rbp 769 addq %rax,%rdi 770 movq (%rsi,%r15,1),%rax 771 adcq $0,%rdx 772 addq %r11,%rdi 773 adcq $0,%rdx 774 movq %rdi,-16(%r14) 775 movq %rdx,%r13 776 777 mulq %rbx 778 addq %rax,%r10 779 movq 0(%rcx),%rax 780 adcq $0,%rdx 781 movq %rdx,%r11 782 783 mulq %rbp 784 addq %rax,%r13 785 movq 8(%rsi,%r15,1),%rax 786 adcq $0,%rdx 787 addq %r10,%r13 788 adcq $0,%rdx 789 movq %r13,-8(%r14) 790 movq %rdx,%rdi 791 792 mulq %rbx 793 addq %rax,%r11 794 movq 8(%rcx),%rax 795 adcq $0,%rdx 796 movq %rdx,%r10 797 798 mulq %rbp 799 addq %rax,%rdi 800 movq 16(%rsi,%r15,1),%rax 801 adcq $0,%rdx 802 addq %r11,%rdi 803 leaq 32(%rcx),%rcx 804 adcq $0,%rdx 805 movq %rdi,(%r14) 806 movq %rdx,%r13 807 808 addq $32,%r15 809 jnz .L1st4x 810 811 mulq %rbx 812 addq %rax,%r10 813 movq -16(%rcx),%rax 814 leaq 32(%r14),%r14 815 adcq $0,%rdx 816 movq %rdx,%r11 817 818 mulq %rbp 819 addq %rax,%r13 820 movq -8(%rsi),%rax 821 adcq $0,%rdx 822 addq %r10,%r13 823 adcq $0,%rdx 824 movq %r13,-24(%r14) 825 movq %rdx,%rdi 826 827 mulq %rbx 828 addq %rax,%r11 829 movq -8(%rcx),%rax 830 adcq $0,%rdx 831 movq %rdx,%r10 832 833 mulq %rbp 834 addq %rax,%rdi 835 movq (%rsi,%r9,1),%rax 836 adcq $0,%rdx 837 addq %r11,%rdi 838 adcq $0,%rdx 839 movq %rdi,-16(%r14) 840 movq %rdx,%r13 841 842 leaq (%rcx,%r9,1),%rcx 843 844 xorq %rdi,%rdi 845 addq %r10,%r13 846 adcq $0,%rdi 847 movq %r13,-8(%r14) 848 849 jmp .Louter4x 850 851.align 32 852.Louter4x: 853 leaq 16+128(%r14),%rdx 854 pxor %xmm4,%xmm4 855 pxor %xmm5,%xmm5 856 movdqa -128(%r12),%xmm0 857 movdqa -112(%r12),%xmm1 858 movdqa -96(%r12),%xmm2 859 movdqa -80(%r12),%xmm3 860 pand -128(%rdx),%xmm0 861 pand -112(%rdx),%xmm1 862 por %xmm0,%xmm4 863 pand -96(%rdx),%xmm2 864 por %xmm1,%xmm5 865 pand -80(%rdx),%xmm3 866 por %xmm2,%xmm4 867 por %xmm3,%xmm5 868 movdqa -64(%r12),%xmm0 869 movdqa -48(%r12),%xmm1 870 movdqa -32(%r12),%xmm2 871 movdqa -16(%r12),%xmm3 872 pand -64(%rdx),%xmm0 873 pand -48(%rdx),%xmm1 874 por %xmm0,%xmm4 875 pand -32(%rdx),%xmm2 876 por %xmm1,%xmm5 877 pand -16(%rdx),%xmm3 878 por %xmm2,%xmm4 879 por %xmm3,%xmm5 880 movdqa 0(%r12),%xmm0 881 movdqa 16(%r12),%xmm1 882 movdqa 32(%r12),%xmm2 883 movdqa 48(%r12),%xmm3 884 pand 0(%rdx),%xmm0 885 pand 16(%rdx),%xmm1 886 por %xmm0,%xmm4 887 pand 32(%rdx),%xmm2 888 por %xmm1,%xmm5 889 pand 48(%rdx),%xmm3 890 por %xmm2,%xmm4 891 por %xmm3,%xmm5 892 movdqa 64(%r12),%xmm0 893 movdqa 80(%r12),%xmm1 894 movdqa 96(%r12),%xmm2 895 movdqa 112(%r12),%xmm3 896 pand 64(%rdx),%xmm0 897 pand 80(%rdx),%xmm1 898 por %xmm0,%xmm4 899 pand 96(%rdx),%xmm2 900 por %xmm1,%xmm5 901 pand 112(%rdx),%xmm3 902 por %xmm2,%xmm4 903 por %xmm3,%xmm5 904 por %xmm5,%xmm4 905 906 pshufd $0x4e,%xmm4,%xmm0 907 por %xmm4,%xmm0 908 leaq 256(%r12),%r12 909.byte 102,72,15,126,195 910 911 movq (%r14,%r9,1),%r10 912 movq %r8,%rbp 913 mulq %rbx 914 addq %rax,%r10 915 movq (%rcx),%rax 916 adcq $0,%rdx 917 918 imulq %r10,%rbp 919 movq %rdx,%r11 920 movq %rdi,(%r14) 921 922 leaq (%r14,%r9,1),%r14 923 924 mulq %rbp 925 addq %rax,%r10 926 movq 8(%rsi,%r9,1),%rax 927 adcq $0,%rdx 928 movq %rdx,%rdi 929 930 mulq %rbx 931 addq %rax,%r11 932 movq 8(%rcx),%rax 933 adcq $0,%rdx 934 addq 8(%r14),%r11 935 adcq $0,%rdx 936 movq %rdx,%r10 937 938 mulq %rbp 939 addq %rax,%rdi 940 movq 16(%rsi,%r9,1),%rax 941 adcq $0,%rdx 942 addq %r11,%rdi 943 leaq 32(%r9),%r15 944 leaq 32(%rcx),%rcx 945 adcq $0,%rdx 946 movq %rdx,%r13 947 jmp .Linner4x 948 949.align 32 950.Linner4x: 951 mulq %rbx 952 addq %rax,%r10 953 movq -16(%rcx),%rax 954 adcq $0,%rdx 955 addq 16(%r14),%r10 956 leaq 32(%r14),%r14 957 adcq $0,%rdx 958 movq %rdx,%r11 959 960 mulq %rbp 961 addq %rax,%r13 962 movq -8(%rsi,%r15,1),%rax 963 adcq $0,%rdx 964 addq %r10,%r13 965 adcq $0,%rdx 966 movq %rdi,-32(%r14) 967 movq %rdx,%rdi 968 969 mulq %rbx 970 addq %rax,%r11 971 movq -8(%rcx),%rax 972 adcq $0,%rdx 973 addq -8(%r14),%r11 974 adcq $0,%rdx 975 movq %rdx,%r10 976 977 mulq %rbp 978 addq %rax,%rdi 979 movq (%rsi,%r15,1),%rax 980 adcq $0,%rdx 981 addq %r11,%rdi 982 adcq $0,%rdx 983 movq %r13,-24(%r14) 984 movq %rdx,%r13 985 986 mulq %rbx 987 addq %rax,%r10 988 movq 0(%rcx),%rax 989 adcq $0,%rdx 990 addq (%r14),%r10 991 adcq $0,%rdx 992 movq %rdx,%r11 993 994 mulq %rbp 995 addq %rax,%r13 996 movq 8(%rsi,%r15,1),%rax 997 adcq $0,%rdx 998 addq %r10,%r13 999 adcq $0,%rdx 1000 movq %rdi,-16(%r14) 1001 movq %rdx,%rdi 1002 1003 mulq %rbx 1004 addq %rax,%r11 1005 movq 8(%rcx),%rax 1006 adcq $0,%rdx 1007 addq 8(%r14),%r11 1008 adcq $0,%rdx 1009 movq %rdx,%r10 1010 1011 mulq %rbp 1012 addq %rax,%rdi 1013 movq 16(%rsi,%r15,1),%rax 1014 adcq $0,%rdx 1015 addq %r11,%rdi 1016 leaq 32(%rcx),%rcx 1017 adcq $0,%rdx 1018 movq %r13,-8(%r14) 1019 movq %rdx,%r13 1020 1021 addq $32,%r15 1022 jnz .Linner4x 1023 1024 mulq %rbx 1025 addq %rax,%r10 1026 movq -16(%rcx),%rax 1027 adcq $0,%rdx 1028 addq 16(%r14),%r10 1029 leaq 32(%r14),%r14 1030 adcq $0,%rdx 1031 movq %rdx,%r11 1032 1033 mulq %rbp 1034 addq %rax,%r13 1035 movq -8(%rsi),%rax 1036 adcq $0,%rdx 1037 addq %r10,%r13 1038 adcq $0,%rdx 1039 movq %rdi,-32(%r14) 1040 movq %rdx,%rdi 1041 1042 mulq %rbx 1043 addq %rax,%r11 1044 movq %rbp,%rax 1045 movq -8(%rcx),%rbp 1046 adcq $0,%rdx 1047 addq -8(%r14),%r11 1048 adcq $0,%rdx 1049 movq %rdx,%r10 1050 1051 mulq %rbp 1052 addq %rax,%rdi 1053 movq (%rsi,%r9,1),%rax 1054 adcq $0,%rdx 1055 addq %r11,%rdi 1056 adcq $0,%rdx 1057 movq %r13,-24(%r14) 1058 movq %rdx,%r13 1059 1060 movq %rdi,-16(%r14) 1061 leaq (%rcx,%r9,1),%rcx 1062 1063 xorq %rdi,%rdi 1064 addq %r10,%r13 1065 adcq $0,%rdi 1066 addq (%r14),%r13 1067 adcq $0,%rdi 1068 movq %r13,-8(%r14) 1069 1070 cmpq 16+8(%rsp),%r12 1071 jb .Louter4x 1072 xorq %rax,%rax 1073 subq %r13,%rbp 1074 adcq %r15,%r15 1075 orq %r15,%rdi 1076 subq %rdi,%rax 1077 leaq (%r14,%r9,1),%rbx 1078 movq (%rcx),%r12 1079 leaq (%rcx),%rbp 1080 movq %r9,%rcx 1081 sarq $3+2,%rcx 1082 movq 56+8(%rsp),%rdi 1083 decq %r12 1084 xorq %r10,%r10 1085 movq 8(%rbp),%r13 1086 movq 16(%rbp),%r14 1087 movq 24(%rbp),%r15 1088 jmp .Lsqr4x_sub_entry 1089.cfi_endproc 1090.size mul4x_internal,.-mul4x_internal 1091.globl bn_power5 1092.hidden bn_power5 1093.type bn_power5,@function 1094.align 32 1095bn_power5: 1096.cfi_startproc 1097_CET_ENDBR 1098 movq %rsp,%rax 1099.cfi_def_cfa_register %rax 1100 leaq OPENSSL_ia32cap_P(%rip),%r11 1101 movl 8(%r11),%r11d 1102 andl $0x80108,%r11d 1103 cmpl $0x80108,%r11d 1104 je .Lpowerx5_enter 1105 pushq %rbx 1106.cfi_offset %rbx,-16 1107 pushq %rbp 1108.cfi_offset %rbp,-24 1109 pushq %r12 1110.cfi_offset %r12,-32 1111 pushq %r13 1112.cfi_offset %r13,-40 1113 pushq %r14 1114.cfi_offset %r14,-48 1115 pushq %r15 1116.cfi_offset %r15,-56 1117.Lpower5_prologue: 1118 1119 shll $3,%r9d 1120 leal (%r9,%r9,2),%r10d 1121 negq %r9 1122 movq (%r8),%r8 1123 1124 1125 1126 1127 1128 1129 1130 1131 leaq -320(%rsp,%r9,2),%r11 1132 movq %rsp,%rbp 1133 subq %rdi,%r11 1134 andq $4095,%r11 1135 cmpq %r11,%r10 1136 jb .Lpwr_sp_alt 1137 subq %r11,%rbp 1138 leaq -320(%rbp,%r9,2),%rbp 1139 jmp .Lpwr_sp_done 1140 1141.align 32 1142.Lpwr_sp_alt: 1143 leaq 4096-320(,%r9,2),%r10 1144 leaq -320(%rbp,%r9,2),%rbp 1145 subq %r10,%r11 1146 movq $0,%r10 1147 cmovcq %r10,%r11 1148 subq %r11,%rbp 1149.Lpwr_sp_done: 1150 andq $-64,%rbp 1151 movq %rsp,%r11 1152 subq %rbp,%r11 1153 andq $-4096,%r11 1154 leaq (%r11,%rbp,1),%rsp 1155 movq (%rsp),%r10 1156 cmpq %rbp,%rsp 1157 ja .Lpwr_page_walk 1158 jmp .Lpwr_page_walk_done 1159 1160.Lpwr_page_walk: 1161 leaq -4096(%rsp),%rsp 1162 movq (%rsp),%r10 1163 cmpq %rbp,%rsp 1164 ja .Lpwr_page_walk 1165.Lpwr_page_walk_done: 1166 1167 movq %r9,%r10 1168 negq %r9 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 movq %r8,32(%rsp) 1180 movq %rax,40(%rsp) 1181.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1182.Lpower5_body: 1183.byte 102,72,15,110,207 1184.byte 102,72,15,110,209 1185.byte 102,73,15,110,218 1186.byte 102,72,15,110,226 1187 1188 call __bn_sqr8x_internal 1189 call __bn_post4x_internal 1190 call __bn_sqr8x_internal 1191 call __bn_post4x_internal 1192 call __bn_sqr8x_internal 1193 call __bn_post4x_internal 1194 call __bn_sqr8x_internal 1195 call __bn_post4x_internal 1196 call __bn_sqr8x_internal 1197 call __bn_post4x_internal 1198 1199.byte 102,72,15,126,209 1200.byte 102,72,15,126,226 1201 movq %rsi,%rdi 1202 movq 40(%rsp),%rax 1203 leaq 32(%rsp),%r8 1204 1205 call mul4x_internal 1206 1207 movq 40(%rsp),%rsi 1208.cfi_def_cfa %rsi,8 1209 movq $1,%rax 1210 movq -48(%rsi),%r15 1211.cfi_restore %r15 1212 movq -40(%rsi),%r14 1213.cfi_restore %r14 1214 movq -32(%rsi),%r13 1215.cfi_restore %r13 1216 movq -24(%rsi),%r12 1217.cfi_restore %r12 1218 movq -16(%rsi),%rbp 1219.cfi_restore %rbp 1220 movq -8(%rsi),%rbx 1221.cfi_restore %rbx 1222 leaq (%rsi),%rsp 1223.cfi_def_cfa_register %rsp 1224.Lpower5_epilogue: 1225 ret 1226.cfi_endproc 1227.size bn_power5,.-bn_power5 1228 1229.globl bn_sqr8x_internal 1230.hidden bn_sqr8x_internal 1231.hidden bn_sqr8x_internal 1232.type bn_sqr8x_internal,@function 1233.align 32 1234bn_sqr8x_internal: 1235__bn_sqr8x_internal: 1236.cfi_startproc 1237_CET_ENDBR 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 leaq 32(%r10),%rbp 1312 leaq (%rsi,%r9,1),%rsi 1313 1314 movq %r9,%rcx 1315 1316 1317 movq -32(%rsi,%rbp,1),%r14 1318 leaq 48+8(%rsp,%r9,2),%rdi 1319 movq -24(%rsi,%rbp,1),%rax 1320 leaq -32(%rdi,%rbp,1),%rdi 1321 movq -16(%rsi,%rbp,1),%rbx 1322 movq %rax,%r15 1323 1324 mulq %r14 1325 movq %rax,%r10 1326 movq %rbx,%rax 1327 movq %rdx,%r11 1328 movq %r10,-24(%rdi,%rbp,1) 1329 1330 mulq %r14 1331 addq %rax,%r11 1332 movq %rbx,%rax 1333 adcq $0,%rdx 1334 movq %r11,-16(%rdi,%rbp,1) 1335 movq %rdx,%r10 1336 1337 1338 movq -8(%rsi,%rbp,1),%rbx 1339 mulq %r15 1340 movq %rax,%r12 1341 movq %rbx,%rax 1342 movq %rdx,%r13 1343 1344 leaq (%rbp),%rcx 1345 mulq %r14 1346 addq %rax,%r10 1347 movq %rbx,%rax 1348 movq %rdx,%r11 1349 adcq $0,%r11 1350 addq %r12,%r10 1351 adcq $0,%r11 1352 movq %r10,-8(%rdi,%rcx,1) 1353 jmp .Lsqr4x_1st 1354 1355.align 32 1356.Lsqr4x_1st: 1357 movq (%rsi,%rcx,1),%rbx 1358 mulq %r15 1359 addq %rax,%r13 1360 movq %rbx,%rax 1361 movq %rdx,%r12 1362 adcq $0,%r12 1363 1364 mulq %r14 1365 addq %rax,%r11 1366 movq %rbx,%rax 1367 movq 8(%rsi,%rcx,1),%rbx 1368 movq %rdx,%r10 1369 adcq $0,%r10 1370 addq %r13,%r11 1371 adcq $0,%r10 1372 1373 1374 mulq %r15 1375 addq %rax,%r12 1376 movq %rbx,%rax 1377 movq %r11,(%rdi,%rcx,1) 1378 movq %rdx,%r13 1379 adcq $0,%r13 1380 1381 mulq %r14 1382 addq %rax,%r10 1383 movq %rbx,%rax 1384 movq 16(%rsi,%rcx,1),%rbx 1385 movq %rdx,%r11 1386 adcq $0,%r11 1387 addq %r12,%r10 1388 adcq $0,%r11 1389 1390 mulq %r15 1391 addq %rax,%r13 1392 movq %rbx,%rax 1393 movq %r10,8(%rdi,%rcx,1) 1394 movq %rdx,%r12 1395 adcq $0,%r12 1396 1397 mulq %r14 1398 addq %rax,%r11 1399 movq %rbx,%rax 1400 movq 24(%rsi,%rcx,1),%rbx 1401 movq %rdx,%r10 1402 adcq $0,%r10 1403 addq %r13,%r11 1404 adcq $0,%r10 1405 1406 1407 mulq %r15 1408 addq %rax,%r12 1409 movq %rbx,%rax 1410 movq %r11,16(%rdi,%rcx,1) 1411 movq %rdx,%r13 1412 adcq $0,%r13 1413 leaq 32(%rcx),%rcx 1414 1415 mulq %r14 1416 addq %rax,%r10 1417 movq %rbx,%rax 1418 movq %rdx,%r11 1419 adcq $0,%r11 1420 addq %r12,%r10 1421 adcq $0,%r11 1422 movq %r10,-8(%rdi,%rcx,1) 1423 1424 cmpq $0,%rcx 1425 jne .Lsqr4x_1st 1426 1427 mulq %r15 1428 addq %rax,%r13 1429 leaq 16(%rbp),%rbp 1430 adcq $0,%rdx 1431 addq %r11,%r13 1432 adcq $0,%rdx 1433 1434 movq %r13,(%rdi) 1435 movq %rdx,%r12 1436 movq %rdx,8(%rdi) 1437 jmp .Lsqr4x_outer 1438 1439.align 32 1440.Lsqr4x_outer: 1441 movq -32(%rsi,%rbp,1),%r14 1442 leaq 48+8(%rsp,%r9,2),%rdi 1443 movq -24(%rsi,%rbp,1),%rax 1444 leaq -32(%rdi,%rbp,1),%rdi 1445 movq -16(%rsi,%rbp,1),%rbx 1446 movq %rax,%r15 1447 1448 mulq %r14 1449 movq -24(%rdi,%rbp,1),%r10 1450 addq %rax,%r10 1451 movq %rbx,%rax 1452 adcq $0,%rdx 1453 movq %r10,-24(%rdi,%rbp,1) 1454 movq %rdx,%r11 1455 1456 mulq %r14 1457 addq %rax,%r11 1458 movq %rbx,%rax 1459 adcq $0,%rdx 1460 addq -16(%rdi,%rbp,1),%r11 1461 movq %rdx,%r10 1462 adcq $0,%r10 1463 movq %r11,-16(%rdi,%rbp,1) 1464 1465 xorq %r12,%r12 1466 1467 movq -8(%rsi,%rbp,1),%rbx 1468 mulq %r15 1469 addq %rax,%r12 1470 movq %rbx,%rax 1471 adcq $0,%rdx 1472 addq -8(%rdi,%rbp,1),%r12 1473 movq %rdx,%r13 1474 adcq $0,%r13 1475 1476 mulq %r14 1477 addq %rax,%r10 1478 movq %rbx,%rax 1479 adcq $0,%rdx 1480 addq %r12,%r10 1481 movq %rdx,%r11 1482 adcq $0,%r11 1483 movq %r10,-8(%rdi,%rbp,1) 1484 1485 leaq (%rbp),%rcx 1486 jmp .Lsqr4x_inner 1487 1488.align 32 1489.Lsqr4x_inner: 1490 movq (%rsi,%rcx,1),%rbx 1491 mulq %r15 1492 addq %rax,%r13 1493 movq %rbx,%rax 1494 movq %rdx,%r12 1495 adcq $0,%r12 1496 addq (%rdi,%rcx,1),%r13 1497 adcq $0,%r12 1498 1499.byte 0x67 1500 mulq %r14 1501 addq %rax,%r11 1502 movq %rbx,%rax 1503 movq 8(%rsi,%rcx,1),%rbx 1504 movq %rdx,%r10 1505 adcq $0,%r10 1506 addq %r13,%r11 1507 adcq $0,%r10 1508 1509 mulq %r15 1510 addq %rax,%r12 1511 movq %r11,(%rdi,%rcx,1) 1512 movq %rbx,%rax 1513 movq %rdx,%r13 1514 adcq $0,%r13 1515 addq 8(%rdi,%rcx,1),%r12 1516 leaq 16(%rcx),%rcx 1517 adcq $0,%r13 1518 1519 mulq %r14 1520 addq %rax,%r10 1521 movq %rbx,%rax 1522 adcq $0,%rdx 1523 addq %r12,%r10 1524 movq %rdx,%r11 1525 adcq $0,%r11 1526 movq %r10,-8(%rdi,%rcx,1) 1527 1528 cmpq $0,%rcx 1529 jne .Lsqr4x_inner 1530 1531.byte 0x67 1532 mulq %r15 1533 addq %rax,%r13 1534 adcq $0,%rdx 1535 addq %r11,%r13 1536 adcq $0,%rdx 1537 1538 movq %r13,(%rdi) 1539 movq %rdx,%r12 1540 movq %rdx,8(%rdi) 1541 1542 addq $16,%rbp 1543 jnz .Lsqr4x_outer 1544 1545 1546 movq -32(%rsi),%r14 1547 leaq 48+8(%rsp,%r9,2),%rdi 1548 movq -24(%rsi),%rax 1549 leaq -32(%rdi,%rbp,1),%rdi 1550 movq -16(%rsi),%rbx 1551 movq %rax,%r15 1552 1553 mulq %r14 1554 addq %rax,%r10 1555 movq %rbx,%rax 1556 movq %rdx,%r11 1557 adcq $0,%r11 1558 1559 mulq %r14 1560 addq %rax,%r11 1561 movq %rbx,%rax 1562 movq %r10,-24(%rdi) 1563 movq %rdx,%r10 1564 adcq $0,%r10 1565 addq %r13,%r11 1566 movq -8(%rsi),%rbx 1567 adcq $0,%r10 1568 1569 mulq %r15 1570 addq %rax,%r12 1571 movq %rbx,%rax 1572 movq %r11,-16(%rdi) 1573 movq %rdx,%r13 1574 adcq $0,%r13 1575 1576 mulq %r14 1577 addq %rax,%r10 1578 movq %rbx,%rax 1579 movq %rdx,%r11 1580 adcq $0,%r11 1581 addq %r12,%r10 1582 adcq $0,%r11 1583 movq %r10,-8(%rdi) 1584 1585 mulq %r15 1586 addq %rax,%r13 1587 movq -16(%rsi),%rax 1588 adcq $0,%rdx 1589 addq %r11,%r13 1590 adcq $0,%rdx 1591 1592 movq %r13,(%rdi) 1593 movq %rdx,%r12 1594 movq %rdx,8(%rdi) 1595 1596 mulq %rbx 1597 addq $16,%rbp 1598 xorq %r14,%r14 1599 subq %r9,%rbp 1600 xorq %r15,%r15 1601 1602 addq %r12,%rax 1603 adcq $0,%rdx 1604 movq %rax,8(%rdi) 1605 movq %rdx,16(%rdi) 1606 movq %r15,24(%rdi) 1607 1608 movq -16(%rsi,%rbp,1),%rax 1609 leaq 48+8(%rsp),%rdi 1610 xorq %r10,%r10 1611 movq 8(%rdi),%r11 1612 1613 leaq (%r14,%r10,2),%r12 1614 shrq $63,%r10 1615 leaq (%rcx,%r11,2),%r13 1616 shrq $63,%r11 1617 orq %r10,%r13 1618 movq 16(%rdi),%r10 1619 movq %r11,%r14 1620 mulq %rax 1621 negq %r15 1622 movq 24(%rdi),%r11 1623 adcq %rax,%r12 1624 movq -8(%rsi,%rbp,1),%rax 1625 movq %r12,(%rdi) 1626 adcq %rdx,%r13 1627 1628 leaq (%r14,%r10,2),%rbx 1629 movq %r13,8(%rdi) 1630 sbbq %r15,%r15 1631 shrq $63,%r10 1632 leaq (%rcx,%r11,2),%r8 1633 shrq $63,%r11 1634 orq %r10,%r8 1635 movq 32(%rdi),%r10 1636 movq %r11,%r14 1637 mulq %rax 1638 negq %r15 1639 movq 40(%rdi),%r11 1640 adcq %rax,%rbx 1641 movq 0(%rsi,%rbp,1),%rax 1642 movq %rbx,16(%rdi) 1643 adcq %rdx,%r8 1644 leaq 16(%rbp),%rbp 1645 movq %r8,24(%rdi) 1646 sbbq %r15,%r15 1647 leaq 64(%rdi),%rdi 1648 jmp .Lsqr4x_shift_n_add 1649 1650.align 32 1651.Lsqr4x_shift_n_add: 1652 leaq (%r14,%r10,2),%r12 1653 shrq $63,%r10 1654 leaq (%rcx,%r11,2),%r13 1655 shrq $63,%r11 1656 orq %r10,%r13 1657 movq -16(%rdi),%r10 1658 movq %r11,%r14 1659 mulq %rax 1660 negq %r15 1661 movq -8(%rdi),%r11 1662 adcq %rax,%r12 1663 movq -8(%rsi,%rbp,1),%rax 1664 movq %r12,-32(%rdi) 1665 adcq %rdx,%r13 1666 1667 leaq (%r14,%r10,2),%rbx 1668 movq %r13,-24(%rdi) 1669 sbbq %r15,%r15 1670 shrq $63,%r10 1671 leaq (%rcx,%r11,2),%r8 1672 shrq $63,%r11 1673 orq %r10,%r8 1674 movq 0(%rdi),%r10 1675 movq %r11,%r14 1676 mulq %rax 1677 negq %r15 1678 movq 8(%rdi),%r11 1679 adcq %rax,%rbx 1680 movq 0(%rsi,%rbp,1),%rax 1681 movq %rbx,-16(%rdi) 1682 adcq %rdx,%r8 1683 1684 leaq (%r14,%r10,2),%r12 1685 movq %r8,-8(%rdi) 1686 sbbq %r15,%r15 1687 shrq $63,%r10 1688 leaq (%rcx,%r11,2),%r13 1689 shrq $63,%r11 1690 orq %r10,%r13 1691 movq 16(%rdi),%r10 1692 movq %r11,%r14 1693 mulq %rax 1694 negq %r15 1695 movq 24(%rdi),%r11 1696 adcq %rax,%r12 1697 movq 8(%rsi,%rbp,1),%rax 1698 movq %r12,0(%rdi) 1699 adcq %rdx,%r13 1700 1701 leaq (%r14,%r10,2),%rbx 1702 movq %r13,8(%rdi) 1703 sbbq %r15,%r15 1704 shrq $63,%r10 1705 leaq (%rcx,%r11,2),%r8 1706 shrq $63,%r11 1707 orq %r10,%r8 1708 movq 32(%rdi),%r10 1709 movq %r11,%r14 1710 mulq %rax 1711 negq %r15 1712 movq 40(%rdi),%r11 1713 adcq %rax,%rbx 1714 movq 16(%rsi,%rbp,1),%rax 1715 movq %rbx,16(%rdi) 1716 adcq %rdx,%r8 1717 movq %r8,24(%rdi) 1718 sbbq %r15,%r15 1719 leaq 64(%rdi),%rdi 1720 addq $32,%rbp 1721 jnz .Lsqr4x_shift_n_add 1722 1723 leaq (%r14,%r10,2),%r12 1724.byte 0x67 1725 shrq $63,%r10 1726 leaq (%rcx,%r11,2),%r13 1727 shrq $63,%r11 1728 orq %r10,%r13 1729 movq -16(%rdi),%r10 1730 movq %r11,%r14 1731 mulq %rax 1732 negq %r15 1733 movq -8(%rdi),%r11 1734 adcq %rax,%r12 1735 movq -8(%rsi),%rax 1736 movq %r12,-32(%rdi) 1737 adcq %rdx,%r13 1738 1739 leaq (%r14,%r10,2),%rbx 1740 movq %r13,-24(%rdi) 1741 sbbq %r15,%r15 1742 shrq $63,%r10 1743 leaq (%rcx,%r11,2),%r8 1744 shrq $63,%r11 1745 orq %r10,%r8 1746 mulq %rax 1747 negq %r15 1748 adcq %rax,%rbx 1749 adcq %rdx,%r8 1750 movq %rbx,-16(%rdi) 1751 movq %r8,-8(%rdi) 1752.byte 102,72,15,126,213 1753__bn_sqr8x_reduction: 1754 xorq %rax,%rax 1755 leaq (%r9,%rbp,1),%rcx 1756 leaq 48+8(%rsp,%r9,2),%rdx 1757 movq %rcx,0+8(%rsp) 1758 leaq 48+8(%rsp,%r9,1),%rdi 1759 movq %rdx,8+8(%rsp) 1760 negq %r9 1761 jmp .L8x_reduction_loop 1762 1763.align 32 1764.L8x_reduction_loop: 1765 leaq (%rdi,%r9,1),%rdi 1766.byte 0x66 1767 movq 0(%rdi),%rbx 1768 movq 8(%rdi),%r9 1769 movq 16(%rdi),%r10 1770 movq 24(%rdi),%r11 1771 movq 32(%rdi),%r12 1772 movq 40(%rdi),%r13 1773 movq 48(%rdi),%r14 1774 movq 56(%rdi),%r15 1775 movq %rax,(%rdx) 1776 leaq 64(%rdi),%rdi 1777 1778.byte 0x67 1779 movq %rbx,%r8 1780 imulq 32+8(%rsp),%rbx 1781 movq 0(%rbp),%rax 1782 movl $8,%ecx 1783 jmp .L8x_reduce 1784 1785.align 32 1786.L8x_reduce: 1787 mulq %rbx 1788 movq 8(%rbp),%rax 1789 negq %r8 1790 movq %rdx,%r8 1791 adcq $0,%r8 1792 1793 mulq %rbx 1794 addq %rax,%r9 1795 movq 16(%rbp),%rax 1796 adcq $0,%rdx 1797 addq %r9,%r8 1798 movq %rbx,48-8+8(%rsp,%rcx,8) 1799 movq %rdx,%r9 1800 adcq $0,%r9 1801 1802 mulq %rbx 1803 addq %rax,%r10 1804 movq 24(%rbp),%rax 1805 adcq $0,%rdx 1806 addq %r10,%r9 1807 movq 32+8(%rsp),%rsi 1808 movq %rdx,%r10 1809 adcq $0,%r10 1810 1811 mulq %rbx 1812 addq %rax,%r11 1813 movq 32(%rbp),%rax 1814 adcq $0,%rdx 1815 imulq %r8,%rsi 1816 addq %r11,%r10 1817 movq %rdx,%r11 1818 adcq $0,%r11 1819 1820 mulq %rbx 1821 addq %rax,%r12 1822 movq 40(%rbp),%rax 1823 adcq $0,%rdx 1824 addq %r12,%r11 1825 movq %rdx,%r12 1826 adcq $0,%r12 1827 1828 mulq %rbx 1829 addq %rax,%r13 1830 movq 48(%rbp),%rax 1831 adcq $0,%rdx 1832 addq %r13,%r12 1833 movq %rdx,%r13 1834 adcq $0,%r13 1835 1836 mulq %rbx 1837 addq %rax,%r14 1838 movq 56(%rbp),%rax 1839 adcq $0,%rdx 1840 addq %r14,%r13 1841 movq %rdx,%r14 1842 adcq $0,%r14 1843 1844 mulq %rbx 1845 movq %rsi,%rbx 1846 addq %rax,%r15 1847 movq 0(%rbp),%rax 1848 adcq $0,%rdx 1849 addq %r15,%r14 1850 movq %rdx,%r15 1851 adcq $0,%r15 1852 1853 decl %ecx 1854 jnz .L8x_reduce 1855 1856 leaq 64(%rbp),%rbp 1857 xorq %rax,%rax 1858 movq 8+8(%rsp),%rdx 1859 cmpq 0+8(%rsp),%rbp 1860 jae .L8x_no_tail 1861 1862.byte 0x66 1863 addq 0(%rdi),%r8 1864 adcq 8(%rdi),%r9 1865 adcq 16(%rdi),%r10 1866 adcq 24(%rdi),%r11 1867 adcq 32(%rdi),%r12 1868 adcq 40(%rdi),%r13 1869 adcq 48(%rdi),%r14 1870 adcq 56(%rdi),%r15 1871 sbbq %rsi,%rsi 1872 1873 movq 48+56+8(%rsp),%rbx 1874 movl $8,%ecx 1875 movq 0(%rbp),%rax 1876 jmp .L8x_tail 1877 1878.align 32 1879.L8x_tail: 1880 mulq %rbx 1881 addq %rax,%r8 1882 movq 8(%rbp),%rax 1883 movq %r8,(%rdi) 1884 movq %rdx,%r8 1885 adcq $0,%r8 1886 1887 mulq %rbx 1888 addq %rax,%r9 1889 movq 16(%rbp),%rax 1890 adcq $0,%rdx 1891 addq %r9,%r8 1892 leaq 8(%rdi),%rdi 1893 movq %rdx,%r9 1894 adcq $0,%r9 1895 1896 mulq %rbx 1897 addq %rax,%r10 1898 movq 24(%rbp),%rax 1899 adcq $0,%rdx 1900 addq %r10,%r9 1901 movq %rdx,%r10 1902 adcq $0,%r10 1903 1904 mulq %rbx 1905 addq %rax,%r11 1906 movq 32(%rbp),%rax 1907 adcq $0,%rdx 1908 addq %r11,%r10 1909 movq %rdx,%r11 1910 adcq $0,%r11 1911 1912 mulq %rbx 1913 addq %rax,%r12 1914 movq 40(%rbp),%rax 1915 adcq $0,%rdx 1916 addq %r12,%r11 1917 movq %rdx,%r12 1918 adcq $0,%r12 1919 1920 mulq %rbx 1921 addq %rax,%r13 1922 movq 48(%rbp),%rax 1923 adcq $0,%rdx 1924 addq %r13,%r12 1925 movq %rdx,%r13 1926 adcq $0,%r13 1927 1928 mulq %rbx 1929 addq %rax,%r14 1930 movq 56(%rbp),%rax 1931 adcq $0,%rdx 1932 addq %r14,%r13 1933 movq %rdx,%r14 1934 adcq $0,%r14 1935 1936 mulq %rbx 1937 movq 48-16+8(%rsp,%rcx,8),%rbx 1938 addq %rax,%r15 1939 adcq $0,%rdx 1940 addq %r15,%r14 1941 movq 0(%rbp),%rax 1942 movq %rdx,%r15 1943 adcq $0,%r15 1944 1945 decl %ecx 1946 jnz .L8x_tail 1947 1948 leaq 64(%rbp),%rbp 1949 movq 8+8(%rsp),%rdx 1950 cmpq 0+8(%rsp),%rbp 1951 jae .L8x_tail_done 1952 1953 movq 48+56+8(%rsp),%rbx 1954 negq %rsi 1955 movq 0(%rbp),%rax 1956 adcq 0(%rdi),%r8 1957 adcq 8(%rdi),%r9 1958 adcq 16(%rdi),%r10 1959 adcq 24(%rdi),%r11 1960 adcq 32(%rdi),%r12 1961 adcq 40(%rdi),%r13 1962 adcq 48(%rdi),%r14 1963 adcq 56(%rdi),%r15 1964 sbbq %rsi,%rsi 1965 1966 movl $8,%ecx 1967 jmp .L8x_tail 1968 1969.align 32 1970.L8x_tail_done: 1971 xorq %rax,%rax 1972 addq (%rdx),%r8 1973 adcq $0,%r9 1974 adcq $0,%r10 1975 adcq $0,%r11 1976 adcq $0,%r12 1977 adcq $0,%r13 1978 adcq $0,%r14 1979 adcq $0,%r15 1980 adcq $0,%rax 1981 1982 negq %rsi 1983.L8x_no_tail: 1984 adcq 0(%rdi),%r8 1985 adcq 8(%rdi),%r9 1986 adcq 16(%rdi),%r10 1987 adcq 24(%rdi),%r11 1988 adcq 32(%rdi),%r12 1989 adcq 40(%rdi),%r13 1990 adcq 48(%rdi),%r14 1991 adcq 56(%rdi),%r15 1992 adcq $0,%rax 1993 movq -8(%rbp),%rcx 1994 xorq %rsi,%rsi 1995 1996.byte 102,72,15,126,213 1997 1998 movq %r8,0(%rdi) 1999 movq %r9,8(%rdi) 2000.byte 102,73,15,126,217 2001 movq %r10,16(%rdi) 2002 movq %r11,24(%rdi) 2003 movq %r12,32(%rdi) 2004 movq %r13,40(%rdi) 2005 movq %r14,48(%rdi) 2006 movq %r15,56(%rdi) 2007 leaq 64(%rdi),%rdi 2008 2009 cmpq %rdx,%rdi 2010 jb .L8x_reduction_loop 2011 ret 2012.cfi_endproc 2013.size bn_sqr8x_internal,.-bn_sqr8x_internal 2014.type __bn_post4x_internal,@function 2015.align 32 2016__bn_post4x_internal: 2017.cfi_startproc 2018 movq 0(%rbp),%r12 2019 leaq (%rdi,%r9,1),%rbx 2020 movq %r9,%rcx 2021.byte 102,72,15,126,207 2022 negq %rax 2023.byte 102,72,15,126,206 2024 sarq $3+2,%rcx 2025 decq %r12 2026 xorq %r10,%r10 2027 movq 8(%rbp),%r13 2028 movq 16(%rbp),%r14 2029 movq 24(%rbp),%r15 2030 jmp .Lsqr4x_sub_entry 2031 2032.align 16 2033.Lsqr4x_sub: 2034 movq 0(%rbp),%r12 2035 movq 8(%rbp),%r13 2036 movq 16(%rbp),%r14 2037 movq 24(%rbp),%r15 2038.Lsqr4x_sub_entry: 2039 leaq 32(%rbp),%rbp 2040 notq %r12 2041 notq %r13 2042 notq %r14 2043 notq %r15 2044 andq %rax,%r12 2045 andq %rax,%r13 2046 andq %rax,%r14 2047 andq %rax,%r15 2048 2049 negq %r10 2050 adcq 0(%rbx),%r12 2051 adcq 8(%rbx),%r13 2052 adcq 16(%rbx),%r14 2053 adcq 24(%rbx),%r15 2054 movq %r12,0(%rdi) 2055 leaq 32(%rbx),%rbx 2056 movq %r13,8(%rdi) 2057 sbbq %r10,%r10 2058 movq %r14,16(%rdi) 2059 movq %r15,24(%rdi) 2060 leaq 32(%rdi),%rdi 2061 2062 incq %rcx 2063 jnz .Lsqr4x_sub 2064 2065 movq %r9,%r10 2066 negq %r9 2067 ret 2068.cfi_endproc 2069.size __bn_post4x_internal,.-__bn_post4x_internal 2070.type bn_mulx4x_mont_gather5,@function 2071.align 32 2072bn_mulx4x_mont_gather5: 2073.cfi_startproc 2074 movq %rsp,%rax 2075.cfi_def_cfa_register %rax 2076.Lmulx4x_enter: 2077 pushq %rbx 2078.cfi_offset %rbx,-16 2079 pushq %rbp 2080.cfi_offset %rbp,-24 2081 pushq %r12 2082.cfi_offset %r12,-32 2083 pushq %r13 2084.cfi_offset %r13,-40 2085 pushq %r14 2086.cfi_offset %r14,-48 2087 pushq %r15 2088.cfi_offset %r15,-56 2089.Lmulx4x_prologue: 2090 2091 shll $3,%r9d 2092 leaq (%r9,%r9,2),%r10 2093 negq %r9 2094 movq (%r8),%r8 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 leaq -320(%rsp,%r9,2),%r11 2106 movq %rsp,%rbp 2107 subq %rdi,%r11 2108 andq $4095,%r11 2109 cmpq %r11,%r10 2110 jb .Lmulx4xsp_alt 2111 subq %r11,%rbp 2112 leaq -320(%rbp,%r9,2),%rbp 2113 jmp .Lmulx4xsp_done 2114 2115.Lmulx4xsp_alt: 2116 leaq 4096-320(,%r9,2),%r10 2117 leaq -320(%rbp,%r9,2),%rbp 2118 subq %r10,%r11 2119 movq $0,%r10 2120 cmovcq %r10,%r11 2121 subq %r11,%rbp 2122.Lmulx4xsp_done: 2123 andq $-64,%rbp 2124 movq %rsp,%r11 2125 subq %rbp,%r11 2126 andq $-4096,%r11 2127 leaq (%r11,%rbp,1),%rsp 2128 movq (%rsp),%r10 2129 cmpq %rbp,%rsp 2130 ja .Lmulx4x_page_walk 2131 jmp .Lmulx4x_page_walk_done 2132 2133.Lmulx4x_page_walk: 2134 leaq -4096(%rsp),%rsp 2135 movq (%rsp),%r10 2136 cmpq %rbp,%rsp 2137 ja .Lmulx4x_page_walk 2138.Lmulx4x_page_walk_done: 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 movq %r8,32(%rsp) 2153 movq %rax,40(%rsp) 2154.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2155.Lmulx4x_body: 2156 call mulx4x_internal 2157 2158 movq 40(%rsp),%rsi 2159.cfi_def_cfa %rsi,8 2160 movq $1,%rax 2161 2162 movq -48(%rsi),%r15 2163.cfi_restore %r15 2164 movq -40(%rsi),%r14 2165.cfi_restore %r14 2166 movq -32(%rsi),%r13 2167.cfi_restore %r13 2168 movq -24(%rsi),%r12 2169.cfi_restore %r12 2170 movq -16(%rsi),%rbp 2171.cfi_restore %rbp 2172 movq -8(%rsi),%rbx 2173.cfi_restore %rbx 2174 leaq (%rsi),%rsp 2175.cfi_def_cfa_register %rsp 2176.Lmulx4x_epilogue: 2177 ret 2178.cfi_endproc 2179.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2180 2181.type mulx4x_internal,@function 2182.align 32 2183mulx4x_internal: 2184.cfi_startproc 2185 movq %r9,8(%rsp) 2186 movq %r9,%r10 2187 negq %r9 2188 shlq $5,%r9 2189 negq %r10 2190 leaq 128(%rdx,%r9,1),%r13 2191 shrq $5+5,%r9 2192 movd 8(%rax),%xmm5 2193 subq $1,%r9 2194 leaq .Linc(%rip),%rax 2195 movq %r13,16+8(%rsp) 2196 movq %r9,24+8(%rsp) 2197 movq %rdi,56+8(%rsp) 2198 movdqa 0(%rax),%xmm0 2199 movdqa 16(%rax),%xmm1 2200 leaq 88-112(%rsp,%r10,1),%r10 2201 leaq 128(%rdx),%rdi 2202 2203 pshufd $0,%xmm5,%xmm5 2204 movdqa %xmm1,%xmm4 2205.byte 0x67 2206 movdqa %xmm1,%xmm2 2207.byte 0x67 2208 paddd %xmm0,%xmm1 2209 pcmpeqd %xmm5,%xmm0 2210 movdqa %xmm4,%xmm3 2211 paddd %xmm1,%xmm2 2212 pcmpeqd %xmm5,%xmm1 2213 movdqa %xmm0,112(%r10) 2214 movdqa %xmm4,%xmm0 2215 2216 paddd %xmm2,%xmm3 2217 pcmpeqd %xmm5,%xmm2 2218 movdqa %xmm1,128(%r10) 2219 movdqa %xmm4,%xmm1 2220 2221 paddd %xmm3,%xmm0 2222 pcmpeqd %xmm5,%xmm3 2223 movdqa %xmm2,144(%r10) 2224 movdqa %xmm4,%xmm2 2225 2226 paddd %xmm0,%xmm1 2227 pcmpeqd %xmm5,%xmm0 2228 movdqa %xmm3,160(%r10) 2229 movdqa %xmm4,%xmm3 2230 paddd %xmm1,%xmm2 2231 pcmpeqd %xmm5,%xmm1 2232 movdqa %xmm0,176(%r10) 2233 movdqa %xmm4,%xmm0 2234 2235 paddd %xmm2,%xmm3 2236 pcmpeqd %xmm5,%xmm2 2237 movdqa %xmm1,192(%r10) 2238 movdqa %xmm4,%xmm1 2239 2240 paddd %xmm3,%xmm0 2241 pcmpeqd %xmm5,%xmm3 2242 movdqa %xmm2,208(%r10) 2243 movdqa %xmm4,%xmm2 2244 2245 paddd %xmm0,%xmm1 2246 pcmpeqd %xmm5,%xmm0 2247 movdqa %xmm3,224(%r10) 2248 movdqa %xmm4,%xmm3 2249 paddd %xmm1,%xmm2 2250 pcmpeqd %xmm5,%xmm1 2251 movdqa %xmm0,240(%r10) 2252 movdqa %xmm4,%xmm0 2253 2254 paddd %xmm2,%xmm3 2255 pcmpeqd %xmm5,%xmm2 2256 movdqa %xmm1,256(%r10) 2257 movdqa %xmm4,%xmm1 2258 2259 paddd %xmm3,%xmm0 2260 pcmpeqd %xmm5,%xmm3 2261 movdqa %xmm2,272(%r10) 2262 movdqa %xmm4,%xmm2 2263 2264 paddd %xmm0,%xmm1 2265 pcmpeqd %xmm5,%xmm0 2266 movdqa %xmm3,288(%r10) 2267 movdqa %xmm4,%xmm3 2268.byte 0x67 2269 paddd %xmm1,%xmm2 2270 pcmpeqd %xmm5,%xmm1 2271 movdqa %xmm0,304(%r10) 2272 2273 paddd %xmm2,%xmm3 2274 pcmpeqd %xmm5,%xmm2 2275 movdqa %xmm1,320(%r10) 2276 2277 pcmpeqd %xmm5,%xmm3 2278 movdqa %xmm2,336(%r10) 2279 2280 pand 64(%rdi),%xmm0 2281 pand 80(%rdi),%xmm1 2282 pand 96(%rdi),%xmm2 2283 movdqa %xmm3,352(%r10) 2284 pand 112(%rdi),%xmm3 2285 por %xmm2,%xmm0 2286 por %xmm3,%xmm1 2287 movdqa -128(%rdi),%xmm4 2288 movdqa -112(%rdi),%xmm5 2289 movdqa -96(%rdi),%xmm2 2290 pand 112(%r10),%xmm4 2291 movdqa -80(%rdi),%xmm3 2292 pand 128(%r10),%xmm5 2293 por %xmm4,%xmm0 2294 pand 144(%r10),%xmm2 2295 por %xmm5,%xmm1 2296 pand 160(%r10),%xmm3 2297 por %xmm2,%xmm0 2298 por %xmm3,%xmm1 2299 movdqa -64(%rdi),%xmm4 2300 movdqa -48(%rdi),%xmm5 2301 movdqa -32(%rdi),%xmm2 2302 pand 176(%r10),%xmm4 2303 movdqa -16(%rdi),%xmm3 2304 pand 192(%r10),%xmm5 2305 por %xmm4,%xmm0 2306 pand 208(%r10),%xmm2 2307 por %xmm5,%xmm1 2308 pand 224(%r10),%xmm3 2309 por %xmm2,%xmm0 2310 por %xmm3,%xmm1 2311 movdqa 0(%rdi),%xmm4 2312 movdqa 16(%rdi),%xmm5 2313 movdqa 32(%rdi),%xmm2 2314 pand 240(%r10),%xmm4 2315 movdqa 48(%rdi),%xmm3 2316 pand 256(%r10),%xmm5 2317 por %xmm4,%xmm0 2318 pand 272(%r10),%xmm2 2319 por %xmm5,%xmm1 2320 pand 288(%r10),%xmm3 2321 por %xmm2,%xmm0 2322 por %xmm3,%xmm1 2323 pxor %xmm1,%xmm0 2324 2325 pshufd $0x4e,%xmm0,%xmm1 2326 por %xmm1,%xmm0 2327 leaq 256(%rdi),%rdi 2328.byte 102,72,15,126,194 2329 leaq 64+32+8(%rsp),%rbx 2330 2331 movq %rdx,%r9 2332 mulxq 0(%rsi),%r8,%rax 2333 mulxq 8(%rsi),%r11,%r12 2334 addq %rax,%r11 2335 mulxq 16(%rsi),%rax,%r13 2336 adcq %rax,%r12 2337 adcq $0,%r13 2338 mulxq 24(%rsi),%rax,%r14 2339 2340 movq %r8,%r15 2341 imulq 32+8(%rsp),%r8 2342 xorq %rbp,%rbp 2343 movq %r8,%rdx 2344 2345 movq %rdi,8+8(%rsp) 2346 2347 leaq 32(%rsi),%rsi 2348 adcxq %rax,%r13 2349 adcxq %rbp,%r14 2350 2351 mulxq 0(%rcx),%rax,%r10 2352 adcxq %rax,%r15 2353 adoxq %r11,%r10 2354 mulxq 8(%rcx),%rax,%r11 2355 adcxq %rax,%r10 2356 adoxq %r12,%r11 2357 mulxq 16(%rcx),%rax,%r12 2358 movq 24+8(%rsp),%rdi 2359 movq %r10,-32(%rbx) 2360 adcxq %rax,%r11 2361 adoxq %r13,%r12 2362 mulxq 24(%rcx),%rax,%r15 2363 movq %r9,%rdx 2364 movq %r11,-24(%rbx) 2365 adcxq %rax,%r12 2366 adoxq %rbp,%r15 2367 leaq 32(%rcx),%rcx 2368 movq %r12,-16(%rbx) 2369 jmp .Lmulx4x_1st 2370 2371.align 32 2372.Lmulx4x_1st: 2373 adcxq %rbp,%r15 2374 mulxq 0(%rsi),%r10,%rax 2375 adcxq %r14,%r10 2376 mulxq 8(%rsi),%r11,%r14 2377 adcxq %rax,%r11 2378 mulxq 16(%rsi),%r12,%rax 2379 adcxq %r14,%r12 2380 mulxq 24(%rsi),%r13,%r14 2381.byte 0x67,0x67 2382 movq %r8,%rdx 2383 adcxq %rax,%r13 2384 adcxq %rbp,%r14 2385 leaq 32(%rsi),%rsi 2386 leaq 32(%rbx),%rbx 2387 2388 adoxq %r15,%r10 2389 mulxq 0(%rcx),%rax,%r15 2390 adcxq %rax,%r10 2391 adoxq %r15,%r11 2392 mulxq 8(%rcx),%rax,%r15 2393 adcxq %rax,%r11 2394 adoxq %r15,%r12 2395 mulxq 16(%rcx),%rax,%r15 2396 movq %r10,-40(%rbx) 2397 adcxq %rax,%r12 2398 movq %r11,-32(%rbx) 2399 adoxq %r15,%r13 2400 mulxq 24(%rcx),%rax,%r15 2401 movq %r9,%rdx 2402 movq %r12,-24(%rbx) 2403 adcxq %rax,%r13 2404 adoxq %rbp,%r15 2405 leaq 32(%rcx),%rcx 2406 movq %r13,-16(%rbx) 2407 2408 decq %rdi 2409 jnz .Lmulx4x_1st 2410 2411 movq 8(%rsp),%rax 2412 adcq %rbp,%r15 2413 leaq (%rsi,%rax,1),%rsi 2414 addq %r15,%r14 2415 movq 8+8(%rsp),%rdi 2416 adcq %rbp,%rbp 2417 movq %r14,-8(%rbx) 2418 jmp .Lmulx4x_outer 2419 2420.align 32 2421.Lmulx4x_outer: 2422 leaq 16-256(%rbx),%r10 2423 pxor %xmm4,%xmm4 2424.byte 0x67,0x67 2425 pxor %xmm5,%xmm5 2426 movdqa -128(%rdi),%xmm0 2427 movdqa -112(%rdi),%xmm1 2428 movdqa -96(%rdi),%xmm2 2429 pand 256(%r10),%xmm0 2430 movdqa -80(%rdi),%xmm3 2431 pand 272(%r10),%xmm1 2432 por %xmm0,%xmm4 2433 pand 288(%r10),%xmm2 2434 por %xmm1,%xmm5 2435 pand 304(%r10),%xmm3 2436 por %xmm2,%xmm4 2437 por %xmm3,%xmm5 2438 movdqa -64(%rdi),%xmm0 2439 movdqa -48(%rdi),%xmm1 2440 movdqa -32(%rdi),%xmm2 2441 pand 320(%r10),%xmm0 2442 movdqa -16(%rdi),%xmm3 2443 pand 336(%r10),%xmm1 2444 por %xmm0,%xmm4 2445 pand 352(%r10),%xmm2 2446 por %xmm1,%xmm5 2447 pand 368(%r10),%xmm3 2448 por %xmm2,%xmm4 2449 por %xmm3,%xmm5 2450 movdqa 0(%rdi),%xmm0 2451 movdqa 16(%rdi),%xmm1 2452 movdqa 32(%rdi),%xmm2 2453 pand 384(%r10),%xmm0 2454 movdqa 48(%rdi),%xmm3 2455 pand 400(%r10),%xmm1 2456 por %xmm0,%xmm4 2457 pand 416(%r10),%xmm2 2458 por %xmm1,%xmm5 2459 pand 432(%r10),%xmm3 2460 por %xmm2,%xmm4 2461 por %xmm3,%xmm5 2462 movdqa 64(%rdi),%xmm0 2463 movdqa 80(%rdi),%xmm1 2464 movdqa 96(%rdi),%xmm2 2465 pand 448(%r10),%xmm0 2466 movdqa 112(%rdi),%xmm3 2467 pand 464(%r10),%xmm1 2468 por %xmm0,%xmm4 2469 pand 480(%r10),%xmm2 2470 por %xmm1,%xmm5 2471 pand 496(%r10),%xmm3 2472 por %xmm2,%xmm4 2473 por %xmm3,%xmm5 2474 por %xmm5,%xmm4 2475 2476 pshufd $0x4e,%xmm4,%xmm0 2477 por %xmm4,%xmm0 2478 leaq 256(%rdi),%rdi 2479.byte 102,72,15,126,194 2480 2481 movq %rbp,(%rbx) 2482 leaq 32(%rbx,%rax,1),%rbx 2483 mulxq 0(%rsi),%r8,%r11 2484 xorq %rbp,%rbp 2485 movq %rdx,%r9 2486 mulxq 8(%rsi),%r14,%r12 2487 adoxq -32(%rbx),%r8 2488 adcxq %r14,%r11 2489 mulxq 16(%rsi),%r15,%r13 2490 adoxq -24(%rbx),%r11 2491 adcxq %r15,%r12 2492 mulxq 24(%rsi),%rdx,%r14 2493 adoxq -16(%rbx),%r12 2494 adcxq %rdx,%r13 2495 leaq (%rcx,%rax,1),%rcx 2496 leaq 32(%rsi),%rsi 2497 adoxq -8(%rbx),%r13 2498 adcxq %rbp,%r14 2499 adoxq %rbp,%r14 2500 2501 movq %r8,%r15 2502 imulq 32+8(%rsp),%r8 2503 2504 movq %r8,%rdx 2505 xorq %rbp,%rbp 2506 movq %rdi,8+8(%rsp) 2507 2508 mulxq 0(%rcx),%rax,%r10 2509 adcxq %rax,%r15 2510 adoxq %r11,%r10 2511 mulxq 8(%rcx),%rax,%r11 2512 adcxq %rax,%r10 2513 adoxq %r12,%r11 2514 mulxq 16(%rcx),%rax,%r12 2515 adcxq %rax,%r11 2516 adoxq %r13,%r12 2517 mulxq 24(%rcx),%rax,%r15 2518 movq %r9,%rdx 2519 movq 24+8(%rsp),%rdi 2520 movq %r10,-32(%rbx) 2521 adcxq %rax,%r12 2522 movq %r11,-24(%rbx) 2523 adoxq %rbp,%r15 2524 movq %r12,-16(%rbx) 2525 leaq 32(%rcx),%rcx 2526 jmp .Lmulx4x_inner 2527 2528.align 32 2529.Lmulx4x_inner: 2530 mulxq 0(%rsi),%r10,%rax 2531 adcxq %rbp,%r15 2532 adoxq %r14,%r10 2533 mulxq 8(%rsi),%r11,%r14 2534 adcxq 0(%rbx),%r10 2535 adoxq %rax,%r11 2536 mulxq 16(%rsi),%r12,%rax 2537 adcxq 8(%rbx),%r11 2538 adoxq %r14,%r12 2539 mulxq 24(%rsi),%r13,%r14 2540 movq %r8,%rdx 2541 adcxq 16(%rbx),%r12 2542 adoxq %rax,%r13 2543 adcxq 24(%rbx),%r13 2544 adoxq %rbp,%r14 2545 leaq 32(%rsi),%rsi 2546 leaq 32(%rbx),%rbx 2547 adcxq %rbp,%r14 2548 2549 adoxq %r15,%r10 2550 mulxq 0(%rcx),%rax,%r15 2551 adcxq %rax,%r10 2552 adoxq %r15,%r11 2553 mulxq 8(%rcx),%rax,%r15 2554 adcxq %rax,%r11 2555 adoxq %r15,%r12 2556 mulxq 16(%rcx),%rax,%r15 2557 movq %r10,-40(%rbx) 2558 adcxq %rax,%r12 2559 adoxq %r15,%r13 2560 movq %r11,-32(%rbx) 2561 mulxq 24(%rcx),%rax,%r15 2562 movq %r9,%rdx 2563 leaq 32(%rcx),%rcx 2564 movq %r12,-24(%rbx) 2565 adcxq %rax,%r13 2566 adoxq %rbp,%r15 2567 movq %r13,-16(%rbx) 2568 2569 decq %rdi 2570 jnz .Lmulx4x_inner 2571 2572 movq 0+8(%rsp),%rax 2573 adcq %rbp,%r15 2574 subq 0(%rbx),%rdi 2575 movq 8+8(%rsp),%rdi 2576 movq 16+8(%rsp),%r10 2577 adcq %r15,%r14 2578 leaq (%rsi,%rax,1),%rsi 2579 adcq %rbp,%rbp 2580 movq %r14,-8(%rbx) 2581 2582 cmpq %r10,%rdi 2583 jb .Lmulx4x_outer 2584 2585 movq -8(%rcx),%r10 2586 movq %rbp,%r8 2587 movq (%rcx,%rax,1),%r12 2588 leaq (%rcx,%rax,1),%rbp 2589 movq %rax,%rcx 2590 leaq (%rbx,%rax,1),%rdi 2591 xorl %eax,%eax 2592 xorq %r15,%r15 2593 subq %r14,%r10 2594 adcq %r15,%r15 2595 orq %r15,%r8 2596 sarq $3+2,%rcx 2597 subq %r8,%rax 2598 movq 56+8(%rsp),%rdx 2599 decq %r12 2600 movq 8(%rbp),%r13 2601 xorq %r8,%r8 2602 movq 16(%rbp),%r14 2603 movq 24(%rbp),%r15 2604 jmp .Lsqrx4x_sub_entry 2605.cfi_endproc 2606.size mulx4x_internal,.-mulx4x_internal 2607.type bn_powerx5,@function 2608.align 32 2609bn_powerx5: 2610.cfi_startproc 2611 movq %rsp,%rax 2612.cfi_def_cfa_register %rax 2613.Lpowerx5_enter: 2614 pushq %rbx 2615.cfi_offset %rbx,-16 2616 pushq %rbp 2617.cfi_offset %rbp,-24 2618 pushq %r12 2619.cfi_offset %r12,-32 2620 pushq %r13 2621.cfi_offset %r13,-40 2622 pushq %r14 2623.cfi_offset %r14,-48 2624 pushq %r15 2625.cfi_offset %r15,-56 2626.Lpowerx5_prologue: 2627 2628 shll $3,%r9d 2629 leaq (%r9,%r9,2),%r10 2630 negq %r9 2631 movq (%r8),%r8 2632 2633 2634 2635 2636 2637 2638 2639 2640 leaq -320(%rsp,%r9,2),%r11 2641 movq %rsp,%rbp 2642 subq %rdi,%r11 2643 andq $4095,%r11 2644 cmpq %r11,%r10 2645 jb .Lpwrx_sp_alt 2646 subq %r11,%rbp 2647 leaq -320(%rbp,%r9,2),%rbp 2648 jmp .Lpwrx_sp_done 2649 2650.align 32 2651.Lpwrx_sp_alt: 2652 leaq 4096-320(,%r9,2),%r10 2653 leaq -320(%rbp,%r9,2),%rbp 2654 subq %r10,%r11 2655 movq $0,%r10 2656 cmovcq %r10,%r11 2657 subq %r11,%rbp 2658.Lpwrx_sp_done: 2659 andq $-64,%rbp 2660 movq %rsp,%r11 2661 subq %rbp,%r11 2662 andq $-4096,%r11 2663 leaq (%r11,%rbp,1),%rsp 2664 movq (%rsp),%r10 2665 cmpq %rbp,%rsp 2666 ja .Lpwrx_page_walk 2667 jmp .Lpwrx_page_walk_done 2668 2669.Lpwrx_page_walk: 2670 leaq -4096(%rsp),%rsp 2671 movq (%rsp),%r10 2672 cmpq %rbp,%rsp 2673 ja .Lpwrx_page_walk 2674.Lpwrx_page_walk_done: 2675 2676 movq %r9,%r10 2677 negq %r9 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 pxor %xmm0,%xmm0 2691.byte 102,72,15,110,207 2692.byte 102,72,15,110,209 2693.byte 102,73,15,110,218 2694.byte 102,72,15,110,226 2695 movq %r8,32(%rsp) 2696 movq %rax,40(%rsp) 2697.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2698.Lpowerx5_body: 2699 2700 call __bn_sqrx8x_internal 2701 call __bn_postx4x_internal 2702 call __bn_sqrx8x_internal 2703 call __bn_postx4x_internal 2704 call __bn_sqrx8x_internal 2705 call __bn_postx4x_internal 2706 call __bn_sqrx8x_internal 2707 call __bn_postx4x_internal 2708 call __bn_sqrx8x_internal 2709 call __bn_postx4x_internal 2710 2711 movq %r10,%r9 2712 movq %rsi,%rdi 2713.byte 102,72,15,126,209 2714.byte 102,72,15,126,226 2715 movq 40(%rsp),%rax 2716 2717 call mulx4x_internal 2718 2719 movq 40(%rsp),%rsi 2720.cfi_def_cfa %rsi,8 2721 movq $1,%rax 2722 2723 movq -48(%rsi),%r15 2724.cfi_restore %r15 2725 movq -40(%rsi),%r14 2726.cfi_restore %r14 2727 movq -32(%rsi),%r13 2728.cfi_restore %r13 2729 movq -24(%rsi),%r12 2730.cfi_restore %r12 2731 movq -16(%rsi),%rbp 2732.cfi_restore %rbp 2733 movq -8(%rsi),%rbx 2734.cfi_restore %rbx 2735 leaq (%rsi),%rsp 2736.cfi_def_cfa_register %rsp 2737.Lpowerx5_epilogue: 2738 ret 2739.cfi_endproc 2740.size bn_powerx5,.-bn_powerx5 2741 2742.globl bn_sqrx8x_internal 2743.hidden bn_sqrx8x_internal 2744.hidden bn_sqrx8x_internal 2745.type bn_sqrx8x_internal,@function 2746.align 32 2747bn_sqrx8x_internal: 2748__bn_sqrx8x_internal: 2749.cfi_startproc 2750_CET_ENDBR 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 leaq 48+8(%rsp),%rdi 2792 leaq (%rsi,%r9,1),%rbp 2793 movq %r9,0+8(%rsp) 2794 movq %rbp,8+8(%rsp) 2795 jmp .Lsqr8x_zero_start 2796 2797.align 32 2798.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2799.Lsqrx8x_zero: 2800.byte 0x3e 2801 movdqa %xmm0,0(%rdi) 2802 movdqa %xmm0,16(%rdi) 2803 movdqa %xmm0,32(%rdi) 2804 movdqa %xmm0,48(%rdi) 2805.Lsqr8x_zero_start: 2806 movdqa %xmm0,64(%rdi) 2807 movdqa %xmm0,80(%rdi) 2808 movdqa %xmm0,96(%rdi) 2809 movdqa %xmm0,112(%rdi) 2810 leaq 128(%rdi),%rdi 2811 subq $64,%r9 2812 jnz .Lsqrx8x_zero 2813 2814 movq 0(%rsi),%rdx 2815 2816 xorq %r10,%r10 2817 xorq %r11,%r11 2818 xorq %r12,%r12 2819 xorq %r13,%r13 2820 xorq %r14,%r14 2821 xorq %r15,%r15 2822 leaq 48+8(%rsp),%rdi 2823 xorq %rbp,%rbp 2824 jmp .Lsqrx8x_outer_loop 2825 2826.align 32 2827.Lsqrx8x_outer_loop: 2828 mulxq 8(%rsi),%r8,%rax 2829 adcxq %r9,%r8 2830 adoxq %rax,%r10 2831 mulxq 16(%rsi),%r9,%rax 2832 adcxq %r10,%r9 2833 adoxq %rax,%r11 2834.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2835 adcxq %r11,%r10 2836 adoxq %rax,%r12 2837.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2838 adcxq %r12,%r11 2839 adoxq %rax,%r13 2840 mulxq 40(%rsi),%r12,%rax 2841 adcxq %r13,%r12 2842 adoxq %rax,%r14 2843 mulxq 48(%rsi),%r13,%rax 2844 adcxq %r14,%r13 2845 adoxq %r15,%rax 2846 mulxq 56(%rsi),%r14,%r15 2847 movq 8(%rsi),%rdx 2848 adcxq %rax,%r14 2849 adoxq %rbp,%r15 2850 adcq 64(%rdi),%r15 2851 movq %r8,8(%rdi) 2852 movq %r9,16(%rdi) 2853 sbbq %rcx,%rcx 2854 xorq %rbp,%rbp 2855 2856 2857 mulxq 16(%rsi),%r8,%rbx 2858 mulxq 24(%rsi),%r9,%rax 2859 adcxq %r10,%r8 2860 adoxq %rbx,%r9 2861 mulxq 32(%rsi),%r10,%rbx 2862 adcxq %r11,%r9 2863 adoxq %rax,%r10 2864.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2865 adcxq %r12,%r10 2866 adoxq %rbx,%r11 2867.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2868 adcxq %r13,%r11 2869 adoxq %r14,%r12 2870.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2871 movq 16(%rsi),%rdx 2872 adcxq %rax,%r12 2873 adoxq %rbx,%r13 2874 adcxq %r15,%r13 2875 adoxq %rbp,%r14 2876 adcxq %rbp,%r14 2877 2878 movq %r8,24(%rdi) 2879 movq %r9,32(%rdi) 2880 2881 mulxq 24(%rsi),%r8,%rbx 2882 mulxq 32(%rsi),%r9,%rax 2883 adcxq %r10,%r8 2884 adoxq %rbx,%r9 2885 mulxq 40(%rsi),%r10,%rbx 2886 adcxq %r11,%r9 2887 adoxq %rax,%r10 2888.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2889 adcxq %r12,%r10 2890 adoxq %r13,%r11 2891.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2892.byte 0x3e 2893 movq 24(%rsi),%rdx 2894 adcxq %rbx,%r11 2895 adoxq %rax,%r12 2896 adcxq %r14,%r12 2897 movq %r8,40(%rdi) 2898 movq %r9,48(%rdi) 2899 mulxq 32(%rsi),%r8,%rax 2900 adoxq %rbp,%r13 2901 adcxq %rbp,%r13 2902 2903 mulxq 40(%rsi),%r9,%rbx 2904 adcxq %r10,%r8 2905 adoxq %rax,%r9 2906 mulxq 48(%rsi),%r10,%rax 2907 adcxq %r11,%r9 2908 adoxq %r12,%r10 2909 mulxq 56(%rsi),%r11,%r12 2910 movq 32(%rsi),%rdx 2911 movq 40(%rsi),%r14 2912 adcxq %rbx,%r10 2913 adoxq %rax,%r11 2914 movq 48(%rsi),%r15 2915 adcxq %r13,%r11 2916 adoxq %rbp,%r12 2917 adcxq %rbp,%r12 2918 2919 movq %r8,56(%rdi) 2920 movq %r9,64(%rdi) 2921 2922 mulxq %r14,%r9,%rax 2923 movq 56(%rsi),%r8 2924 adcxq %r10,%r9 2925 mulxq %r15,%r10,%rbx 2926 adoxq %rax,%r10 2927 adcxq %r11,%r10 2928 mulxq %r8,%r11,%rax 2929 movq %r14,%rdx 2930 adoxq %rbx,%r11 2931 adcxq %r12,%r11 2932 2933 adcxq %rbp,%rax 2934 2935 mulxq %r15,%r14,%rbx 2936 mulxq %r8,%r12,%r13 2937 movq %r15,%rdx 2938 leaq 64(%rsi),%rsi 2939 adcxq %r14,%r11 2940 adoxq %rbx,%r12 2941 adcxq %rax,%r12 2942 adoxq %rbp,%r13 2943 2944.byte 0x67,0x67 2945 mulxq %r8,%r8,%r14 2946 adcxq %r8,%r13 2947 adcxq %rbp,%r14 2948 2949 cmpq 8+8(%rsp),%rsi 2950 je .Lsqrx8x_outer_break 2951 2952 negq %rcx 2953 movq $-8,%rcx 2954 movq %rbp,%r15 2955 movq 64(%rdi),%r8 2956 adcxq 72(%rdi),%r9 2957 adcxq 80(%rdi),%r10 2958 adcxq 88(%rdi),%r11 2959 adcq 96(%rdi),%r12 2960 adcq 104(%rdi),%r13 2961 adcq 112(%rdi),%r14 2962 adcq 120(%rdi),%r15 2963 leaq (%rsi),%rbp 2964 leaq 128(%rdi),%rdi 2965 sbbq %rax,%rax 2966 2967 movq -64(%rsi),%rdx 2968 movq %rax,16+8(%rsp) 2969 movq %rdi,24+8(%rsp) 2970 2971 2972 xorl %eax,%eax 2973 jmp .Lsqrx8x_loop 2974 2975.align 32 2976.Lsqrx8x_loop: 2977 movq %r8,%rbx 2978 mulxq 0(%rbp),%rax,%r8 2979 adcxq %rax,%rbx 2980 adoxq %r9,%r8 2981 2982 mulxq 8(%rbp),%rax,%r9 2983 adcxq %rax,%r8 2984 adoxq %r10,%r9 2985 2986 mulxq 16(%rbp),%rax,%r10 2987 adcxq %rax,%r9 2988 adoxq %r11,%r10 2989 2990 mulxq 24(%rbp),%rax,%r11 2991 adcxq %rax,%r10 2992 adoxq %r12,%r11 2993 2994.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2995 adcxq %rax,%r11 2996 adoxq %r13,%r12 2997 2998 mulxq 40(%rbp),%rax,%r13 2999 adcxq %rax,%r12 3000 adoxq %r14,%r13 3001 3002 mulxq 48(%rbp),%rax,%r14 3003 movq %rbx,(%rdi,%rcx,8) 3004 movl $0,%ebx 3005 adcxq %rax,%r13 3006 adoxq %r15,%r14 3007 3008.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3009 movq 8(%rsi,%rcx,8),%rdx 3010 adcxq %rax,%r14 3011 adoxq %rbx,%r15 3012 adcxq %rbx,%r15 3013 3014.byte 0x67 3015 incq %rcx 3016 jnz .Lsqrx8x_loop 3017 3018 leaq 64(%rbp),%rbp 3019 movq $-8,%rcx 3020 cmpq 8+8(%rsp),%rbp 3021 je .Lsqrx8x_break 3022 3023 subq 16+8(%rsp),%rbx 3024.byte 0x66 3025 movq -64(%rsi),%rdx 3026 adcxq 0(%rdi),%r8 3027 adcxq 8(%rdi),%r9 3028 adcq 16(%rdi),%r10 3029 adcq 24(%rdi),%r11 3030 adcq 32(%rdi),%r12 3031 adcq 40(%rdi),%r13 3032 adcq 48(%rdi),%r14 3033 adcq 56(%rdi),%r15 3034 leaq 64(%rdi),%rdi 3035.byte 0x67 3036 sbbq %rax,%rax 3037 xorl %ebx,%ebx 3038 movq %rax,16+8(%rsp) 3039 jmp .Lsqrx8x_loop 3040 3041.align 32 3042.Lsqrx8x_break: 3043 xorq %rbp,%rbp 3044 subq 16+8(%rsp),%rbx 3045 adcxq %rbp,%r8 3046 movq 24+8(%rsp),%rcx 3047 adcxq %rbp,%r9 3048 movq 0(%rsi),%rdx 3049 adcq $0,%r10 3050 movq %r8,0(%rdi) 3051 adcq $0,%r11 3052 adcq $0,%r12 3053 adcq $0,%r13 3054 adcq $0,%r14 3055 adcq $0,%r15 3056 cmpq %rcx,%rdi 3057 je .Lsqrx8x_outer_loop 3058 3059 movq %r9,8(%rdi) 3060 movq 8(%rcx),%r9 3061 movq %r10,16(%rdi) 3062 movq 16(%rcx),%r10 3063 movq %r11,24(%rdi) 3064 movq 24(%rcx),%r11 3065 movq %r12,32(%rdi) 3066 movq 32(%rcx),%r12 3067 movq %r13,40(%rdi) 3068 movq 40(%rcx),%r13 3069 movq %r14,48(%rdi) 3070 movq 48(%rcx),%r14 3071 movq %r15,56(%rdi) 3072 movq 56(%rcx),%r15 3073 movq %rcx,%rdi 3074 jmp .Lsqrx8x_outer_loop 3075 3076.align 32 3077.Lsqrx8x_outer_break: 3078 movq %r9,72(%rdi) 3079.byte 102,72,15,126,217 3080 movq %r10,80(%rdi) 3081 movq %r11,88(%rdi) 3082 movq %r12,96(%rdi) 3083 movq %r13,104(%rdi) 3084 movq %r14,112(%rdi) 3085 leaq 48+8(%rsp),%rdi 3086 movq (%rsi,%rcx,1),%rdx 3087 3088 movq 8(%rdi),%r11 3089 xorq %r10,%r10 3090 movq 0+8(%rsp),%r9 3091 adoxq %r11,%r11 3092 movq 16(%rdi),%r12 3093 movq 24(%rdi),%r13 3094 3095 3096.align 32 3097.Lsqrx4x_shift_n_add: 3098 mulxq %rdx,%rax,%rbx 3099 adoxq %r12,%r12 3100 adcxq %r10,%rax 3101.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3102.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3103 adoxq %r13,%r13 3104 adcxq %r11,%rbx 3105 movq 40(%rdi),%r11 3106 movq %rax,0(%rdi) 3107 movq %rbx,8(%rdi) 3108 3109 mulxq %rdx,%rax,%rbx 3110 adoxq %r10,%r10 3111 adcxq %r12,%rax 3112 movq 16(%rsi,%rcx,1),%rdx 3113 movq 48(%rdi),%r12 3114 adoxq %r11,%r11 3115 adcxq %r13,%rbx 3116 movq 56(%rdi),%r13 3117 movq %rax,16(%rdi) 3118 movq %rbx,24(%rdi) 3119 3120 mulxq %rdx,%rax,%rbx 3121 adoxq %r12,%r12 3122 adcxq %r10,%rax 3123 movq 24(%rsi,%rcx,1),%rdx 3124 leaq 32(%rcx),%rcx 3125 movq 64(%rdi),%r10 3126 adoxq %r13,%r13 3127 adcxq %r11,%rbx 3128 movq 72(%rdi),%r11 3129 movq %rax,32(%rdi) 3130 movq %rbx,40(%rdi) 3131 3132 mulxq %rdx,%rax,%rbx 3133 adoxq %r10,%r10 3134 adcxq %r12,%rax 3135 jrcxz .Lsqrx4x_shift_n_add_break 3136.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3137 adoxq %r11,%r11 3138 adcxq %r13,%rbx 3139 movq 80(%rdi),%r12 3140 movq 88(%rdi),%r13 3141 movq %rax,48(%rdi) 3142 movq %rbx,56(%rdi) 3143 leaq 64(%rdi),%rdi 3144 nop 3145 jmp .Lsqrx4x_shift_n_add 3146 3147.align 32 3148.Lsqrx4x_shift_n_add_break: 3149 adcxq %r13,%rbx 3150 movq %rax,48(%rdi) 3151 movq %rbx,56(%rdi) 3152 leaq 64(%rdi),%rdi 3153.byte 102,72,15,126,213 3154__bn_sqrx8x_reduction: 3155 xorl %eax,%eax 3156 movq 32+8(%rsp),%rbx 3157 movq 48+8(%rsp),%rdx 3158 leaq -64(%rbp,%r9,1),%rcx 3159 3160 movq %rcx,0+8(%rsp) 3161 movq %rdi,8+8(%rsp) 3162 3163 leaq 48+8(%rsp),%rdi 3164 jmp .Lsqrx8x_reduction_loop 3165 3166.align 32 3167.Lsqrx8x_reduction_loop: 3168 movq 8(%rdi),%r9 3169 movq 16(%rdi),%r10 3170 movq 24(%rdi),%r11 3171 movq 32(%rdi),%r12 3172 movq %rdx,%r8 3173 imulq %rbx,%rdx 3174 movq 40(%rdi),%r13 3175 movq 48(%rdi),%r14 3176 movq 56(%rdi),%r15 3177 movq %rax,24+8(%rsp) 3178 3179 leaq 64(%rdi),%rdi 3180 xorq %rsi,%rsi 3181 movq $-8,%rcx 3182 jmp .Lsqrx8x_reduce 3183 3184.align 32 3185.Lsqrx8x_reduce: 3186 movq %r8,%rbx 3187 mulxq 0(%rbp),%rax,%r8 3188 adcxq %rbx,%rax 3189 adoxq %r9,%r8 3190 3191 mulxq 8(%rbp),%rbx,%r9 3192 adcxq %rbx,%r8 3193 adoxq %r10,%r9 3194 3195 mulxq 16(%rbp),%rbx,%r10 3196 adcxq %rbx,%r9 3197 adoxq %r11,%r10 3198 3199 mulxq 24(%rbp),%rbx,%r11 3200 adcxq %rbx,%r10 3201 adoxq %r12,%r11 3202 3203.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3204 movq %rdx,%rax 3205 movq %r8,%rdx 3206 adcxq %rbx,%r11 3207 adoxq %r13,%r12 3208 3209 mulxq 32+8(%rsp),%rbx,%rdx 3210 movq %rax,%rdx 3211 movq %rax,64+48+8(%rsp,%rcx,8) 3212 3213 mulxq 40(%rbp),%rax,%r13 3214 adcxq %rax,%r12 3215 adoxq %r14,%r13 3216 3217 mulxq 48(%rbp),%rax,%r14 3218 adcxq %rax,%r13 3219 adoxq %r15,%r14 3220 3221 mulxq 56(%rbp),%rax,%r15 3222 movq %rbx,%rdx 3223 adcxq %rax,%r14 3224 adoxq %rsi,%r15 3225 adcxq %rsi,%r15 3226 3227.byte 0x67,0x67,0x67 3228 incq %rcx 3229 jnz .Lsqrx8x_reduce 3230 3231 movq %rsi,%rax 3232 cmpq 0+8(%rsp),%rbp 3233 jae .Lsqrx8x_no_tail 3234 3235 movq 48+8(%rsp),%rdx 3236 addq 0(%rdi),%r8 3237 leaq 64(%rbp),%rbp 3238 movq $-8,%rcx 3239 adcxq 8(%rdi),%r9 3240 adcxq 16(%rdi),%r10 3241 adcq 24(%rdi),%r11 3242 adcq 32(%rdi),%r12 3243 adcq 40(%rdi),%r13 3244 adcq 48(%rdi),%r14 3245 adcq 56(%rdi),%r15 3246 leaq 64(%rdi),%rdi 3247 sbbq %rax,%rax 3248 3249 xorq %rsi,%rsi 3250 movq %rax,16+8(%rsp) 3251 jmp .Lsqrx8x_tail 3252 3253.align 32 3254.Lsqrx8x_tail: 3255 movq %r8,%rbx 3256 mulxq 0(%rbp),%rax,%r8 3257 adcxq %rax,%rbx 3258 adoxq %r9,%r8 3259 3260 mulxq 8(%rbp),%rax,%r9 3261 adcxq %rax,%r8 3262 adoxq %r10,%r9 3263 3264 mulxq 16(%rbp),%rax,%r10 3265 adcxq %rax,%r9 3266 adoxq %r11,%r10 3267 3268 mulxq 24(%rbp),%rax,%r11 3269 adcxq %rax,%r10 3270 adoxq %r12,%r11 3271 3272.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3273 adcxq %rax,%r11 3274 adoxq %r13,%r12 3275 3276 mulxq 40(%rbp),%rax,%r13 3277 adcxq %rax,%r12 3278 adoxq %r14,%r13 3279 3280 mulxq 48(%rbp),%rax,%r14 3281 adcxq %rax,%r13 3282 adoxq %r15,%r14 3283 3284 mulxq 56(%rbp),%rax,%r15 3285 movq 72+48+8(%rsp,%rcx,8),%rdx 3286 adcxq %rax,%r14 3287 adoxq %rsi,%r15 3288 movq %rbx,(%rdi,%rcx,8) 3289 movq %r8,%rbx 3290 adcxq %rsi,%r15 3291 3292 incq %rcx 3293 jnz .Lsqrx8x_tail 3294 3295 cmpq 0+8(%rsp),%rbp 3296 jae .Lsqrx8x_tail_done 3297 3298 subq 16+8(%rsp),%rsi 3299 movq 48+8(%rsp),%rdx 3300 leaq 64(%rbp),%rbp 3301 adcq 0(%rdi),%r8 3302 adcq 8(%rdi),%r9 3303 adcq 16(%rdi),%r10 3304 adcq 24(%rdi),%r11 3305 adcq 32(%rdi),%r12 3306 adcq 40(%rdi),%r13 3307 adcq 48(%rdi),%r14 3308 adcq 56(%rdi),%r15 3309 leaq 64(%rdi),%rdi 3310 sbbq %rax,%rax 3311 subq $8,%rcx 3312 3313 xorq %rsi,%rsi 3314 movq %rax,16+8(%rsp) 3315 jmp .Lsqrx8x_tail 3316 3317.align 32 3318.Lsqrx8x_tail_done: 3319 xorq %rax,%rax 3320 addq 24+8(%rsp),%r8 3321 adcq $0,%r9 3322 adcq $0,%r10 3323 adcq $0,%r11 3324 adcq $0,%r12 3325 adcq $0,%r13 3326 adcq $0,%r14 3327 adcq $0,%r15 3328 adcq $0,%rax 3329 3330 subq 16+8(%rsp),%rsi 3331.Lsqrx8x_no_tail: 3332 adcq 0(%rdi),%r8 3333.byte 102,72,15,126,217 3334 adcq 8(%rdi),%r9 3335 movq 56(%rbp),%rsi 3336.byte 102,72,15,126,213 3337 adcq 16(%rdi),%r10 3338 adcq 24(%rdi),%r11 3339 adcq 32(%rdi),%r12 3340 adcq 40(%rdi),%r13 3341 adcq 48(%rdi),%r14 3342 adcq 56(%rdi),%r15 3343 adcq $0,%rax 3344 3345 movq 32+8(%rsp),%rbx 3346 movq 64(%rdi,%rcx,1),%rdx 3347 3348 movq %r8,0(%rdi) 3349 leaq 64(%rdi),%r8 3350 movq %r9,8(%rdi) 3351 movq %r10,16(%rdi) 3352 movq %r11,24(%rdi) 3353 movq %r12,32(%rdi) 3354 movq %r13,40(%rdi) 3355 movq %r14,48(%rdi) 3356 movq %r15,56(%rdi) 3357 3358 leaq 64(%rdi,%rcx,1),%rdi 3359 cmpq 8+8(%rsp),%r8 3360 jb .Lsqrx8x_reduction_loop 3361 ret 3362.cfi_endproc 3363.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3364.align 32 3365.type __bn_postx4x_internal,@function 3366__bn_postx4x_internal: 3367.cfi_startproc 3368 movq 0(%rbp),%r12 3369 movq %rcx,%r10 3370 movq %rcx,%r9 3371 negq %rax 3372 sarq $3+2,%rcx 3373 3374.byte 102,72,15,126,202 3375.byte 102,72,15,126,206 3376 decq %r12 3377 movq 8(%rbp),%r13 3378 xorq %r8,%r8 3379 movq 16(%rbp),%r14 3380 movq 24(%rbp),%r15 3381 jmp .Lsqrx4x_sub_entry 3382 3383.align 16 3384.Lsqrx4x_sub: 3385 movq 0(%rbp),%r12 3386 movq 8(%rbp),%r13 3387 movq 16(%rbp),%r14 3388 movq 24(%rbp),%r15 3389.Lsqrx4x_sub_entry: 3390 andnq %rax,%r12,%r12 3391 leaq 32(%rbp),%rbp 3392 andnq %rax,%r13,%r13 3393 andnq %rax,%r14,%r14 3394 andnq %rax,%r15,%r15 3395 3396 negq %r8 3397 adcq 0(%rdi),%r12 3398 adcq 8(%rdi),%r13 3399 adcq 16(%rdi),%r14 3400 adcq 24(%rdi),%r15 3401 movq %r12,0(%rdx) 3402 leaq 32(%rdi),%rdi 3403 movq %r13,8(%rdx) 3404 sbbq %r8,%r8 3405 movq %r14,16(%rdx) 3406 movq %r15,24(%rdx) 3407 leaq 32(%rdx),%rdx 3408 3409 incq %rcx 3410 jnz .Lsqrx4x_sub 3411 3412 negq %r9 3413 3414 ret 3415.cfi_endproc 3416.size __bn_postx4x_internal,.-__bn_postx4x_internal 3417.globl bn_scatter5 3418.hidden bn_scatter5 3419.type bn_scatter5,@function 3420.align 16 3421bn_scatter5: 3422.cfi_startproc 3423_CET_ENDBR 3424 cmpl $0,%esi 3425 jz .Lscatter_epilogue 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 leaq (%rdx,%rcx,8),%rdx 3436.Lscatter: 3437 movq (%rdi),%rax 3438 leaq 8(%rdi),%rdi 3439 movq %rax,(%rdx) 3440 leaq 256(%rdx),%rdx 3441 subl $1,%esi 3442 jnz .Lscatter 3443.Lscatter_epilogue: 3444 ret 3445.cfi_endproc 3446.size bn_scatter5,.-bn_scatter5 3447 3448.globl bn_gather5 3449.hidden bn_gather5 3450.type bn_gather5,@function 3451.align 32 3452bn_gather5: 3453.cfi_startproc 3454.LSEH_begin_bn_gather5: 3455_CET_ENDBR 3456 3457.byte 0x4c,0x8d,0x14,0x24 3458.cfi_def_cfa_register %r10 3459.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3460 leaq .Linc(%rip),%rax 3461 andq $-16,%rsp 3462 3463 movd %ecx,%xmm5 3464 movdqa 0(%rax),%xmm0 3465 movdqa 16(%rax),%xmm1 3466 leaq 128(%rdx),%r11 3467 leaq 128(%rsp),%rax 3468 3469 pshufd $0,%xmm5,%xmm5 3470 movdqa %xmm1,%xmm4 3471 movdqa %xmm1,%xmm2 3472 paddd %xmm0,%xmm1 3473 pcmpeqd %xmm5,%xmm0 3474 movdqa %xmm4,%xmm3 3475 3476 paddd %xmm1,%xmm2 3477 pcmpeqd %xmm5,%xmm1 3478 movdqa %xmm0,-128(%rax) 3479 movdqa %xmm4,%xmm0 3480 3481 paddd %xmm2,%xmm3 3482 pcmpeqd %xmm5,%xmm2 3483 movdqa %xmm1,-112(%rax) 3484 movdqa %xmm4,%xmm1 3485 3486 paddd %xmm3,%xmm0 3487 pcmpeqd %xmm5,%xmm3 3488 movdqa %xmm2,-96(%rax) 3489 movdqa %xmm4,%xmm2 3490 paddd %xmm0,%xmm1 3491 pcmpeqd %xmm5,%xmm0 3492 movdqa %xmm3,-80(%rax) 3493 movdqa %xmm4,%xmm3 3494 3495 paddd %xmm1,%xmm2 3496 pcmpeqd %xmm5,%xmm1 3497 movdqa %xmm0,-64(%rax) 3498 movdqa %xmm4,%xmm0 3499 3500 paddd %xmm2,%xmm3 3501 pcmpeqd %xmm5,%xmm2 3502 movdqa %xmm1,-48(%rax) 3503 movdqa %xmm4,%xmm1 3504 3505 paddd %xmm3,%xmm0 3506 pcmpeqd %xmm5,%xmm3 3507 movdqa %xmm2,-32(%rax) 3508 movdqa %xmm4,%xmm2 3509 paddd %xmm0,%xmm1 3510 pcmpeqd %xmm5,%xmm0 3511 movdqa %xmm3,-16(%rax) 3512 movdqa %xmm4,%xmm3 3513 3514 paddd %xmm1,%xmm2 3515 pcmpeqd %xmm5,%xmm1 3516 movdqa %xmm0,0(%rax) 3517 movdqa %xmm4,%xmm0 3518 3519 paddd %xmm2,%xmm3 3520 pcmpeqd %xmm5,%xmm2 3521 movdqa %xmm1,16(%rax) 3522 movdqa %xmm4,%xmm1 3523 3524 paddd %xmm3,%xmm0 3525 pcmpeqd %xmm5,%xmm3 3526 movdqa %xmm2,32(%rax) 3527 movdqa %xmm4,%xmm2 3528 paddd %xmm0,%xmm1 3529 pcmpeqd %xmm5,%xmm0 3530 movdqa %xmm3,48(%rax) 3531 movdqa %xmm4,%xmm3 3532 3533 paddd %xmm1,%xmm2 3534 pcmpeqd %xmm5,%xmm1 3535 movdqa %xmm0,64(%rax) 3536 movdqa %xmm4,%xmm0 3537 3538 paddd %xmm2,%xmm3 3539 pcmpeqd %xmm5,%xmm2 3540 movdqa %xmm1,80(%rax) 3541 movdqa %xmm4,%xmm1 3542 3543 paddd %xmm3,%xmm0 3544 pcmpeqd %xmm5,%xmm3 3545 movdqa %xmm2,96(%rax) 3546 movdqa %xmm4,%xmm2 3547 movdqa %xmm3,112(%rax) 3548 jmp .Lgather 3549 3550.align 32 3551.Lgather: 3552 pxor %xmm4,%xmm4 3553 pxor %xmm5,%xmm5 3554 movdqa -128(%r11),%xmm0 3555 movdqa -112(%r11),%xmm1 3556 movdqa -96(%r11),%xmm2 3557 pand -128(%rax),%xmm0 3558 movdqa -80(%r11),%xmm3 3559 pand -112(%rax),%xmm1 3560 por %xmm0,%xmm4 3561 pand -96(%rax),%xmm2 3562 por %xmm1,%xmm5 3563 pand -80(%rax),%xmm3 3564 por %xmm2,%xmm4 3565 por %xmm3,%xmm5 3566 movdqa -64(%r11),%xmm0 3567 movdqa -48(%r11),%xmm1 3568 movdqa -32(%r11),%xmm2 3569 pand -64(%rax),%xmm0 3570 movdqa -16(%r11),%xmm3 3571 pand -48(%rax),%xmm1 3572 por %xmm0,%xmm4 3573 pand -32(%rax),%xmm2 3574 por %xmm1,%xmm5 3575 pand -16(%rax),%xmm3 3576 por %xmm2,%xmm4 3577 por %xmm3,%xmm5 3578 movdqa 0(%r11),%xmm0 3579 movdqa 16(%r11),%xmm1 3580 movdqa 32(%r11),%xmm2 3581 pand 0(%rax),%xmm0 3582 movdqa 48(%r11),%xmm3 3583 pand 16(%rax),%xmm1 3584 por %xmm0,%xmm4 3585 pand 32(%rax),%xmm2 3586 por %xmm1,%xmm5 3587 pand 48(%rax),%xmm3 3588 por %xmm2,%xmm4 3589 por %xmm3,%xmm5 3590 movdqa 64(%r11),%xmm0 3591 movdqa 80(%r11),%xmm1 3592 movdqa 96(%r11),%xmm2 3593 pand 64(%rax),%xmm0 3594 movdqa 112(%r11),%xmm3 3595 pand 80(%rax),%xmm1 3596 por %xmm0,%xmm4 3597 pand 96(%rax),%xmm2 3598 por %xmm1,%xmm5 3599 pand 112(%rax),%xmm3 3600 por %xmm2,%xmm4 3601 por %xmm3,%xmm5 3602 por %xmm5,%xmm4 3603 leaq 256(%r11),%r11 3604 3605 pshufd $0x4e,%xmm4,%xmm0 3606 por %xmm4,%xmm0 3607 movq %xmm0,(%rdi) 3608 leaq 8(%rdi),%rdi 3609 subl $1,%esi 3610 jnz .Lgather 3611 3612 leaq (%r10),%rsp 3613.cfi_def_cfa_register %rsp 3614 ret 3615.LSEH_end_bn_gather5: 3616.cfi_endproc 3617.size bn_gather5,.-bn_gather5 3618.section .rodata 3619.align 64 3620.Linc: 3621.long 0,0, 1,1 3622.long 2,2, 2,2 3623.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3624.text 3625#endif 3626