1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8 9.globl bn_mul_mont_gather5_nohw 10.hidden bn_mul_mont_gather5_nohw 11.type bn_mul_mont_gather5_nohw,@function 12.align 64 13bn_mul_mont_gather5_nohw: 14.cfi_startproc 15_CET_ENDBR 16 17 18 movl %r9d,%r9d 19 movq %rsp,%rax 20.cfi_def_cfa_register %rax 21 movd 8(%rsp),%xmm5 22 pushq %rbx 23.cfi_offset %rbx,-16 24 pushq %rbp 25.cfi_offset %rbp,-24 26 pushq %r12 27.cfi_offset %r12,-32 28 pushq %r13 29.cfi_offset %r13,-40 30 pushq %r14 31.cfi_offset %r14,-48 32 pushq %r15 33.cfi_offset %r15,-56 34 35 negq %r9 36 movq %rsp,%r11 37 leaq -280(%rsp,%r9,8),%r10 38 negq %r9 39 andq $-1024,%r10 40 41 42 43 44 45 46 47 48 49 subq %r10,%r11 50 andq $-4096,%r11 51 leaq (%r10,%r11,1),%rsp 52 movq (%rsp),%r11 53 cmpq %r10,%rsp 54 ja .Lmul_page_walk 55 jmp .Lmul_page_walk_done 56 57.Lmul_page_walk: 58 leaq -4096(%rsp),%rsp 59 movq (%rsp),%r11 60 cmpq %r10,%rsp 61 ja .Lmul_page_walk 62.Lmul_page_walk_done: 63 64 leaq .Linc(%rip),%r10 65 movq %rax,8(%rsp,%r9,8) 66.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 67.Lmul_body: 68 69 leaq 128(%rdx),%r12 70 movdqa 0(%r10),%xmm0 71 movdqa 16(%r10),%xmm1 72 leaq 24-112(%rsp,%r9,8),%r10 73 andq $-16,%r10 74 75 pshufd $0,%xmm5,%xmm5 76 movdqa %xmm1,%xmm4 77 movdqa %xmm1,%xmm2 78 paddd %xmm0,%xmm1 79 pcmpeqd %xmm5,%xmm0 80.byte 0x67 81 movdqa %xmm4,%xmm3 82 paddd %xmm1,%xmm2 83 pcmpeqd %xmm5,%xmm1 84 movdqa %xmm0,112(%r10) 85 movdqa %xmm4,%xmm0 86 87 paddd %xmm2,%xmm3 88 pcmpeqd %xmm5,%xmm2 89 movdqa %xmm1,128(%r10) 90 movdqa %xmm4,%xmm1 91 92 paddd %xmm3,%xmm0 93 pcmpeqd %xmm5,%xmm3 94 movdqa %xmm2,144(%r10) 95 movdqa %xmm4,%xmm2 96 97 paddd %xmm0,%xmm1 98 pcmpeqd %xmm5,%xmm0 99 movdqa %xmm3,160(%r10) 100 movdqa %xmm4,%xmm3 101 paddd %xmm1,%xmm2 102 pcmpeqd %xmm5,%xmm1 103 movdqa %xmm0,176(%r10) 104 movdqa %xmm4,%xmm0 105 106 paddd %xmm2,%xmm3 107 pcmpeqd %xmm5,%xmm2 108 movdqa %xmm1,192(%r10) 109 movdqa %xmm4,%xmm1 110 111 paddd %xmm3,%xmm0 112 pcmpeqd %xmm5,%xmm3 113 movdqa %xmm2,208(%r10) 114 movdqa %xmm4,%xmm2 115 116 paddd %xmm0,%xmm1 117 pcmpeqd %xmm5,%xmm0 118 movdqa %xmm3,224(%r10) 119 movdqa %xmm4,%xmm3 120 paddd %xmm1,%xmm2 121 pcmpeqd %xmm5,%xmm1 122 movdqa %xmm0,240(%r10) 123 movdqa %xmm4,%xmm0 124 125 paddd %xmm2,%xmm3 126 pcmpeqd %xmm5,%xmm2 127 movdqa %xmm1,256(%r10) 128 movdqa %xmm4,%xmm1 129 130 paddd %xmm3,%xmm0 131 pcmpeqd %xmm5,%xmm3 132 movdqa %xmm2,272(%r10) 133 movdqa %xmm4,%xmm2 134 135 paddd %xmm0,%xmm1 136 pcmpeqd %xmm5,%xmm0 137 movdqa %xmm3,288(%r10) 138 movdqa %xmm4,%xmm3 139 paddd %xmm1,%xmm2 140 pcmpeqd %xmm5,%xmm1 141 movdqa %xmm0,304(%r10) 142 143 paddd %xmm2,%xmm3 144.byte 0x67 145 pcmpeqd %xmm5,%xmm2 146 movdqa %xmm1,320(%r10) 147 148 pcmpeqd %xmm5,%xmm3 149 movdqa %xmm2,336(%r10) 150 pand 64(%r12),%xmm0 151 152 pand 80(%r12),%xmm1 153 pand 96(%r12),%xmm2 154 movdqa %xmm3,352(%r10) 155 pand 112(%r12),%xmm3 156 por %xmm2,%xmm0 157 por %xmm3,%xmm1 158 movdqa -128(%r12),%xmm4 159 movdqa -112(%r12),%xmm5 160 movdqa -96(%r12),%xmm2 161 pand 112(%r10),%xmm4 162 movdqa -80(%r12),%xmm3 163 pand 128(%r10),%xmm5 164 por %xmm4,%xmm0 165 pand 144(%r10),%xmm2 166 por %xmm5,%xmm1 167 pand 160(%r10),%xmm3 168 por %xmm2,%xmm0 169 por %xmm3,%xmm1 170 movdqa -64(%r12),%xmm4 171 movdqa -48(%r12),%xmm5 172 movdqa -32(%r12),%xmm2 173 pand 176(%r10),%xmm4 174 movdqa -16(%r12),%xmm3 175 pand 192(%r10),%xmm5 176 por %xmm4,%xmm0 177 pand 208(%r10),%xmm2 178 por %xmm5,%xmm1 179 pand 224(%r10),%xmm3 180 por %xmm2,%xmm0 181 por %xmm3,%xmm1 182 movdqa 0(%r12),%xmm4 183 movdqa 16(%r12),%xmm5 184 movdqa 32(%r12),%xmm2 185 pand 240(%r10),%xmm4 186 movdqa 48(%r12),%xmm3 187 pand 256(%r10),%xmm5 188 por %xmm4,%xmm0 189 pand 272(%r10),%xmm2 190 por %xmm5,%xmm1 191 pand 288(%r10),%xmm3 192 por %xmm2,%xmm0 193 por %xmm3,%xmm1 194 por %xmm1,%xmm0 195 196 pshufd $0x4e,%xmm0,%xmm1 197 por %xmm1,%xmm0 198 leaq 256(%r12),%r12 199.byte 102,72,15,126,195 200 201 movq (%r8),%r8 202 movq (%rsi),%rax 203 204 xorq %r14,%r14 205 xorq %r15,%r15 206 207 movq %r8,%rbp 208 mulq %rbx 209 movq %rax,%r10 210 movq (%rcx),%rax 211 212 imulq %r10,%rbp 213 movq %rdx,%r11 214 215 mulq %rbp 216 addq %rax,%r10 217 movq 8(%rsi),%rax 218 adcq $0,%rdx 219 movq %rdx,%r13 220 221 leaq 1(%r15),%r15 222 jmp .L1st_enter 223 224.align 16 225.L1st: 226 addq %rax,%r13 227 movq (%rsi,%r15,8),%rax 228 adcq $0,%rdx 229 addq %r11,%r13 230 movq %r10,%r11 231 adcq $0,%rdx 232 movq %r13,-16(%rsp,%r15,8) 233 movq %rdx,%r13 234 235.L1st_enter: 236 mulq %rbx 237 addq %rax,%r11 238 movq (%rcx,%r15,8),%rax 239 adcq $0,%rdx 240 leaq 1(%r15),%r15 241 movq %rdx,%r10 242 243 mulq %rbp 244 cmpq %r9,%r15 245 jne .L1st 246 247 248 addq %rax,%r13 249 adcq $0,%rdx 250 addq %r11,%r13 251 adcq $0,%rdx 252 movq %r13,-16(%rsp,%r9,8) 253 movq %rdx,%r13 254 movq %r10,%r11 255 256 xorq %rdx,%rdx 257 addq %r11,%r13 258 adcq $0,%rdx 259 movq %r13,-8(%rsp,%r9,8) 260 movq %rdx,(%rsp,%r9,8) 261 262 leaq 1(%r14),%r14 263 jmp .Louter 264.align 16 265.Louter: 266 leaq 24+128(%rsp,%r9,8),%rdx 267 andq $-16,%rdx 268 pxor %xmm4,%xmm4 269 pxor %xmm5,%xmm5 270 movdqa -128(%r12),%xmm0 271 movdqa -112(%r12),%xmm1 272 movdqa -96(%r12),%xmm2 273 movdqa -80(%r12),%xmm3 274 pand -128(%rdx),%xmm0 275 pand -112(%rdx),%xmm1 276 por %xmm0,%xmm4 277 pand -96(%rdx),%xmm2 278 por %xmm1,%xmm5 279 pand -80(%rdx),%xmm3 280 por %xmm2,%xmm4 281 por %xmm3,%xmm5 282 movdqa -64(%r12),%xmm0 283 movdqa -48(%r12),%xmm1 284 movdqa -32(%r12),%xmm2 285 movdqa -16(%r12),%xmm3 286 pand -64(%rdx),%xmm0 287 pand -48(%rdx),%xmm1 288 por %xmm0,%xmm4 289 pand -32(%rdx),%xmm2 290 por %xmm1,%xmm5 291 pand -16(%rdx),%xmm3 292 por %xmm2,%xmm4 293 por %xmm3,%xmm5 294 movdqa 0(%r12),%xmm0 295 movdqa 16(%r12),%xmm1 296 movdqa 32(%r12),%xmm2 297 movdqa 48(%r12),%xmm3 298 pand 0(%rdx),%xmm0 299 pand 16(%rdx),%xmm1 300 por %xmm0,%xmm4 301 pand 32(%rdx),%xmm2 302 por %xmm1,%xmm5 303 pand 48(%rdx),%xmm3 304 por %xmm2,%xmm4 305 por %xmm3,%xmm5 306 movdqa 64(%r12),%xmm0 307 movdqa 80(%r12),%xmm1 308 movdqa 96(%r12),%xmm2 309 movdqa 112(%r12),%xmm3 310 pand 64(%rdx),%xmm0 311 pand 80(%rdx),%xmm1 312 por %xmm0,%xmm4 313 pand 96(%rdx),%xmm2 314 por %xmm1,%xmm5 315 pand 112(%rdx),%xmm3 316 por %xmm2,%xmm4 317 por %xmm3,%xmm5 318 por %xmm5,%xmm4 319 320 pshufd $0x4e,%xmm4,%xmm0 321 por %xmm4,%xmm0 322 leaq 256(%r12),%r12 323 324 movq (%rsi),%rax 325.byte 102,72,15,126,195 326 327 xorq %r15,%r15 328 movq %r8,%rbp 329 movq (%rsp),%r10 330 331 mulq %rbx 332 addq %rax,%r10 333 movq (%rcx),%rax 334 adcq $0,%rdx 335 336 imulq %r10,%rbp 337 movq %rdx,%r11 338 339 mulq %rbp 340 addq %rax,%r10 341 movq 8(%rsi),%rax 342 adcq $0,%rdx 343 movq 8(%rsp),%r10 344 movq %rdx,%r13 345 346 leaq 1(%r15),%r15 347 jmp .Linner_enter 348 349.align 16 350.Linner: 351 addq %rax,%r13 352 movq (%rsi,%r15,8),%rax 353 adcq $0,%rdx 354 addq %r10,%r13 355 movq (%rsp,%r15,8),%r10 356 adcq $0,%rdx 357 movq %r13,-16(%rsp,%r15,8) 358 movq %rdx,%r13 359 360.Linner_enter: 361 mulq %rbx 362 addq %rax,%r11 363 movq (%rcx,%r15,8),%rax 364 adcq $0,%rdx 365 addq %r11,%r10 366 movq %rdx,%r11 367 adcq $0,%r11 368 leaq 1(%r15),%r15 369 370 mulq %rbp 371 cmpq %r9,%r15 372 jne .Linner 373 374 addq %rax,%r13 375 adcq $0,%rdx 376 addq %r10,%r13 377 movq (%rsp,%r9,8),%r10 378 adcq $0,%rdx 379 movq %r13,-16(%rsp,%r9,8) 380 movq %rdx,%r13 381 382 xorq %rdx,%rdx 383 addq %r11,%r13 384 adcq $0,%rdx 385 addq %r10,%r13 386 adcq $0,%rdx 387 movq %r13,-8(%rsp,%r9,8) 388 movq %rdx,(%rsp,%r9,8) 389 390 leaq 1(%r14),%r14 391 cmpq %r9,%r14 392 jb .Louter 393 394 xorq %r14,%r14 395 movq (%rsp),%rax 396 leaq (%rsp),%rsi 397 movq %r9,%r15 398 jmp .Lsub 399.align 16 400.Lsub: sbbq (%rcx,%r14,8),%rax 401 movq %rax,(%rdi,%r14,8) 402 movq 8(%rsi,%r14,8),%rax 403 leaq 1(%r14),%r14 404 decq %r15 405 jnz .Lsub 406 407 sbbq $0,%rax 408 movq $-1,%rbx 409 xorq %rax,%rbx 410 xorq %r14,%r14 411 movq %r9,%r15 412 413.Lcopy: 414 movq (%rdi,%r14,8),%rcx 415 movq (%rsp,%r14,8),%rdx 416 andq %rbx,%rcx 417 andq %rax,%rdx 418 movq %r14,(%rsp,%r14,8) 419 orq %rcx,%rdx 420 movq %rdx,(%rdi,%r14,8) 421 leaq 1(%r14),%r14 422 subq $1,%r15 423 jnz .Lcopy 424 425 movq 8(%rsp,%r9,8),%rsi 426.cfi_def_cfa %rsi,8 427 movq $1,%rax 428 429 movq -48(%rsi),%r15 430.cfi_restore %r15 431 movq -40(%rsi),%r14 432.cfi_restore %r14 433 movq -32(%rsi),%r13 434.cfi_restore %r13 435 movq -24(%rsi),%r12 436.cfi_restore %r12 437 movq -16(%rsi),%rbp 438.cfi_restore %rbp 439 movq -8(%rsi),%rbx 440.cfi_restore %rbx 441 leaq (%rsi),%rsp 442.cfi_def_cfa_register %rsp 443.Lmul_epilogue: 444 ret 445.cfi_endproc 446.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw 447.globl bn_mul4x_mont_gather5 448.hidden bn_mul4x_mont_gather5 449.type bn_mul4x_mont_gather5,@function 450.align 32 451bn_mul4x_mont_gather5: 452.cfi_startproc 453_CET_ENDBR 454.byte 0x67 455 movq %rsp,%rax 456.cfi_def_cfa_register %rax 457 pushq %rbx 458.cfi_offset %rbx,-16 459 pushq %rbp 460.cfi_offset %rbp,-24 461 pushq %r12 462.cfi_offset %r12,-32 463 pushq %r13 464.cfi_offset %r13,-40 465 pushq %r14 466.cfi_offset %r14,-48 467 pushq %r15 468.cfi_offset %r15,-56 469.Lmul4x_prologue: 470 471.byte 0x67 472 473 474 475 shll $3,%r9d 476 leaq (%r9,%r9,2),%r10 477 negq %r9 478 479 480 481 482 483 484 485 486 487 488 leaq -320(%rsp,%r9,2),%r11 489 movq %rsp,%rbp 490 subq %rdi,%r11 491 andq $4095,%r11 492 cmpq %r11,%r10 493 jb .Lmul4xsp_alt 494 subq %r11,%rbp 495 leaq -320(%rbp,%r9,2),%rbp 496 jmp .Lmul4xsp_done 497 498.align 32 499.Lmul4xsp_alt: 500 leaq 4096-320(,%r9,2),%r10 501 leaq -320(%rbp,%r9,2),%rbp 502 subq %r10,%r11 503 movq $0,%r10 504 cmovcq %r10,%r11 505 subq %r11,%rbp 506.Lmul4xsp_done: 507 andq $-64,%rbp 508 movq %rsp,%r11 509 subq %rbp,%r11 510 andq $-4096,%r11 511 leaq (%r11,%rbp,1),%rsp 512 movq (%rsp),%r10 513 cmpq %rbp,%rsp 514 ja .Lmul4x_page_walk 515 jmp .Lmul4x_page_walk_done 516 517.Lmul4x_page_walk: 518 leaq -4096(%rsp),%rsp 519 movq (%rsp),%r10 520 cmpq %rbp,%rsp 521 ja .Lmul4x_page_walk 522.Lmul4x_page_walk_done: 523 524 negq %r9 525 526 movq %rax,40(%rsp) 527.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 528.Lmul4x_body: 529 530 call mul4x_internal 531 532 movq 40(%rsp),%rsi 533.cfi_def_cfa %rsi,8 534 movq $1,%rax 535 536 movq -48(%rsi),%r15 537.cfi_restore %r15 538 movq -40(%rsi),%r14 539.cfi_restore %r14 540 movq -32(%rsi),%r13 541.cfi_restore %r13 542 movq -24(%rsi),%r12 543.cfi_restore %r12 544 movq -16(%rsi),%rbp 545.cfi_restore %rbp 546 movq -8(%rsi),%rbx 547.cfi_restore %rbx 548 leaq (%rsi),%rsp 549.cfi_def_cfa_register %rsp 550.Lmul4x_epilogue: 551 ret 552.cfi_endproc 553.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 554 555.type mul4x_internal,@function 556.align 32 557mul4x_internal: 558.cfi_startproc 559 shlq $5,%r9 560 movd 8(%rax),%xmm5 561 leaq .Linc(%rip),%rax 562 leaq 128(%rdx,%r9,1),%r13 563 shrq $5,%r9 564 movdqa 0(%rax),%xmm0 565 movdqa 16(%rax),%xmm1 566 leaq 88-112(%rsp,%r9,1),%r10 567 leaq 128(%rdx),%r12 568 569 pshufd $0,%xmm5,%xmm5 570 movdqa %xmm1,%xmm4 571.byte 0x67,0x67 572 movdqa %xmm1,%xmm2 573 paddd %xmm0,%xmm1 574 pcmpeqd %xmm5,%xmm0 575.byte 0x67 576 movdqa %xmm4,%xmm3 577 paddd %xmm1,%xmm2 578 pcmpeqd %xmm5,%xmm1 579 movdqa %xmm0,112(%r10) 580 movdqa %xmm4,%xmm0 581 582 paddd %xmm2,%xmm3 583 pcmpeqd %xmm5,%xmm2 584 movdqa %xmm1,128(%r10) 585 movdqa %xmm4,%xmm1 586 587 paddd %xmm3,%xmm0 588 pcmpeqd %xmm5,%xmm3 589 movdqa %xmm2,144(%r10) 590 movdqa %xmm4,%xmm2 591 592 paddd %xmm0,%xmm1 593 pcmpeqd %xmm5,%xmm0 594 movdqa %xmm3,160(%r10) 595 movdqa %xmm4,%xmm3 596 paddd %xmm1,%xmm2 597 pcmpeqd %xmm5,%xmm1 598 movdqa %xmm0,176(%r10) 599 movdqa %xmm4,%xmm0 600 601 paddd %xmm2,%xmm3 602 pcmpeqd %xmm5,%xmm2 603 movdqa %xmm1,192(%r10) 604 movdqa %xmm4,%xmm1 605 606 paddd %xmm3,%xmm0 607 pcmpeqd %xmm5,%xmm3 608 movdqa %xmm2,208(%r10) 609 movdqa %xmm4,%xmm2 610 611 paddd %xmm0,%xmm1 612 pcmpeqd %xmm5,%xmm0 613 movdqa %xmm3,224(%r10) 614 movdqa %xmm4,%xmm3 615 paddd %xmm1,%xmm2 616 pcmpeqd %xmm5,%xmm1 617 movdqa %xmm0,240(%r10) 618 movdqa %xmm4,%xmm0 619 620 paddd %xmm2,%xmm3 621 pcmpeqd %xmm5,%xmm2 622 movdqa %xmm1,256(%r10) 623 movdqa %xmm4,%xmm1 624 625 paddd %xmm3,%xmm0 626 pcmpeqd %xmm5,%xmm3 627 movdqa %xmm2,272(%r10) 628 movdqa %xmm4,%xmm2 629 630 paddd %xmm0,%xmm1 631 pcmpeqd %xmm5,%xmm0 632 movdqa %xmm3,288(%r10) 633 movdqa %xmm4,%xmm3 634 paddd %xmm1,%xmm2 635 pcmpeqd %xmm5,%xmm1 636 movdqa %xmm0,304(%r10) 637 638 paddd %xmm2,%xmm3 639.byte 0x67 640 pcmpeqd %xmm5,%xmm2 641 movdqa %xmm1,320(%r10) 642 643 pcmpeqd %xmm5,%xmm3 644 movdqa %xmm2,336(%r10) 645 pand 64(%r12),%xmm0 646 647 pand 80(%r12),%xmm1 648 pand 96(%r12),%xmm2 649 movdqa %xmm3,352(%r10) 650 pand 112(%r12),%xmm3 651 por %xmm2,%xmm0 652 por %xmm3,%xmm1 653 movdqa -128(%r12),%xmm4 654 movdqa -112(%r12),%xmm5 655 movdqa -96(%r12),%xmm2 656 pand 112(%r10),%xmm4 657 movdqa -80(%r12),%xmm3 658 pand 128(%r10),%xmm5 659 por %xmm4,%xmm0 660 pand 144(%r10),%xmm2 661 por %xmm5,%xmm1 662 pand 160(%r10),%xmm3 663 por %xmm2,%xmm0 664 por %xmm3,%xmm1 665 movdqa -64(%r12),%xmm4 666 movdqa -48(%r12),%xmm5 667 movdqa -32(%r12),%xmm2 668 pand 176(%r10),%xmm4 669 movdqa -16(%r12),%xmm3 670 pand 192(%r10),%xmm5 671 por %xmm4,%xmm0 672 pand 208(%r10),%xmm2 673 por %xmm5,%xmm1 674 pand 224(%r10),%xmm3 675 por %xmm2,%xmm0 676 por %xmm3,%xmm1 677 movdqa 0(%r12),%xmm4 678 movdqa 16(%r12),%xmm5 679 movdqa 32(%r12),%xmm2 680 pand 240(%r10),%xmm4 681 movdqa 48(%r12),%xmm3 682 pand 256(%r10),%xmm5 683 por %xmm4,%xmm0 684 pand 272(%r10),%xmm2 685 por %xmm5,%xmm1 686 pand 288(%r10),%xmm3 687 por %xmm2,%xmm0 688 por %xmm3,%xmm1 689 por %xmm1,%xmm0 690 691 pshufd $0x4e,%xmm0,%xmm1 692 por %xmm1,%xmm0 693 leaq 256(%r12),%r12 694.byte 102,72,15,126,195 695 696 movq %r13,16+8(%rsp) 697 movq %rdi,56+8(%rsp) 698 699 movq (%r8),%r8 700 movq (%rsi),%rax 701 leaq (%rsi,%r9,1),%rsi 702 negq %r9 703 704 movq %r8,%rbp 705 mulq %rbx 706 movq %rax,%r10 707 movq (%rcx),%rax 708 709 imulq %r10,%rbp 710 leaq 64+8(%rsp),%r14 711 movq %rdx,%r11 712 713 mulq %rbp 714 addq %rax,%r10 715 movq 8(%rsi,%r9,1),%rax 716 adcq $0,%rdx 717 movq %rdx,%rdi 718 719 mulq %rbx 720 addq %rax,%r11 721 movq 8(%rcx),%rax 722 adcq $0,%rdx 723 movq %rdx,%r10 724 725 mulq %rbp 726 addq %rax,%rdi 727 movq 16(%rsi,%r9,1),%rax 728 adcq $0,%rdx 729 addq %r11,%rdi 730 leaq 32(%r9),%r15 731 leaq 32(%rcx),%rcx 732 adcq $0,%rdx 733 movq %rdi,(%r14) 734 movq %rdx,%r13 735 jmp .L1st4x 736 737.align 32 738.L1st4x: 739 mulq %rbx 740 addq %rax,%r10 741 movq -16(%rcx),%rax 742 leaq 32(%r14),%r14 743 adcq $0,%rdx 744 movq %rdx,%r11 745 746 mulq %rbp 747 addq %rax,%r13 748 movq -8(%rsi,%r15,1),%rax 749 adcq $0,%rdx 750 addq %r10,%r13 751 adcq $0,%rdx 752 movq %r13,-24(%r14) 753 movq %rdx,%rdi 754 755 mulq %rbx 756 addq %rax,%r11 757 movq -8(%rcx),%rax 758 adcq $0,%rdx 759 movq %rdx,%r10 760 761 mulq %rbp 762 addq %rax,%rdi 763 movq (%rsi,%r15,1),%rax 764 adcq $0,%rdx 765 addq %r11,%rdi 766 adcq $0,%rdx 767 movq %rdi,-16(%r14) 768 movq %rdx,%r13 769 770 mulq %rbx 771 addq %rax,%r10 772 movq 0(%rcx),%rax 773 adcq $0,%rdx 774 movq %rdx,%r11 775 776 mulq %rbp 777 addq %rax,%r13 778 movq 8(%rsi,%r15,1),%rax 779 adcq $0,%rdx 780 addq %r10,%r13 781 adcq $0,%rdx 782 movq %r13,-8(%r14) 783 movq %rdx,%rdi 784 785 mulq %rbx 786 addq %rax,%r11 787 movq 8(%rcx),%rax 788 adcq $0,%rdx 789 movq %rdx,%r10 790 791 mulq %rbp 792 addq %rax,%rdi 793 movq 16(%rsi,%r15,1),%rax 794 adcq $0,%rdx 795 addq %r11,%rdi 796 leaq 32(%rcx),%rcx 797 adcq $0,%rdx 798 movq %rdi,(%r14) 799 movq %rdx,%r13 800 801 addq $32,%r15 802 jnz .L1st4x 803 804 mulq %rbx 805 addq %rax,%r10 806 movq -16(%rcx),%rax 807 leaq 32(%r14),%r14 808 adcq $0,%rdx 809 movq %rdx,%r11 810 811 mulq %rbp 812 addq %rax,%r13 813 movq -8(%rsi),%rax 814 adcq $0,%rdx 815 addq %r10,%r13 816 adcq $0,%rdx 817 movq %r13,-24(%r14) 818 movq %rdx,%rdi 819 820 mulq %rbx 821 addq %rax,%r11 822 movq -8(%rcx),%rax 823 adcq $0,%rdx 824 movq %rdx,%r10 825 826 mulq %rbp 827 addq %rax,%rdi 828 movq (%rsi,%r9,1),%rax 829 adcq $0,%rdx 830 addq %r11,%rdi 831 adcq $0,%rdx 832 movq %rdi,-16(%r14) 833 movq %rdx,%r13 834 835 leaq (%rcx,%r9,1),%rcx 836 837 xorq %rdi,%rdi 838 addq %r10,%r13 839 adcq $0,%rdi 840 movq %r13,-8(%r14) 841 842 jmp .Louter4x 843 844.align 32 845.Louter4x: 846 leaq 16+128(%r14),%rdx 847 pxor %xmm4,%xmm4 848 pxor %xmm5,%xmm5 849 movdqa -128(%r12),%xmm0 850 movdqa -112(%r12),%xmm1 851 movdqa -96(%r12),%xmm2 852 movdqa -80(%r12),%xmm3 853 pand -128(%rdx),%xmm0 854 pand -112(%rdx),%xmm1 855 por %xmm0,%xmm4 856 pand -96(%rdx),%xmm2 857 por %xmm1,%xmm5 858 pand -80(%rdx),%xmm3 859 por %xmm2,%xmm4 860 por %xmm3,%xmm5 861 movdqa -64(%r12),%xmm0 862 movdqa -48(%r12),%xmm1 863 movdqa -32(%r12),%xmm2 864 movdqa -16(%r12),%xmm3 865 pand -64(%rdx),%xmm0 866 pand -48(%rdx),%xmm1 867 por %xmm0,%xmm4 868 pand -32(%rdx),%xmm2 869 por %xmm1,%xmm5 870 pand -16(%rdx),%xmm3 871 por %xmm2,%xmm4 872 por %xmm3,%xmm5 873 movdqa 0(%r12),%xmm0 874 movdqa 16(%r12),%xmm1 875 movdqa 32(%r12),%xmm2 876 movdqa 48(%r12),%xmm3 877 pand 0(%rdx),%xmm0 878 pand 16(%rdx),%xmm1 879 por %xmm0,%xmm4 880 pand 32(%rdx),%xmm2 881 por %xmm1,%xmm5 882 pand 48(%rdx),%xmm3 883 por %xmm2,%xmm4 884 por %xmm3,%xmm5 885 movdqa 64(%r12),%xmm0 886 movdqa 80(%r12),%xmm1 887 movdqa 96(%r12),%xmm2 888 movdqa 112(%r12),%xmm3 889 pand 64(%rdx),%xmm0 890 pand 80(%rdx),%xmm1 891 por %xmm0,%xmm4 892 pand 96(%rdx),%xmm2 893 por %xmm1,%xmm5 894 pand 112(%rdx),%xmm3 895 por %xmm2,%xmm4 896 por %xmm3,%xmm5 897 por %xmm5,%xmm4 898 899 pshufd $0x4e,%xmm4,%xmm0 900 por %xmm4,%xmm0 901 leaq 256(%r12),%r12 902.byte 102,72,15,126,195 903 904 movq (%r14,%r9,1),%r10 905 movq %r8,%rbp 906 mulq %rbx 907 addq %rax,%r10 908 movq (%rcx),%rax 909 adcq $0,%rdx 910 911 imulq %r10,%rbp 912 movq %rdx,%r11 913 movq %rdi,(%r14) 914 915 leaq (%r14,%r9,1),%r14 916 917 mulq %rbp 918 addq %rax,%r10 919 movq 8(%rsi,%r9,1),%rax 920 adcq $0,%rdx 921 movq %rdx,%rdi 922 923 mulq %rbx 924 addq %rax,%r11 925 movq 8(%rcx),%rax 926 adcq $0,%rdx 927 addq 8(%r14),%r11 928 adcq $0,%rdx 929 movq %rdx,%r10 930 931 mulq %rbp 932 addq %rax,%rdi 933 movq 16(%rsi,%r9,1),%rax 934 adcq $0,%rdx 935 addq %r11,%rdi 936 leaq 32(%r9),%r15 937 leaq 32(%rcx),%rcx 938 adcq $0,%rdx 939 movq %rdx,%r13 940 jmp .Linner4x 941 942.align 32 943.Linner4x: 944 mulq %rbx 945 addq %rax,%r10 946 movq -16(%rcx),%rax 947 adcq $0,%rdx 948 addq 16(%r14),%r10 949 leaq 32(%r14),%r14 950 adcq $0,%rdx 951 movq %rdx,%r11 952 953 mulq %rbp 954 addq %rax,%r13 955 movq -8(%rsi,%r15,1),%rax 956 adcq $0,%rdx 957 addq %r10,%r13 958 adcq $0,%rdx 959 movq %rdi,-32(%r14) 960 movq %rdx,%rdi 961 962 mulq %rbx 963 addq %rax,%r11 964 movq -8(%rcx),%rax 965 adcq $0,%rdx 966 addq -8(%r14),%r11 967 adcq $0,%rdx 968 movq %rdx,%r10 969 970 mulq %rbp 971 addq %rax,%rdi 972 movq (%rsi,%r15,1),%rax 973 adcq $0,%rdx 974 addq %r11,%rdi 975 adcq $0,%rdx 976 movq %r13,-24(%r14) 977 movq %rdx,%r13 978 979 mulq %rbx 980 addq %rax,%r10 981 movq 0(%rcx),%rax 982 adcq $0,%rdx 983 addq (%r14),%r10 984 adcq $0,%rdx 985 movq %rdx,%r11 986 987 mulq %rbp 988 addq %rax,%r13 989 movq 8(%rsi,%r15,1),%rax 990 adcq $0,%rdx 991 addq %r10,%r13 992 adcq $0,%rdx 993 movq %rdi,-16(%r14) 994 movq %rdx,%rdi 995 996 mulq %rbx 997 addq %rax,%r11 998 movq 8(%rcx),%rax 999 adcq $0,%rdx 1000 addq 8(%r14),%r11 1001 adcq $0,%rdx 1002 movq %rdx,%r10 1003 1004 mulq %rbp 1005 addq %rax,%rdi 1006 movq 16(%rsi,%r15,1),%rax 1007 adcq $0,%rdx 1008 addq %r11,%rdi 1009 leaq 32(%rcx),%rcx 1010 adcq $0,%rdx 1011 movq %r13,-8(%r14) 1012 movq %rdx,%r13 1013 1014 addq $32,%r15 1015 jnz .Linner4x 1016 1017 mulq %rbx 1018 addq %rax,%r10 1019 movq -16(%rcx),%rax 1020 adcq $0,%rdx 1021 addq 16(%r14),%r10 1022 leaq 32(%r14),%r14 1023 adcq $0,%rdx 1024 movq %rdx,%r11 1025 1026 mulq %rbp 1027 addq %rax,%r13 1028 movq -8(%rsi),%rax 1029 adcq $0,%rdx 1030 addq %r10,%r13 1031 adcq $0,%rdx 1032 movq %rdi,-32(%r14) 1033 movq %rdx,%rdi 1034 1035 mulq %rbx 1036 addq %rax,%r11 1037 movq %rbp,%rax 1038 movq -8(%rcx),%rbp 1039 adcq $0,%rdx 1040 addq -8(%r14),%r11 1041 adcq $0,%rdx 1042 movq %rdx,%r10 1043 1044 mulq %rbp 1045 addq %rax,%rdi 1046 movq (%rsi,%r9,1),%rax 1047 adcq $0,%rdx 1048 addq %r11,%rdi 1049 adcq $0,%rdx 1050 movq %r13,-24(%r14) 1051 movq %rdx,%r13 1052 1053 movq %rdi,-16(%r14) 1054 leaq (%rcx,%r9,1),%rcx 1055 1056 xorq %rdi,%rdi 1057 addq %r10,%r13 1058 adcq $0,%rdi 1059 addq (%r14),%r13 1060 adcq $0,%rdi 1061 movq %r13,-8(%r14) 1062 1063 cmpq 16+8(%rsp),%r12 1064 jb .Louter4x 1065 xorq %rax,%rax 1066 subq %r13,%rbp 1067 adcq %r15,%r15 1068 orq %r15,%rdi 1069 subq %rdi,%rax 1070 leaq (%r14,%r9,1),%rbx 1071 movq (%rcx),%r12 1072 leaq (%rcx),%rbp 1073 movq %r9,%rcx 1074 sarq $3+2,%rcx 1075 movq 56+8(%rsp),%rdi 1076 decq %r12 1077 xorq %r10,%r10 1078 movq 8(%rbp),%r13 1079 movq 16(%rbp),%r14 1080 movq 24(%rbp),%r15 1081 jmp .Lsqr4x_sub_entry 1082.cfi_endproc 1083.size mul4x_internal,.-mul4x_internal 1084.globl bn_power5_nohw 1085.hidden bn_power5_nohw 1086.type bn_power5_nohw,@function 1087.align 32 1088bn_power5_nohw: 1089.cfi_startproc 1090_CET_ENDBR 1091 movq %rsp,%rax 1092.cfi_def_cfa_register %rax 1093 pushq %rbx 1094.cfi_offset %rbx,-16 1095 pushq %rbp 1096.cfi_offset %rbp,-24 1097 pushq %r12 1098.cfi_offset %r12,-32 1099 pushq %r13 1100.cfi_offset %r13,-40 1101 pushq %r14 1102.cfi_offset %r14,-48 1103 pushq %r15 1104.cfi_offset %r15,-56 1105.Lpower5_prologue: 1106 1107 1108 1109 1110 shll $3,%r9d 1111 leal (%r9,%r9,2),%r10d 1112 negq %r9 1113 movq (%r8),%r8 1114 1115 1116 1117 1118 1119 1120 1121 1122 leaq -320(%rsp,%r9,2),%r11 1123 movq %rsp,%rbp 1124 subq %rdi,%r11 1125 andq $4095,%r11 1126 cmpq %r11,%r10 1127 jb .Lpwr_sp_alt 1128 subq %r11,%rbp 1129 leaq -320(%rbp,%r9,2),%rbp 1130 jmp .Lpwr_sp_done 1131 1132.align 32 1133.Lpwr_sp_alt: 1134 leaq 4096-320(,%r9,2),%r10 1135 leaq -320(%rbp,%r9,2),%rbp 1136 subq %r10,%r11 1137 movq $0,%r10 1138 cmovcq %r10,%r11 1139 subq %r11,%rbp 1140.Lpwr_sp_done: 1141 andq $-64,%rbp 1142 movq %rsp,%r11 1143 subq %rbp,%r11 1144 andq $-4096,%r11 1145 leaq (%r11,%rbp,1),%rsp 1146 movq (%rsp),%r10 1147 cmpq %rbp,%rsp 1148 ja .Lpwr_page_walk 1149 jmp .Lpwr_page_walk_done 1150 1151.Lpwr_page_walk: 1152 leaq -4096(%rsp),%rsp 1153 movq (%rsp),%r10 1154 cmpq %rbp,%rsp 1155 ja .Lpwr_page_walk 1156.Lpwr_page_walk_done: 1157 1158 movq %r9,%r10 1159 negq %r9 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 movq %r8,32(%rsp) 1171 movq %rax,40(%rsp) 1172.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1173.Lpower5_body: 1174.byte 102,72,15,110,207 1175.byte 102,72,15,110,209 1176.byte 102,73,15,110,218 1177.byte 102,72,15,110,226 1178 1179 call __bn_sqr8x_internal 1180 call __bn_post4x_internal 1181 call __bn_sqr8x_internal 1182 call __bn_post4x_internal 1183 call __bn_sqr8x_internal 1184 call __bn_post4x_internal 1185 call __bn_sqr8x_internal 1186 call __bn_post4x_internal 1187 call __bn_sqr8x_internal 1188 call __bn_post4x_internal 1189 1190.byte 102,72,15,126,209 1191.byte 102,72,15,126,226 1192 movq %rsi,%rdi 1193 movq 40(%rsp),%rax 1194 leaq 32(%rsp),%r8 1195 1196 call mul4x_internal 1197 1198 movq 40(%rsp),%rsi 1199.cfi_def_cfa %rsi,8 1200 movq $1,%rax 1201 movq -48(%rsi),%r15 1202.cfi_restore %r15 1203 movq -40(%rsi),%r14 1204.cfi_restore %r14 1205 movq -32(%rsi),%r13 1206.cfi_restore %r13 1207 movq -24(%rsi),%r12 1208.cfi_restore %r12 1209 movq -16(%rsi),%rbp 1210.cfi_restore %rbp 1211 movq -8(%rsi),%rbx 1212.cfi_restore %rbx 1213 leaq (%rsi),%rsp 1214.cfi_def_cfa_register %rsp 1215.Lpower5_epilogue: 1216 ret 1217.cfi_endproc 1218.size bn_power5_nohw,.-bn_power5_nohw 1219 1220.globl bn_sqr8x_internal 1221.hidden bn_sqr8x_internal 1222.hidden bn_sqr8x_internal 1223.type bn_sqr8x_internal,@function 1224.align 32 1225bn_sqr8x_internal: 1226__bn_sqr8x_internal: 1227.cfi_startproc 1228_CET_ENDBR 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 leaq 32(%r10),%rbp 1303 leaq (%rsi,%r9,1),%rsi 1304 1305 movq %r9,%rcx 1306 1307 1308 movq -32(%rsi,%rbp,1),%r14 1309 leaq 48+8(%rsp,%r9,2),%rdi 1310 movq -24(%rsi,%rbp,1),%rax 1311 leaq -32(%rdi,%rbp,1),%rdi 1312 movq -16(%rsi,%rbp,1),%rbx 1313 movq %rax,%r15 1314 1315 mulq %r14 1316 movq %rax,%r10 1317 movq %rbx,%rax 1318 movq %rdx,%r11 1319 movq %r10,-24(%rdi,%rbp,1) 1320 1321 mulq %r14 1322 addq %rax,%r11 1323 movq %rbx,%rax 1324 adcq $0,%rdx 1325 movq %r11,-16(%rdi,%rbp,1) 1326 movq %rdx,%r10 1327 1328 1329 movq -8(%rsi,%rbp,1),%rbx 1330 mulq %r15 1331 movq %rax,%r12 1332 movq %rbx,%rax 1333 movq %rdx,%r13 1334 1335 leaq (%rbp),%rcx 1336 mulq %r14 1337 addq %rax,%r10 1338 movq %rbx,%rax 1339 movq %rdx,%r11 1340 adcq $0,%r11 1341 addq %r12,%r10 1342 adcq $0,%r11 1343 movq %r10,-8(%rdi,%rcx,1) 1344 jmp .Lsqr4x_1st 1345 1346.align 32 1347.Lsqr4x_1st: 1348 movq (%rsi,%rcx,1),%rbx 1349 mulq %r15 1350 addq %rax,%r13 1351 movq %rbx,%rax 1352 movq %rdx,%r12 1353 adcq $0,%r12 1354 1355 mulq %r14 1356 addq %rax,%r11 1357 movq %rbx,%rax 1358 movq 8(%rsi,%rcx,1),%rbx 1359 movq %rdx,%r10 1360 adcq $0,%r10 1361 addq %r13,%r11 1362 adcq $0,%r10 1363 1364 1365 mulq %r15 1366 addq %rax,%r12 1367 movq %rbx,%rax 1368 movq %r11,(%rdi,%rcx,1) 1369 movq %rdx,%r13 1370 adcq $0,%r13 1371 1372 mulq %r14 1373 addq %rax,%r10 1374 movq %rbx,%rax 1375 movq 16(%rsi,%rcx,1),%rbx 1376 movq %rdx,%r11 1377 adcq $0,%r11 1378 addq %r12,%r10 1379 adcq $0,%r11 1380 1381 mulq %r15 1382 addq %rax,%r13 1383 movq %rbx,%rax 1384 movq %r10,8(%rdi,%rcx,1) 1385 movq %rdx,%r12 1386 adcq $0,%r12 1387 1388 mulq %r14 1389 addq %rax,%r11 1390 movq %rbx,%rax 1391 movq 24(%rsi,%rcx,1),%rbx 1392 movq %rdx,%r10 1393 adcq $0,%r10 1394 addq %r13,%r11 1395 adcq $0,%r10 1396 1397 1398 mulq %r15 1399 addq %rax,%r12 1400 movq %rbx,%rax 1401 movq %r11,16(%rdi,%rcx,1) 1402 movq %rdx,%r13 1403 adcq $0,%r13 1404 leaq 32(%rcx),%rcx 1405 1406 mulq %r14 1407 addq %rax,%r10 1408 movq %rbx,%rax 1409 movq %rdx,%r11 1410 adcq $0,%r11 1411 addq %r12,%r10 1412 adcq $0,%r11 1413 movq %r10,-8(%rdi,%rcx,1) 1414 1415 cmpq $0,%rcx 1416 jne .Lsqr4x_1st 1417 1418 mulq %r15 1419 addq %rax,%r13 1420 leaq 16(%rbp),%rbp 1421 adcq $0,%rdx 1422 addq %r11,%r13 1423 adcq $0,%rdx 1424 1425 movq %r13,(%rdi) 1426 movq %rdx,%r12 1427 movq %rdx,8(%rdi) 1428 jmp .Lsqr4x_outer 1429 1430.align 32 1431.Lsqr4x_outer: 1432 movq -32(%rsi,%rbp,1),%r14 1433 leaq 48+8(%rsp,%r9,2),%rdi 1434 movq -24(%rsi,%rbp,1),%rax 1435 leaq -32(%rdi,%rbp,1),%rdi 1436 movq -16(%rsi,%rbp,1),%rbx 1437 movq %rax,%r15 1438 1439 mulq %r14 1440 movq -24(%rdi,%rbp,1),%r10 1441 addq %rax,%r10 1442 movq %rbx,%rax 1443 adcq $0,%rdx 1444 movq %r10,-24(%rdi,%rbp,1) 1445 movq %rdx,%r11 1446 1447 mulq %r14 1448 addq %rax,%r11 1449 movq %rbx,%rax 1450 adcq $0,%rdx 1451 addq -16(%rdi,%rbp,1),%r11 1452 movq %rdx,%r10 1453 adcq $0,%r10 1454 movq %r11,-16(%rdi,%rbp,1) 1455 1456 xorq %r12,%r12 1457 1458 movq -8(%rsi,%rbp,1),%rbx 1459 mulq %r15 1460 addq %rax,%r12 1461 movq %rbx,%rax 1462 adcq $0,%rdx 1463 addq -8(%rdi,%rbp,1),%r12 1464 movq %rdx,%r13 1465 adcq $0,%r13 1466 1467 mulq %r14 1468 addq %rax,%r10 1469 movq %rbx,%rax 1470 adcq $0,%rdx 1471 addq %r12,%r10 1472 movq %rdx,%r11 1473 adcq $0,%r11 1474 movq %r10,-8(%rdi,%rbp,1) 1475 1476 leaq (%rbp),%rcx 1477 jmp .Lsqr4x_inner 1478 1479.align 32 1480.Lsqr4x_inner: 1481 movq (%rsi,%rcx,1),%rbx 1482 mulq %r15 1483 addq %rax,%r13 1484 movq %rbx,%rax 1485 movq %rdx,%r12 1486 adcq $0,%r12 1487 addq (%rdi,%rcx,1),%r13 1488 adcq $0,%r12 1489 1490.byte 0x67 1491 mulq %r14 1492 addq %rax,%r11 1493 movq %rbx,%rax 1494 movq 8(%rsi,%rcx,1),%rbx 1495 movq %rdx,%r10 1496 adcq $0,%r10 1497 addq %r13,%r11 1498 adcq $0,%r10 1499 1500 mulq %r15 1501 addq %rax,%r12 1502 movq %r11,(%rdi,%rcx,1) 1503 movq %rbx,%rax 1504 movq %rdx,%r13 1505 adcq $0,%r13 1506 addq 8(%rdi,%rcx,1),%r12 1507 leaq 16(%rcx),%rcx 1508 adcq $0,%r13 1509 1510 mulq %r14 1511 addq %rax,%r10 1512 movq %rbx,%rax 1513 adcq $0,%rdx 1514 addq %r12,%r10 1515 movq %rdx,%r11 1516 adcq $0,%r11 1517 movq %r10,-8(%rdi,%rcx,1) 1518 1519 cmpq $0,%rcx 1520 jne .Lsqr4x_inner 1521 1522.byte 0x67 1523 mulq %r15 1524 addq %rax,%r13 1525 adcq $0,%rdx 1526 addq %r11,%r13 1527 adcq $0,%rdx 1528 1529 movq %r13,(%rdi) 1530 movq %rdx,%r12 1531 movq %rdx,8(%rdi) 1532 1533 addq $16,%rbp 1534 jnz .Lsqr4x_outer 1535 1536 1537 movq -32(%rsi),%r14 1538 leaq 48+8(%rsp,%r9,2),%rdi 1539 movq -24(%rsi),%rax 1540 leaq -32(%rdi,%rbp,1),%rdi 1541 movq -16(%rsi),%rbx 1542 movq %rax,%r15 1543 1544 mulq %r14 1545 addq %rax,%r10 1546 movq %rbx,%rax 1547 movq %rdx,%r11 1548 adcq $0,%r11 1549 1550 mulq %r14 1551 addq %rax,%r11 1552 movq %rbx,%rax 1553 movq %r10,-24(%rdi) 1554 movq %rdx,%r10 1555 adcq $0,%r10 1556 addq %r13,%r11 1557 movq -8(%rsi),%rbx 1558 adcq $0,%r10 1559 1560 mulq %r15 1561 addq %rax,%r12 1562 movq %rbx,%rax 1563 movq %r11,-16(%rdi) 1564 movq %rdx,%r13 1565 adcq $0,%r13 1566 1567 mulq %r14 1568 addq %rax,%r10 1569 movq %rbx,%rax 1570 movq %rdx,%r11 1571 adcq $0,%r11 1572 addq %r12,%r10 1573 adcq $0,%r11 1574 movq %r10,-8(%rdi) 1575 1576 mulq %r15 1577 addq %rax,%r13 1578 movq -16(%rsi),%rax 1579 adcq $0,%rdx 1580 addq %r11,%r13 1581 adcq $0,%rdx 1582 1583 movq %r13,(%rdi) 1584 movq %rdx,%r12 1585 movq %rdx,8(%rdi) 1586 1587 mulq %rbx 1588 addq $16,%rbp 1589 xorq %r14,%r14 1590 subq %r9,%rbp 1591 xorq %r15,%r15 1592 1593 addq %r12,%rax 1594 adcq $0,%rdx 1595 movq %rax,8(%rdi) 1596 movq %rdx,16(%rdi) 1597 movq %r15,24(%rdi) 1598 1599 movq -16(%rsi,%rbp,1),%rax 1600 leaq 48+8(%rsp),%rdi 1601 xorq %r10,%r10 1602 movq 8(%rdi),%r11 1603 1604 leaq (%r14,%r10,2),%r12 1605 shrq $63,%r10 1606 leaq (%rcx,%r11,2),%r13 1607 shrq $63,%r11 1608 orq %r10,%r13 1609 movq 16(%rdi),%r10 1610 movq %r11,%r14 1611 mulq %rax 1612 negq %r15 1613 movq 24(%rdi),%r11 1614 adcq %rax,%r12 1615 movq -8(%rsi,%rbp,1),%rax 1616 movq %r12,(%rdi) 1617 adcq %rdx,%r13 1618 1619 leaq (%r14,%r10,2),%rbx 1620 movq %r13,8(%rdi) 1621 sbbq %r15,%r15 1622 shrq $63,%r10 1623 leaq (%rcx,%r11,2),%r8 1624 shrq $63,%r11 1625 orq %r10,%r8 1626 movq 32(%rdi),%r10 1627 movq %r11,%r14 1628 mulq %rax 1629 negq %r15 1630 movq 40(%rdi),%r11 1631 adcq %rax,%rbx 1632 movq 0(%rsi,%rbp,1),%rax 1633 movq %rbx,16(%rdi) 1634 adcq %rdx,%r8 1635 leaq 16(%rbp),%rbp 1636 movq %r8,24(%rdi) 1637 sbbq %r15,%r15 1638 leaq 64(%rdi),%rdi 1639 jmp .Lsqr4x_shift_n_add 1640 1641.align 32 1642.Lsqr4x_shift_n_add: 1643 leaq (%r14,%r10,2),%r12 1644 shrq $63,%r10 1645 leaq (%rcx,%r11,2),%r13 1646 shrq $63,%r11 1647 orq %r10,%r13 1648 movq -16(%rdi),%r10 1649 movq %r11,%r14 1650 mulq %rax 1651 negq %r15 1652 movq -8(%rdi),%r11 1653 adcq %rax,%r12 1654 movq -8(%rsi,%rbp,1),%rax 1655 movq %r12,-32(%rdi) 1656 adcq %rdx,%r13 1657 1658 leaq (%r14,%r10,2),%rbx 1659 movq %r13,-24(%rdi) 1660 sbbq %r15,%r15 1661 shrq $63,%r10 1662 leaq (%rcx,%r11,2),%r8 1663 shrq $63,%r11 1664 orq %r10,%r8 1665 movq 0(%rdi),%r10 1666 movq %r11,%r14 1667 mulq %rax 1668 negq %r15 1669 movq 8(%rdi),%r11 1670 adcq %rax,%rbx 1671 movq 0(%rsi,%rbp,1),%rax 1672 movq %rbx,-16(%rdi) 1673 adcq %rdx,%r8 1674 1675 leaq (%r14,%r10,2),%r12 1676 movq %r8,-8(%rdi) 1677 sbbq %r15,%r15 1678 shrq $63,%r10 1679 leaq (%rcx,%r11,2),%r13 1680 shrq $63,%r11 1681 orq %r10,%r13 1682 movq 16(%rdi),%r10 1683 movq %r11,%r14 1684 mulq %rax 1685 negq %r15 1686 movq 24(%rdi),%r11 1687 adcq %rax,%r12 1688 movq 8(%rsi,%rbp,1),%rax 1689 movq %r12,0(%rdi) 1690 adcq %rdx,%r13 1691 1692 leaq (%r14,%r10,2),%rbx 1693 movq %r13,8(%rdi) 1694 sbbq %r15,%r15 1695 shrq $63,%r10 1696 leaq (%rcx,%r11,2),%r8 1697 shrq $63,%r11 1698 orq %r10,%r8 1699 movq 32(%rdi),%r10 1700 movq %r11,%r14 1701 mulq %rax 1702 negq %r15 1703 movq 40(%rdi),%r11 1704 adcq %rax,%rbx 1705 movq 16(%rsi,%rbp,1),%rax 1706 movq %rbx,16(%rdi) 1707 adcq %rdx,%r8 1708 movq %r8,24(%rdi) 1709 sbbq %r15,%r15 1710 leaq 64(%rdi),%rdi 1711 addq $32,%rbp 1712 jnz .Lsqr4x_shift_n_add 1713 1714 leaq (%r14,%r10,2),%r12 1715.byte 0x67 1716 shrq $63,%r10 1717 leaq (%rcx,%r11,2),%r13 1718 shrq $63,%r11 1719 orq %r10,%r13 1720 movq -16(%rdi),%r10 1721 movq %r11,%r14 1722 mulq %rax 1723 negq %r15 1724 movq -8(%rdi),%r11 1725 adcq %rax,%r12 1726 movq -8(%rsi),%rax 1727 movq %r12,-32(%rdi) 1728 adcq %rdx,%r13 1729 1730 leaq (%r14,%r10,2),%rbx 1731 movq %r13,-24(%rdi) 1732 sbbq %r15,%r15 1733 shrq $63,%r10 1734 leaq (%rcx,%r11,2),%r8 1735 shrq $63,%r11 1736 orq %r10,%r8 1737 mulq %rax 1738 negq %r15 1739 adcq %rax,%rbx 1740 adcq %rdx,%r8 1741 movq %rbx,-16(%rdi) 1742 movq %r8,-8(%rdi) 1743.byte 102,72,15,126,213 1744__bn_sqr8x_reduction: 1745 xorq %rax,%rax 1746 leaq (%r9,%rbp,1),%rcx 1747 leaq 48+8(%rsp,%r9,2),%rdx 1748 movq %rcx,0+8(%rsp) 1749 leaq 48+8(%rsp,%r9,1),%rdi 1750 movq %rdx,8+8(%rsp) 1751 negq %r9 1752 jmp .L8x_reduction_loop 1753 1754.align 32 1755.L8x_reduction_loop: 1756 leaq (%rdi,%r9,1),%rdi 1757.byte 0x66 1758 movq 0(%rdi),%rbx 1759 movq 8(%rdi),%r9 1760 movq 16(%rdi),%r10 1761 movq 24(%rdi),%r11 1762 movq 32(%rdi),%r12 1763 movq 40(%rdi),%r13 1764 movq 48(%rdi),%r14 1765 movq 56(%rdi),%r15 1766 movq %rax,(%rdx) 1767 leaq 64(%rdi),%rdi 1768 1769.byte 0x67 1770 movq %rbx,%r8 1771 imulq 32+8(%rsp),%rbx 1772 movq 0(%rbp),%rax 1773 movl $8,%ecx 1774 jmp .L8x_reduce 1775 1776.align 32 1777.L8x_reduce: 1778 mulq %rbx 1779 movq 8(%rbp),%rax 1780 negq %r8 1781 movq %rdx,%r8 1782 adcq $0,%r8 1783 1784 mulq %rbx 1785 addq %rax,%r9 1786 movq 16(%rbp),%rax 1787 adcq $0,%rdx 1788 addq %r9,%r8 1789 movq %rbx,48-8+8(%rsp,%rcx,8) 1790 movq %rdx,%r9 1791 adcq $0,%r9 1792 1793 mulq %rbx 1794 addq %rax,%r10 1795 movq 24(%rbp),%rax 1796 adcq $0,%rdx 1797 addq %r10,%r9 1798 movq 32+8(%rsp),%rsi 1799 movq %rdx,%r10 1800 adcq $0,%r10 1801 1802 mulq %rbx 1803 addq %rax,%r11 1804 movq 32(%rbp),%rax 1805 adcq $0,%rdx 1806 imulq %r8,%rsi 1807 addq %r11,%r10 1808 movq %rdx,%r11 1809 adcq $0,%r11 1810 1811 mulq %rbx 1812 addq %rax,%r12 1813 movq 40(%rbp),%rax 1814 adcq $0,%rdx 1815 addq %r12,%r11 1816 movq %rdx,%r12 1817 adcq $0,%r12 1818 1819 mulq %rbx 1820 addq %rax,%r13 1821 movq 48(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r13,%r12 1824 movq %rdx,%r13 1825 adcq $0,%r13 1826 1827 mulq %rbx 1828 addq %rax,%r14 1829 movq 56(%rbp),%rax 1830 adcq $0,%rdx 1831 addq %r14,%r13 1832 movq %rdx,%r14 1833 adcq $0,%r14 1834 1835 mulq %rbx 1836 movq %rsi,%rbx 1837 addq %rax,%r15 1838 movq 0(%rbp),%rax 1839 adcq $0,%rdx 1840 addq %r15,%r14 1841 movq %rdx,%r15 1842 adcq $0,%r15 1843 1844 decl %ecx 1845 jnz .L8x_reduce 1846 1847 leaq 64(%rbp),%rbp 1848 xorq %rax,%rax 1849 movq 8+8(%rsp),%rdx 1850 cmpq 0+8(%rsp),%rbp 1851 jae .L8x_no_tail 1852 1853.byte 0x66 1854 addq 0(%rdi),%r8 1855 adcq 8(%rdi),%r9 1856 adcq 16(%rdi),%r10 1857 adcq 24(%rdi),%r11 1858 adcq 32(%rdi),%r12 1859 adcq 40(%rdi),%r13 1860 adcq 48(%rdi),%r14 1861 adcq 56(%rdi),%r15 1862 sbbq %rsi,%rsi 1863 1864 movq 48+56+8(%rsp),%rbx 1865 movl $8,%ecx 1866 movq 0(%rbp),%rax 1867 jmp .L8x_tail 1868 1869.align 32 1870.L8x_tail: 1871 mulq %rbx 1872 addq %rax,%r8 1873 movq 8(%rbp),%rax 1874 movq %r8,(%rdi) 1875 movq %rdx,%r8 1876 adcq $0,%r8 1877 1878 mulq %rbx 1879 addq %rax,%r9 1880 movq 16(%rbp),%rax 1881 adcq $0,%rdx 1882 addq %r9,%r8 1883 leaq 8(%rdi),%rdi 1884 movq %rdx,%r9 1885 adcq $0,%r9 1886 1887 mulq %rbx 1888 addq %rax,%r10 1889 movq 24(%rbp),%rax 1890 adcq $0,%rdx 1891 addq %r10,%r9 1892 movq %rdx,%r10 1893 adcq $0,%r10 1894 1895 mulq %rbx 1896 addq %rax,%r11 1897 movq 32(%rbp),%rax 1898 adcq $0,%rdx 1899 addq %r11,%r10 1900 movq %rdx,%r11 1901 adcq $0,%r11 1902 1903 mulq %rbx 1904 addq %rax,%r12 1905 movq 40(%rbp),%rax 1906 adcq $0,%rdx 1907 addq %r12,%r11 1908 movq %rdx,%r12 1909 adcq $0,%r12 1910 1911 mulq %rbx 1912 addq %rax,%r13 1913 movq 48(%rbp),%rax 1914 adcq $0,%rdx 1915 addq %r13,%r12 1916 movq %rdx,%r13 1917 adcq $0,%r13 1918 1919 mulq %rbx 1920 addq %rax,%r14 1921 movq 56(%rbp),%rax 1922 adcq $0,%rdx 1923 addq %r14,%r13 1924 movq %rdx,%r14 1925 adcq $0,%r14 1926 1927 mulq %rbx 1928 movq 48-16+8(%rsp,%rcx,8),%rbx 1929 addq %rax,%r15 1930 adcq $0,%rdx 1931 addq %r15,%r14 1932 movq 0(%rbp),%rax 1933 movq %rdx,%r15 1934 adcq $0,%r15 1935 1936 decl %ecx 1937 jnz .L8x_tail 1938 1939 leaq 64(%rbp),%rbp 1940 movq 8+8(%rsp),%rdx 1941 cmpq 0+8(%rsp),%rbp 1942 jae .L8x_tail_done 1943 1944 movq 48+56+8(%rsp),%rbx 1945 negq %rsi 1946 movq 0(%rbp),%rax 1947 adcq 0(%rdi),%r8 1948 adcq 8(%rdi),%r9 1949 adcq 16(%rdi),%r10 1950 adcq 24(%rdi),%r11 1951 adcq 32(%rdi),%r12 1952 adcq 40(%rdi),%r13 1953 adcq 48(%rdi),%r14 1954 adcq 56(%rdi),%r15 1955 sbbq %rsi,%rsi 1956 1957 movl $8,%ecx 1958 jmp .L8x_tail 1959 1960.align 32 1961.L8x_tail_done: 1962 xorq %rax,%rax 1963 addq (%rdx),%r8 1964 adcq $0,%r9 1965 adcq $0,%r10 1966 adcq $0,%r11 1967 adcq $0,%r12 1968 adcq $0,%r13 1969 adcq $0,%r14 1970 adcq $0,%r15 1971 adcq $0,%rax 1972 1973 negq %rsi 1974.L8x_no_tail: 1975 adcq 0(%rdi),%r8 1976 adcq 8(%rdi),%r9 1977 adcq 16(%rdi),%r10 1978 adcq 24(%rdi),%r11 1979 adcq 32(%rdi),%r12 1980 adcq 40(%rdi),%r13 1981 adcq 48(%rdi),%r14 1982 adcq 56(%rdi),%r15 1983 adcq $0,%rax 1984 movq -8(%rbp),%rcx 1985 xorq %rsi,%rsi 1986 1987.byte 102,72,15,126,213 1988 1989 movq %r8,0(%rdi) 1990 movq %r9,8(%rdi) 1991.byte 102,73,15,126,217 1992 movq %r10,16(%rdi) 1993 movq %r11,24(%rdi) 1994 movq %r12,32(%rdi) 1995 movq %r13,40(%rdi) 1996 movq %r14,48(%rdi) 1997 movq %r15,56(%rdi) 1998 leaq 64(%rdi),%rdi 1999 2000 cmpq %rdx,%rdi 2001 jb .L8x_reduction_loop 2002 ret 2003.cfi_endproc 2004.size bn_sqr8x_internal,.-bn_sqr8x_internal 2005.type __bn_post4x_internal,@function 2006.align 32 2007__bn_post4x_internal: 2008.cfi_startproc 2009 movq 0(%rbp),%r12 2010 leaq (%rdi,%r9,1),%rbx 2011 movq %r9,%rcx 2012.byte 102,72,15,126,207 2013 negq %rax 2014.byte 102,72,15,126,206 2015 sarq $3+2,%rcx 2016 decq %r12 2017 xorq %r10,%r10 2018 movq 8(%rbp),%r13 2019 movq 16(%rbp),%r14 2020 movq 24(%rbp),%r15 2021 jmp .Lsqr4x_sub_entry 2022 2023.align 16 2024.Lsqr4x_sub: 2025 movq 0(%rbp),%r12 2026 movq 8(%rbp),%r13 2027 movq 16(%rbp),%r14 2028 movq 24(%rbp),%r15 2029.Lsqr4x_sub_entry: 2030 leaq 32(%rbp),%rbp 2031 notq %r12 2032 notq %r13 2033 notq %r14 2034 notq %r15 2035 andq %rax,%r12 2036 andq %rax,%r13 2037 andq %rax,%r14 2038 andq %rax,%r15 2039 2040 negq %r10 2041 adcq 0(%rbx),%r12 2042 adcq 8(%rbx),%r13 2043 adcq 16(%rbx),%r14 2044 adcq 24(%rbx),%r15 2045 movq %r12,0(%rdi) 2046 leaq 32(%rbx),%rbx 2047 movq %r13,8(%rdi) 2048 sbbq %r10,%r10 2049 movq %r14,16(%rdi) 2050 movq %r15,24(%rdi) 2051 leaq 32(%rdi),%rdi 2052 2053 incq %rcx 2054 jnz .Lsqr4x_sub 2055 2056 movq %r9,%r10 2057 negq %r9 2058 ret 2059.cfi_endproc 2060.size __bn_post4x_internal,.-__bn_post4x_internal 2061.globl bn_mulx4x_mont_gather5 2062.hidden bn_mulx4x_mont_gather5 2063.type bn_mulx4x_mont_gather5,@function 2064.align 32 2065bn_mulx4x_mont_gather5: 2066.cfi_startproc 2067_CET_ENDBR 2068 movq %rsp,%rax 2069.cfi_def_cfa_register %rax 2070 pushq %rbx 2071.cfi_offset %rbx,-16 2072 pushq %rbp 2073.cfi_offset %rbp,-24 2074 pushq %r12 2075.cfi_offset %r12,-32 2076 pushq %r13 2077.cfi_offset %r13,-40 2078 pushq %r14 2079.cfi_offset %r14,-48 2080 pushq %r15 2081.cfi_offset %r15,-56 2082.Lmulx4x_prologue: 2083 2084 2085 2086 2087 shll $3,%r9d 2088 leaq (%r9,%r9,2),%r10 2089 negq %r9 2090 movq (%r8),%r8 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 leaq -320(%rsp,%r9,2),%r11 2102 movq %rsp,%rbp 2103 subq %rdi,%r11 2104 andq $4095,%r11 2105 cmpq %r11,%r10 2106 jb .Lmulx4xsp_alt 2107 subq %r11,%rbp 2108 leaq -320(%rbp,%r9,2),%rbp 2109 jmp .Lmulx4xsp_done 2110 2111.Lmulx4xsp_alt: 2112 leaq 4096-320(,%r9,2),%r10 2113 leaq -320(%rbp,%r9,2),%rbp 2114 subq %r10,%r11 2115 movq $0,%r10 2116 cmovcq %r10,%r11 2117 subq %r11,%rbp 2118.Lmulx4xsp_done: 2119 andq $-64,%rbp 2120 movq %rsp,%r11 2121 subq %rbp,%r11 2122 andq $-4096,%r11 2123 leaq (%r11,%rbp,1),%rsp 2124 movq (%rsp),%r10 2125 cmpq %rbp,%rsp 2126 ja .Lmulx4x_page_walk 2127 jmp .Lmulx4x_page_walk_done 2128 2129.Lmulx4x_page_walk: 2130 leaq -4096(%rsp),%rsp 2131 movq (%rsp),%r10 2132 cmpq %rbp,%rsp 2133 ja .Lmulx4x_page_walk 2134.Lmulx4x_page_walk_done: 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 movq %r8,32(%rsp) 2149 movq %rax,40(%rsp) 2150.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2151.Lmulx4x_body: 2152 call mulx4x_internal 2153 2154 movq 40(%rsp),%rsi 2155.cfi_def_cfa %rsi,8 2156 movq $1,%rax 2157 2158 movq -48(%rsi),%r15 2159.cfi_restore %r15 2160 movq -40(%rsi),%r14 2161.cfi_restore %r14 2162 movq -32(%rsi),%r13 2163.cfi_restore %r13 2164 movq -24(%rsi),%r12 2165.cfi_restore %r12 2166 movq -16(%rsi),%rbp 2167.cfi_restore %rbp 2168 movq -8(%rsi),%rbx 2169.cfi_restore %rbx 2170 leaq (%rsi),%rsp 2171.cfi_def_cfa_register %rsp 2172.Lmulx4x_epilogue: 2173 ret 2174.cfi_endproc 2175.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2176 2177.type mulx4x_internal,@function 2178.align 32 2179mulx4x_internal: 2180.cfi_startproc 2181 movq %r9,8(%rsp) 2182 movq %r9,%r10 2183 negq %r9 2184 shlq $5,%r9 2185 negq %r10 2186 leaq 128(%rdx,%r9,1),%r13 2187 shrq $5+5,%r9 2188 movd 8(%rax),%xmm5 2189 subq $1,%r9 2190 leaq .Linc(%rip),%rax 2191 movq %r13,16+8(%rsp) 2192 movq %r9,24+8(%rsp) 2193 movq %rdi,56+8(%rsp) 2194 movdqa 0(%rax),%xmm0 2195 movdqa 16(%rax),%xmm1 2196 leaq 88-112(%rsp,%r10,1),%r10 2197 leaq 128(%rdx),%rdi 2198 2199 pshufd $0,%xmm5,%xmm5 2200 movdqa %xmm1,%xmm4 2201.byte 0x67 2202 movdqa %xmm1,%xmm2 2203.byte 0x67 2204 paddd %xmm0,%xmm1 2205 pcmpeqd %xmm5,%xmm0 2206 movdqa %xmm4,%xmm3 2207 paddd %xmm1,%xmm2 2208 pcmpeqd %xmm5,%xmm1 2209 movdqa %xmm0,112(%r10) 2210 movdqa %xmm4,%xmm0 2211 2212 paddd %xmm2,%xmm3 2213 pcmpeqd %xmm5,%xmm2 2214 movdqa %xmm1,128(%r10) 2215 movdqa %xmm4,%xmm1 2216 2217 paddd %xmm3,%xmm0 2218 pcmpeqd %xmm5,%xmm3 2219 movdqa %xmm2,144(%r10) 2220 movdqa %xmm4,%xmm2 2221 2222 paddd %xmm0,%xmm1 2223 pcmpeqd %xmm5,%xmm0 2224 movdqa %xmm3,160(%r10) 2225 movdqa %xmm4,%xmm3 2226 paddd %xmm1,%xmm2 2227 pcmpeqd %xmm5,%xmm1 2228 movdqa %xmm0,176(%r10) 2229 movdqa %xmm4,%xmm0 2230 2231 paddd %xmm2,%xmm3 2232 pcmpeqd %xmm5,%xmm2 2233 movdqa %xmm1,192(%r10) 2234 movdqa %xmm4,%xmm1 2235 2236 paddd %xmm3,%xmm0 2237 pcmpeqd %xmm5,%xmm3 2238 movdqa %xmm2,208(%r10) 2239 movdqa %xmm4,%xmm2 2240 2241 paddd %xmm0,%xmm1 2242 pcmpeqd %xmm5,%xmm0 2243 movdqa %xmm3,224(%r10) 2244 movdqa %xmm4,%xmm3 2245 paddd %xmm1,%xmm2 2246 pcmpeqd %xmm5,%xmm1 2247 movdqa %xmm0,240(%r10) 2248 movdqa %xmm4,%xmm0 2249 2250 paddd %xmm2,%xmm3 2251 pcmpeqd %xmm5,%xmm2 2252 movdqa %xmm1,256(%r10) 2253 movdqa %xmm4,%xmm1 2254 2255 paddd %xmm3,%xmm0 2256 pcmpeqd %xmm5,%xmm3 2257 movdqa %xmm2,272(%r10) 2258 movdqa %xmm4,%xmm2 2259 2260 paddd %xmm0,%xmm1 2261 pcmpeqd %xmm5,%xmm0 2262 movdqa %xmm3,288(%r10) 2263 movdqa %xmm4,%xmm3 2264.byte 0x67 2265 paddd %xmm1,%xmm2 2266 pcmpeqd %xmm5,%xmm1 2267 movdqa %xmm0,304(%r10) 2268 2269 paddd %xmm2,%xmm3 2270 pcmpeqd %xmm5,%xmm2 2271 movdqa %xmm1,320(%r10) 2272 2273 pcmpeqd %xmm5,%xmm3 2274 movdqa %xmm2,336(%r10) 2275 2276 pand 64(%rdi),%xmm0 2277 pand 80(%rdi),%xmm1 2278 pand 96(%rdi),%xmm2 2279 movdqa %xmm3,352(%r10) 2280 pand 112(%rdi),%xmm3 2281 por %xmm2,%xmm0 2282 por %xmm3,%xmm1 2283 movdqa -128(%rdi),%xmm4 2284 movdqa -112(%rdi),%xmm5 2285 movdqa -96(%rdi),%xmm2 2286 pand 112(%r10),%xmm4 2287 movdqa -80(%rdi),%xmm3 2288 pand 128(%r10),%xmm5 2289 por %xmm4,%xmm0 2290 pand 144(%r10),%xmm2 2291 por %xmm5,%xmm1 2292 pand 160(%r10),%xmm3 2293 por %xmm2,%xmm0 2294 por %xmm3,%xmm1 2295 movdqa -64(%rdi),%xmm4 2296 movdqa -48(%rdi),%xmm5 2297 movdqa -32(%rdi),%xmm2 2298 pand 176(%r10),%xmm4 2299 movdqa -16(%rdi),%xmm3 2300 pand 192(%r10),%xmm5 2301 por %xmm4,%xmm0 2302 pand 208(%r10),%xmm2 2303 por %xmm5,%xmm1 2304 pand 224(%r10),%xmm3 2305 por %xmm2,%xmm0 2306 por %xmm3,%xmm1 2307 movdqa 0(%rdi),%xmm4 2308 movdqa 16(%rdi),%xmm5 2309 movdqa 32(%rdi),%xmm2 2310 pand 240(%r10),%xmm4 2311 movdqa 48(%rdi),%xmm3 2312 pand 256(%r10),%xmm5 2313 por %xmm4,%xmm0 2314 pand 272(%r10),%xmm2 2315 por %xmm5,%xmm1 2316 pand 288(%r10),%xmm3 2317 por %xmm2,%xmm0 2318 por %xmm3,%xmm1 2319 pxor %xmm1,%xmm0 2320 2321 pshufd $0x4e,%xmm0,%xmm1 2322 por %xmm1,%xmm0 2323 leaq 256(%rdi),%rdi 2324.byte 102,72,15,126,194 2325 leaq 64+32+8(%rsp),%rbx 2326 2327 movq %rdx,%r9 2328 mulxq 0(%rsi),%r8,%rax 2329 mulxq 8(%rsi),%r11,%r12 2330 addq %rax,%r11 2331 mulxq 16(%rsi),%rax,%r13 2332 adcq %rax,%r12 2333 adcq $0,%r13 2334 mulxq 24(%rsi),%rax,%r14 2335 2336 movq %r8,%r15 2337 imulq 32+8(%rsp),%r8 2338 xorq %rbp,%rbp 2339 movq %r8,%rdx 2340 2341 movq %rdi,8+8(%rsp) 2342 2343 leaq 32(%rsi),%rsi 2344 adcxq %rax,%r13 2345 adcxq %rbp,%r14 2346 2347 mulxq 0(%rcx),%rax,%r10 2348 adcxq %rax,%r15 2349 adoxq %r11,%r10 2350 mulxq 8(%rcx),%rax,%r11 2351 adcxq %rax,%r10 2352 adoxq %r12,%r11 2353 mulxq 16(%rcx),%rax,%r12 2354 movq 24+8(%rsp),%rdi 2355 movq %r10,-32(%rbx) 2356 adcxq %rax,%r11 2357 adoxq %r13,%r12 2358 mulxq 24(%rcx),%rax,%r15 2359 movq %r9,%rdx 2360 movq %r11,-24(%rbx) 2361 adcxq %rax,%r12 2362 adoxq %rbp,%r15 2363 leaq 32(%rcx),%rcx 2364 movq %r12,-16(%rbx) 2365 jmp .Lmulx4x_1st 2366 2367.align 32 2368.Lmulx4x_1st: 2369 adcxq %rbp,%r15 2370 mulxq 0(%rsi),%r10,%rax 2371 adcxq %r14,%r10 2372 mulxq 8(%rsi),%r11,%r14 2373 adcxq %rax,%r11 2374 mulxq 16(%rsi),%r12,%rax 2375 adcxq %r14,%r12 2376 mulxq 24(%rsi),%r13,%r14 2377.byte 0x67,0x67 2378 movq %r8,%rdx 2379 adcxq %rax,%r13 2380 adcxq %rbp,%r14 2381 leaq 32(%rsi),%rsi 2382 leaq 32(%rbx),%rbx 2383 2384 adoxq %r15,%r10 2385 mulxq 0(%rcx),%rax,%r15 2386 adcxq %rax,%r10 2387 adoxq %r15,%r11 2388 mulxq 8(%rcx),%rax,%r15 2389 adcxq %rax,%r11 2390 adoxq %r15,%r12 2391 mulxq 16(%rcx),%rax,%r15 2392 movq %r10,-40(%rbx) 2393 adcxq %rax,%r12 2394 movq %r11,-32(%rbx) 2395 adoxq %r15,%r13 2396 mulxq 24(%rcx),%rax,%r15 2397 movq %r9,%rdx 2398 movq %r12,-24(%rbx) 2399 adcxq %rax,%r13 2400 adoxq %rbp,%r15 2401 leaq 32(%rcx),%rcx 2402 movq %r13,-16(%rbx) 2403 2404 decq %rdi 2405 jnz .Lmulx4x_1st 2406 2407 movq 8(%rsp),%rax 2408 adcq %rbp,%r15 2409 leaq (%rsi,%rax,1),%rsi 2410 addq %r15,%r14 2411 movq 8+8(%rsp),%rdi 2412 adcq %rbp,%rbp 2413 movq %r14,-8(%rbx) 2414 jmp .Lmulx4x_outer 2415 2416.align 32 2417.Lmulx4x_outer: 2418 leaq 16-256(%rbx),%r10 2419 pxor %xmm4,%xmm4 2420.byte 0x67,0x67 2421 pxor %xmm5,%xmm5 2422 movdqa -128(%rdi),%xmm0 2423 movdqa -112(%rdi),%xmm1 2424 movdqa -96(%rdi),%xmm2 2425 pand 256(%r10),%xmm0 2426 movdqa -80(%rdi),%xmm3 2427 pand 272(%r10),%xmm1 2428 por %xmm0,%xmm4 2429 pand 288(%r10),%xmm2 2430 por %xmm1,%xmm5 2431 pand 304(%r10),%xmm3 2432 por %xmm2,%xmm4 2433 por %xmm3,%xmm5 2434 movdqa -64(%rdi),%xmm0 2435 movdqa -48(%rdi),%xmm1 2436 movdqa -32(%rdi),%xmm2 2437 pand 320(%r10),%xmm0 2438 movdqa -16(%rdi),%xmm3 2439 pand 336(%r10),%xmm1 2440 por %xmm0,%xmm4 2441 pand 352(%r10),%xmm2 2442 por %xmm1,%xmm5 2443 pand 368(%r10),%xmm3 2444 por %xmm2,%xmm4 2445 por %xmm3,%xmm5 2446 movdqa 0(%rdi),%xmm0 2447 movdqa 16(%rdi),%xmm1 2448 movdqa 32(%rdi),%xmm2 2449 pand 384(%r10),%xmm0 2450 movdqa 48(%rdi),%xmm3 2451 pand 400(%r10),%xmm1 2452 por %xmm0,%xmm4 2453 pand 416(%r10),%xmm2 2454 por %xmm1,%xmm5 2455 pand 432(%r10),%xmm3 2456 por %xmm2,%xmm4 2457 por %xmm3,%xmm5 2458 movdqa 64(%rdi),%xmm0 2459 movdqa 80(%rdi),%xmm1 2460 movdqa 96(%rdi),%xmm2 2461 pand 448(%r10),%xmm0 2462 movdqa 112(%rdi),%xmm3 2463 pand 464(%r10),%xmm1 2464 por %xmm0,%xmm4 2465 pand 480(%r10),%xmm2 2466 por %xmm1,%xmm5 2467 pand 496(%r10),%xmm3 2468 por %xmm2,%xmm4 2469 por %xmm3,%xmm5 2470 por %xmm5,%xmm4 2471 2472 pshufd $0x4e,%xmm4,%xmm0 2473 por %xmm4,%xmm0 2474 leaq 256(%rdi),%rdi 2475.byte 102,72,15,126,194 2476 2477 movq %rbp,(%rbx) 2478 leaq 32(%rbx,%rax,1),%rbx 2479 mulxq 0(%rsi),%r8,%r11 2480 xorq %rbp,%rbp 2481 movq %rdx,%r9 2482 mulxq 8(%rsi),%r14,%r12 2483 adoxq -32(%rbx),%r8 2484 adcxq %r14,%r11 2485 mulxq 16(%rsi),%r15,%r13 2486 adoxq -24(%rbx),%r11 2487 adcxq %r15,%r12 2488 mulxq 24(%rsi),%rdx,%r14 2489 adoxq -16(%rbx),%r12 2490 adcxq %rdx,%r13 2491 leaq (%rcx,%rax,1),%rcx 2492 leaq 32(%rsi),%rsi 2493 adoxq -8(%rbx),%r13 2494 adcxq %rbp,%r14 2495 adoxq %rbp,%r14 2496 2497 movq %r8,%r15 2498 imulq 32+8(%rsp),%r8 2499 2500 movq %r8,%rdx 2501 xorq %rbp,%rbp 2502 movq %rdi,8+8(%rsp) 2503 2504 mulxq 0(%rcx),%rax,%r10 2505 adcxq %rax,%r15 2506 adoxq %r11,%r10 2507 mulxq 8(%rcx),%rax,%r11 2508 adcxq %rax,%r10 2509 adoxq %r12,%r11 2510 mulxq 16(%rcx),%rax,%r12 2511 adcxq %rax,%r11 2512 adoxq %r13,%r12 2513 mulxq 24(%rcx),%rax,%r15 2514 movq %r9,%rdx 2515 movq 24+8(%rsp),%rdi 2516 movq %r10,-32(%rbx) 2517 adcxq %rax,%r12 2518 movq %r11,-24(%rbx) 2519 adoxq %rbp,%r15 2520 movq %r12,-16(%rbx) 2521 leaq 32(%rcx),%rcx 2522 jmp .Lmulx4x_inner 2523 2524.align 32 2525.Lmulx4x_inner: 2526 mulxq 0(%rsi),%r10,%rax 2527 adcxq %rbp,%r15 2528 adoxq %r14,%r10 2529 mulxq 8(%rsi),%r11,%r14 2530 adcxq 0(%rbx),%r10 2531 adoxq %rax,%r11 2532 mulxq 16(%rsi),%r12,%rax 2533 adcxq 8(%rbx),%r11 2534 adoxq %r14,%r12 2535 mulxq 24(%rsi),%r13,%r14 2536 movq %r8,%rdx 2537 adcxq 16(%rbx),%r12 2538 adoxq %rax,%r13 2539 adcxq 24(%rbx),%r13 2540 adoxq %rbp,%r14 2541 leaq 32(%rsi),%rsi 2542 leaq 32(%rbx),%rbx 2543 adcxq %rbp,%r14 2544 2545 adoxq %r15,%r10 2546 mulxq 0(%rcx),%rax,%r15 2547 adcxq %rax,%r10 2548 adoxq %r15,%r11 2549 mulxq 8(%rcx),%rax,%r15 2550 adcxq %rax,%r11 2551 adoxq %r15,%r12 2552 mulxq 16(%rcx),%rax,%r15 2553 movq %r10,-40(%rbx) 2554 adcxq %rax,%r12 2555 adoxq %r15,%r13 2556 movq %r11,-32(%rbx) 2557 mulxq 24(%rcx),%rax,%r15 2558 movq %r9,%rdx 2559 leaq 32(%rcx),%rcx 2560 movq %r12,-24(%rbx) 2561 adcxq %rax,%r13 2562 adoxq %rbp,%r15 2563 movq %r13,-16(%rbx) 2564 2565 decq %rdi 2566 jnz .Lmulx4x_inner 2567 2568 movq 0+8(%rsp),%rax 2569 adcq %rbp,%r15 2570 subq 0(%rbx),%rdi 2571 movq 8+8(%rsp),%rdi 2572 movq 16+8(%rsp),%r10 2573 adcq %r15,%r14 2574 leaq (%rsi,%rax,1),%rsi 2575 adcq %rbp,%rbp 2576 movq %r14,-8(%rbx) 2577 2578 cmpq %r10,%rdi 2579 jb .Lmulx4x_outer 2580 2581 movq -8(%rcx),%r10 2582 movq %rbp,%r8 2583 movq (%rcx,%rax,1),%r12 2584 leaq (%rcx,%rax,1),%rbp 2585 movq %rax,%rcx 2586 leaq (%rbx,%rax,1),%rdi 2587 xorl %eax,%eax 2588 xorq %r15,%r15 2589 subq %r14,%r10 2590 adcq %r15,%r15 2591 orq %r15,%r8 2592 sarq $3+2,%rcx 2593 subq %r8,%rax 2594 movq 56+8(%rsp),%rdx 2595 decq %r12 2596 movq 8(%rbp),%r13 2597 xorq %r8,%r8 2598 movq 16(%rbp),%r14 2599 movq 24(%rbp),%r15 2600 jmp .Lsqrx4x_sub_entry 2601.cfi_endproc 2602.size mulx4x_internal,.-mulx4x_internal 2603.globl bn_powerx5 2604.hidden bn_powerx5 2605.type bn_powerx5,@function 2606.align 32 2607bn_powerx5: 2608.cfi_startproc 2609_CET_ENDBR 2610 movq %rsp,%rax 2611.cfi_def_cfa_register %rax 2612 pushq %rbx 2613.cfi_offset %rbx,-16 2614 pushq %rbp 2615.cfi_offset %rbp,-24 2616 pushq %r12 2617.cfi_offset %r12,-32 2618 pushq %r13 2619.cfi_offset %r13,-40 2620 pushq %r14 2621.cfi_offset %r14,-48 2622 pushq %r15 2623.cfi_offset %r15,-56 2624.Lpowerx5_prologue: 2625 2626 2627 2628 2629 shll $3,%r9d 2630 leaq (%r9,%r9,2),%r10 2631 negq %r9 2632 movq (%r8),%r8 2633 2634 2635 2636 2637 2638 2639 2640 2641 leaq -320(%rsp,%r9,2),%r11 2642 movq %rsp,%rbp 2643 subq %rdi,%r11 2644 andq $4095,%r11 2645 cmpq %r11,%r10 2646 jb .Lpwrx_sp_alt 2647 subq %r11,%rbp 2648 leaq -320(%rbp,%r9,2),%rbp 2649 jmp .Lpwrx_sp_done 2650 2651.align 32 2652.Lpwrx_sp_alt: 2653 leaq 4096-320(,%r9,2),%r10 2654 leaq -320(%rbp,%r9,2),%rbp 2655 subq %r10,%r11 2656 movq $0,%r10 2657 cmovcq %r10,%r11 2658 subq %r11,%rbp 2659.Lpwrx_sp_done: 2660 andq $-64,%rbp 2661 movq %rsp,%r11 2662 subq %rbp,%r11 2663 andq $-4096,%r11 2664 leaq (%r11,%rbp,1),%rsp 2665 movq (%rsp),%r10 2666 cmpq %rbp,%rsp 2667 ja .Lpwrx_page_walk 2668 jmp .Lpwrx_page_walk_done 2669 2670.Lpwrx_page_walk: 2671 leaq -4096(%rsp),%rsp 2672 movq (%rsp),%r10 2673 cmpq %rbp,%rsp 2674 ja .Lpwrx_page_walk 2675.Lpwrx_page_walk_done: 2676 2677 movq %r9,%r10 2678 negq %r9 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 pxor %xmm0,%xmm0 2692.byte 102,72,15,110,207 2693.byte 102,72,15,110,209 2694.byte 102,73,15,110,218 2695.byte 102,72,15,110,226 2696 movq %r8,32(%rsp) 2697 movq %rax,40(%rsp) 2698.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2699.Lpowerx5_body: 2700 2701 call __bn_sqrx8x_internal 2702 call __bn_postx4x_internal 2703 call __bn_sqrx8x_internal 2704 call __bn_postx4x_internal 2705 call __bn_sqrx8x_internal 2706 call __bn_postx4x_internal 2707 call __bn_sqrx8x_internal 2708 call __bn_postx4x_internal 2709 call __bn_sqrx8x_internal 2710 call __bn_postx4x_internal 2711 2712 movq %r10,%r9 2713 movq %rsi,%rdi 2714.byte 102,72,15,126,209 2715.byte 102,72,15,126,226 2716 movq 40(%rsp),%rax 2717 2718 call mulx4x_internal 2719 2720 movq 40(%rsp),%rsi 2721.cfi_def_cfa %rsi,8 2722 movq $1,%rax 2723 2724 movq -48(%rsi),%r15 2725.cfi_restore %r15 2726 movq -40(%rsi),%r14 2727.cfi_restore %r14 2728 movq -32(%rsi),%r13 2729.cfi_restore %r13 2730 movq -24(%rsi),%r12 2731.cfi_restore %r12 2732 movq -16(%rsi),%rbp 2733.cfi_restore %rbp 2734 movq -8(%rsi),%rbx 2735.cfi_restore %rbx 2736 leaq (%rsi),%rsp 2737.cfi_def_cfa_register %rsp 2738.Lpowerx5_epilogue: 2739 ret 2740.cfi_endproc 2741.size bn_powerx5,.-bn_powerx5 2742 2743.globl bn_sqrx8x_internal 2744.hidden bn_sqrx8x_internal 2745.hidden bn_sqrx8x_internal 2746.type bn_sqrx8x_internal,@function 2747.align 32 2748bn_sqrx8x_internal: 2749__bn_sqrx8x_internal: 2750.cfi_startproc 2751_CET_ENDBR 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 leaq 48+8(%rsp),%rdi 2793 leaq (%rsi,%r9,1),%rbp 2794 movq %r9,0+8(%rsp) 2795 movq %rbp,8+8(%rsp) 2796 jmp .Lsqr8x_zero_start 2797 2798.align 32 2799.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2800.Lsqrx8x_zero: 2801.byte 0x3e 2802 movdqa %xmm0,0(%rdi) 2803 movdqa %xmm0,16(%rdi) 2804 movdqa %xmm0,32(%rdi) 2805 movdqa %xmm0,48(%rdi) 2806.Lsqr8x_zero_start: 2807 movdqa %xmm0,64(%rdi) 2808 movdqa %xmm0,80(%rdi) 2809 movdqa %xmm0,96(%rdi) 2810 movdqa %xmm0,112(%rdi) 2811 leaq 128(%rdi),%rdi 2812 subq $64,%r9 2813 jnz .Lsqrx8x_zero 2814 2815 movq 0(%rsi),%rdx 2816 2817 xorq %r10,%r10 2818 xorq %r11,%r11 2819 xorq %r12,%r12 2820 xorq %r13,%r13 2821 xorq %r14,%r14 2822 xorq %r15,%r15 2823 leaq 48+8(%rsp),%rdi 2824 xorq %rbp,%rbp 2825 jmp .Lsqrx8x_outer_loop 2826 2827.align 32 2828.Lsqrx8x_outer_loop: 2829 mulxq 8(%rsi),%r8,%rax 2830 adcxq %r9,%r8 2831 adoxq %rax,%r10 2832 mulxq 16(%rsi),%r9,%rax 2833 adcxq %r10,%r9 2834 adoxq %rax,%r11 2835.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2836 adcxq %r11,%r10 2837 adoxq %rax,%r12 2838.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2839 adcxq %r12,%r11 2840 adoxq %rax,%r13 2841 mulxq 40(%rsi),%r12,%rax 2842 adcxq %r13,%r12 2843 adoxq %rax,%r14 2844 mulxq 48(%rsi),%r13,%rax 2845 adcxq %r14,%r13 2846 adoxq %r15,%rax 2847 mulxq 56(%rsi),%r14,%r15 2848 movq 8(%rsi),%rdx 2849 adcxq %rax,%r14 2850 adoxq %rbp,%r15 2851 adcq 64(%rdi),%r15 2852 movq %r8,8(%rdi) 2853 movq %r9,16(%rdi) 2854 sbbq %rcx,%rcx 2855 xorq %rbp,%rbp 2856 2857 2858 mulxq 16(%rsi),%r8,%rbx 2859 mulxq 24(%rsi),%r9,%rax 2860 adcxq %r10,%r8 2861 adoxq %rbx,%r9 2862 mulxq 32(%rsi),%r10,%rbx 2863 adcxq %r11,%r9 2864 adoxq %rax,%r10 2865.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2866 adcxq %r12,%r10 2867 adoxq %rbx,%r11 2868.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2869 adcxq %r13,%r11 2870 adoxq %r14,%r12 2871.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2872 movq 16(%rsi),%rdx 2873 adcxq %rax,%r12 2874 adoxq %rbx,%r13 2875 adcxq %r15,%r13 2876 adoxq %rbp,%r14 2877 adcxq %rbp,%r14 2878 2879 movq %r8,24(%rdi) 2880 movq %r9,32(%rdi) 2881 2882 mulxq 24(%rsi),%r8,%rbx 2883 mulxq 32(%rsi),%r9,%rax 2884 adcxq %r10,%r8 2885 adoxq %rbx,%r9 2886 mulxq 40(%rsi),%r10,%rbx 2887 adcxq %r11,%r9 2888 adoxq %rax,%r10 2889.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2890 adcxq %r12,%r10 2891 adoxq %r13,%r11 2892.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2893.byte 0x3e 2894 movq 24(%rsi),%rdx 2895 adcxq %rbx,%r11 2896 adoxq %rax,%r12 2897 adcxq %r14,%r12 2898 movq %r8,40(%rdi) 2899 movq %r9,48(%rdi) 2900 mulxq 32(%rsi),%r8,%rax 2901 adoxq %rbp,%r13 2902 adcxq %rbp,%r13 2903 2904 mulxq 40(%rsi),%r9,%rbx 2905 adcxq %r10,%r8 2906 adoxq %rax,%r9 2907 mulxq 48(%rsi),%r10,%rax 2908 adcxq %r11,%r9 2909 adoxq %r12,%r10 2910 mulxq 56(%rsi),%r11,%r12 2911 movq 32(%rsi),%rdx 2912 movq 40(%rsi),%r14 2913 adcxq %rbx,%r10 2914 adoxq %rax,%r11 2915 movq 48(%rsi),%r15 2916 adcxq %r13,%r11 2917 adoxq %rbp,%r12 2918 adcxq %rbp,%r12 2919 2920 movq %r8,56(%rdi) 2921 movq %r9,64(%rdi) 2922 2923 mulxq %r14,%r9,%rax 2924 movq 56(%rsi),%r8 2925 adcxq %r10,%r9 2926 mulxq %r15,%r10,%rbx 2927 adoxq %rax,%r10 2928 adcxq %r11,%r10 2929 mulxq %r8,%r11,%rax 2930 movq %r14,%rdx 2931 adoxq %rbx,%r11 2932 adcxq %r12,%r11 2933 2934 adcxq %rbp,%rax 2935 2936 mulxq %r15,%r14,%rbx 2937 mulxq %r8,%r12,%r13 2938 movq %r15,%rdx 2939 leaq 64(%rsi),%rsi 2940 adcxq %r14,%r11 2941 adoxq %rbx,%r12 2942 adcxq %rax,%r12 2943 adoxq %rbp,%r13 2944 2945.byte 0x67,0x67 2946 mulxq %r8,%r8,%r14 2947 adcxq %r8,%r13 2948 adcxq %rbp,%r14 2949 2950 cmpq 8+8(%rsp),%rsi 2951 je .Lsqrx8x_outer_break 2952 2953 negq %rcx 2954 movq $-8,%rcx 2955 movq %rbp,%r15 2956 movq 64(%rdi),%r8 2957 adcxq 72(%rdi),%r9 2958 adcxq 80(%rdi),%r10 2959 adcxq 88(%rdi),%r11 2960 adcq 96(%rdi),%r12 2961 adcq 104(%rdi),%r13 2962 adcq 112(%rdi),%r14 2963 adcq 120(%rdi),%r15 2964 leaq (%rsi),%rbp 2965 leaq 128(%rdi),%rdi 2966 sbbq %rax,%rax 2967 2968 movq -64(%rsi),%rdx 2969 movq %rax,16+8(%rsp) 2970 movq %rdi,24+8(%rsp) 2971 2972 2973 xorl %eax,%eax 2974 jmp .Lsqrx8x_loop 2975 2976.align 32 2977.Lsqrx8x_loop: 2978 movq %r8,%rbx 2979 mulxq 0(%rbp),%rax,%r8 2980 adcxq %rax,%rbx 2981 adoxq %r9,%r8 2982 2983 mulxq 8(%rbp),%rax,%r9 2984 adcxq %rax,%r8 2985 adoxq %r10,%r9 2986 2987 mulxq 16(%rbp),%rax,%r10 2988 adcxq %rax,%r9 2989 adoxq %r11,%r10 2990 2991 mulxq 24(%rbp),%rax,%r11 2992 adcxq %rax,%r10 2993 adoxq %r12,%r11 2994 2995.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2996 adcxq %rax,%r11 2997 adoxq %r13,%r12 2998 2999 mulxq 40(%rbp),%rax,%r13 3000 adcxq %rax,%r12 3001 adoxq %r14,%r13 3002 3003 mulxq 48(%rbp),%rax,%r14 3004 movq %rbx,(%rdi,%rcx,8) 3005 movl $0,%ebx 3006 adcxq %rax,%r13 3007 adoxq %r15,%r14 3008 3009.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3010 movq 8(%rsi,%rcx,8),%rdx 3011 adcxq %rax,%r14 3012 adoxq %rbx,%r15 3013 adcxq %rbx,%r15 3014 3015.byte 0x67 3016 incq %rcx 3017 jnz .Lsqrx8x_loop 3018 3019 leaq 64(%rbp),%rbp 3020 movq $-8,%rcx 3021 cmpq 8+8(%rsp),%rbp 3022 je .Lsqrx8x_break 3023 3024 subq 16+8(%rsp),%rbx 3025.byte 0x66 3026 movq -64(%rsi),%rdx 3027 adcxq 0(%rdi),%r8 3028 adcxq 8(%rdi),%r9 3029 adcq 16(%rdi),%r10 3030 adcq 24(%rdi),%r11 3031 adcq 32(%rdi),%r12 3032 adcq 40(%rdi),%r13 3033 adcq 48(%rdi),%r14 3034 adcq 56(%rdi),%r15 3035 leaq 64(%rdi),%rdi 3036.byte 0x67 3037 sbbq %rax,%rax 3038 xorl %ebx,%ebx 3039 movq %rax,16+8(%rsp) 3040 jmp .Lsqrx8x_loop 3041 3042.align 32 3043.Lsqrx8x_break: 3044 xorq %rbp,%rbp 3045 subq 16+8(%rsp),%rbx 3046 adcxq %rbp,%r8 3047 movq 24+8(%rsp),%rcx 3048 adcxq %rbp,%r9 3049 movq 0(%rsi),%rdx 3050 adcq $0,%r10 3051 movq %r8,0(%rdi) 3052 adcq $0,%r11 3053 adcq $0,%r12 3054 adcq $0,%r13 3055 adcq $0,%r14 3056 adcq $0,%r15 3057 cmpq %rcx,%rdi 3058 je .Lsqrx8x_outer_loop 3059 3060 movq %r9,8(%rdi) 3061 movq 8(%rcx),%r9 3062 movq %r10,16(%rdi) 3063 movq 16(%rcx),%r10 3064 movq %r11,24(%rdi) 3065 movq 24(%rcx),%r11 3066 movq %r12,32(%rdi) 3067 movq 32(%rcx),%r12 3068 movq %r13,40(%rdi) 3069 movq 40(%rcx),%r13 3070 movq %r14,48(%rdi) 3071 movq 48(%rcx),%r14 3072 movq %r15,56(%rdi) 3073 movq 56(%rcx),%r15 3074 movq %rcx,%rdi 3075 jmp .Lsqrx8x_outer_loop 3076 3077.align 32 3078.Lsqrx8x_outer_break: 3079 movq %r9,72(%rdi) 3080.byte 102,72,15,126,217 3081 movq %r10,80(%rdi) 3082 movq %r11,88(%rdi) 3083 movq %r12,96(%rdi) 3084 movq %r13,104(%rdi) 3085 movq %r14,112(%rdi) 3086 leaq 48+8(%rsp),%rdi 3087 movq (%rsi,%rcx,1),%rdx 3088 3089 movq 8(%rdi),%r11 3090 xorq %r10,%r10 3091 movq 0+8(%rsp),%r9 3092 adoxq %r11,%r11 3093 movq 16(%rdi),%r12 3094 movq 24(%rdi),%r13 3095 3096 3097.align 32 3098.Lsqrx4x_shift_n_add: 3099 mulxq %rdx,%rax,%rbx 3100 adoxq %r12,%r12 3101 adcxq %r10,%rax 3102.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3103.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3104 adoxq %r13,%r13 3105 adcxq %r11,%rbx 3106 movq 40(%rdi),%r11 3107 movq %rax,0(%rdi) 3108 movq %rbx,8(%rdi) 3109 3110 mulxq %rdx,%rax,%rbx 3111 adoxq %r10,%r10 3112 adcxq %r12,%rax 3113 movq 16(%rsi,%rcx,1),%rdx 3114 movq 48(%rdi),%r12 3115 adoxq %r11,%r11 3116 adcxq %r13,%rbx 3117 movq 56(%rdi),%r13 3118 movq %rax,16(%rdi) 3119 movq %rbx,24(%rdi) 3120 3121 mulxq %rdx,%rax,%rbx 3122 adoxq %r12,%r12 3123 adcxq %r10,%rax 3124 movq 24(%rsi,%rcx,1),%rdx 3125 leaq 32(%rcx),%rcx 3126 movq 64(%rdi),%r10 3127 adoxq %r13,%r13 3128 adcxq %r11,%rbx 3129 movq 72(%rdi),%r11 3130 movq %rax,32(%rdi) 3131 movq %rbx,40(%rdi) 3132 3133 mulxq %rdx,%rax,%rbx 3134 adoxq %r10,%r10 3135 adcxq %r12,%rax 3136 jrcxz .Lsqrx4x_shift_n_add_break 3137.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3138 adoxq %r11,%r11 3139 adcxq %r13,%rbx 3140 movq 80(%rdi),%r12 3141 movq 88(%rdi),%r13 3142 movq %rax,48(%rdi) 3143 movq %rbx,56(%rdi) 3144 leaq 64(%rdi),%rdi 3145 nop 3146 jmp .Lsqrx4x_shift_n_add 3147 3148.align 32 3149.Lsqrx4x_shift_n_add_break: 3150 adcxq %r13,%rbx 3151 movq %rax,48(%rdi) 3152 movq %rbx,56(%rdi) 3153 leaq 64(%rdi),%rdi 3154.byte 102,72,15,126,213 3155__bn_sqrx8x_reduction: 3156 xorl %eax,%eax 3157 movq 32+8(%rsp),%rbx 3158 movq 48+8(%rsp),%rdx 3159 leaq -64(%rbp,%r9,1),%rcx 3160 3161 movq %rcx,0+8(%rsp) 3162 movq %rdi,8+8(%rsp) 3163 3164 leaq 48+8(%rsp),%rdi 3165 jmp .Lsqrx8x_reduction_loop 3166 3167.align 32 3168.Lsqrx8x_reduction_loop: 3169 movq 8(%rdi),%r9 3170 movq 16(%rdi),%r10 3171 movq 24(%rdi),%r11 3172 movq 32(%rdi),%r12 3173 movq %rdx,%r8 3174 imulq %rbx,%rdx 3175 movq 40(%rdi),%r13 3176 movq 48(%rdi),%r14 3177 movq 56(%rdi),%r15 3178 movq %rax,24+8(%rsp) 3179 3180 leaq 64(%rdi),%rdi 3181 xorq %rsi,%rsi 3182 movq $-8,%rcx 3183 jmp .Lsqrx8x_reduce 3184 3185.align 32 3186.Lsqrx8x_reduce: 3187 movq %r8,%rbx 3188 mulxq 0(%rbp),%rax,%r8 3189 adcxq %rbx,%rax 3190 adoxq %r9,%r8 3191 3192 mulxq 8(%rbp),%rbx,%r9 3193 adcxq %rbx,%r8 3194 adoxq %r10,%r9 3195 3196 mulxq 16(%rbp),%rbx,%r10 3197 adcxq %rbx,%r9 3198 adoxq %r11,%r10 3199 3200 mulxq 24(%rbp),%rbx,%r11 3201 adcxq %rbx,%r10 3202 adoxq %r12,%r11 3203 3204.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3205 movq %rdx,%rax 3206 movq %r8,%rdx 3207 adcxq %rbx,%r11 3208 adoxq %r13,%r12 3209 3210 mulxq 32+8(%rsp),%rbx,%rdx 3211 movq %rax,%rdx 3212 movq %rax,64+48+8(%rsp,%rcx,8) 3213 3214 mulxq 40(%rbp),%rax,%r13 3215 adcxq %rax,%r12 3216 adoxq %r14,%r13 3217 3218 mulxq 48(%rbp),%rax,%r14 3219 adcxq %rax,%r13 3220 adoxq %r15,%r14 3221 3222 mulxq 56(%rbp),%rax,%r15 3223 movq %rbx,%rdx 3224 adcxq %rax,%r14 3225 adoxq %rsi,%r15 3226 adcxq %rsi,%r15 3227 3228.byte 0x67,0x67,0x67 3229 incq %rcx 3230 jnz .Lsqrx8x_reduce 3231 3232 movq %rsi,%rax 3233 cmpq 0+8(%rsp),%rbp 3234 jae .Lsqrx8x_no_tail 3235 3236 movq 48+8(%rsp),%rdx 3237 addq 0(%rdi),%r8 3238 leaq 64(%rbp),%rbp 3239 movq $-8,%rcx 3240 adcxq 8(%rdi),%r9 3241 adcxq 16(%rdi),%r10 3242 adcq 24(%rdi),%r11 3243 adcq 32(%rdi),%r12 3244 adcq 40(%rdi),%r13 3245 adcq 48(%rdi),%r14 3246 adcq 56(%rdi),%r15 3247 leaq 64(%rdi),%rdi 3248 sbbq %rax,%rax 3249 3250 xorq %rsi,%rsi 3251 movq %rax,16+8(%rsp) 3252 jmp .Lsqrx8x_tail 3253 3254.align 32 3255.Lsqrx8x_tail: 3256 movq %r8,%rbx 3257 mulxq 0(%rbp),%rax,%r8 3258 adcxq %rax,%rbx 3259 adoxq %r9,%r8 3260 3261 mulxq 8(%rbp),%rax,%r9 3262 adcxq %rax,%r8 3263 adoxq %r10,%r9 3264 3265 mulxq 16(%rbp),%rax,%r10 3266 adcxq %rax,%r9 3267 adoxq %r11,%r10 3268 3269 mulxq 24(%rbp),%rax,%r11 3270 adcxq %rax,%r10 3271 adoxq %r12,%r11 3272 3273.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3274 adcxq %rax,%r11 3275 adoxq %r13,%r12 3276 3277 mulxq 40(%rbp),%rax,%r13 3278 adcxq %rax,%r12 3279 adoxq %r14,%r13 3280 3281 mulxq 48(%rbp),%rax,%r14 3282 adcxq %rax,%r13 3283 adoxq %r15,%r14 3284 3285 mulxq 56(%rbp),%rax,%r15 3286 movq 72+48+8(%rsp,%rcx,8),%rdx 3287 adcxq %rax,%r14 3288 adoxq %rsi,%r15 3289 movq %rbx,(%rdi,%rcx,8) 3290 movq %r8,%rbx 3291 adcxq %rsi,%r15 3292 3293 incq %rcx 3294 jnz .Lsqrx8x_tail 3295 3296 cmpq 0+8(%rsp),%rbp 3297 jae .Lsqrx8x_tail_done 3298 3299 subq 16+8(%rsp),%rsi 3300 movq 48+8(%rsp),%rdx 3301 leaq 64(%rbp),%rbp 3302 adcq 0(%rdi),%r8 3303 adcq 8(%rdi),%r9 3304 adcq 16(%rdi),%r10 3305 adcq 24(%rdi),%r11 3306 adcq 32(%rdi),%r12 3307 adcq 40(%rdi),%r13 3308 adcq 48(%rdi),%r14 3309 adcq 56(%rdi),%r15 3310 leaq 64(%rdi),%rdi 3311 sbbq %rax,%rax 3312 subq $8,%rcx 3313 3314 xorq %rsi,%rsi 3315 movq %rax,16+8(%rsp) 3316 jmp .Lsqrx8x_tail 3317 3318.align 32 3319.Lsqrx8x_tail_done: 3320 xorq %rax,%rax 3321 addq 24+8(%rsp),%r8 3322 adcq $0,%r9 3323 adcq $0,%r10 3324 adcq $0,%r11 3325 adcq $0,%r12 3326 adcq $0,%r13 3327 adcq $0,%r14 3328 adcq $0,%r15 3329 adcq $0,%rax 3330 3331 subq 16+8(%rsp),%rsi 3332.Lsqrx8x_no_tail: 3333 adcq 0(%rdi),%r8 3334.byte 102,72,15,126,217 3335 adcq 8(%rdi),%r9 3336 movq 56(%rbp),%rsi 3337.byte 102,72,15,126,213 3338 adcq 16(%rdi),%r10 3339 adcq 24(%rdi),%r11 3340 adcq 32(%rdi),%r12 3341 adcq 40(%rdi),%r13 3342 adcq 48(%rdi),%r14 3343 adcq 56(%rdi),%r15 3344 adcq $0,%rax 3345 3346 movq 32+8(%rsp),%rbx 3347 movq 64(%rdi,%rcx,1),%rdx 3348 3349 movq %r8,0(%rdi) 3350 leaq 64(%rdi),%r8 3351 movq %r9,8(%rdi) 3352 movq %r10,16(%rdi) 3353 movq %r11,24(%rdi) 3354 movq %r12,32(%rdi) 3355 movq %r13,40(%rdi) 3356 movq %r14,48(%rdi) 3357 movq %r15,56(%rdi) 3358 3359 leaq 64(%rdi,%rcx,1),%rdi 3360 cmpq 8+8(%rsp),%r8 3361 jb .Lsqrx8x_reduction_loop 3362 ret 3363.cfi_endproc 3364.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3365.align 32 3366.type __bn_postx4x_internal,@function 3367__bn_postx4x_internal: 3368.cfi_startproc 3369 movq 0(%rbp),%r12 3370 movq %rcx,%r10 3371 movq %rcx,%r9 3372 negq %rax 3373 sarq $3+2,%rcx 3374 3375.byte 102,72,15,126,202 3376.byte 102,72,15,126,206 3377 decq %r12 3378 movq 8(%rbp),%r13 3379 xorq %r8,%r8 3380 movq 16(%rbp),%r14 3381 movq 24(%rbp),%r15 3382 jmp .Lsqrx4x_sub_entry 3383 3384.align 16 3385.Lsqrx4x_sub: 3386 movq 0(%rbp),%r12 3387 movq 8(%rbp),%r13 3388 movq 16(%rbp),%r14 3389 movq 24(%rbp),%r15 3390.Lsqrx4x_sub_entry: 3391 andnq %rax,%r12,%r12 3392 leaq 32(%rbp),%rbp 3393 andnq %rax,%r13,%r13 3394 andnq %rax,%r14,%r14 3395 andnq %rax,%r15,%r15 3396 3397 negq %r8 3398 adcq 0(%rdi),%r12 3399 adcq 8(%rdi),%r13 3400 adcq 16(%rdi),%r14 3401 adcq 24(%rdi),%r15 3402 movq %r12,0(%rdx) 3403 leaq 32(%rdi),%rdi 3404 movq %r13,8(%rdx) 3405 sbbq %r8,%r8 3406 movq %r14,16(%rdx) 3407 movq %r15,24(%rdx) 3408 leaq 32(%rdx),%rdx 3409 3410 incq %rcx 3411 jnz .Lsqrx4x_sub 3412 3413 negq %r9 3414 3415 ret 3416.cfi_endproc 3417.size __bn_postx4x_internal,.-__bn_postx4x_internal 3418.globl bn_scatter5 3419.hidden bn_scatter5 3420.type bn_scatter5,@function 3421.align 16 3422bn_scatter5: 3423.cfi_startproc 3424_CET_ENDBR 3425 cmpl $0,%esi 3426 jz .Lscatter_epilogue 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 leaq (%rdx,%rcx,8),%rdx 3437.Lscatter: 3438 movq (%rdi),%rax 3439 leaq 8(%rdi),%rdi 3440 movq %rax,(%rdx) 3441 leaq 256(%rdx),%rdx 3442 subl $1,%esi 3443 jnz .Lscatter 3444.Lscatter_epilogue: 3445 ret 3446.cfi_endproc 3447.size bn_scatter5,.-bn_scatter5 3448 3449.globl bn_gather5 3450.hidden bn_gather5 3451.type bn_gather5,@function 3452.align 32 3453bn_gather5: 3454.cfi_startproc 3455.LSEH_begin_bn_gather5: 3456_CET_ENDBR 3457 3458.byte 0x4c,0x8d,0x14,0x24 3459.cfi_def_cfa_register %r10 3460.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3461 leaq .Linc(%rip),%rax 3462 andq $-16,%rsp 3463 3464 movd %ecx,%xmm5 3465 movdqa 0(%rax),%xmm0 3466 movdqa 16(%rax),%xmm1 3467 leaq 128(%rdx),%r11 3468 leaq 128(%rsp),%rax 3469 3470 pshufd $0,%xmm5,%xmm5 3471 movdqa %xmm1,%xmm4 3472 movdqa %xmm1,%xmm2 3473 paddd %xmm0,%xmm1 3474 pcmpeqd %xmm5,%xmm0 3475 movdqa %xmm4,%xmm3 3476 3477 paddd %xmm1,%xmm2 3478 pcmpeqd %xmm5,%xmm1 3479 movdqa %xmm0,-128(%rax) 3480 movdqa %xmm4,%xmm0 3481 3482 paddd %xmm2,%xmm3 3483 pcmpeqd %xmm5,%xmm2 3484 movdqa %xmm1,-112(%rax) 3485 movdqa %xmm4,%xmm1 3486 3487 paddd %xmm3,%xmm0 3488 pcmpeqd %xmm5,%xmm3 3489 movdqa %xmm2,-96(%rax) 3490 movdqa %xmm4,%xmm2 3491 paddd %xmm0,%xmm1 3492 pcmpeqd %xmm5,%xmm0 3493 movdqa %xmm3,-80(%rax) 3494 movdqa %xmm4,%xmm3 3495 3496 paddd %xmm1,%xmm2 3497 pcmpeqd %xmm5,%xmm1 3498 movdqa %xmm0,-64(%rax) 3499 movdqa %xmm4,%xmm0 3500 3501 paddd %xmm2,%xmm3 3502 pcmpeqd %xmm5,%xmm2 3503 movdqa %xmm1,-48(%rax) 3504 movdqa %xmm4,%xmm1 3505 3506 paddd %xmm3,%xmm0 3507 pcmpeqd %xmm5,%xmm3 3508 movdqa %xmm2,-32(%rax) 3509 movdqa %xmm4,%xmm2 3510 paddd %xmm0,%xmm1 3511 pcmpeqd %xmm5,%xmm0 3512 movdqa %xmm3,-16(%rax) 3513 movdqa %xmm4,%xmm3 3514 3515 paddd %xmm1,%xmm2 3516 pcmpeqd %xmm5,%xmm1 3517 movdqa %xmm0,0(%rax) 3518 movdqa %xmm4,%xmm0 3519 3520 paddd %xmm2,%xmm3 3521 pcmpeqd %xmm5,%xmm2 3522 movdqa %xmm1,16(%rax) 3523 movdqa %xmm4,%xmm1 3524 3525 paddd %xmm3,%xmm0 3526 pcmpeqd %xmm5,%xmm3 3527 movdqa %xmm2,32(%rax) 3528 movdqa %xmm4,%xmm2 3529 paddd %xmm0,%xmm1 3530 pcmpeqd %xmm5,%xmm0 3531 movdqa %xmm3,48(%rax) 3532 movdqa %xmm4,%xmm3 3533 3534 paddd %xmm1,%xmm2 3535 pcmpeqd %xmm5,%xmm1 3536 movdqa %xmm0,64(%rax) 3537 movdqa %xmm4,%xmm0 3538 3539 paddd %xmm2,%xmm3 3540 pcmpeqd %xmm5,%xmm2 3541 movdqa %xmm1,80(%rax) 3542 movdqa %xmm4,%xmm1 3543 3544 paddd %xmm3,%xmm0 3545 pcmpeqd %xmm5,%xmm3 3546 movdqa %xmm2,96(%rax) 3547 movdqa %xmm4,%xmm2 3548 movdqa %xmm3,112(%rax) 3549 jmp .Lgather 3550 3551.align 32 3552.Lgather: 3553 pxor %xmm4,%xmm4 3554 pxor %xmm5,%xmm5 3555 movdqa -128(%r11),%xmm0 3556 movdqa -112(%r11),%xmm1 3557 movdqa -96(%r11),%xmm2 3558 pand -128(%rax),%xmm0 3559 movdqa -80(%r11),%xmm3 3560 pand -112(%rax),%xmm1 3561 por %xmm0,%xmm4 3562 pand -96(%rax),%xmm2 3563 por %xmm1,%xmm5 3564 pand -80(%rax),%xmm3 3565 por %xmm2,%xmm4 3566 por %xmm3,%xmm5 3567 movdqa -64(%r11),%xmm0 3568 movdqa -48(%r11),%xmm1 3569 movdqa -32(%r11),%xmm2 3570 pand -64(%rax),%xmm0 3571 movdqa -16(%r11),%xmm3 3572 pand -48(%rax),%xmm1 3573 por %xmm0,%xmm4 3574 pand -32(%rax),%xmm2 3575 por %xmm1,%xmm5 3576 pand -16(%rax),%xmm3 3577 por %xmm2,%xmm4 3578 por %xmm3,%xmm5 3579 movdqa 0(%r11),%xmm0 3580 movdqa 16(%r11),%xmm1 3581 movdqa 32(%r11),%xmm2 3582 pand 0(%rax),%xmm0 3583 movdqa 48(%r11),%xmm3 3584 pand 16(%rax),%xmm1 3585 por %xmm0,%xmm4 3586 pand 32(%rax),%xmm2 3587 por %xmm1,%xmm5 3588 pand 48(%rax),%xmm3 3589 por %xmm2,%xmm4 3590 por %xmm3,%xmm5 3591 movdqa 64(%r11),%xmm0 3592 movdqa 80(%r11),%xmm1 3593 movdqa 96(%r11),%xmm2 3594 pand 64(%rax),%xmm0 3595 movdqa 112(%r11),%xmm3 3596 pand 80(%rax),%xmm1 3597 por %xmm0,%xmm4 3598 pand 96(%rax),%xmm2 3599 por %xmm1,%xmm5 3600 pand 112(%rax),%xmm3 3601 por %xmm2,%xmm4 3602 por %xmm3,%xmm5 3603 por %xmm5,%xmm4 3604 leaq 256(%r11),%r11 3605 3606 pshufd $0x4e,%xmm4,%xmm0 3607 por %xmm4,%xmm0 3608 movq %xmm0,(%rdi) 3609 leaq 8(%rdi),%rdi 3610 subl $1,%esi 3611 jnz .Lgather 3612 3613 leaq (%r10),%rsp 3614.cfi_def_cfa_register %rsp 3615 ret 3616.LSEH_end_bn_gather5: 3617.cfi_endproc 3618.size bn_gather5,.-bn_gather5 3619.section .rodata 3620.align 64 3621.Linc: 3622.long 0,0, 1,1 3623.long 2,2, 2,2 3624.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3625.text 3626#endif 3627