1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__, win32 8%ifidn __OUTPUT_FORMAT__,obj 9section code use32 class=code align=64 10%elifidn __OUTPUT_FORMAT__,win32 11$@feat.00 equ 1 12section .text code align=64 13%else 14section .text code 15%endif 16global _bn_mul_comba8 17align 16 18_bn_mul_comba8: 19L$_bn_mul_comba8_begin: 20 push esi 21 mov esi,DWORD [12+esp] 22 push edi 23 mov edi,DWORD [20+esp] 24 push ebp 25 push ebx 26 xor ebx,ebx 27 mov eax,DWORD [esi] 28 xor ecx,ecx 29 mov edx,DWORD [edi] 30 ; ################## Calculate word 0 31 xor ebp,ebp 32 ; mul a[0]*b[0] 33 mul edx 34 add ebx,eax 35 mov eax,DWORD [20+esp] 36 adc ecx,edx 37 mov edx,DWORD [edi] 38 adc ebp,0 39 mov DWORD [eax],ebx 40 mov eax,DWORD [4+esi] 41 ; saved r[0] 42 ; ################## Calculate word 1 43 xor ebx,ebx 44 ; mul a[1]*b[0] 45 mul edx 46 add ecx,eax 47 mov eax,DWORD [esi] 48 adc ebp,edx 49 mov edx,DWORD [4+edi] 50 adc ebx,0 51 ; mul a[0]*b[1] 52 mul edx 53 add ecx,eax 54 mov eax,DWORD [20+esp] 55 adc ebp,edx 56 mov edx,DWORD [edi] 57 adc ebx,0 58 mov DWORD [4+eax],ecx 59 mov eax,DWORD [8+esi] 60 ; saved r[1] 61 ; ################## Calculate word 2 62 xor ecx,ecx 63 ; mul a[2]*b[0] 64 mul edx 65 add ebp,eax 66 mov eax,DWORD [4+esi] 67 adc ebx,edx 68 mov edx,DWORD [4+edi] 69 adc ecx,0 70 ; mul a[1]*b[1] 71 mul edx 72 add ebp,eax 73 mov eax,DWORD [esi] 74 adc ebx,edx 75 mov edx,DWORD [8+edi] 76 adc ecx,0 77 ; mul a[0]*b[2] 78 mul edx 79 add ebp,eax 80 mov eax,DWORD [20+esp] 81 adc ebx,edx 82 mov edx,DWORD [edi] 83 adc ecx,0 84 mov DWORD [8+eax],ebp 85 mov eax,DWORD [12+esi] 86 ; saved r[2] 87 ; ################## Calculate word 3 88 xor ebp,ebp 89 ; mul a[3]*b[0] 90 mul edx 91 add ebx,eax 92 mov eax,DWORD [8+esi] 93 adc ecx,edx 94 mov edx,DWORD [4+edi] 95 adc ebp,0 96 ; mul a[2]*b[1] 97 mul edx 98 add ebx,eax 99 mov eax,DWORD [4+esi] 100 adc ecx,edx 101 mov edx,DWORD [8+edi] 102 adc ebp,0 103 ; mul a[1]*b[2] 104 mul edx 105 add ebx,eax 106 mov eax,DWORD [esi] 107 adc ecx,edx 108 mov edx,DWORD [12+edi] 109 adc ebp,0 110 ; mul a[0]*b[3] 111 mul edx 112 add ebx,eax 113 mov eax,DWORD [20+esp] 114 adc ecx,edx 115 mov edx,DWORD [edi] 116 adc ebp,0 117 mov DWORD [12+eax],ebx 118 mov eax,DWORD [16+esi] 119 ; saved r[3] 120 ; ################## Calculate word 4 121 xor ebx,ebx 122 ; mul a[4]*b[0] 123 mul edx 124 add ecx,eax 125 mov eax,DWORD [12+esi] 126 adc ebp,edx 127 mov edx,DWORD [4+edi] 128 adc ebx,0 129 ; mul a[3]*b[1] 130 mul edx 131 add ecx,eax 132 mov eax,DWORD [8+esi] 133 adc ebp,edx 134 mov edx,DWORD [8+edi] 135 adc ebx,0 136 ; mul a[2]*b[2] 137 mul edx 138 add ecx,eax 139 mov eax,DWORD [4+esi] 140 adc ebp,edx 141 mov edx,DWORD [12+edi] 142 adc ebx,0 143 ; mul a[1]*b[3] 144 mul edx 145 add ecx,eax 146 mov eax,DWORD [esi] 147 adc ebp,edx 148 mov edx,DWORD [16+edi] 149 adc ebx,0 150 ; mul a[0]*b[4] 151 mul edx 152 add ecx,eax 153 mov eax,DWORD [20+esp] 154 adc ebp,edx 155 mov edx,DWORD [edi] 156 adc ebx,0 157 mov DWORD [16+eax],ecx 158 mov eax,DWORD [20+esi] 159 ; saved r[4] 160 ; ################## Calculate word 5 161 xor ecx,ecx 162 ; mul a[5]*b[0] 163 mul edx 164 add ebp,eax 165 mov eax,DWORD [16+esi] 166 adc ebx,edx 167 mov edx,DWORD [4+edi] 168 adc ecx,0 169 ; mul a[4]*b[1] 170 mul edx 171 add ebp,eax 172 mov eax,DWORD [12+esi] 173 adc ebx,edx 174 mov edx,DWORD [8+edi] 175 adc ecx,0 176 ; mul a[3]*b[2] 177 mul edx 178 add ebp,eax 179 mov eax,DWORD [8+esi] 180 adc ebx,edx 181 mov edx,DWORD [12+edi] 182 adc ecx,0 183 ; mul a[2]*b[3] 184 mul edx 185 add ebp,eax 186 mov eax,DWORD [4+esi] 187 adc ebx,edx 188 mov edx,DWORD [16+edi] 189 adc ecx,0 190 ; mul a[1]*b[4] 191 mul edx 192 add ebp,eax 193 mov eax,DWORD [esi] 194 adc ebx,edx 195 mov edx,DWORD [20+edi] 196 adc ecx,0 197 ; mul a[0]*b[5] 198 mul edx 199 add ebp,eax 200 mov eax,DWORD [20+esp] 201 adc ebx,edx 202 mov edx,DWORD [edi] 203 adc ecx,0 204 mov DWORD [20+eax],ebp 205 mov eax,DWORD [24+esi] 206 ; saved r[5] 207 ; ################## Calculate word 6 208 xor ebp,ebp 209 ; mul a[6]*b[0] 210 mul edx 211 add ebx,eax 212 mov eax,DWORD [20+esi] 213 adc ecx,edx 214 mov edx,DWORD [4+edi] 215 adc ebp,0 216 ; mul a[5]*b[1] 217 mul edx 218 add ebx,eax 219 mov eax,DWORD [16+esi] 220 adc ecx,edx 221 mov edx,DWORD [8+edi] 222 adc ebp,0 223 ; mul a[4]*b[2] 224 mul edx 225 add ebx,eax 226 mov eax,DWORD [12+esi] 227 adc ecx,edx 228 mov edx,DWORD [12+edi] 229 adc ebp,0 230 ; mul a[3]*b[3] 231 mul edx 232 add ebx,eax 233 mov eax,DWORD [8+esi] 234 adc ecx,edx 235 mov edx,DWORD [16+edi] 236 adc ebp,0 237 ; mul a[2]*b[4] 238 mul edx 239 add ebx,eax 240 mov eax,DWORD [4+esi] 241 adc ecx,edx 242 mov edx,DWORD [20+edi] 243 adc ebp,0 244 ; mul a[1]*b[5] 245 mul edx 246 add ebx,eax 247 mov eax,DWORD [esi] 248 adc ecx,edx 249 mov edx,DWORD [24+edi] 250 adc ebp,0 251 ; mul a[0]*b[6] 252 mul edx 253 add ebx,eax 254 mov eax,DWORD [20+esp] 255 adc ecx,edx 256 mov edx,DWORD [edi] 257 adc ebp,0 258 mov DWORD [24+eax],ebx 259 mov eax,DWORD [28+esi] 260 ; saved r[6] 261 ; ################## Calculate word 7 262 xor ebx,ebx 263 ; mul a[7]*b[0] 264 mul edx 265 add ecx,eax 266 mov eax,DWORD [24+esi] 267 adc ebp,edx 268 mov edx,DWORD [4+edi] 269 adc ebx,0 270 ; mul a[6]*b[1] 271 mul edx 272 add ecx,eax 273 mov eax,DWORD [20+esi] 274 adc ebp,edx 275 mov edx,DWORD [8+edi] 276 adc ebx,0 277 ; mul a[5]*b[2] 278 mul edx 279 add ecx,eax 280 mov eax,DWORD [16+esi] 281 adc ebp,edx 282 mov edx,DWORD [12+edi] 283 adc ebx,0 284 ; mul a[4]*b[3] 285 mul edx 286 add ecx,eax 287 mov eax,DWORD [12+esi] 288 adc ebp,edx 289 mov edx,DWORD [16+edi] 290 adc ebx,0 291 ; mul a[3]*b[4] 292 mul edx 293 add ecx,eax 294 mov eax,DWORD [8+esi] 295 adc ebp,edx 296 mov edx,DWORD [20+edi] 297 adc ebx,0 298 ; mul a[2]*b[5] 299 mul edx 300 add ecx,eax 301 mov eax,DWORD [4+esi] 302 adc ebp,edx 303 mov edx,DWORD [24+edi] 304 adc ebx,0 305 ; mul a[1]*b[6] 306 mul edx 307 add ecx,eax 308 mov eax,DWORD [esi] 309 adc ebp,edx 310 mov edx,DWORD [28+edi] 311 adc ebx,0 312 ; mul a[0]*b[7] 313 mul edx 314 add ecx,eax 315 mov eax,DWORD [20+esp] 316 adc ebp,edx 317 mov edx,DWORD [4+edi] 318 adc ebx,0 319 mov DWORD [28+eax],ecx 320 mov eax,DWORD [28+esi] 321 ; saved r[7] 322 ; ################## Calculate word 8 323 xor ecx,ecx 324 ; mul a[7]*b[1] 325 mul edx 326 add ebp,eax 327 mov eax,DWORD [24+esi] 328 adc ebx,edx 329 mov edx,DWORD [8+edi] 330 adc ecx,0 331 ; mul a[6]*b[2] 332 mul edx 333 add ebp,eax 334 mov eax,DWORD [20+esi] 335 adc ebx,edx 336 mov edx,DWORD [12+edi] 337 adc ecx,0 338 ; mul a[5]*b[3] 339 mul edx 340 add ebp,eax 341 mov eax,DWORD [16+esi] 342 adc ebx,edx 343 mov edx,DWORD [16+edi] 344 adc ecx,0 345 ; mul a[4]*b[4] 346 mul edx 347 add ebp,eax 348 mov eax,DWORD [12+esi] 349 adc ebx,edx 350 mov edx,DWORD [20+edi] 351 adc ecx,0 352 ; mul a[3]*b[5] 353 mul edx 354 add ebp,eax 355 mov eax,DWORD [8+esi] 356 adc ebx,edx 357 mov edx,DWORD [24+edi] 358 adc ecx,0 359 ; mul a[2]*b[6] 360 mul edx 361 add ebp,eax 362 mov eax,DWORD [4+esi] 363 adc ebx,edx 364 mov edx,DWORD [28+edi] 365 adc ecx,0 366 ; mul a[1]*b[7] 367 mul edx 368 add ebp,eax 369 mov eax,DWORD [20+esp] 370 adc ebx,edx 371 mov edx,DWORD [8+edi] 372 adc ecx,0 373 mov DWORD [32+eax],ebp 374 mov eax,DWORD [28+esi] 375 ; saved r[8] 376 ; ################## Calculate word 9 377 xor ebp,ebp 378 ; mul a[7]*b[2] 379 mul edx 380 add ebx,eax 381 mov eax,DWORD [24+esi] 382 adc ecx,edx 383 mov edx,DWORD [12+edi] 384 adc ebp,0 385 ; mul a[6]*b[3] 386 mul edx 387 add ebx,eax 388 mov eax,DWORD [20+esi] 389 adc ecx,edx 390 mov edx,DWORD [16+edi] 391 adc ebp,0 392 ; mul a[5]*b[4] 393 mul edx 394 add ebx,eax 395 mov eax,DWORD [16+esi] 396 adc ecx,edx 397 mov edx,DWORD [20+edi] 398 adc ebp,0 399 ; mul a[4]*b[5] 400 mul edx 401 add ebx,eax 402 mov eax,DWORD [12+esi] 403 adc ecx,edx 404 mov edx,DWORD [24+edi] 405 adc ebp,0 406 ; mul a[3]*b[6] 407 mul edx 408 add ebx,eax 409 mov eax,DWORD [8+esi] 410 adc ecx,edx 411 mov edx,DWORD [28+edi] 412 adc ebp,0 413 ; mul a[2]*b[7] 414 mul edx 415 add ebx,eax 416 mov eax,DWORD [20+esp] 417 adc ecx,edx 418 mov edx,DWORD [12+edi] 419 adc ebp,0 420 mov DWORD [36+eax],ebx 421 mov eax,DWORD [28+esi] 422 ; saved r[9] 423 ; ################## Calculate word 10 424 xor ebx,ebx 425 ; mul a[7]*b[3] 426 mul edx 427 add ecx,eax 428 mov eax,DWORD [24+esi] 429 adc ebp,edx 430 mov edx,DWORD [16+edi] 431 adc ebx,0 432 ; mul a[6]*b[4] 433 mul edx 434 add ecx,eax 435 mov eax,DWORD [20+esi] 436 adc ebp,edx 437 mov edx,DWORD [20+edi] 438 adc ebx,0 439 ; mul a[5]*b[5] 440 mul edx 441 add ecx,eax 442 mov eax,DWORD [16+esi] 443 adc ebp,edx 444 mov edx,DWORD [24+edi] 445 adc ebx,0 446 ; mul a[4]*b[6] 447 mul edx 448 add ecx,eax 449 mov eax,DWORD [12+esi] 450 adc ebp,edx 451 mov edx,DWORD [28+edi] 452 adc ebx,0 453 ; mul a[3]*b[7] 454 mul edx 455 add ecx,eax 456 mov eax,DWORD [20+esp] 457 adc ebp,edx 458 mov edx,DWORD [16+edi] 459 adc ebx,0 460 mov DWORD [40+eax],ecx 461 mov eax,DWORD [28+esi] 462 ; saved r[10] 463 ; ################## Calculate word 11 464 xor ecx,ecx 465 ; mul a[7]*b[4] 466 mul edx 467 add ebp,eax 468 mov eax,DWORD [24+esi] 469 adc ebx,edx 470 mov edx,DWORD [20+edi] 471 adc ecx,0 472 ; mul a[6]*b[5] 473 mul edx 474 add ebp,eax 475 mov eax,DWORD [20+esi] 476 adc ebx,edx 477 mov edx,DWORD [24+edi] 478 adc ecx,0 479 ; mul a[5]*b[6] 480 mul edx 481 add ebp,eax 482 mov eax,DWORD [16+esi] 483 adc ebx,edx 484 mov edx,DWORD [28+edi] 485 adc ecx,0 486 ; mul a[4]*b[7] 487 mul edx 488 add ebp,eax 489 mov eax,DWORD [20+esp] 490 adc ebx,edx 491 mov edx,DWORD [20+edi] 492 adc ecx,0 493 mov DWORD [44+eax],ebp 494 mov eax,DWORD [28+esi] 495 ; saved r[11] 496 ; ################## Calculate word 12 497 xor ebp,ebp 498 ; mul a[7]*b[5] 499 mul edx 500 add ebx,eax 501 mov eax,DWORD [24+esi] 502 adc ecx,edx 503 mov edx,DWORD [24+edi] 504 adc ebp,0 505 ; mul a[6]*b[6] 506 mul edx 507 add ebx,eax 508 mov eax,DWORD [20+esi] 509 adc ecx,edx 510 mov edx,DWORD [28+edi] 511 adc ebp,0 512 ; mul a[5]*b[7] 513 mul edx 514 add ebx,eax 515 mov eax,DWORD [20+esp] 516 adc ecx,edx 517 mov edx,DWORD [24+edi] 518 adc ebp,0 519 mov DWORD [48+eax],ebx 520 mov eax,DWORD [28+esi] 521 ; saved r[12] 522 ; ################## Calculate word 13 523 xor ebx,ebx 524 ; mul a[7]*b[6] 525 mul edx 526 add ecx,eax 527 mov eax,DWORD [24+esi] 528 adc ebp,edx 529 mov edx,DWORD [28+edi] 530 adc ebx,0 531 ; mul a[6]*b[7] 532 mul edx 533 add ecx,eax 534 mov eax,DWORD [20+esp] 535 adc ebp,edx 536 mov edx,DWORD [28+edi] 537 adc ebx,0 538 mov DWORD [52+eax],ecx 539 mov eax,DWORD [28+esi] 540 ; saved r[13] 541 ; ################## Calculate word 14 542 xor ecx,ecx 543 ; mul a[7]*b[7] 544 mul edx 545 add ebp,eax 546 mov eax,DWORD [20+esp] 547 adc ebx,edx 548 adc ecx,0 549 mov DWORD [56+eax],ebp 550 ; saved r[14] 551 ; save r[15] 552 mov DWORD [60+eax],ebx 553 pop ebx 554 pop ebp 555 pop edi 556 pop esi 557 ret 558global _bn_mul_comba4 559align 16 560_bn_mul_comba4: 561L$_bn_mul_comba4_begin: 562 push esi 563 mov esi,DWORD [12+esp] 564 push edi 565 mov edi,DWORD [20+esp] 566 push ebp 567 push ebx 568 xor ebx,ebx 569 mov eax,DWORD [esi] 570 xor ecx,ecx 571 mov edx,DWORD [edi] 572 ; ################## Calculate word 0 573 xor ebp,ebp 574 ; mul a[0]*b[0] 575 mul edx 576 add ebx,eax 577 mov eax,DWORD [20+esp] 578 adc ecx,edx 579 mov edx,DWORD [edi] 580 adc ebp,0 581 mov DWORD [eax],ebx 582 mov eax,DWORD [4+esi] 583 ; saved r[0] 584 ; ################## Calculate word 1 585 xor ebx,ebx 586 ; mul a[1]*b[0] 587 mul edx 588 add ecx,eax 589 mov eax,DWORD [esi] 590 adc ebp,edx 591 mov edx,DWORD [4+edi] 592 adc ebx,0 593 ; mul a[0]*b[1] 594 mul edx 595 add ecx,eax 596 mov eax,DWORD [20+esp] 597 adc ebp,edx 598 mov edx,DWORD [edi] 599 adc ebx,0 600 mov DWORD [4+eax],ecx 601 mov eax,DWORD [8+esi] 602 ; saved r[1] 603 ; ################## Calculate word 2 604 xor ecx,ecx 605 ; mul a[2]*b[0] 606 mul edx 607 add ebp,eax 608 mov eax,DWORD [4+esi] 609 adc ebx,edx 610 mov edx,DWORD [4+edi] 611 adc ecx,0 612 ; mul a[1]*b[1] 613 mul edx 614 add ebp,eax 615 mov eax,DWORD [esi] 616 adc ebx,edx 617 mov edx,DWORD [8+edi] 618 adc ecx,0 619 ; mul a[0]*b[2] 620 mul edx 621 add ebp,eax 622 mov eax,DWORD [20+esp] 623 adc ebx,edx 624 mov edx,DWORD [edi] 625 adc ecx,0 626 mov DWORD [8+eax],ebp 627 mov eax,DWORD [12+esi] 628 ; saved r[2] 629 ; ################## Calculate word 3 630 xor ebp,ebp 631 ; mul a[3]*b[0] 632 mul edx 633 add ebx,eax 634 mov eax,DWORD [8+esi] 635 adc ecx,edx 636 mov edx,DWORD [4+edi] 637 adc ebp,0 638 ; mul a[2]*b[1] 639 mul edx 640 add ebx,eax 641 mov eax,DWORD [4+esi] 642 adc ecx,edx 643 mov edx,DWORD [8+edi] 644 adc ebp,0 645 ; mul a[1]*b[2] 646 mul edx 647 add ebx,eax 648 mov eax,DWORD [esi] 649 adc ecx,edx 650 mov edx,DWORD [12+edi] 651 adc ebp,0 652 ; mul a[0]*b[3] 653 mul edx 654 add ebx,eax 655 mov eax,DWORD [20+esp] 656 adc ecx,edx 657 mov edx,DWORD [4+edi] 658 adc ebp,0 659 mov DWORD [12+eax],ebx 660 mov eax,DWORD [12+esi] 661 ; saved r[3] 662 ; ################## Calculate word 4 663 xor ebx,ebx 664 ; mul a[3]*b[1] 665 mul edx 666 add ecx,eax 667 mov eax,DWORD [8+esi] 668 adc ebp,edx 669 mov edx,DWORD [8+edi] 670 adc ebx,0 671 ; mul a[2]*b[2] 672 mul edx 673 add ecx,eax 674 mov eax,DWORD [4+esi] 675 adc ebp,edx 676 mov edx,DWORD [12+edi] 677 adc ebx,0 678 ; mul a[1]*b[3] 679 mul edx 680 add ecx,eax 681 mov eax,DWORD [20+esp] 682 adc ebp,edx 683 mov edx,DWORD [8+edi] 684 adc ebx,0 685 mov DWORD [16+eax],ecx 686 mov eax,DWORD [12+esi] 687 ; saved r[4] 688 ; ################## Calculate word 5 689 xor ecx,ecx 690 ; mul a[3]*b[2] 691 mul edx 692 add ebp,eax 693 mov eax,DWORD [8+esi] 694 adc ebx,edx 695 mov edx,DWORD [12+edi] 696 adc ecx,0 697 ; mul a[2]*b[3] 698 mul edx 699 add ebp,eax 700 mov eax,DWORD [20+esp] 701 adc ebx,edx 702 mov edx,DWORD [12+edi] 703 adc ecx,0 704 mov DWORD [20+eax],ebp 705 mov eax,DWORD [12+esi] 706 ; saved r[5] 707 ; ################## Calculate word 6 708 xor ebp,ebp 709 ; mul a[3]*b[3] 710 mul edx 711 add ebx,eax 712 mov eax,DWORD [20+esp] 713 adc ecx,edx 714 adc ebp,0 715 mov DWORD [24+eax],ebx 716 ; saved r[6] 717 ; save r[7] 718 mov DWORD [28+eax],ecx 719 pop ebx 720 pop ebp 721 pop edi 722 pop esi 723 ret 724global _bn_sqr_comba8 725align 16 726_bn_sqr_comba8: 727L$_bn_sqr_comba8_begin: 728 push esi 729 push edi 730 push ebp 731 push ebx 732 mov edi,DWORD [20+esp] 733 mov esi,DWORD [24+esp] 734 xor ebx,ebx 735 xor ecx,ecx 736 mov eax,DWORD [esi] 737 ; ############### Calculate word 0 738 xor ebp,ebp 739 ; sqr a[0]*a[0] 740 mul eax 741 add ebx,eax 742 adc ecx,edx 743 mov edx,DWORD [esi] 744 adc ebp,0 745 mov DWORD [edi],ebx 746 mov eax,DWORD [4+esi] 747 ; saved r[0] 748 ; ############### Calculate word 1 749 xor ebx,ebx 750 ; sqr a[1]*a[0] 751 mul edx 752 add eax,eax 753 adc edx,edx 754 adc ebx,0 755 add ecx,eax 756 adc ebp,edx 757 mov eax,DWORD [8+esi] 758 adc ebx,0 759 mov DWORD [4+edi],ecx 760 mov edx,DWORD [esi] 761 ; saved r[1] 762 ; ############### Calculate word 2 763 xor ecx,ecx 764 ; sqr a[2]*a[0] 765 mul edx 766 add eax,eax 767 adc edx,edx 768 adc ecx,0 769 add ebp,eax 770 adc ebx,edx 771 mov eax,DWORD [4+esi] 772 adc ecx,0 773 ; sqr a[1]*a[1] 774 mul eax 775 add ebp,eax 776 adc ebx,edx 777 mov edx,DWORD [esi] 778 adc ecx,0 779 mov DWORD [8+edi],ebp 780 mov eax,DWORD [12+esi] 781 ; saved r[2] 782 ; ############### Calculate word 3 783 xor ebp,ebp 784 ; sqr a[3]*a[0] 785 mul edx 786 add eax,eax 787 adc edx,edx 788 adc ebp,0 789 add ebx,eax 790 adc ecx,edx 791 mov eax,DWORD [8+esi] 792 adc ebp,0 793 mov edx,DWORD [4+esi] 794 ; sqr a[2]*a[1] 795 mul edx 796 add eax,eax 797 adc edx,edx 798 adc ebp,0 799 add ebx,eax 800 adc ecx,edx 801 mov eax,DWORD [16+esi] 802 adc ebp,0 803 mov DWORD [12+edi],ebx 804 mov edx,DWORD [esi] 805 ; saved r[3] 806 ; ############### Calculate word 4 807 xor ebx,ebx 808 ; sqr a[4]*a[0] 809 mul edx 810 add eax,eax 811 adc edx,edx 812 adc ebx,0 813 add ecx,eax 814 adc ebp,edx 815 mov eax,DWORD [12+esi] 816 adc ebx,0 817 mov edx,DWORD [4+esi] 818 ; sqr a[3]*a[1] 819 mul edx 820 add eax,eax 821 adc edx,edx 822 adc ebx,0 823 add ecx,eax 824 adc ebp,edx 825 mov eax,DWORD [8+esi] 826 adc ebx,0 827 ; sqr a[2]*a[2] 828 mul eax 829 add ecx,eax 830 adc ebp,edx 831 mov edx,DWORD [esi] 832 adc ebx,0 833 mov DWORD [16+edi],ecx 834 mov eax,DWORD [20+esi] 835 ; saved r[4] 836 ; ############### Calculate word 5 837 xor ecx,ecx 838 ; sqr a[5]*a[0] 839 mul edx 840 add eax,eax 841 adc edx,edx 842 adc ecx,0 843 add ebp,eax 844 adc ebx,edx 845 mov eax,DWORD [16+esi] 846 adc ecx,0 847 mov edx,DWORD [4+esi] 848 ; sqr a[4]*a[1] 849 mul edx 850 add eax,eax 851 adc edx,edx 852 adc ecx,0 853 add ebp,eax 854 adc ebx,edx 855 mov eax,DWORD [12+esi] 856 adc ecx,0 857 mov edx,DWORD [8+esi] 858 ; sqr a[3]*a[2] 859 mul edx 860 add eax,eax 861 adc edx,edx 862 adc ecx,0 863 add ebp,eax 864 adc ebx,edx 865 mov eax,DWORD [24+esi] 866 adc ecx,0 867 mov DWORD [20+edi],ebp 868 mov edx,DWORD [esi] 869 ; saved r[5] 870 ; ############### Calculate word 6 871 xor ebp,ebp 872 ; sqr a[6]*a[0] 873 mul edx 874 add eax,eax 875 adc edx,edx 876 adc ebp,0 877 add ebx,eax 878 adc ecx,edx 879 mov eax,DWORD [20+esi] 880 adc ebp,0 881 mov edx,DWORD [4+esi] 882 ; sqr a[5]*a[1] 883 mul edx 884 add eax,eax 885 adc edx,edx 886 adc ebp,0 887 add ebx,eax 888 adc ecx,edx 889 mov eax,DWORD [16+esi] 890 adc ebp,0 891 mov edx,DWORD [8+esi] 892 ; sqr a[4]*a[2] 893 mul edx 894 add eax,eax 895 adc edx,edx 896 adc ebp,0 897 add ebx,eax 898 adc ecx,edx 899 mov eax,DWORD [12+esi] 900 adc ebp,0 901 ; sqr a[3]*a[3] 902 mul eax 903 add ebx,eax 904 adc ecx,edx 905 mov edx,DWORD [esi] 906 adc ebp,0 907 mov DWORD [24+edi],ebx 908 mov eax,DWORD [28+esi] 909 ; saved r[6] 910 ; ############### Calculate word 7 911 xor ebx,ebx 912 ; sqr a[7]*a[0] 913 mul edx 914 add eax,eax 915 adc edx,edx 916 adc ebx,0 917 add ecx,eax 918 adc ebp,edx 919 mov eax,DWORD [24+esi] 920 adc ebx,0 921 mov edx,DWORD [4+esi] 922 ; sqr a[6]*a[1] 923 mul edx 924 add eax,eax 925 adc edx,edx 926 adc ebx,0 927 add ecx,eax 928 adc ebp,edx 929 mov eax,DWORD [20+esi] 930 adc ebx,0 931 mov edx,DWORD [8+esi] 932 ; sqr a[5]*a[2] 933 mul edx 934 add eax,eax 935 adc edx,edx 936 adc ebx,0 937 add ecx,eax 938 adc ebp,edx 939 mov eax,DWORD [16+esi] 940 adc ebx,0 941 mov edx,DWORD [12+esi] 942 ; sqr a[4]*a[3] 943 mul edx 944 add eax,eax 945 adc edx,edx 946 adc ebx,0 947 add ecx,eax 948 adc ebp,edx 949 mov eax,DWORD [28+esi] 950 adc ebx,0 951 mov DWORD [28+edi],ecx 952 mov edx,DWORD [4+esi] 953 ; saved r[7] 954 ; ############### Calculate word 8 955 xor ecx,ecx 956 ; sqr a[7]*a[1] 957 mul edx 958 add eax,eax 959 adc edx,edx 960 adc ecx,0 961 add ebp,eax 962 adc ebx,edx 963 mov eax,DWORD [24+esi] 964 adc ecx,0 965 mov edx,DWORD [8+esi] 966 ; sqr a[6]*a[2] 967 mul edx 968 add eax,eax 969 adc edx,edx 970 adc ecx,0 971 add ebp,eax 972 adc ebx,edx 973 mov eax,DWORD [20+esi] 974 adc ecx,0 975 mov edx,DWORD [12+esi] 976 ; sqr a[5]*a[3] 977 mul edx 978 add eax,eax 979 adc edx,edx 980 adc ecx,0 981 add ebp,eax 982 adc ebx,edx 983 mov eax,DWORD [16+esi] 984 adc ecx,0 985 ; sqr a[4]*a[4] 986 mul eax 987 add ebp,eax 988 adc ebx,edx 989 mov edx,DWORD [8+esi] 990 adc ecx,0 991 mov DWORD [32+edi],ebp 992 mov eax,DWORD [28+esi] 993 ; saved r[8] 994 ; ############### Calculate word 9 995 xor ebp,ebp 996 ; sqr a[7]*a[2] 997 mul edx 998 add eax,eax 999 adc edx,edx 1000 adc ebp,0 1001 add ebx,eax 1002 adc ecx,edx 1003 mov eax,DWORD [24+esi] 1004 adc ebp,0 1005 mov edx,DWORD [12+esi] 1006 ; sqr a[6]*a[3] 1007 mul edx 1008 add eax,eax 1009 adc edx,edx 1010 adc ebp,0 1011 add ebx,eax 1012 adc ecx,edx 1013 mov eax,DWORD [20+esi] 1014 adc ebp,0 1015 mov edx,DWORD [16+esi] 1016 ; sqr a[5]*a[4] 1017 mul edx 1018 add eax,eax 1019 adc edx,edx 1020 adc ebp,0 1021 add ebx,eax 1022 adc ecx,edx 1023 mov eax,DWORD [28+esi] 1024 adc ebp,0 1025 mov DWORD [36+edi],ebx 1026 mov edx,DWORD [12+esi] 1027 ; saved r[9] 1028 ; ############### Calculate word 10 1029 xor ebx,ebx 1030 ; sqr a[7]*a[3] 1031 mul edx 1032 add eax,eax 1033 adc edx,edx 1034 adc ebx,0 1035 add ecx,eax 1036 adc ebp,edx 1037 mov eax,DWORD [24+esi] 1038 adc ebx,0 1039 mov edx,DWORD [16+esi] 1040 ; sqr a[6]*a[4] 1041 mul edx 1042 add eax,eax 1043 adc edx,edx 1044 adc ebx,0 1045 add ecx,eax 1046 adc ebp,edx 1047 mov eax,DWORD [20+esi] 1048 adc ebx,0 1049 ; sqr a[5]*a[5] 1050 mul eax 1051 add ecx,eax 1052 adc ebp,edx 1053 mov edx,DWORD [16+esi] 1054 adc ebx,0 1055 mov DWORD [40+edi],ecx 1056 mov eax,DWORD [28+esi] 1057 ; saved r[10] 1058 ; ############### Calculate word 11 1059 xor ecx,ecx 1060 ; sqr a[7]*a[4] 1061 mul edx 1062 add eax,eax 1063 adc edx,edx 1064 adc ecx,0 1065 add ebp,eax 1066 adc ebx,edx 1067 mov eax,DWORD [24+esi] 1068 adc ecx,0 1069 mov edx,DWORD [20+esi] 1070 ; sqr a[6]*a[5] 1071 mul edx 1072 add eax,eax 1073 adc edx,edx 1074 adc ecx,0 1075 add ebp,eax 1076 adc ebx,edx 1077 mov eax,DWORD [28+esi] 1078 adc ecx,0 1079 mov DWORD [44+edi],ebp 1080 mov edx,DWORD [20+esi] 1081 ; saved r[11] 1082 ; ############### Calculate word 12 1083 xor ebp,ebp 1084 ; sqr a[7]*a[5] 1085 mul edx 1086 add eax,eax 1087 adc edx,edx 1088 adc ebp,0 1089 add ebx,eax 1090 adc ecx,edx 1091 mov eax,DWORD [24+esi] 1092 adc ebp,0 1093 ; sqr a[6]*a[6] 1094 mul eax 1095 add ebx,eax 1096 adc ecx,edx 1097 mov edx,DWORD [24+esi] 1098 adc ebp,0 1099 mov DWORD [48+edi],ebx 1100 mov eax,DWORD [28+esi] 1101 ; saved r[12] 1102 ; ############### Calculate word 13 1103 xor ebx,ebx 1104 ; sqr a[7]*a[6] 1105 mul edx 1106 add eax,eax 1107 adc edx,edx 1108 adc ebx,0 1109 add ecx,eax 1110 adc ebp,edx 1111 mov eax,DWORD [28+esi] 1112 adc ebx,0 1113 mov DWORD [52+edi],ecx 1114 ; saved r[13] 1115 ; ############### Calculate word 14 1116 xor ecx,ecx 1117 ; sqr a[7]*a[7] 1118 mul eax 1119 add ebp,eax 1120 adc ebx,edx 1121 adc ecx,0 1122 mov DWORD [56+edi],ebp 1123 ; saved r[14] 1124 mov DWORD [60+edi],ebx 1125 pop ebx 1126 pop ebp 1127 pop edi 1128 pop esi 1129 ret 1130global _bn_sqr_comba4 1131align 16 1132_bn_sqr_comba4: 1133L$_bn_sqr_comba4_begin: 1134 push esi 1135 push edi 1136 push ebp 1137 push ebx 1138 mov edi,DWORD [20+esp] 1139 mov esi,DWORD [24+esp] 1140 xor ebx,ebx 1141 xor ecx,ecx 1142 mov eax,DWORD [esi] 1143 ; ############### Calculate word 0 1144 xor ebp,ebp 1145 ; sqr a[0]*a[0] 1146 mul eax 1147 add ebx,eax 1148 adc ecx,edx 1149 mov edx,DWORD [esi] 1150 adc ebp,0 1151 mov DWORD [edi],ebx 1152 mov eax,DWORD [4+esi] 1153 ; saved r[0] 1154 ; ############### Calculate word 1 1155 xor ebx,ebx 1156 ; sqr a[1]*a[0] 1157 mul edx 1158 add eax,eax 1159 adc edx,edx 1160 adc ebx,0 1161 add ecx,eax 1162 adc ebp,edx 1163 mov eax,DWORD [8+esi] 1164 adc ebx,0 1165 mov DWORD [4+edi],ecx 1166 mov edx,DWORD [esi] 1167 ; saved r[1] 1168 ; ############### Calculate word 2 1169 xor ecx,ecx 1170 ; sqr a[2]*a[0] 1171 mul edx 1172 add eax,eax 1173 adc edx,edx 1174 adc ecx,0 1175 add ebp,eax 1176 adc ebx,edx 1177 mov eax,DWORD [4+esi] 1178 adc ecx,0 1179 ; sqr a[1]*a[1] 1180 mul eax 1181 add ebp,eax 1182 adc ebx,edx 1183 mov edx,DWORD [esi] 1184 adc ecx,0 1185 mov DWORD [8+edi],ebp 1186 mov eax,DWORD [12+esi] 1187 ; saved r[2] 1188 ; ############### Calculate word 3 1189 xor ebp,ebp 1190 ; sqr a[3]*a[0] 1191 mul edx 1192 add eax,eax 1193 adc edx,edx 1194 adc ebp,0 1195 add ebx,eax 1196 adc ecx,edx 1197 mov eax,DWORD [8+esi] 1198 adc ebp,0 1199 mov edx,DWORD [4+esi] 1200 ; sqr a[2]*a[1] 1201 mul edx 1202 add eax,eax 1203 adc edx,edx 1204 adc ebp,0 1205 add ebx,eax 1206 adc ecx,edx 1207 mov eax,DWORD [12+esi] 1208 adc ebp,0 1209 mov DWORD [12+edi],ebx 1210 mov edx,DWORD [4+esi] 1211 ; saved r[3] 1212 ; ############### Calculate word 4 1213 xor ebx,ebx 1214 ; sqr a[3]*a[1] 1215 mul edx 1216 add eax,eax 1217 adc edx,edx 1218 adc ebx,0 1219 add ecx,eax 1220 adc ebp,edx 1221 mov eax,DWORD [8+esi] 1222 adc ebx,0 1223 ; sqr a[2]*a[2] 1224 mul eax 1225 add ecx,eax 1226 adc ebp,edx 1227 mov edx,DWORD [8+esi] 1228 adc ebx,0 1229 mov DWORD [16+edi],ecx 1230 mov eax,DWORD [12+esi] 1231 ; saved r[4] 1232 ; ############### Calculate word 5 1233 xor ecx,ecx 1234 ; sqr a[3]*a[2] 1235 mul edx 1236 add eax,eax 1237 adc edx,edx 1238 adc ecx,0 1239 add ebp,eax 1240 adc ebx,edx 1241 mov eax,DWORD [12+esi] 1242 adc ecx,0 1243 mov DWORD [20+edi],ebp 1244 ; saved r[5] 1245 ; ############### Calculate word 6 1246 xor ebp,ebp 1247 ; sqr a[3]*a[3] 1248 mul eax 1249 add ebx,eax 1250 adc ecx,edx 1251 adc ebp,0 1252 mov DWORD [24+edi],ebx 1253 ; saved r[6] 1254 mov DWORD [28+edi],ecx 1255 pop ebx 1256 pop ebp 1257 pop edi 1258 pop esi 1259 ret 1260%else 1261; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 1262ret 1263%endif 1264