1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24.type _vpaes_encrypt_core,@function 25.align 16 26_vpaes_encrypt_core: 27.cfi_startproc 28 movq %rdx,%r9 29 movq $16,%r11 30 movl 240(%rdx),%eax 31 movdqa %xmm9,%xmm1 32 movdqa .Lk_ipt(%rip),%xmm2 33 pandn %xmm0,%xmm1 34 movdqu (%r9),%xmm5 35 psrld $4,%xmm1 36 pand %xmm9,%xmm0 37.byte 102,15,56,0,208 38 movdqa .Lk_ipt+16(%rip),%xmm0 39.byte 102,15,56,0,193 40 pxor %xmm5,%xmm2 41 addq $16,%r9 42 pxor %xmm2,%xmm0 43 leaq .Lk_mc_backward(%rip),%r10 44 jmp .Lenc_entry 45 46.align 16 47.Lenc_loop: 48 49 movdqa %xmm13,%xmm4 50 movdqa %xmm12,%xmm0 51.byte 102,15,56,0,226 52.byte 102,15,56,0,195 53 pxor %xmm5,%xmm4 54 movdqa %xmm15,%xmm5 55 pxor %xmm4,%xmm0 56 movdqa -64(%r11,%r10,1),%xmm1 57.byte 102,15,56,0,234 58 movdqa (%r11,%r10,1),%xmm4 59 movdqa %xmm14,%xmm2 60.byte 102,15,56,0,211 61 movdqa %xmm0,%xmm3 62 pxor %xmm5,%xmm2 63.byte 102,15,56,0,193 64 addq $16,%r9 65 pxor %xmm2,%xmm0 66.byte 102,15,56,0,220 67 addq $16,%r11 68 pxor %xmm0,%xmm3 69.byte 102,15,56,0,193 70 andq $0x30,%r11 71 subq $1,%rax 72 pxor %xmm3,%xmm0 73 74.Lenc_entry: 75 76 movdqa %xmm9,%xmm1 77 movdqa %xmm11,%xmm5 78 pandn %xmm0,%xmm1 79 psrld $4,%xmm1 80 pand %xmm9,%xmm0 81.byte 102,15,56,0,232 82 movdqa %xmm10,%xmm3 83 pxor %xmm1,%xmm0 84.byte 102,15,56,0,217 85 movdqa %xmm10,%xmm4 86 pxor %xmm5,%xmm3 87.byte 102,15,56,0,224 88 movdqa %xmm10,%xmm2 89 pxor %xmm5,%xmm4 90.byte 102,15,56,0,211 91 movdqa %xmm10,%xmm3 92 pxor %xmm0,%xmm2 93.byte 102,15,56,0,220 94 movdqu (%r9),%xmm5 95 pxor %xmm1,%xmm3 96 jnz .Lenc_loop 97 98 99 movdqa -96(%r10),%xmm4 100 movdqa -80(%r10),%xmm0 101.byte 102,15,56,0,226 102 pxor %xmm5,%xmm4 103.byte 102,15,56,0,195 104 movdqa 64(%r11,%r10,1),%xmm1 105 pxor %xmm4,%xmm0 106.byte 102,15,56,0,193 107 ret 108.cfi_endproc 109.size _vpaes_encrypt_core,.-_vpaes_encrypt_core 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140.type _vpaes_encrypt_core_2x,@function 141.align 16 142_vpaes_encrypt_core_2x: 143.cfi_startproc 144 movq %rdx,%r9 145 movq $16,%r11 146 movl 240(%rdx),%eax 147 movdqa %xmm9,%xmm1 148 movdqa %xmm9,%xmm7 149 movdqa .Lk_ipt(%rip),%xmm2 150 movdqa %xmm2,%xmm8 151 pandn %xmm0,%xmm1 152 pandn %xmm6,%xmm7 153 movdqu (%r9),%xmm5 154 155 psrld $4,%xmm1 156 psrld $4,%xmm7 157 pand %xmm9,%xmm0 158 pand %xmm9,%xmm6 159.byte 102,15,56,0,208 160.byte 102,68,15,56,0,198 161 movdqa .Lk_ipt+16(%rip),%xmm0 162 movdqa %xmm0,%xmm6 163.byte 102,15,56,0,193 164.byte 102,15,56,0,247 165 pxor %xmm5,%xmm2 166 pxor %xmm5,%xmm8 167 addq $16,%r9 168 pxor %xmm2,%xmm0 169 pxor %xmm8,%xmm6 170 leaq .Lk_mc_backward(%rip),%r10 171 jmp .Lenc2x_entry 172 173.align 16 174.Lenc2x_loop: 175 176 movdqa .Lk_sb1(%rip),%xmm4 177 movdqa .Lk_sb1+16(%rip),%xmm0 178 movdqa %xmm4,%xmm12 179 movdqa %xmm0,%xmm6 180.byte 102,15,56,0,226 181.byte 102,69,15,56,0,224 182.byte 102,15,56,0,195 183.byte 102,65,15,56,0,243 184 pxor %xmm5,%xmm4 185 pxor %xmm5,%xmm12 186 movdqa .Lk_sb2(%rip),%xmm5 187 movdqa %xmm5,%xmm13 188 pxor %xmm4,%xmm0 189 pxor %xmm12,%xmm6 190 movdqa -64(%r11,%r10,1),%xmm1 191 192.byte 102,15,56,0,234 193.byte 102,69,15,56,0,232 194 movdqa (%r11,%r10,1),%xmm4 195 196 movdqa .Lk_sb2+16(%rip),%xmm2 197 movdqa %xmm2,%xmm8 198.byte 102,15,56,0,211 199.byte 102,69,15,56,0,195 200 movdqa %xmm0,%xmm3 201 movdqa %xmm6,%xmm11 202 pxor %xmm5,%xmm2 203 pxor %xmm13,%xmm8 204.byte 102,15,56,0,193 205.byte 102,15,56,0,241 206 addq $16,%r9 207 pxor %xmm2,%xmm0 208 pxor %xmm8,%xmm6 209.byte 102,15,56,0,220 210.byte 102,68,15,56,0,220 211 addq $16,%r11 212 pxor %xmm0,%xmm3 213 pxor %xmm6,%xmm11 214.byte 102,15,56,0,193 215.byte 102,15,56,0,241 216 andq $0x30,%r11 217 subq $1,%rax 218 pxor %xmm3,%xmm0 219 pxor %xmm11,%xmm6 220 221.Lenc2x_entry: 222 223 movdqa %xmm9,%xmm1 224 movdqa %xmm9,%xmm7 225 movdqa .Lk_inv+16(%rip),%xmm5 226 movdqa %xmm5,%xmm13 227 pandn %xmm0,%xmm1 228 pandn %xmm6,%xmm7 229 psrld $4,%xmm1 230 psrld $4,%xmm7 231 pand %xmm9,%xmm0 232 pand %xmm9,%xmm6 233.byte 102,15,56,0,232 234.byte 102,68,15,56,0,238 235 movdqa %xmm10,%xmm3 236 movdqa %xmm10,%xmm11 237 pxor %xmm1,%xmm0 238 pxor %xmm7,%xmm6 239.byte 102,15,56,0,217 240.byte 102,68,15,56,0,223 241 movdqa %xmm10,%xmm4 242 movdqa %xmm10,%xmm12 243 pxor %xmm5,%xmm3 244 pxor %xmm13,%xmm11 245.byte 102,15,56,0,224 246.byte 102,68,15,56,0,230 247 movdqa %xmm10,%xmm2 248 movdqa %xmm10,%xmm8 249 pxor %xmm5,%xmm4 250 pxor %xmm13,%xmm12 251.byte 102,15,56,0,211 252.byte 102,69,15,56,0,195 253 movdqa %xmm10,%xmm3 254 movdqa %xmm10,%xmm11 255 pxor %xmm0,%xmm2 256 pxor %xmm6,%xmm8 257.byte 102,15,56,0,220 258.byte 102,69,15,56,0,220 259 movdqu (%r9),%xmm5 260 261 pxor %xmm1,%xmm3 262 pxor %xmm7,%xmm11 263 jnz .Lenc2x_loop 264 265 266 movdqa -96(%r10),%xmm4 267 movdqa -80(%r10),%xmm0 268 movdqa %xmm4,%xmm12 269 movdqa %xmm0,%xmm6 270.byte 102,15,56,0,226 271.byte 102,69,15,56,0,224 272 pxor %xmm5,%xmm4 273 pxor %xmm5,%xmm12 274.byte 102,15,56,0,195 275.byte 102,65,15,56,0,243 276 movdqa 64(%r11,%r10,1),%xmm1 277 278 pxor %xmm4,%xmm0 279 pxor %xmm12,%xmm6 280.byte 102,15,56,0,193 281.byte 102,15,56,0,241 282 ret 283.cfi_endproc 284.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x 285 286 287 288 289 290 291.type _vpaes_schedule_core,@function 292.align 16 293_vpaes_schedule_core: 294.cfi_startproc 295 296 297 298 299 300 call _vpaes_preheat 301 movdqa .Lk_rcon(%rip),%xmm8 302 movdqu (%rdi),%xmm0 303 304 305 movdqa %xmm0,%xmm3 306 leaq .Lk_ipt(%rip),%r11 307 call _vpaes_schedule_transform 308 movdqa %xmm0,%xmm7 309 310 leaq .Lk_sr(%rip),%r10 311 312 313 movdqu %xmm0,(%rdx) 314 315.Lschedule_go: 316 cmpl $192,%esi 317 ja .Lschedule_256 318 319 320 321 322 323 324 325 326 327 328 329.Lschedule_128: 330 movl $10,%esi 331 332.Loop_schedule_128: 333 call _vpaes_schedule_round 334 decq %rsi 335 jz .Lschedule_mangle_last 336 call _vpaes_schedule_mangle 337 jmp .Loop_schedule_128 338 339 340 341 342 343 344 345 346 347 348 349.align 16 350.Lschedule_256: 351 movdqu 16(%rdi),%xmm0 352 call _vpaes_schedule_transform 353 movl $7,%esi 354 355.Loop_schedule_256: 356 call _vpaes_schedule_mangle 357 movdqa %xmm0,%xmm6 358 359 360 call _vpaes_schedule_round 361 decq %rsi 362 jz .Lschedule_mangle_last 363 call _vpaes_schedule_mangle 364 365 366 pshufd $0xFF,%xmm0,%xmm0 367 movdqa %xmm7,%xmm5 368 movdqa %xmm6,%xmm7 369 call _vpaes_schedule_low_round 370 movdqa %xmm5,%xmm7 371 372 jmp .Loop_schedule_256 373 374 375 376 377 378 379 380 381 382 383 384 385.align 16 386.Lschedule_mangle_last: 387 388 leaq .Lk_deskew(%rip),%r11 389 390 391 movdqa (%r8,%r10,1),%xmm1 392.byte 102,15,56,0,193 393 leaq .Lk_opt(%rip),%r11 394 addq $32,%rdx 395 396.Lschedule_mangle_last_dec: 397 addq $-16,%rdx 398 pxor .Lk_s63(%rip),%xmm0 399 call _vpaes_schedule_transform 400 movdqu %xmm0,(%rdx) 401 402 403 pxor %xmm0,%xmm0 404 pxor %xmm1,%xmm1 405 pxor %xmm2,%xmm2 406 pxor %xmm3,%xmm3 407 pxor %xmm4,%xmm4 408 pxor %xmm5,%xmm5 409 pxor %xmm6,%xmm6 410 pxor %xmm7,%xmm7 411 ret 412.cfi_endproc 413.size _vpaes_schedule_core,.-_vpaes_schedule_core 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433.type _vpaes_schedule_round,@function 434.align 16 435_vpaes_schedule_round: 436.cfi_startproc 437 438 pxor %xmm1,%xmm1 439.byte 102,65,15,58,15,200,15 440.byte 102,69,15,58,15,192,15 441 pxor %xmm1,%xmm7 442 443 444 pshufd $0xFF,%xmm0,%xmm0 445.byte 102,15,58,15,192,1 446 447 448 449 450_vpaes_schedule_low_round: 451 452 movdqa %xmm7,%xmm1 453 pslldq $4,%xmm7 454 pxor %xmm1,%xmm7 455 movdqa %xmm7,%xmm1 456 pslldq $8,%xmm7 457 pxor %xmm1,%xmm7 458 pxor .Lk_s63(%rip),%xmm7 459 460 461 movdqa %xmm9,%xmm1 462 pandn %xmm0,%xmm1 463 psrld $4,%xmm1 464 pand %xmm9,%xmm0 465 movdqa %xmm11,%xmm2 466.byte 102,15,56,0,208 467 pxor %xmm1,%xmm0 468 movdqa %xmm10,%xmm3 469.byte 102,15,56,0,217 470 pxor %xmm2,%xmm3 471 movdqa %xmm10,%xmm4 472.byte 102,15,56,0,224 473 pxor %xmm2,%xmm4 474 movdqa %xmm10,%xmm2 475.byte 102,15,56,0,211 476 pxor %xmm0,%xmm2 477 movdqa %xmm10,%xmm3 478.byte 102,15,56,0,220 479 pxor %xmm1,%xmm3 480 movdqa %xmm13,%xmm4 481.byte 102,15,56,0,226 482 movdqa %xmm12,%xmm0 483.byte 102,15,56,0,195 484 pxor %xmm4,%xmm0 485 486 487 pxor %xmm7,%xmm0 488 movdqa %xmm0,%xmm7 489 ret 490.cfi_endproc 491.size _vpaes_schedule_round,.-_vpaes_schedule_round 492 493 494 495 496 497 498 499 500 501 502.type _vpaes_schedule_transform,@function 503.align 16 504_vpaes_schedule_transform: 505.cfi_startproc 506 movdqa %xmm9,%xmm1 507 pandn %xmm0,%xmm1 508 psrld $4,%xmm1 509 pand %xmm9,%xmm0 510 movdqa (%r11),%xmm2 511.byte 102,15,56,0,208 512 movdqa 16(%r11),%xmm0 513.byte 102,15,56,0,193 514 pxor %xmm2,%xmm0 515 ret 516.cfi_endproc 517.size _vpaes_schedule_transform,.-_vpaes_schedule_transform 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542.type _vpaes_schedule_mangle,@function 543.align 16 544_vpaes_schedule_mangle: 545.cfi_startproc 546 movdqa %xmm0,%xmm4 547 movdqa .Lk_mc_forward(%rip),%xmm5 548 549 550 addq $16,%rdx 551 pxor .Lk_s63(%rip),%xmm4 552.byte 102,15,56,0,229 553 movdqa %xmm4,%xmm3 554.byte 102,15,56,0,229 555 pxor %xmm4,%xmm3 556.byte 102,15,56,0,229 557 pxor %xmm4,%xmm3 558 559.Lschedule_mangle_both: 560 movdqa (%r8,%r10,1),%xmm1 561.byte 102,15,56,0,217 562 addq $-16,%r8 563 andq $0x30,%r8 564 movdqu %xmm3,(%rdx) 565 ret 566.cfi_endproc 567.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle 568 569 570 571 572.globl vpaes_set_encrypt_key 573.hidden vpaes_set_encrypt_key 574.type vpaes_set_encrypt_key,@function 575.align 16 576vpaes_set_encrypt_key: 577.cfi_startproc 578_CET_ENDBR 579#ifdef BORINGSSL_DISPATCH_TEST 580.extern BORINGSSL_function_hit 581.hidden BORINGSSL_function_hit 582 movb $1,BORINGSSL_function_hit+5(%rip) 583#endif 584 585 movl %esi,%eax 586 shrl $5,%eax 587 addl $5,%eax 588 movl %eax,240(%rdx) 589 590 movl $0,%ecx 591 movl $0x30,%r8d 592 call _vpaes_schedule_core 593 xorl %eax,%eax 594 ret 595.cfi_endproc 596.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key 597 598.globl vpaes_encrypt 599.hidden vpaes_encrypt 600.type vpaes_encrypt,@function 601.align 16 602vpaes_encrypt: 603.cfi_startproc 604_CET_ENDBR 605#ifdef BORINGSSL_DISPATCH_TEST 606.extern BORINGSSL_function_hit 607.hidden BORINGSSL_function_hit 608 movb $1,BORINGSSL_function_hit+4(%rip) 609#endif 610 movdqu (%rdi),%xmm0 611 call _vpaes_preheat 612 call _vpaes_encrypt_core 613 movdqu %xmm0,(%rsi) 614 ret 615.cfi_endproc 616.size vpaes_encrypt,.-vpaes_encrypt 617.globl vpaes_ctr32_encrypt_blocks 618.hidden vpaes_ctr32_encrypt_blocks 619.type vpaes_ctr32_encrypt_blocks,@function 620.align 16 621vpaes_ctr32_encrypt_blocks: 622.cfi_startproc 623_CET_ENDBR 624 625 xchgq %rcx,%rdx 626 testq %rcx,%rcx 627 jz .Lctr32_abort 628 movdqu (%r8),%xmm0 629 movdqa .Lctr_add_one(%rip),%xmm8 630 subq %rdi,%rsi 631 call _vpaes_preheat 632 movdqa %xmm0,%xmm6 633 pshufb .Lrev_ctr(%rip),%xmm6 634 635 testq $1,%rcx 636 jz .Lctr32_prep_loop 637 638 639 640 movdqu (%rdi),%xmm7 641 call _vpaes_encrypt_core 642 pxor %xmm7,%xmm0 643 paddd %xmm8,%xmm6 644 movdqu %xmm0,(%rsi,%rdi,1) 645 subq $1,%rcx 646 leaq 16(%rdi),%rdi 647 jz .Lctr32_done 648 649.Lctr32_prep_loop: 650 651 652 movdqa %xmm6,%xmm14 653 movdqa %xmm6,%xmm15 654 paddd %xmm8,%xmm15 655 656.Lctr32_loop: 657 movdqa .Lrev_ctr(%rip),%xmm1 658 movdqa %xmm14,%xmm0 659 movdqa %xmm15,%xmm6 660.byte 102,15,56,0,193 661.byte 102,15,56,0,241 662 call _vpaes_encrypt_core_2x 663 movdqu (%rdi),%xmm1 664 movdqu 16(%rdi),%xmm2 665 movdqa .Lctr_add_two(%rip),%xmm3 666 pxor %xmm1,%xmm0 667 pxor %xmm2,%xmm6 668 paddd %xmm3,%xmm14 669 paddd %xmm3,%xmm15 670 movdqu %xmm0,(%rsi,%rdi,1) 671 movdqu %xmm6,16(%rsi,%rdi,1) 672 subq $2,%rcx 673 leaq 32(%rdi),%rdi 674 jnz .Lctr32_loop 675 676.Lctr32_done: 677.Lctr32_abort: 678 ret 679.cfi_endproc 680.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks 681 682 683 684 685 686 687.type _vpaes_preheat,@function 688.align 16 689_vpaes_preheat: 690.cfi_startproc 691 leaq .Lk_s0F(%rip),%r10 692 movdqa -32(%r10),%xmm10 693 movdqa -16(%r10),%xmm11 694 movdqa 0(%r10),%xmm9 695 movdqa 48(%r10),%xmm13 696 movdqa 64(%r10),%xmm12 697 movdqa 80(%r10),%xmm15 698 movdqa 96(%r10),%xmm14 699 ret 700.cfi_endproc 701.size _vpaes_preheat,.-_vpaes_preheat 702 703 704 705 706 707.type _vpaes_consts,@object 708.section .rodata 709.align 64 710_vpaes_consts: 711.Lk_inv: 712.quad 0x0E05060F0D080180, 0x040703090A0B0C02 713.quad 0x01040A060F0B0780, 0x030D0E0C02050809 714 715.Lk_s0F: 716.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 717 718.Lk_ipt: 719.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 720.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 721 722.Lk_sb1: 723.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 724.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 725.Lk_sb2: 726.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 727.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 728.Lk_sbo: 729.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 730.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 731 732.Lk_mc_forward: 733.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 734.quad 0x080B0A0904070605, 0x000302010C0F0E0D 735.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 736.quad 0x000302010C0F0E0D, 0x080B0A0904070605 737 738.Lk_mc_backward: 739.quad 0x0605040702010003, 0x0E0D0C0F0A09080B 740.quad 0x020100030E0D0C0F, 0x0A09080B06050407 741.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 742.quad 0x0A09080B06050407, 0x020100030E0D0C0F 743 744.Lk_sr: 745.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 746.quad 0x030E09040F0A0500, 0x0B06010C07020D08 747.quad 0x0F060D040B020900, 0x070E050C030A0108 748.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 749 750.Lk_rcon: 751.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 752 753.Lk_s63: 754.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 755 756.Lk_opt: 757.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 758.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 759 760.Lk_deskew: 761.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 762.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 763 764 765.Lrev_ctr: 766.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 767 768 769.Lctr_add_one: 770.quad 0x0000000000000000, 0x0000000100000000 771.Lctr_add_two: 772.quad 0x0000000000000000, 0x0000000200000000 773 774.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 775.align 64 776.size _vpaes_consts,.-_vpaes_consts 777.text 778#endif 779