1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8 9.globl _aes_hw_encrypt 10.private_extern _aes_hw_encrypt 11 12.p2align 4 13_aes_hw_encrypt: 14 15_CET_ENDBR 16#ifdef BORINGSSL_DISPATCH_TEST 17 18 movb $1,_BORINGSSL_function_hit+1(%rip) 19#endif 20 movups (%rdi),%xmm2 21 movl 240(%rdx),%eax 22 movups (%rdx),%xmm0 23 movups 16(%rdx),%xmm1 24 leaq 32(%rdx),%rdx 25 xorps %xmm0,%xmm2 26L$oop_enc1_1: 27.byte 102,15,56,220,209 28 decl %eax 29 movups (%rdx),%xmm1 30 leaq 16(%rdx),%rdx 31 jnz L$oop_enc1_1 32.byte 102,15,56,221,209 33 pxor %xmm0,%xmm0 34 pxor %xmm1,%xmm1 35 movups %xmm2,(%rsi) 36 pxor %xmm2,%xmm2 37 ret 38 39 40 41.globl _aes_hw_decrypt 42.private_extern _aes_hw_decrypt 43 44.p2align 4 45_aes_hw_decrypt: 46 47_CET_ENDBR 48 movups (%rdi),%xmm2 49 movl 240(%rdx),%eax 50 movups (%rdx),%xmm0 51 movups 16(%rdx),%xmm1 52 leaq 32(%rdx),%rdx 53 xorps %xmm0,%xmm2 54L$oop_dec1_2: 55.byte 102,15,56,222,209 56 decl %eax 57 movups (%rdx),%xmm1 58 leaq 16(%rdx),%rdx 59 jnz L$oop_dec1_2 60.byte 102,15,56,223,209 61 pxor %xmm0,%xmm0 62 pxor %xmm1,%xmm1 63 movups %xmm2,(%rsi) 64 pxor %xmm2,%xmm2 65 ret 66 67 68 69.p2align 4 70_aesni_encrypt2: 71 72 movups (%rcx),%xmm0 73 shll $4,%eax 74 movups 16(%rcx),%xmm1 75 xorps %xmm0,%xmm2 76 xorps %xmm0,%xmm3 77 movups 32(%rcx),%xmm0 78 leaq 32(%rcx,%rax,1),%rcx 79 negq %rax 80 addq $16,%rax 81 82L$enc_loop2: 83.byte 102,15,56,220,209 84.byte 102,15,56,220,217 85 movups (%rcx,%rax,1),%xmm1 86 addq $32,%rax 87.byte 102,15,56,220,208 88.byte 102,15,56,220,216 89 movups -16(%rcx,%rax,1),%xmm0 90 jnz L$enc_loop2 91 92.byte 102,15,56,220,209 93.byte 102,15,56,220,217 94.byte 102,15,56,221,208 95.byte 102,15,56,221,216 96 ret 97 98 99 100.p2align 4 101_aesni_decrypt2: 102 103 movups (%rcx),%xmm0 104 shll $4,%eax 105 movups 16(%rcx),%xmm1 106 xorps %xmm0,%xmm2 107 xorps %xmm0,%xmm3 108 movups 32(%rcx),%xmm0 109 leaq 32(%rcx,%rax,1),%rcx 110 negq %rax 111 addq $16,%rax 112 113L$dec_loop2: 114.byte 102,15,56,222,209 115.byte 102,15,56,222,217 116 movups (%rcx,%rax,1),%xmm1 117 addq $32,%rax 118.byte 102,15,56,222,208 119.byte 102,15,56,222,216 120 movups -16(%rcx,%rax,1),%xmm0 121 jnz L$dec_loop2 122 123.byte 102,15,56,222,209 124.byte 102,15,56,222,217 125.byte 102,15,56,223,208 126.byte 102,15,56,223,216 127 ret 128 129 130 131.p2align 4 132_aesni_encrypt3: 133 134 movups (%rcx),%xmm0 135 shll $4,%eax 136 movups 16(%rcx),%xmm1 137 xorps %xmm0,%xmm2 138 xorps %xmm0,%xmm3 139 xorps %xmm0,%xmm4 140 movups 32(%rcx),%xmm0 141 leaq 32(%rcx,%rax,1),%rcx 142 negq %rax 143 addq $16,%rax 144 145L$enc_loop3: 146.byte 102,15,56,220,209 147.byte 102,15,56,220,217 148.byte 102,15,56,220,225 149 movups (%rcx,%rax,1),%xmm1 150 addq $32,%rax 151.byte 102,15,56,220,208 152.byte 102,15,56,220,216 153.byte 102,15,56,220,224 154 movups -16(%rcx,%rax,1),%xmm0 155 jnz L$enc_loop3 156 157.byte 102,15,56,220,209 158.byte 102,15,56,220,217 159.byte 102,15,56,220,225 160.byte 102,15,56,221,208 161.byte 102,15,56,221,216 162.byte 102,15,56,221,224 163 ret 164 165 166 167.p2align 4 168_aesni_decrypt3: 169 170 movups (%rcx),%xmm0 171 shll $4,%eax 172 movups 16(%rcx),%xmm1 173 xorps %xmm0,%xmm2 174 xorps %xmm0,%xmm3 175 xorps %xmm0,%xmm4 176 movups 32(%rcx),%xmm0 177 leaq 32(%rcx,%rax,1),%rcx 178 negq %rax 179 addq $16,%rax 180 181L$dec_loop3: 182.byte 102,15,56,222,209 183.byte 102,15,56,222,217 184.byte 102,15,56,222,225 185 movups (%rcx,%rax,1),%xmm1 186 addq $32,%rax 187.byte 102,15,56,222,208 188.byte 102,15,56,222,216 189.byte 102,15,56,222,224 190 movups -16(%rcx,%rax,1),%xmm0 191 jnz L$dec_loop3 192 193.byte 102,15,56,222,209 194.byte 102,15,56,222,217 195.byte 102,15,56,222,225 196.byte 102,15,56,223,208 197.byte 102,15,56,223,216 198.byte 102,15,56,223,224 199 ret 200 201 202 203.p2align 4 204_aesni_encrypt4: 205 206 movups (%rcx),%xmm0 207 shll $4,%eax 208 movups 16(%rcx),%xmm1 209 xorps %xmm0,%xmm2 210 xorps %xmm0,%xmm3 211 xorps %xmm0,%xmm4 212 xorps %xmm0,%xmm5 213 movups 32(%rcx),%xmm0 214 leaq 32(%rcx,%rax,1),%rcx 215 negq %rax 216.byte 0x0f,0x1f,0x00 217 addq $16,%rax 218 219L$enc_loop4: 220.byte 102,15,56,220,209 221.byte 102,15,56,220,217 222.byte 102,15,56,220,225 223.byte 102,15,56,220,233 224 movups (%rcx,%rax,1),%xmm1 225 addq $32,%rax 226.byte 102,15,56,220,208 227.byte 102,15,56,220,216 228.byte 102,15,56,220,224 229.byte 102,15,56,220,232 230 movups -16(%rcx,%rax,1),%xmm0 231 jnz L$enc_loop4 232 233.byte 102,15,56,220,209 234.byte 102,15,56,220,217 235.byte 102,15,56,220,225 236.byte 102,15,56,220,233 237.byte 102,15,56,221,208 238.byte 102,15,56,221,216 239.byte 102,15,56,221,224 240.byte 102,15,56,221,232 241 ret 242 243 244 245.p2align 4 246_aesni_decrypt4: 247 248 movups (%rcx),%xmm0 249 shll $4,%eax 250 movups 16(%rcx),%xmm1 251 xorps %xmm0,%xmm2 252 xorps %xmm0,%xmm3 253 xorps %xmm0,%xmm4 254 xorps %xmm0,%xmm5 255 movups 32(%rcx),%xmm0 256 leaq 32(%rcx,%rax,1),%rcx 257 negq %rax 258.byte 0x0f,0x1f,0x00 259 addq $16,%rax 260 261L$dec_loop4: 262.byte 102,15,56,222,209 263.byte 102,15,56,222,217 264.byte 102,15,56,222,225 265.byte 102,15,56,222,233 266 movups (%rcx,%rax,1),%xmm1 267 addq $32,%rax 268.byte 102,15,56,222,208 269.byte 102,15,56,222,216 270.byte 102,15,56,222,224 271.byte 102,15,56,222,232 272 movups -16(%rcx,%rax,1),%xmm0 273 jnz L$dec_loop4 274 275.byte 102,15,56,222,209 276.byte 102,15,56,222,217 277.byte 102,15,56,222,225 278.byte 102,15,56,222,233 279.byte 102,15,56,223,208 280.byte 102,15,56,223,216 281.byte 102,15,56,223,224 282.byte 102,15,56,223,232 283 ret 284 285 286 287.p2align 4 288_aesni_encrypt6: 289 290 movups (%rcx),%xmm0 291 shll $4,%eax 292 movups 16(%rcx),%xmm1 293 xorps %xmm0,%xmm2 294 pxor %xmm0,%xmm3 295 pxor %xmm0,%xmm4 296.byte 102,15,56,220,209 297 leaq 32(%rcx,%rax,1),%rcx 298 negq %rax 299.byte 102,15,56,220,217 300 pxor %xmm0,%xmm5 301 pxor %xmm0,%xmm6 302.byte 102,15,56,220,225 303 pxor %xmm0,%xmm7 304 movups (%rcx,%rax,1),%xmm0 305 addq $16,%rax 306 jmp L$enc_loop6_enter 307.p2align 4 308L$enc_loop6: 309.byte 102,15,56,220,209 310.byte 102,15,56,220,217 311.byte 102,15,56,220,225 312L$enc_loop6_enter: 313.byte 102,15,56,220,233 314.byte 102,15,56,220,241 315.byte 102,15,56,220,249 316 movups (%rcx,%rax,1),%xmm1 317 addq $32,%rax 318.byte 102,15,56,220,208 319.byte 102,15,56,220,216 320.byte 102,15,56,220,224 321.byte 102,15,56,220,232 322.byte 102,15,56,220,240 323.byte 102,15,56,220,248 324 movups -16(%rcx,%rax,1),%xmm0 325 jnz L$enc_loop6 326 327.byte 102,15,56,220,209 328.byte 102,15,56,220,217 329.byte 102,15,56,220,225 330.byte 102,15,56,220,233 331.byte 102,15,56,220,241 332.byte 102,15,56,220,249 333.byte 102,15,56,221,208 334.byte 102,15,56,221,216 335.byte 102,15,56,221,224 336.byte 102,15,56,221,232 337.byte 102,15,56,221,240 338.byte 102,15,56,221,248 339 ret 340 341 342 343.p2align 4 344_aesni_decrypt6: 345 346 movups (%rcx),%xmm0 347 shll $4,%eax 348 movups 16(%rcx),%xmm1 349 xorps %xmm0,%xmm2 350 pxor %xmm0,%xmm3 351 pxor %xmm0,%xmm4 352.byte 102,15,56,222,209 353 leaq 32(%rcx,%rax,1),%rcx 354 negq %rax 355.byte 102,15,56,222,217 356 pxor %xmm0,%xmm5 357 pxor %xmm0,%xmm6 358.byte 102,15,56,222,225 359 pxor %xmm0,%xmm7 360 movups (%rcx,%rax,1),%xmm0 361 addq $16,%rax 362 jmp L$dec_loop6_enter 363.p2align 4 364L$dec_loop6: 365.byte 102,15,56,222,209 366.byte 102,15,56,222,217 367.byte 102,15,56,222,225 368L$dec_loop6_enter: 369.byte 102,15,56,222,233 370.byte 102,15,56,222,241 371.byte 102,15,56,222,249 372 movups (%rcx,%rax,1),%xmm1 373 addq $32,%rax 374.byte 102,15,56,222,208 375.byte 102,15,56,222,216 376.byte 102,15,56,222,224 377.byte 102,15,56,222,232 378.byte 102,15,56,222,240 379.byte 102,15,56,222,248 380 movups -16(%rcx,%rax,1),%xmm0 381 jnz L$dec_loop6 382 383.byte 102,15,56,222,209 384.byte 102,15,56,222,217 385.byte 102,15,56,222,225 386.byte 102,15,56,222,233 387.byte 102,15,56,222,241 388.byte 102,15,56,222,249 389.byte 102,15,56,223,208 390.byte 102,15,56,223,216 391.byte 102,15,56,223,224 392.byte 102,15,56,223,232 393.byte 102,15,56,223,240 394.byte 102,15,56,223,248 395 ret 396 397 398 399.p2align 4 400_aesni_encrypt8: 401 402 movups (%rcx),%xmm0 403 shll $4,%eax 404 movups 16(%rcx),%xmm1 405 xorps %xmm0,%xmm2 406 xorps %xmm0,%xmm3 407 pxor %xmm0,%xmm4 408 pxor %xmm0,%xmm5 409 pxor %xmm0,%xmm6 410 leaq 32(%rcx,%rax,1),%rcx 411 negq %rax 412.byte 102,15,56,220,209 413 pxor %xmm0,%xmm7 414 pxor %xmm0,%xmm8 415.byte 102,15,56,220,217 416 pxor %xmm0,%xmm9 417 movups (%rcx,%rax,1),%xmm0 418 addq $16,%rax 419 jmp L$enc_loop8_inner 420.p2align 4 421L$enc_loop8: 422.byte 102,15,56,220,209 423.byte 102,15,56,220,217 424L$enc_loop8_inner: 425.byte 102,15,56,220,225 426.byte 102,15,56,220,233 427.byte 102,15,56,220,241 428.byte 102,15,56,220,249 429.byte 102,68,15,56,220,193 430.byte 102,68,15,56,220,201 431L$enc_loop8_enter: 432 movups (%rcx,%rax,1),%xmm1 433 addq $32,%rax 434.byte 102,15,56,220,208 435.byte 102,15,56,220,216 436.byte 102,15,56,220,224 437.byte 102,15,56,220,232 438.byte 102,15,56,220,240 439.byte 102,15,56,220,248 440.byte 102,68,15,56,220,192 441.byte 102,68,15,56,220,200 442 movups -16(%rcx,%rax,1),%xmm0 443 jnz L$enc_loop8 444 445.byte 102,15,56,220,209 446.byte 102,15,56,220,217 447.byte 102,15,56,220,225 448.byte 102,15,56,220,233 449.byte 102,15,56,220,241 450.byte 102,15,56,220,249 451.byte 102,68,15,56,220,193 452.byte 102,68,15,56,220,201 453.byte 102,15,56,221,208 454.byte 102,15,56,221,216 455.byte 102,15,56,221,224 456.byte 102,15,56,221,232 457.byte 102,15,56,221,240 458.byte 102,15,56,221,248 459.byte 102,68,15,56,221,192 460.byte 102,68,15,56,221,200 461 ret 462 463 464 465.p2align 4 466_aesni_decrypt8: 467 468 movups (%rcx),%xmm0 469 shll $4,%eax 470 movups 16(%rcx),%xmm1 471 xorps %xmm0,%xmm2 472 xorps %xmm0,%xmm3 473 pxor %xmm0,%xmm4 474 pxor %xmm0,%xmm5 475 pxor %xmm0,%xmm6 476 leaq 32(%rcx,%rax,1),%rcx 477 negq %rax 478.byte 102,15,56,222,209 479 pxor %xmm0,%xmm7 480 pxor %xmm0,%xmm8 481.byte 102,15,56,222,217 482 pxor %xmm0,%xmm9 483 movups (%rcx,%rax,1),%xmm0 484 addq $16,%rax 485 jmp L$dec_loop8_inner 486.p2align 4 487L$dec_loop8: 488.byte 102,15,56,222,209 489.byte 102,15,56,222,217 490L$dec_loop8_inner: 491.byte 102,15,56,222,225 492.byte 102,15,56,222,233 493.byte 102,15,56,222,241 494.byte 102,15,56,222,249 495.byte 102,68,15,56,222,193 496.byte 102,68,15,56,222,201 497L$dec_loop8_enter: 498 movups (%rcx,%rax,1),%xmm1 499 addq $32,%rax 500.byte 102,15,56,222,208 501.byte 102,15,56,222,216 502.byte 102,15,56,222,224 503.byte 102,15,56,222,232 504.byte 102,15,56,222,240 505.byte 102,15,56,222,248 506.byte 102,68,15,56,222,192 507.byte 102,68,15,56,222,200 508 movups -16(%rcx,%rax,1),%xmm0 509 jnz L$dec_loop8 510 511.byte 102,15,56,222,209 512.byte 102,15,56,222,217 513.byte 102,15,56,222,225 514.byte 102,15,56,222,233 515.byte 102,15,56,222,241 516.byte 102,15,56,222,249 517.byte 102,68,15,56,222,193 518.byte 102,68,15,56,222,201 519.byte 102,15,56,223,208 520.byte 102,15,56,223,216 521.byte 102,15,56,223,224 522.byte 102,15,56,223,232 523.byte 102,15,56,223,240 524.byte 102,15,56,223,248 525.byte 102,68,15,56,223,192 526.byte 102,68,15,56,223,200 527 ret 528 529 530.globl _aes_hw_ecb_encrypt 531.private_extern _aes_hw_ecb_encrypt 532 533.p2align 4 534_aes_hw_ecb_encrypt: 535 536_CET_ENDBR 537 andq $-16,%rdx 538 jz L$ecb_ret 539 540 movl 240(%rcx),%eax 541 movups (%rcx),%xmm0 542 movq %rcx,%r11 543 movl %eax,%r10d 544 testl %r8d,%r8d 545 jz L$ecb_decrypt 546 547 cmpq $0x80,%rdx 548 jb L$ecb_enc_tail 549 550 movdqu (%rdi),%xmm2 551 movdqu 16(%rdi),%xmm3 552 movdqu 32(%rdi),%xmm4 553 movdqu 48(%rdi),%xmm5 554 movdqu 64(%rdi),%xmm6 555 movdqu 80(%rdi),%xmm7 556 movdqu 96(%rdi),%xmm8 557 movdqu 112(%rdi),%xmm9 558 leaq 128(%rdi),%rdi 559 subq $0x80,%rdx 560 jmp L$ecb_enc_loop8_enter 561.p2align 4 562L$ecb_enc_loop8: 563 movups %xmm2,(%rsi) 564 movq %r11,%rcx 565 movdqu (%rdi),%xmm2 566 movl %r10d,%eax 567 movups %xmm3,16(%rsi) 568 movdqu 16(%rdi),%xmm3 569 movups %xmm4,32(%rsi) 570 movdqu 32(%rdi),%xmm4 571 movups %xmm5,48(%rsi) 572 movdqu 48(%rdi),%xmm5 573 movups %xmm6,64(%rsi) 574 movdqu 64(%rdi),%xmm6 575 movups %xmm7,80(%rsi) 576 movdqu 80(%rdi),%xmm7 577 movups %xmm8,96(%rsi) 578 movdqu 96(%rdi),%xmm8 579 movups %xmm9,112(%rsi) 580 leaq 128(%rsi),%rsi 581 movdqu 112(%rdi),%xmm9 582 leaq 128(%rdi),%rdi 583L$ecb_enc_loop8_enter: 584 585 call _aesni_encrypt8 586 587 subq $0x80,%rdx 588 jnc L$ecb_enc_loop8 589 590 movups %xmm2,(%rsi) 591 movq %r11,%rcx 592 movups %xmm3,16(%rsi) 593 movl %r10d,%eax 594 movups %xmm4,32(%rsi) 595 movups %xmm5,48(%rsi) 596 movups %xmm6,64(%rsi) 597 movups %xmm7,80(%rsi) 598 movups %xmm8,96(%rsi) 599 movups %xmm9,112(%rsi) 600 leaq 128(%rsi),%rsi 601 addq $0x80,%rdx 602 jz L$ecb_ret 603 604L$ecb_enc_tail: 605 movups (%rdi),%xmm2 606 cmpq $0x20,%rdx 607 jb L$ecb_enc_one 608 movups 16(%rdi),%xmm3 609 je L$ecb_enc_two 610 movups 32(%rdi),%xmm4 611 cmpq $0x40,%rdx 612 jb L$ecb_enc_three 613 movups 48(%rdi),%xmm5 614 je L$ecb_enc_four 615 movups 64(%rdi),%xmm6 616 cmpq $0x60,%rdx 617 jb L$ecb_enc_five 618 movups 80(%rdi),%xmm7 619 je L$ecb_enc_six 620 movdqu 96(%rdi),%xmm8 621 xorps %xmm9,%xmm9 622 call _aesni_encrypt8 623 movups %xmm2,(%rsi) 624 movups %xmm3,16(%rsi) 625 movups %xmm4,32(%rsi) 626 movups %xmm5,48(%rsi) 627 movups %xmm6,64(%rsi) 628 movups %xmm7,80(%rsi) 629 movups %xmm8,96(%rsi) 630 jmp L$ecb_ret 631.p2align 4 632L$ecb_enc_one: 633 movups (%rcx),%xmm0 634 movups 16(%rcx),%xmm1 635 leaq 32(%rcx),%rcx 636 xorps %xmm0,%xmm2 637L$oop_enc1_3: 638.byte 102,15,56,220,209 639 decl %eax 640 movups (%rcx),%xmm1 641 leaq 16(%rcx),%rcx 642 jnz L$oop_enc1_3 643.byte 102,15,56,221,209 644 movups %xmm2,(%rsi) 645 jmp L$ecb_ret 646.p2align 4 647L$ecb_enc_two: 648 call _aesni_encrypt2 649 movups %xmm2,(%rsi) 650 movups %xmm3,16(%rsi) 651 jmp L$ecb_ret 652.p2align 4 653L$ecb_enc_three: 654 call _aesni_encrypt3 655 movups %xmm2,(%rsi) 656 movups %xmm3,16(%rsi) 657 movups %xmm4,32(%rsi) 658 jmp L$ecb_ret 659.p2align 4 660L$ecb_enc_four: 661 call _aesni_encrypt4 662 movups %xmm2,(%rsi) 663 movups %xmm3,16(%rsi) 664 movups %xmm4,32(%rsi) 665 movups %xmm5,48(%rsi) 666 jmp L$ecb_ret 667.p2align 4 668L$ecb_enc_five: 669 xorps %xmm7,%xmm7 670 call _aesni_encrypt6 671 movups %xmm2,(%rsi) 672 movups %xmm3,16(%rsi) 673 movups %xmm4,32(%rsi) 674 movups %xmm5,48(%rsi) 675 movups %xmm6,64(%rsi) 676 jmp L$ecb_ret 677.p2align 4 678L$ecb_enc_six: 679 call _aesni_encrypt6 680 movups %xmm2,(%rsi) 681 movups %xmm3,16(%rsi) 682 movups %xmm4,32(%rsi) 683 movups %xmm5,48(%rsi) 684 movups %xmm6,64(%rsi) 685 movups %xmm7,80(%rsi) 686 jmp L$ecb_ret 687 688.p2align 4 689L$ecb_decrypt: 690 cmpq $0x80,%rdx 691 jb L$ecb_dec_tail 692 693 movdqu (%rdi),%xmm2 694 movdqu 16(%rdi),%xmm3 695 movdqu 32(%rdi),%xmm4 696 movdqu 48(%rdi),%xmm5 697 movdqu 64(%rdi),%xmm6 698 movdqu 80(%rdi),%xmm7 699 movdqu 96(%rdi),%xmm8 700 movdqu 112(%rdi),%xmm9 701 leaq 128(%rdi),%rdi 702 subq $0x80,%rdx 703 jmp L$ecb_dec_loop8_enter 704.p2align 4 705L$ecb_dec_loop8: 706 movups %xmm2,(%rsi) 707 movq %r11,%rcx 708 movdqu (%rdi),%xmm2 709 movl %r10d,%eax 710 movups %xmm3,16(%rsi) 711 movdqu 16(%rdi),%xmm3 712 movups %xmm4,32(%rsi) 713 movdqu 32(%rdi),%xmm4 714 movups %xmm5,48(%rsi) 715 movdqu 48(%rdi),%xmm5 716 movups %xmm6,64(%rsi) 717 movdqu 64(%rdi),%xmm6 718 movups %xmm7,80(%rsi) 719 movdqu 80(%rdi),%xmm7 720 movups %xmm8,96(%rsi) 721 movdqu 96(%rdi),%xmm8 722 movups %xmm9,112(%rsi) 723 leaq 128(%rsi),%rsi 724 movdqu 112(%rdi),%xmm9 725 leaq 128(%rdi),%rdi 726L$ecb_dec_loop8_enter: 727 728 call _aesni_decrypt8 729 730 movups (%r11),%xmm0 731 subq $0x80,%rdx 732 jnc L$ecb_dec_loop8 733 734 movups %xmm2,(%rsi) 735 pxor %xmm2,%xmm2 736 movq %r11,%rcx 737 movups %xmm3,16(%rsi) 738 pxor %xmm3,%xmm3 739 movl %r10d,%eax 740 movups %xmm4,32(%rsi) 741 pxor %xmm4,%xmm4 742 movups %xmm5,48(%rsi) 743 pxor %xmm5,%xmm5 744 movups %xmm6,64(%rsi) 745 pxor %xmm6,%xmm6 746 movups %xmm7,80(%rsi) 747 pxor %xmm7,%xmm7 748 movups %xmm8,96(%rsi) 749 pxor %xmm8,%xmm8 750 movups %xmm9,112(%rsi) 751 pxor %xmm9,%xmm9 752 leaq 128(%rsi),%rsi 753 addq $0x80,%rdx 754 jz L$ecb_ret 755 756L$ecb_dec_tail: 757 movups (%rdi),%xmm2 758 cmpq $0x20,%rdx 759 jb L$ecb_dec_one 760 movups 16(%rdi),%xmm3 761 je L$ecb_dec_two 762 movups 32(%rdi),%xmm4 763 cmpq $0x40,%rdx 764 jb L$ecb_dec_three 765 movups 48(%rdi),%xmm5 766 je L$ecb_dec_four 767 movups 64(%rdi),%xmm6 768 cmpq $0x60,%rdx 769 jb L$ecb_dec_five 770 movups 80(%rdi),%xmm7 771 je L$ecb_dec_six 772 movups 96(%rdi),%xmm8 773 movups (%rcx),%xmm0 774 xorps %xmm9,%xmm9 775 call _aesni_decrypt8 776 movups %xmm2,(%rsi) 777 pxor %xmm2,%xmm2 778 movups %xmm3,16(%rsi) 779 pxor %xmm3,%xmm3 780 movups %xmm4,32(%rsi) 781 pxor %xmm4,%xmm4 782 movups %xmm5,48(%rsi) 783 pxor %xmm5,%xmm5 784 movups %xmm6,64(%rsi) 785 pxor %xmm6,%xmm6 786 movups %xmm7,80(%rsi) 787 pxor %xmm7,%xmm7 788 movups %xmm8,96(%rsi) 789 pxor %xmm8,%xmm8 790 pxor %xmm9,%xmm9 791 jmp L$ecb_ret 792.p2align 4 793L$ecb_dec_one: 794 movups (%rcx),%xmm0 795 movups 16(%rcx),%xmm1 796 leaq 32(%rcx),%rcx 797 xorps %xmm0,%xmm2 798L$oop_dec1_4: 799.byte 102,15,56,222,209 800 decl %eax 801 movups (%rcx),%xmm1 802 leaq 16(%rcx),%rcx 803 jnz L$oop_dec1_4 804.byte 102,15,56,223,209 805 movups %xmm2,(%rsi) 806 pxor %xmm2,%xmm2 807 jmp L$ecb_ret 808.p2align 4 809L$ecb_dec_two: 810 call _aesni_decrypt2 811 movups %xmm2,(%rsi) 812 pxor %xmm2,%xmm2 813 movups %xmm3,16(%rsi) 814 pxor %xmm3,%xmm3 815 jmp L$ecb_ret 816.p2align 4 817L$ecb_dec_three: 818 call _aesni_decrypt3 819 movups %xmm2,(%rsi) 820 pxor %xmm2,%xmm2 821 movups %xmm3,16(%rsi) 822 pxor %xmm3,%xmm3 823 movups %xmm4,32(%rsi) 824 pxor %xmm4,%xmm4 825 jmp L$ecb_ret 826.p2align 4 827L$ecb_dec_four: 828 call _aesni_decrypt4 829 movups %xmm2,(%rsi) 830 pxor %xmm2,%xmm2 831 movups %xmm3,16(%rsi) 832 pxor %xmm3,%xmm3 833 movups %xmm4,32(%rsi) 834 pxor %xmm4,%xmm4 835 movups %xmm5,48(%rsi) 836 pxor %xmm5,%xmm5 837 jmp L$ecb_ret 838.p2align 4 839L$ecb_dec_five: 840 xorps %xmm7,%xmm7 841 call _aesni_decrypt6 842 movups %xmm2,(%rsi) 843 pxor %xmm2,%xmm2 844 movups %xmm3,16(%rsi) 845 pxor %xmm3,%xmm3 846 movups %xmm4,32(%rsi) 847 pxor %xmm4,%xmm4 848 movups %xmm5,48(%rsi) 849 pxor %xmm5,%xmm5 850 movups %xmm6,64(%rsi) 851 pxor %xmm6,%xmm6 852 pxor %xmm7,%xmm7 853 jmp L$ecb_ret 854.p2align 4 855L$ecb_dec_six: 856 call _aesni_decrypt6 857 movups %xmm2,(%rsi) 858 pxor %xmm2,%xmm2 859 movups %xmm3,16(%rsi) 860 pxor %xmm3,%xmm3 861 movups %xmm4,32(%rsi) 862 pxor %xmm4,%xmm4 863 movups %xmm5,48(%rsi) 864 pxor %xmm5,%xmm5 865 movups %xmm6,64(%rsi) 866 pxor %xmm6,%xmm6 867 movups %xmm7,80(%rsi) 868 pxor %xmm7,%xmm7 869 870L$ecb_ret: 871 xorps %xmm0,%xmm0 872 pxor %xmm1,%xmm1 873 ret 874 875 876.globl _aes_hw_ctr32_encrypt_blocks 877.private_extern _aes_hw_ctr32_encrypt_blocks 878 879.p2align 4 880_aes_hw_ctr32_encrypt_blocks: 881 882_CET_ENDBR 883#ifdef BORINGSSL_DISPATCH_TEST 884 movb $1,_BORINGSSL_function_hit(%rip) 885#endif 886 cmpq $1,%rdx 887 jne L$ctr32_bulk 888 889 890 891 movups (%r8),%xmm2 892 movups (%rdi),%xmm3 893 movl 240(%rcx),%edx 894 movups (%rcx),%xmm0 895 movups 16(%rcx),%xmm1 896 leaq 32(%rcx),%rcx 897 xorps %xmm0,%xmm2 898L$oop_enc1_5: 899.byte 102,15,56,220,209 900 decl %edx 901 movups (%rcx),%xmm1 902 leaq 16(%rcx),%rcx 903 jnz L$oop_enc1_5 904.byte 102,15,56,221,209 905 pxor %xmm0,%xmm0 906 pxor %xmm1,%xmm1 907 xorps %xmm3,%xmm2 908 pxor %xmm3,%xmm3 909 movups %xmm2,(%rsi) 910 xorps %xmm2,%xmm2 911 jmp L$ctr32_epilogue 912 913.p2align 4 914L$ctr32_bulk: 915 leaq (%rsp),%r11 916 917 pushq %rbp 918 919 subq $128,%rsp 920 andq $-16,%rsp 921 922 923 924 925 movdqu (%r8),%xmm2 926 movdqu (%rcx),%xmm0 927 movl 12(%r8),%r8d 928 pxor %xmm0,%xmm2 929 movl 12(%rcx),%ebp 930 movdqa %xmm2,0(%rsp) 931 bswapl %r8d 932 movdqa %xmm2,%xmm3 933 movdqa %xmm2,%xmm4 934 movdqa %xmm2,%xmm5 935 movdqa %xmm2,64(%rsp) 936 movdqa %xmm2,80(%rsp) 937 movdqa %xmm2,96(%rsp) 938 movq %rdx,%r10 939 movdqa %xmm2,112(%rsp) 940 941 leaq 1(%r8),%rax 942 leaq 2(%r8),%rdx 943 bswapl %eax 944 bswapl %edx 945 xorl %ebp,%eax 946 xorl %ebp,%edx 947.byte 102,15,58,34,216,3 948 leaq 3(%r8),%rax 949 movdqa %xmm3,16(%rsp) 950.byte 102,15,58,34,226,3 951 bswapl %eax 952 movq %r10,%rdx 953 leaq 4(%r8),%r10 954 movdqa %xmm4,32(%rsp) 955 xorl %ebp,%eax 956 bswapl %r10d 957.byte 102,15,58,34,232,3 958 xorl %ebp,%r10d 959 movdqa %xmm5,48(%rsp) 960 leaq 5(%r8),%r9 961 movl %r10d,64+12(%rsp) 962 bswapl %r9d 963 leaq 6(%r8),%r10 964 movl 240(%rcx),%eax 965 xorl %ebp,%r9d 966 bswapl %r10d 967 movl %r9d,80+12(%rsp) 968 xorl %ebp,%r10d 969 leaq 7(%r8),%r9 970 movl %r10d,96+12(%rsp) 971 bswapl %r9d 972 xorl %ebp,%r9d 973 movl %r9d,112+12(%rsp) 974 975 movups 16(%rcx),%xmm1 976 977 movdqa 64(%rsp),%xmm6 978 movdqa 80(%rsp),%xmm7 979 980 cmpq $8,%rdx 981 jb L$ctr32_tail 982 983 leaq 128(%rcx),%rcx 984 subq $8,%rdx 985 jmp L$ctr32_loop8 986 987.p2align 5 988L$ctr32_loop8: 989 addl $8,%r8d 990 movdqa 96(%rsp),%xmm8 991.byte 102,15,56,220,209 992 movl %r8d,%r9d 993 movdqa 112(%rsp),%xmm9 994.byte 102,15,56,220,217 995 bswapl %r9d 996 movups 32-128(%rcx),%xmm0 997.byte 102,15,56,220,225 998 xorl %ebp,%r9d 999 nop 1000.byte 102,15,56,220,233 1001 movl %r9d,0+12(%rsp) 1002 leaq 1(%r8),%r9 1003.byte 102,15,56,220,241 1004.byte 102,15,56,220,249 1005.byte 102,68,15,56,220,193 1006.byte 102,68,15,56,220,201 1007 movups 48-128(%rcx),%xmm1 1008 bswapl %r9d 1009.byte 102,15,56,220,208 1010.byte 102,15,56,220,216 1011 xorl %ebp,%r9d 1012.byte 0x66,0x90 1013.byte 102,15,56,220,224 1014.byte 102,15,56,220,232 1015 movl %r9d,16+12(%rsp) 1016 leaq 2(%r8),%r9 1017.byte 102,15,56,220,240 1018.byte 102,15,56,220,248 1019.byte 102,68,15,56,220,192 1020.byte 102,68,15,56,220,200 1021 movups 64-128(%rcx),%xmm0 1022 bswapl %r9d 1023.byte 102,15,56,220,209 1024.byte 102,15,56,220,217 1025 xorl %ebp,%r9d 1026.byte 0x66,0x90 1027.byte 102,15,56,220,225 1028.byte 102,15,56,220,233 1029 movl %r9d,32+12(%rsp) 1030 leaq 3(%r8),%r9 1031.byte 102,15,56,220,241 1032.byte 102,15,56,220,249 1033.byte 102,68,15,56,220,193 1034.byte 102,68,15,56,220,201 1035 movups 80-128(%rcx),%xmm1 1036 bswapl %r9d 1037.byte 102,15,56,220,208 1038.byte 102,15,56,220,216 1039 xorl %ebp,%r9d 1040.byte 0x66,0x90 1041.byte 102,15,56,220,224 1042.byte 102,15,56,220,232 1043 movl %r9d,48+12(%rsp) 1044 leaq 4(%r8),%r9 1045.byte 102,15,56,220,240 1046.byte 102,15,56,220,248 1047.byte 102,68,15,56,220,192 1048.byte 102,68,15,56,220,200 1049 movups 96-128(%rcx),%xmm0 1050 bswapl %r9d 1051.byte 102,15,56,220,209 1052.byte 102,15,56,220,217 1053 xorl %ebp,%r9d 1054.byte 0x66,0x90 1055.byte 102,15,56,220,225 1056.byte 102,15,56,220,233 1057 movl %r9d,64+12(%rsp) 1058 leaq 5(%r8),%r9 1059.byte 102,15,56,220,241 1060.byte 102,15,56,220,249 1061.byte 102,68,15,56,220,193 1062.byte 102,68,15,56,220,201 1063 movups 112-128(%rcx),%xmm1 1064 bswapl %r9d 1065.byte 102,15,56,220,208 1066.byte 102,15,56,220,216 1067 xorl %ebp,%r9d 1068.byte 0x66,0x90 1069.byte 102,15,56,220,224 1070.byte 102,15,56,220,232 1071 movl %r9d,80+12(%rsp) 1072 leaq 6(%r8),%r9 1073.byte 102,15,56,220,240 1074.byte 102,15,56,220,248 1075.byte 102,68,15,56,220,192 1076.byte 102,68,15,56,220,200 1077 movups 128-128(%rcx),%xmm0 1078 bswapl %r9d 1079.byte 102,15,56,220,209 1080.byte 102,15,56,220,217 1081 xorl %ebp,%r9d 1082.byte 0x66,0x90 1083.byte 102,15,56,220,225 1084.byte 102,15,56,220,233 1085 movl %r9d,96+12(%rsp) 1086 leaq 7(%r8),%r9 1087.byte 102,15,56,220,241 1088.byte 102,15,56,220,249 1089.byte 102,68,15,56,220,193 1090.byte 102,68,15,56,220,201 1091 movups 144-128(%rcx),%xmm1 1092 bswapl %r9d 1093.byte 102,15,56,220,208 1094.byte 102,15,56,220,216 1095.byte 102,15,56,220,224 1096 xorl %ebp,%r9d 1097 movdqu 0(%rdi),%xmm10 1098.byte 102,15,56,220,232 1099 movl %r9d,112+12(%rsp) 1100 cmpl $11,%eax 1101.byte 102,15,56,220,240 1102.byte 102,15,56,220,248 1103.byte 102,68,15,56,220,192 1104.byte 102,68,15,56,220,200 1105 movups 160-128(%rcx),%xmm0 1106 1107 jb L$ctr32_enc_done 1108 1109.byte 102,15,56,220,209 1110.byte 102,15,56,220,217 1111.byte 102,15,56,220,225 1112.byte 102,15,56,220,233 1113.byte 102,15,56,220,241 1114.byte 102,15,56,220,249 1115.byte 102,68,15,56,220,193 1116.byte 102,68,15,56,220,201 1117 movups 176-128(%rcx),%xmm1 1118 1119.byte 102,15,56,220,208 1120.byte 102,15,56,220,216 1121.byte 102,15,56,220,224 1122.byte 102,15,56,220,232 1123.byte 102,15,56,220,240 1124.byte 102,15,56,220,248 1125.byte 102,68,15,56,220,192 1126.byte 102,68,15,56,220,200 1127 movups 192-128(%rcx),%xmm0 1128 je L$ctr32_enc_done 1129 1130.byte 102,15,56,220,209 1131.byte 102,15,56,220,217 1132.byte 102,15,56,220,225 1133.byte 102,15,56,220,233 1134.byte 102,15,56,220,241 1135.byte 102,15,56,220,249 1136.byte 102,68,15,56,220,193 1137.byte 102,68,15,56,220,201 1138 movups 208-128(%rcx),%xmm1 1139 1140.byte 102,15,56,220,208 1141.byte 102,15,56,220,216 1142.byte 102,15,56,220,224 1143.byte 102,15,56,220,232 1144.byte 102,15,56,220,240 1145.byte 102,15,56,220,248 1146.byte 102,68,15,56,220,192 1147.byte 102,68,15,56,220,200 1148 movups 224-128(%rcx),%xmm0 1149 jmp L$ctr32_enc_done 1150 1151.p2align 4 1152L$ctr32_enc_done: 1153 movdqu 16(%rdi),%xmm11 1154 pxor %xmm0,%xmm10 1155 movdqu 32(%rdi),%xmm12 1156 pxor %xmm0,%xmm11 1157 movdqu 48(%rdi),%xmm13 1158 pxor %xmm0,%xmm12 1159 movdqu 64(%rdi),%xmm14 1160 pxor %xmm0,%xmm13 1161 movdqu 80(%rdi),%xmm15 1162 pxor %xmm0,%xmm14 1163 prefetcht0 448(%rdi) 1164 prefetcht0 512(%rdi) 1165 pxor %xmm0,%xmm15 1166.byte 102,15,56,220,209 1167.byte 102,15,56,220,217 1168.byte 102,15,56,220,225 1169.byte 102,15,56,220,233 1170.byte 102,15,56,220,241 1171.byte 102,15,56,220,249 1172.byte 102,68,15,56,220,193 1173.byte 102,68,15,56,220,201 1174 movdqu 96(%rdi),%xmm1 1175 leaq 128(%rdi),%rdi 1176 1177.byte 102,65,15,56,221,210 1178 pxor %xmm0,%xmm1 1179 movdqu 112-128(%rdi),%xmm10 1180.byte 102,65,15,56,221,219 1181 pxor %xmm0,%xmm10 1182 movdqa 0(%rsp),%xmm11 1183.byte 102,65,15,56,221,228 1184.byte 102,65,15,56,221,237 1185 movdqa 16(%rsp),%xmm12 1186 movdqa 32(%rsp),%xmm13 1187.byte 102,65,15,56,221,246 1188.byte 102,65,15,56,221,255 1189 movdqa 48(%rsp),%xmm14 1190 movdqa 64(%rsp),%xmm15 1191.byte 102,68,15,56,221,193 1192 movdqa 80(%rsp),%xmm0 1193 movups 16-128(%rcx),%xmm1 1194.byte 102,69,15,56,221,202 1195 1196 movups %xmm2,(%rsi) 1197 movdqa %xmm11,%xmm2 1198 movups %xmm3,16(%rsi) 1199 movdqa %xmm12,%xmm3 1200 movups %xmm4,32(%rsi) 1201 movdqa %xmm13,%xmm4 1202 movups %xmm5,48(%rsi) 1203 movdqa %xmm14,%xmm5 1204 movups %xmm6,64(%rsi) 1205 movdqa %xmm15,%xmm6 1206 movups %xmm7,80(%rsi) 1207 movdqa %xmm0,%xmm7 1208 movups %xmm8,96(%rsi) 1209 movups %xmm9,112(%rsi) 1210 leaq 128(%rsi),%rsi 1211 1212 subq $8,%rdx 1213 jnc L$ctr32_loop8 1214 1215 addq $8,%rdx 1216 jz L$ctr32_done 1217 leaq -128(%rcx),%rcx 1218 1219L$ctr32_tail: 1220 1221 1222 leaq 16(%rcx),%rcx 1223 cmpq $4,%rdx 1224 jb L$ctr32_loop3 1225 je L$ctr32_loop4 1226 1227 1228 shll $4,%eax 1229 movdqa 96(%rsp),%xmm8 1230 pxor %xmm9,%xmm9 1231 1232 movups 16(%rcx),%xmm0 1233.byte 102,15,56,220,209 1234.byte 102,15,56,220,217 1235 leaq 32-16(%rcx,%rax,1),%rcx 1236 negq %rax 1237.byte 102,15,56,220,225 1238 addq $16,%rax 1239 movups (%rdi),%xmm10 1240.byte 102,15,56,220,233 1241.byte 102,15,56,220,241 1242 movups 16(%rdi),%xmm11 1243 movups 32(%rdi),%xmm12 1244.byte 102,15,56,220,249 1245.byte 102,68,15,56,220,193 1246 1247 call L$enc_loop8_enter 1248 1249 movdqu 48(%rdi),%xmm13 1250 pxor %xmm10,%xmm2 1251 movdqu 64(%rdi),%xmm10 1252 pxor %xmm11,%xmm3 1253 movdqu %xmm2,(%rsi) 1254 pxor %xmm12,%xmm4 1255 movdqu %xmm3,16(%rsi) 1256 pxor %xmm13,%xmm5 1257 movdqu %xmm4,32(%rsi) 1258 pxor %xmm10,%xmm6 1259 movdqu %xmm5,48(%rsi) 1260 movdqu %xmm6,64(%rsi) 1261 cmpq $6,%rdx 1262 jb L$ctr32_done 1263 1264 movups 80(%rdi),%xmm11 1265 xorps %xmm11,%xmm7 1266 movups %xmm7,80(%rsi) 1267 je L$ctr32_done 1268 1269 movups 96(%rdi),%xmm12 1270 xorps %xmm12,%xmm8 1271 movups %xmm8,96(%rsi) 1272 jmp L$ctr32_done 1273 1274.p2align 5 1275L$ctr32_loop4: 1276.byte 102,15,56,220,209 1277 leaq 16(%rcx),%rcx 1278 decl %eax 1279.byte 102,15,56,220,217 1280.byte 102,15,56,220,225 1281.byte 102,15,56,220,233 1282 movups (%rcx),%xmm1 1283 jnz L$ctr32_loop4 1284.byte 102,15,56,221,209 1285.byte 102,15,56,221,217 1286 movups (%rdi),%xmm10 1287 movups 16(%rdi),%xmm11 1288.byte 102,15,56,221,225 1289.byte 102,15,56,221,233 1290 movups 32(%rdi),%xmm12 1291 movups 48(%rdi),%xmm13 1292 1293 xorps %xmm10,%xmm2 1294 movups %xmm2,(%rsi) 1295 xorps %xmm11,%xmm3 1296 movups %xmm3,16(%rsi) 1297 pxor %xmm12,%xmm4 1298 movdqu %xmm4,32(%rsi) 1299 pxor %xmm13,%xmm5 1300 movdqu %xmm5,48(%rsi) 1301 jmp L$ctr32_done 1302 1303.p2align 5 1304L$ctr32_loop3: 1305.byte 102,15,56,220,209 1306 leaq 16(%rcx),%rcx 1307 decl %eax 1308.byte 102,15,56,220,217 1309.byte 102,15,56,220,225 1310 movups (%rcx),%xmm1 1311 jnz L$ctr32_loop3 1312.byte 102,15,56,221,209 1313.byte 102,15,56,221,217 1314.byte 102,15,56,221,225 1315 1316 movups (%rdi),%xmm10 1317 xorps %xmm10,%xmm2 1318 movups %xmm2,(%rsi) 1319 cmpq $2,%rdx 1320 jb L$ctr32_done 1321 1322 movups 16(%rdi),%xmm11 1323 xorps %xmm11,%xmm3 1324 movups %xmm3,16(%rsi) 1325 je L$ctr32_done 1326 1327 movups 32(%rdi),%xmm12 1328 xorps %xmm12,%xmm4 1329 movups %xmm4,32(%rsi) 1330 1331L$ctr32_done: 1332 xorps %xmm0,%xmm0 1333 xorl %ebp,%ebp 1334 pxor %xmm1,%xmm1 1335 pxor %xmm2,%xmm2 1336 pxor %xmm3,%xmm3 1337 pxor %xmm4,%xmm4 1338 pxor %xmm5,%xmm5 1339 pxor %xmm6,%xmm6 1340 pxor %xmm7,%xmm7 1341 movaps %xmm0,0(%rsp) 1342 pxor %xmm8,%xmm8 1343 movaps %xmm0,16(%rsp) 1344 pxor %xmm9,%xmm9 1345 movaps %xmm0,32(%rsp) 1346 pxor %xmm10,%xmm10 1347 movaps %xmm0,48(%rsp) 1348 pxor %xmm11,%xmm11 1349 movaps %xmm0,64(%rsp) 1350 pxor %xmm12,%xmm12 1351 movaps %xmm0,80(%rsp) 1352 pxor %xmm13,%xmm13 1353 movaps %xmm0,96(%rsp) 1354 pxor %xmm14,%xmm14 1355 movaps %xmm0,112(%rsp) 1356 pxor %xmm15,%xmm15 1357 movq -8(%r11),%rbp 1358 1359 leaq (%r11),%rsp 1360 1361L$ctr32_epilogue: 1362 ret 1363 1364 1365.globl _aes_hw_cbc_encrypt 1366.private_extern _aes_hw_cbc_encrypt 1367 1368.p2align 4 1369_aes_hw_cbc_encrypt: 1370 1371_CET_ENDBR 1372 testq %rdx,%rdx 1373 jz L$cbc_ret 1374 1375 movl 240(%rcx),%r10d 1376 movq %rcx,%r11 1377 testl %r9d,%r9d 1378 jz L$cbc_decrypt 1379 1380 movups (%r8),%xmm2 1381 movl %r10d,%eax 1382 cmpq $16,%rdx 1383 jb L$cbc_enc_tail 1384 subq $16,%rdx 1385 jmp L$cbc_enc_loop 1386.p2align 4 1387L$cbc_enc_loop: 1388 movups (%rdi),%xmm3 1389 leaq 16(%rdi),%rdi 1390 1391 movups (%rcx),%xmm0 1392 movups 16(%rcx),%xmm1 1393 xorps %xmm0,%xmm3 1394 leaq 32(%rcx),%rcx 1395 xorps %xmm3,%xmm2 1396L$oop_enc1_6: 1397.byte 102,15,56,220,209 1398 decl %eax 1399 movups (%rcx),%xmm1 1400 leaq 16(%rcx),%rcx 1401 jnz L$oop_enc1_6 1402.byte 102,15,56,221,209 1403 movl %r10d,%eax 1404 movq %r11,%rcx 1405 movups %xmm2,0(%rsi) 1406 leaq 16(%rsi),%rsi 1407 subq $16,%rdx 1408 jnc L$cbc_enc_loop 1409 addq $16,%rdx 1410 jnz L$cbc_enc_tail 1411 pxor %xmm0,%xmm0 1412 pxor %xmm1,%xmm1 1413 movups %xmm2,(%r8) 1414 pxor %xmm2,%xmm2 1415 pxor %xmm3,%xmm3 1416 jmp L$cbc_ret 1417 1418L$cbc_enc_tail: 1419 movq %rdx,%rcx 1420 xchgq %rdi,%rsi 1421.long 0x9066A4F3 1422 movl $16,%ecx 1423 subq %rdx,%rcx 1424 xorl %eax,%eax 1425.long 0x9066AAF3 1426 leaq -16(%rdi),%rdi 1427 movl %r10d,%eax 1428 movq %rdi,%rsi 1429 movq %r11,%rcx 1430 xorq %rdx,%rdx 1431 jmp L$cbc_enc_loop 1432 1433.p2align 4 1434L$cbc_decrypt: 1435 cmpq $16,%rdx 1436 jne L$cbc_decrypt_bulk 1437 1438 1439 1440 movdqu (%rdi),%xmm2 1441 movdqu (%r8),%xmm3 1442 movdqa %xmm2,%xmm4 1443 movups (%rcx),%xmm0 1444 movups 16(%rcx),%xmm1 1445 leaq 32(%rcx),%rcx 1446 xorps %xmm0,%xmm2 1447L$oop_dec1_7: 1448.byte 102,15,56,222,209 1449 decl %r10d 1450 movups (%rcx),%xmm1 1451 leaq 16(%rcx),%rcx 1452 jnz L$oop_dec1_7 1453.byte 102,15,56,223,209 1454 pxor %xmm0,%xmm0 1455 pxor %xmm1,%xmm1 1456 movdqu %xmm4,(%r8) 1457 xorps %xmm3,%xmm2 1458 pxor %xmm3,%xmm3 1459 movups %xmm2,(%rsi) 1460 pxor %xmm2,%xmm2 1461 jmp L$cbc_ret 1462.p2align 4 1463L$cbc_decrypt_bulk: 1464 leaq (%rsp),%r11 1465 1466 pushq %rbp 1467 1468 subq $16,%rsp 1469 andq $-16,%rsp 1470 movq %rcx,%rbp 1471 movups (%r8),%xmm10 1472 movl %r10d,%eax 1473 cmpq $0x50,%rdx 1474 jbe L$cbc_dec_tail 1475 1476 movups (%rcx),%xmm0 1477 movdqu 0(%rdi),%xmm2 1478 movdqu 16(%rdi),%xmm3 1479 movdqa %xmm2,%xmm11 1480 movdqu 32(%rdi),%xmm4 1481 movdqa %xmm3,%xmm12 1482 movdqu 48(%rdi),%xmm5 1483 movdqa %xmm4,%xmm13 1484 movdqu 64(%rdi),%xmm6 1485 movdqa %xmm5,%xmm14 1486 movdqu 80(%rdi),%xmm7 1487 movdqa %xmm6,%xmm15 1488 cmpq $0x70,%rdx 1489 jbe L$cbc_dec_six_or_seven 1490 1491 subq $0x70,%rdx 1492 leaq 112(%rcx),%rcx 1493 jmp L$cbc_dec_loop8_enter 1494.p2align 4 1495L$cbc_dec_loop8: 1496 movups %xmm9,(%rsi) 1497 leaq 16(%rsi),%rsi 1498L$cbc_dec_loop8_enter: 1499 movdqu 96(%rdi),%xmm8 1500 pxor %xmm0,%xmm2 1501 movdqu 112(%rdi),%xmm9 1502 pxor %xmm0,%xmm3 1503 movups 16-112(%rcx),%xmm1 1504 pxor %xmm0,%xmm4 1505 movq $-1,%rbp 1506 cmpq $0x70,%rdx 1507 pxor %xmm0,%xmm5 1508 pxor %xmm0,%xmm6 1509 pxor %xmm0,%xmm7 1510 pxor %xmm0,%xmm8 1511 1512.byte 102,15,56,222,209 1513 pxor %xmm0,%xmm9 1514 movups 32-112(%rcx),%xmm0 1515.byte 102,15,56,222,217 1516.byte 102,15,56,222,225 1517.byte 102,15,56,222,233 1518.byte 102,15,56,222,241 1519.byte 102,15,56,222,249 1520.byte 102,68,15,56,222,193 1521 adcq $0,%rbp 1522 andq $128,%rbp 1523.byte 102,68,15,56,222,201 1524 addq %rdi,%rbp 1525 movups 48-112(%rcx),%xmm1 1526.byte 102,15,56,222,208 1527.byte 102,15,56,222,216 1528.byte 102,15,56,222,224 1529.byte 102,15,56,222,232 1530.byte 102,15,56,222,240 1531.byte 102,15,56,222,248 1532.byte 102,68,15,56,222,192 1533.byte 102,68,15,56,222,200 1534 movups 64-112(%rcx),%xmm0 1535 nop 1536.byte 102,15,56,222,209 1537.byte 102,15,56,222,217 1538.byte 102,15,56,222,225 1539.byte 102,15,56,222,233 1540.byte 102,15,56,222,241 1541.byte 102,15,56,222,249 1542.byte 102,68,15,56,222,193 1543.byte 102,68,15,56,222,201 1544 movups 80-112(%rcx),%xmm1 1545 nop 1546.byte 102,15,56,222,208 1547.byte 102,15,56,222,216 1548.byte 102,15,56,222,224 1549.byte 102,15,56,222,232 1550.byte 102,15,56,222,240 1551.byte 102,15,56,222,248 1552.byte 102,68,15,56,222,192 1553.byte 102,68,15,56,222,200 1554 movups 96-112(%rcx),%xmm0 1555 nop 1556.byte 102,15,56,222,209 1557.byte 102,15,56,222,217 1558.byte 102,15,56,222,225 1559.byte 102,15,56,222,233 1560.byte 102,15,56,222,241 1561.byte 102,15,56,222,249 1562.byte 102,68,15,56,222,193 1563.byte 102,68,15,56,222,201 1564 movups 112-112(%rcx),%xmm1 1565 nop 1566.byte 102,15,56,222,208 1567.byte 102,15,56,222,216 1568.byte 102,15,56,222,224 1569.byte 102,15,56,222,232 1570.byte 102,15,56,222,240 1571.byte 102,15,56,222,248 1572.byte 102,68,15,56,222,192 1573.byte 102,68,15,56,222,200 1574 movups 128-112(%rcx),%xmm0 1575 nop 1576.byte 102,15,56,222,209 1577.byte 102,15,56,222,217 1578.byte 102,15,56,222,225 1579.byte 102,15,56,222,233 1580.byte 102,15,56,222,241 1581.byte 102,15,56,222,249 1582.byte 102,68,15,56,222,193 1583.byte 102,68,15,56,222,201 1584 movups 144-112(%rcx),%xmm1 1585 cmpl $11,%eax 1586.byte 102,15,56,222,208 1587.byte 102,15,56,222,216 1588.byte 102,15,56,222,224 1589.byte 102,15,56,222,232 1590.byte 102,15,56,222,240 1591.byte 102,15,56,222,248 1592.byte 102,68,15,56,222,192 1593.byte 102,68,15,56,222,200 1594 movups 160-112(%rcx),%xmm0 1595 jb L$cbc_dec_done 1596.byte 102,15,56,222,209 1597.byte 102,15,56,222,217 1598.byte 102,15,56,222,225 1599.byte 102,15,56,222,233 1600.byte 102,15,56,222,241 1601.byte 102,15,56,222,249 1602.byte 102,68,15,56,222,193 1603.byte 102,68,15,56,222,201 1604 movups 176-112(%rcx),%xmm1 1605 nop 1606.byte 102,15,56,222,208 1607.byte 102,15,56,222,216 1608.byte 102,15,56,222,224 1609.byte 102,15,56,222,232 1610.byte 102,15,56,222,240 1611.byte 102,15,56,222,248 1612.byte 102,68,15,56,222,192 1613.byte 102,68,15,56,222,200 1614 movups 192-112(%rcx),%xmm0 1615 je L$cbc_dec_done 1616.byte 102,15,56,222,209 1617.byte 102,15,56,222,217 1618.byte 102,15,56,222,225 1619.byte 102,15,56,222,233 1620.byte 102,15,56,222,241 1621.byte 102,15,56,222,249 1622.byte 102,68,15,56,222,193 1623.byte 102,68,15,56,222,201 1624 movups 208-112(%rcx),%xmm1 1625 nop 1626.byte 102,15,56,222,208 1627.byte 102,15,56,222,216 1628.byte 102,15,56,222,224 1629.byte 102,15,56,222,232 1630.byte 102,15,56,222,240 1631.byte 102,15,56,222,248 1632.byte 102,68,15,56,222,192 1633.byte 102,68,15,56,222,200 1634 movups 224-112(%rcx),%xmm0 1635 jmp L$cbc_dec_done 1636.p2align 4 1637L$cbc_dec_done: 1638.byte 102,15,56,222,209 1639.byte 102,15,56,222,217 1640 pxor %xmm0,%xmm10 1641 pxor %xmm0,%xmm11 1642.byte 102,15,56,222,225 1643.byte 102,15,56,222,233 1644 pxor %xmm0,%xmm12 1645 pxor %xmm0,%xmm13 1646.byte 102,15,56,222,241 1647.byte 102,15,56,222,249 1648 pxor %xmm0,%xmm14 1649 pxor %xmm0,%xmm15 1650.byte 102,68,15,56,222,193 1651.byte 102,68,15,56,222,201 1652 movdqu 80(%rdi),%xmm1 1653 1654.byte 102,65,15,56,223,210 1655 movdqu 96(%rdi),%xmm10 1656 pxor %xmm0,%xmm1 1657.byte 102,65,15,56,223,219 1658 pxor %xmm0,%xmm10 1659 movdqu 112(%rdi),%xmm0 1660.byte 102,65,15,56,223,228 1661 leaq 128(%rdi),%rdi 1662 movdqu 0(%rbp),%xmm11 1663.byte 102,65,15,56,223,237 1664.byte 102,65,15,56,223,246 1665 movdqu 16(%rbp),%xmm12 1666 movdqu 32(%rbp),%xmm13 1667.byte 102,65,15,56,223,255 1668.byte 102,68,15,56,223,193 1669 movdqu 48(%rbp),%xmm14 1670 movdqu 64(%rbp),%xmm15 1671.byte 102,69,15,56,223,202 1672 movdqa %xmm0,%xmm10 1673 movdqu 80(%rbp),%xmm1 1674 movups -112(%rcx),%xmm0 1675 1676 movups %xmm2,(%rsi) 1677 movdqa %xmm11,%xmm2 1678 movups %xmm3,16(%rsi) 1679 movdqa %xmm12,%xmm3 1680 movups %xmm4,32(%rsi) 1681 movdqa %xmm13,%xmm4 1682 movups %xmm5,48(%rsi) 1683 movdqa %xmm14,%xmm5 1684 movups %xmm6,64(%rsi) 1685 movdqa %xmm15,%xmm6 1686 movups %xmm7,80(%rsi) 1687 movdqa %xmm1,%xmm7 1688 movups %xmm8,96(%rsi) 1689 leaq 112(%rsi),%rsi 1690 1691 subq $0x80,%rdx 1692 ja L$cbc_dec_loop8 1693 1694 movaps %xmm9,%xmm2 1695 leaq -112(%rcx),%rcx 1696 addq $0x70,%rdx 1697 jle L$cbc_dec_clear_tail_collected 1698 movups %xmm9,(%rsi) 1699 leaq 16(%rsi),%rsi 1700 cmpq $0x50,%rdx 1701 jbe L$cbc_dec_tail 1702 1703 movaps %xmm11,%xmm2 1704L$cbc_dec_six_or_seven: 1705 cmpq $0x60,%rdx 1706 ja L$cbc_dec_seven 1707 1708 movaps %xmm7,%xmm8 1709 call _aesni_decrypt6 1710 pxor %xmm10,%xmm2 1711 movaps %xmm8,%xmm10 1712 pxor %xmm11,%xmm3 1713 movdqu %xmm2,(%rsi) 1714 pxor %xmm12,%xmm4 1715 movdqu %xmm3,16(%rsi) 1716 pxor %xmm3,%xmm3 1717 pxor %xmm13,%xmm5 1718 movdqu %xmm4,32(%rsi) 1719 pxor %xmm4,%xmm4 1720 pxor %xmm14,%xmm6 1721 movdqu %xmm5,48(%rsi) 1722 pxor %xmm5,%xmm5 1723 pxor %xmm15,%xmm7 1724 movdqu %xmm6,64(%rsi) 1725 pxor %xmm6,%xmm6 1726 leaq 80(%rsi),%rsi 1727 movdqa %xmm7,%xmm2 1728 pxor %xmm7,%xmm7 1729 jmp L$cbc_dec_tail_collected 1730 1731.p2align 4 1732L$cbc_dec_seven: 1733 movups 96(%rdi),%xmm8 1734 xorps %xmm9,%xmm9 1735 call _aesni_decrypt8 1736 movups 80(%rdi),%xmm9 1737 pxor %xmm10,%xmm2 1738 movups 96(%rdi),%xmm10 1739 pxor %xmm11,%xmm3 1740 movdqu %xmm2,(%rsi) 1741 pxor %xmm12,%xmm4 1742 movdqu %xmm3,16(%rsi) 1743 pxor %xmm3,%xmm3 1744 pxor %xmm13,%xmm5 1745 movdqu %xmm4,32(%rsi) 1746 pxor %xmm4,%xmm4 1747 pxor %xmm14,%xmm6 1748 movdqu %xmm5,48(%rsi) 1749 pxor %xmm5,%xmm5 1750 pxor %xmm15,%xmm7 1751 movdqu %xmm6,64(%rsi) 1752 pxor %xmm6,%xmm6 1753 pxor %xmm9,%xmm8 1754 movdqu %xmm7,80(%rsi) 1755 pxor %xmm7,%xmm7 1756 leaq 96(%rsi),%rsi 1757 movdqa %xmm8,%xmm2 1758 pxor %xmm8,%xmm8 1759 pxor %xmm9,%xmm9 1760 jmp L$cbc_dec_tail_collected 1761 1762L$cbc_dec_tail: 1763 movups (%rdi),%xmm2 1764 subq $0x10,%rdx 1765 jbe L$cbc_dec_one 1766 1767 movups 16(%rdi),%xmm3 1768 movaps %xmm2,%xmm11 1769 subq $0x10,%rdx 1770 jbe L$cbc_dec_two 1771 1772 movups 32(%rdi),%xmm4 1773 movaps %xmm3,%xmm12 1774 subq $0x10,%rdx 1775 jbe L$cbc_dec_three 1776 1777 movups 48(%rdi),%xmm5 1778 movaps %xmm4,%xmm13 1779 subq $0x10,%rdx 1780 jbe L$cbc_dec_four 1781 1782 movups 64(%rdi),%xmm6 1783 movaps %xmm5,%xmm14 1784 movaps %xmm6,%xmm15 1785 xorps %xmm7,%xmm7 1786 call _aesni_decrypt6 1787 pxor %xmm10,%xmm2 1788 movaps %xmm15,%xmm10 1789 pxor %xmm11,%xmm3 1790 movdqu %xmm2,(%rsi) 1791 pxor %xmm12,%xmm4 1792 movdqu %xmm3,16(%rsi) 1793 pxor %xmm3,%xmm3 1794 pxor %xmm13,%xmm5 1795 movdqu %xmm4,32(%rsi) 1796 pxor %xmm4,%xmm4 1797 pxor %xmm14,%xmm6 1798 movdqu %xmm5,48(%rsi) 1799 pxor %xmm5,%xmm5 1800 leaq 64(%rsi),%rsi 1801 movdqa %xmm6,%xmm2 1802 pxor %xmm6,%xmm6 1803 pxor %xmm7,%xmm7 1804 subq $0x10,%rdx 1805 jmp L$cbc_dec_tail_collected 1806 1807.p2align 4 1808L$cbc_dec_one: 1809 movaps %xmm2,%xmm11 1810 movups (%rcx),%xmm0 1811 movups 16(%rcx),%xmm1 1812 leaq 32(%rcx),%rcx 1813 xorps %xmm0,%xmm2 1814L$oop_dec1_8: 1815.byte 102,15,56,222,209 1816 decl %eax 1817 movups (%rcx),%xmm1 1818 leaq 16(%rcx),%rcx 1819 jnz L$oop_dec1_8 1820.byte 102,15,56,223,209 1821 xorps %xmm10,%xmm2 1822 movaps %xmm11,%xmm10 1823 jmp L$cbc_dec_tail_collected 1824.p2align 4 1825L$cbc_dec_two: 1826 movaps %xmm3,%xmm12 1827 call _aesni_decrypt2 1828 pxor %xmm10,%xmm2 1829 movaps %xmm12,%xmm10 1830 pxor %xmm11,%xmm3 1831 movdqu %xmm2,(%rsi) 1832 movdqa %xmm3,%xmm2 1833 pxor %xmm3,%xmm3 1834 leaq 16(%rsi),%rsi 1835 jmp L$cbc_dec_tail_collected 1836.p2align 4 1837L$cbc_dec_three: 1838 movaps %xmm4,%xmm13 1839 call _aesni_decrypt3 1840 pxor %xmm10,%xmm2 1841 movaps %xmm13,%xmm10 1842 pxor %xmm11,%xmm3 1843 movdqu %xmm2,(%rsi) 1844 pxor %xmm12,%xmm4 1845 movdqu %xmm3,16(%rsi) 1846 pxor %xmm3,%xmm3 1847 movdqa %xmm4,%xmm2 1848 pxor %xmm4,%xmm4 1849 leaq 32(%rsi),%rsi 1850 jmp L$cbc_dec_tail_collected 1851.p2align 4 1852L$cbc_dec_four: 1853 movaps %xmm5,%xmm14 1854 call _aesni_decrypt4 1855 pxor %xmm10,%xmm2 1856 movaps %xmm14,%xmm10 1857 pxor %xmm11,%xmm3 1858 movdqu %xmm2,(%rsi) 1859 pxor %xmm12,%xmm4 1860 movdqu %xmm3,16(%rsi) 1861 pxor %xmm3,%xmm3 1862 pxor %xmm13,%xmm5 1863 movdqu %xmm4,32(%rsi) 1864 pxor %xmm4,%xmm4 1865 movdqa %xmm5,%xmm2 1866 pxor %xmm5,%xmm5 1867 leaq 48(%rsi),%rsi 1868 jmp L$cbc_dec_tail_collected 1869 1870.p2align 4 1871L$cbc_dec_clear_tail_collected: 1872 pxor %xmm3,%xmm3 1873 pxor %xmm4,%xmm4 1874 pxor %xmm5,%xmm5 1875 pxor %xmm6,%xmm6 1876 pxor %xmm7,%xmm7 1877 pxor %xmm8,%xmm8 1878 pxor %xmm9,%xmm9 1879L$cbc_dec_tail_collected: 1880 movups %xmm10,(%r8) 1881 andq $15,%rdx 1882 jnz L$cbc_dec_tail_partial 1883 movups %xmm2,(%rsi) 1884 pxor %xmm2,%xmm2 1885 jmp L$cbc_dec_ret 1886.p2align 4 1887L$cbc_dec_tail_partial: 1888 movaps %xmm2,(%rsp) 1889 pxor %xmm2,%xmm2 1890 movq $16,%rcx 1891 movq %rsi,%rdi 1892 subq %rdx,%rcx 1893 leaq (%rsp),%rsi 1894.long 0x9066A4F3 1895 movdqa %xmm2,(%rsp) 1896 1897L$cbc_dec_ret: 1898 xorps %xmm0,%xmm0 1899 pxor %xmm1,%xmm1 1900 movq -8(%r11),%rbp 1901 1902 leaq (%r11),%rsp 1903 1904L$cbc_ret: 1905 ret 1906 1907 1908.globl _aes_hw_set_decrypt_key 1909.private_extern _aes_hw_set_decrypt_key 1910 1911.p2align 4 1912_aes_hw_set_decrypt_key: 1913 1914_CET_ENDBR 1915.byte 0x48,0x83,0xEC,0x08 1916 1917 call __aesni_set_encrypt_key 1918 shll $4,%esi 1919 testl %eax,%eax 1920 jnz L$dec_key_ret 1921 leaq 16(%rdx,%rsi,1),%rdi 1922 1923 movups (%rdx),%xmm0 1924 movups (%rdi),%xmm1 1925 movups %xmm0,(%rdi) 1926 movups %xmm1,(%rdx) 1927 leaq 16(%rdx),%rdx 1928 leaq -16(%rdi),%rdi 1929 1930L$dec_key_inverse: 1931 movups (%rdx),%xmm0 1932 movups (%rdi),%xmm1 1933.byte 102,15,56,219,192 1934.byte 102,15,56,219,201 1935 leaq 16(%rdx),%rdx 1936 leaq -16(%rdi),%rdi 1937 movups %xmm0,16(%rdi) 1938 movups %xmm1,-16(%rdx) 1939 cmpq %rdx,%rdi 1940 ja L$dec_key_inverse 1941 1942 movups (%rdx),%xmm0 1943.byte 102,15,56,219,192 1944 pxor %xmm1,%xmm1 1945 movups %xmm0,(%rdi) 1946 pxor %xmm0,%xmm0 1947L$dec_key_ret: 1948 addq $8,%rsp 1949 1950 ret 1951 1952L$SEH_end_set_decrypt_key: 1953 1954.globl _aes_hw_set_encrypt_key 1955.private_extern _aes_hw_set_encrypt_key 1956 1957.p2align 4 1958_aes_hw_set_encrypt_key: 1959__aesni_set_encrypt_key: 1960 1961_CET_ENDBR 1962#ifdef BORINGSSL_DISPATCH_TEST 1963 movb $1,_BORINGSSL_function_hit+3(%rip) 1964#endif 1965.byte 0x48,0x83,0xEC,0x08 1966 1967 movq $-1,%rax 1968 testq %rdi,%rdi 1969 jz L$enc_key_ret 1970 testq %rdx,%rdx 1971 jz L$enc_key_ret 1972 1973 movups (%rdi),%xmm0 1974 xorps %xmm4,%xmm4 1975 leaq _OPENSSL_ia32cap_P(%rip),%r10 1976 movl 4(%r10),%r10d 1977 andl $268437504,%r10d 1978 leaq 16(%rdx),%rax 1979 cmpl $256,%esi 1980 je L$14rounds 1981 cmpl $192,%esi 1982 je L$12rounds 1983 cmpl $128,%esi 1984 jne L$bad_keybits 1985 1986L$10rounds: 1987 movl $9,%esi 1988 cmpl $268435456,%r10d 1989 je L$10rounds_alt 1990 1991 movups %xmm0,(%rdx) 1992.byte 102,15,58,223,200,1 1993 call L$key_expansion_128_cold 1994.byte 102,15,58,223,200,2 1995 call L$key_expansion_128 1996.byte 102,15,58,223,200,4 1997 call L$key_expansion_128 1998.byte 102,15,58,223,200,8 1999 call L$key_expansion_128 2000.byte 102,15,58,223,200,16 2001 call L$key_expansion_128 2002.byte 102,15,58,223,200,32 2003 call L$key_expansion_128 2004.byte 102,15,58,223,200,64 2005 call L$key_expansion_128 2006.byte 102,15,58,223,200,128 2007 call L$key_expansion_128 2008.byte 102,15,58,223,200,27 2009 call L$key_expansion_128 2010.byte 102,15,58,223,200,54 2011 call L$key_expansion_128 2012 movups %xmm0,(%rax) 2013 movl %esi,80(%rax) 2014 xorl %eax,%eax 2015 jmp L$enc_key_ret 2016 2017.p2align 4 2018L$10rounds_alt: 2019 movdqa L$key_rotate(%rip),%xmm5 2020 movl $8,%r10d 2021 movdqa L$key_rcon1(%rip),%xmm4 2022 movdqa %xmm0,%xmm2 2023 movdqu %xmm0,(%rdx) 2024 jmp L$oop_key128 2025 2026.p2align 4 2027L$oop_key128: 2028.byte 102,15,56,0,197 2029.byte 102,15,56,221,196 2030 pslld $1,%xmm4 2031 leaq 16(%rax),%rax 2032 2033 movdqa %xmm2,%xmm3 2034 pslldq $4,%xmm2 2035 pxor %xmm2,%xmm3 2036 pslldq $4,%xmm2 2037 pxor %xmm2,%xmm3 2038 pslldq $4,%xmm2 2039 pxor %xmm3,%xmm2 2040 2041 pxor %xmm2,%xmm0 2042 movdqu %xmm0,-16(%rax) 2043 movdqa %xmm0,%xmm2 2044 2045 decl %r10d 2046 jnz L$oop_key128 2047 2048 movdqa L$key_rcon1b(%rip),%xmm4 2049 2050.byte 102,15,56,0,197 2051.byte 102,15,56,221,196 2052 pslld $1,%xmm4 2053 2054 movdqa %xmm2,%xmm3 2055 pslldq $4,%xmm2 2056 pxor %xmm2,%xmm3 2057 pslldq $4,%xmm2 2058 pxor %xmm2,%xmm3 2059 pslldq $4,%xmm2 2060 pxor %xmm3,%xmm2 2061 2062 pxor %xmm2,%xmm0 2063 movdqu %xmm0,(%rax) 2064 2065 movdqa %xmm0,%xmm2 2066.byte 102,15,56,0,197 2067.byte 102,15,56,221,196 2068 2069 movdqa %xmm2,%xmm3 2070 pslldq $4,%xmm2 2071 pxor %xmm2,%xmm3 2072 pslldq $4,%xmm2 2073 pxor %xmm2,%xmm3 2074 pslldq $4,%xmm2 2075 pxor %xmm3,%xmm2 2076 2077 pxor %xmm2,%xmm0 2078 movdqu %xmm0,16(%rax) 2079 2080 movl %esi,96(%rax) 2081 xorl %eax,%eax 2082 jmp L$enc_key_ret 2083 2084.p2align 4 2085L$12rounds: 2086 movq 16(%rdi),%xmm2 2087 movl $11,%esi 2088 cmpl $268435456,%r10d 2089 je L$12rounds_alt 2090 2091 movups %xmm0,(%rdx) 2092.byte 102,15,58,223,202,1 2093 call L$key_expansion_192a_cold 2094.byte 102,15,58,223,202,2 2095 call L$key_expansion_192b 2096.byte 102,15,58,223,202,4 2097 call L$key_expansion_192a 2098.byte 102,15,58,223,202,8 2099 call L$key_expansion_192b 2100.byte 102,15,58,223,202,16 2101 call L$key_expansion_192a 2102.byte 102,15,58,223,202,32 2103 call L$key_expansion_192b 2104.byte 102,15,58,223,202,64 2105 call L$key_expansion_192a 2106.byte 102,15,58,223,202,128 2107 call L$key_expansion_192b 2108 movups %xmm0,(%rax) 2109 movl %esi,48(%rax) 2110 xorq %rax,%rax 2111 jmp L$enc_key_ret 2112 2113.p2align 4 2114L$12rounds_alt: 2115 movdqa L$key_rotate192(%rip),%xmm5 2116 movdqa L$key_rcon1(%rip),%xmm4 2117 movl $8,%r10d 2118 movdqu %xmm0,(%rdx) 2119 jmp L$oop_key192 2120 2121.p2align 4 2122L$oop_key192: 2123 movq %xmm2,0(%rax) 2124 movdqa %xmm2,%xmm1 2125.byte 102,15,56,0,213 2126.byte 102,15,56,221,212 2127 pslld $1,%xmm4 2128 leaq 24(%rax),%rax 2129 2130 movdqa %xmm0,%xmm3 2131 pslldq $4,%xmm0 2132 pxor %xmm0,%xmm3 2133 pslldq $4,%xmm0 2134 pxor %xmm0,%xmm3 2135 pslldq $4,%xmm0 2136 pxor %xmm3,%xmm0 2137 2138 pshufd $0xff,%xmm0,%xmm3 2139 pxor %xmm1,%xmm3 2140 pslldq $4,%xmm1 2141 pxor %xmm1,%xmm3 2142 2143 pxor %xmm2,%xmm0 2144 pxor %xmm3,%xmm2 2145 movdqu %xmm0,-16(%rax) 2146 2147 decl %r10d 2148 jnz L$oop_key192 2149 2150 movl %esi,32(%rax) 2151 xorl %eax,%eax 2152 jmp L$enc_key_ret 2153 2154.p2align 4 2155L$14rounds: 2156 movups 16(%rdi),%xmm2 2157 movl $13,%esi 2158 leaq 16(%rax),%rax 2159 cmpl $268435456,%r10d 2160 je L$14rounds_alt 2161 2162 movups %xmm0,(%rdx) 2163 movups %xmm2,16(%rdx) 2164.byte 102,15,58,223,202,1 2165 call L$key_expansion_256a_cold 2166.byte 102,15,58,223,200,1 2167 call L$key_expansion_256b 2168.byte 102,15,58,223,202,2 2169 call L$key_expansion_256a 2170.byte 102,15,58,223,200,2 2171 call L$key_expansion_256b 2172.byte 102,15,58,223,202,4 2173 call L$key_expansion_256a 2174.byte 102,15,58,223,200,4 2175 call L$key_expansion_256b 2176.byte 102,15,58,223,202,8 2177 call L$key_expansion_256a 2178.byte 102,15,58,223,200,8 2179 call L$key_expansion_256b 2180.byte 102,15,58,223,202,16 2181 call L$key_expansion_256a 2182.byte 102,15,58,223,200,16 2183 call L$key_expansion_256b 2184.byte 102,15,58,223,202,32 2185 call L$key_expansion_256a 2186.byte 102,15,58,223,200,32 2187 call L$key_expansion_256b 2188.byte 102,15,58,223,202,64 2189 call L$key_expansion_256a 2190 movups %xmm0,(%rax) 2191 movl %esi,16(%rax) 2192 xorq %rax,%rax 2193 jmp L$enc_key_ret 2194 2195.p2align 4 2196L$14rounds_alt: 2197 movdqa L$key_rotate(%rip),%xmm5 2198 movdqa L$key_rcon1(%rip),%xmm4 2199 movl $7,%r10d 2200 movdqu %xmm0,0(%rdx) 2201 movdqa %xmm2,%xmm1 2202 movdqu %xmm2,16(%rdx) 2203 jmp L$oop_key256 2204 2205.p2align 4 2206L$oop_key256: 2207.byte 102,15,56,0,213 2208.byte 102,15,56,221,212 2209 2210 movdqa %xmm0,%xmm3 2211 pslldq $4,%xmm0 2212 pxor %xmm0,%xmm3 2213 pslldq $4,%xmm0 2214 pxor %xmm0,%xmm3 2215 pslldq $4,%xmm0 2216 pxor %xmm3,%xmm0 2217 pslld $1,%xmm4 2218 2219 pxor %xmm2,%xmm0 2220 movdqu %xmm0,(%rax) 2221 2222 decl %r10d 2223 jz L$done_key256 2224 2225 pshufd $0xff,%xmm0,%xmm2 2226 pxor %xmm3,%xmm3 2227.byte 102,15,56,221,211 2228 2229 movdqa %xmm1,%xmm3 2230 pslldq $4,%xmm1 2231 pxor %xmm1,%xmm3 2232 pslldq $4,%xmm1 2233 pxor %xmm1,%xmm3 2234 pslldq $4,%xmm1 2235 pxor %xmm3,%xmm1 2236 2237 pxor %xmm1,%xmm2 2238 movdqu %xmm2,16(%rax) 2239 leaq 32(%rax),%rax 2240 movdqa %xmm2,%xmm1 2241 2242 jmp L$oop_key256 2243 2244L$done_key256: 2245 movl %esi,16(%rax) 2246 xorl %eax,%eax 2247 jmp L$enc_key_ret 2248 2249.p2align 4 2250L$bad_keybits: 2251 movq $-2,%rax 2252L$enc_key_ret: 2253 pxor %xmm0,%xmm0 2254 pxor %xmm1,%xmm1 2255 pxor %xmm2,%xmm2 2256 pxor %xmm3,%xmm3 2257 pxor %xmm4,%xmm4 2258 pxor %xmm5,%xmm5 2259 addq $8,%rsp 2260 2261 ret 2262 2263L$SEH_end_set_encrypt_key: 2264 2265.p2align 4 2266L$key_expansion_128: 2267 movups %xmm0,(%rax) 2268 leaq 16(%rax),%rax 2269L$key_expansion_128_cold: 2270 shufps $16,%xmm0,%xmm4 2271 xorps %xmm4,%xmm0 2272 shufps $140,%xmm0,%xmm4 2273 xorps %xmm4,%xmm0 2274 shufps $255,%xmm1,%xmm1 2275 xorps %xmm1,%xmm0 2276 ret 2277 2278.p2align 4 2279L$key_expansion_192a: 2280 movups %xmm0,(%rax) 2281 leaq 16(%rax),%rax 2282L$key_expansion_192a_cold: 2283 movaps %xmm2,%xmm5 2284L$key_expansion_192b_warm: 2285 shufps $16,%xmm0,%xmm4 2286 movdqa %xmm2,%xmm3 2287 xorps %xmm4,%xmm0 2288 shufps $140,%xmm0,%xmm4 2289 pslldq $4,%xmm3 2290 xorps %xmm4,%xmm0 2291 pshufd $85,%xmm1,%xmm1 2292 pxor %xmm3,%xmm2 2293 pxor %xmm1,%xmm0 2294 pshufd $255,%xmm0,%xmm3 2295 pxor %xmm3,%xmm2 2296 ret 2297 2298.p2align 4 2299L$key_expansion_192b: 2300 movaps %xmm0,%xmm3 2301 shufps $68,%xmm0,%xmm5 2302 movups %xmm5,(%rax) 2303 shufps $78,%xmm2,%xmm3 2304 movups %xmm3,16(%rax) 2305 leaq 32(%rax),%rax 2306 jmp L$key_expansion_192b_warm 2307 2308.p2align 4 2309L$key_expansion_256a: 2310 movups %xmm2,(%rax) 2311 leaq 16(%rax),%rax 2312L$key_expansion_256a_cold: 2313 shufps $16,%xmm0,%xmm4 2314 xorps %xmm4,%xmm0 2315 shufps $140,%xmm0,%xmm4 2316 xorps %xmm4,%xmm0 2317 shufps $255,%xmm1,%xmm1 2318 xorps %xmm1,%xmm0 2319 ret 2320 2321.p2align 4 2322L$key_expansion_256b: 2323 movups %xmm0,(%rax) 2324 leaq 16(%rax),%rax 2325 2326 shufps $16,%xmm2,%xmm4 2327 xorps %xmm4,%xmm2 2328 shufps $140,%xmm2,%xmm4 2329 xorps %xmm4,%xmm2 2330 shufps $170,%xmm1,%xmm1 2331 xorps %xmm1,%xmm2 2332 ret 2333 2334 2335.section __DATA,__const 2336.p2align 6 2337L$bswap_mask: 2338.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2339L$increment32: 2340.long 6,6,6,0 2341L$increment64: 2342.long 1,0,0,0 2343L$xts_magic: 2344.long 0x87,0,1,0 2345L$increment1: 2346.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 2347L$key_rotate: 2348.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 2349L$key_rotate192: 2350.long 0x04070605,0x04070605,0x04070605,0x04070605 2351L$key_rcon1: 2352.long 1,1,1,1 2353L$key_rcon1b: 2354.long 0x1b,0x1b,0x1b,0x1b 2355 2356.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2357.p2align 6 2358.text 2359#endif 2360