1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8.globl _aes_hw_encrypt 9.private_extern _aes_hw_encrypt 10 11.p2align 4 12_aes_hw_encrypt: 13 14_CET_ENDBR 15#ifdef BORINGSSL_DISPATCH_TEST 16 17 movb $1,_BORINGSSL_function_hit+1(%rip) 18#endif 19 movups (%rdi),%xmm2 20 movl 240(%rdx),%eax 21 movups (%rdx),%xmm0 22 movups 16(%rdx),%xmm1 23 leaq 32(%rdx),%rdx 24 xorps %xmm0,%xmm2 25L$oop_enc1_1: 26.byte 102,15,56,220,209 27 decl %eax 28 movups (%rdx),%xmm1 29 leaq 16(%rdx),%rdx 30 jnz L$oop_enc1_1 31.byte 102,15,56,221,209 32 pxor %xmm0,%xmm0 33 pxor %xmm1,%xmm1 34 movups %xmm2,(%rsi) 35 pxor %xmm2,%xmm2 36 ret 37 38 39 40.globl _aes_hw_decrypt 41.private_extern _aes_hw_decrypt 42 43.p2align 4 44_aes_hw_decrypt: 45 46_CET_ENDBR 47 movups (%rdi),%xmm2 48 movl 240(%rdx),%eax 49 movups (%rdx),%xmm0 50 movups 16(%rdx),%xmm1 51 leaq 32(%rdx),%rdx 52 xorps %xmm0,%xmm2 53L$oop_dec1_2: 54.byte 102,15,56,222,209 55 decl %eax 56 movups (%rdx),%xmm1 57 leaq 16(%rdx),%rdx 58 jnz L$oop_dec1_2 59.byte 102,15,56,223,209 60 pxor %xmm0,%xmm0 61 pxor %xmm1,%xmm1 62 movups %xmm2,(%rsi) 63 pxor %xmm2,%xmm2 64 ret 65 66 67 68.p2align 4 69_aesni_encrypt2: 70 71 movups (%rcx),%xmm0 72 shll $4,%eax 73 movups 16(%rcx),%xmm1 74 xorps %xmm0,%xmm2 75 xorps %xmm0,%xmm3 76 movups 32(%rcx),%xmm0 77 leaq 32(%rcx,%rax,1),%rcx 78 negq %rax 79 addq $16,%rax 80 81L$enc_loop2: 82.byte 102,15,56,220,209 83.byte 102,15,56,220,217 84 movups (%rcx,%rax,1),%xmm1 85 addq $32,%rax 86.byte 102,15,56,220,208 87.byte 102,15,56,220,216 88 movups -16(%rcx,%rax,1),%xmm0 89 jnz L$enc_loop2 90 91.byte 102,15,56,220,209 92.byte 102,15,56,220,217 93.byte 102,15,56,221,208 94.byte 102,15,56,221,216 95 ret 96 97 98 99.p2align 4 100_aesni_decrypt2: 101 102 movups (%rcx),%xmm0 103 shll $4,%eax 104 movups 16(%rcx),%xmm1 105 xorps %xmm0,%xmm2 106 xorps %xmm0,%xmm3 107 movups 32(%rcx),%xmm0 108 leaq 32(%rcx,%rax,1),%rcx 109 negq %rax 110 addq $16,%rax 111 112L$dec_loop2: 113.byte 102,15,56,222,209 114.byte 102,15,56,222,217 115 movups (%rcx,%rax,1),%xmm1 116 addq $32,%rax 117.byte 102,15,56,222,208 118.byte 102,15,56,222,216 119 movups -16(%rcx,%rax,1),%xmm0 120 jnz L$dec_loop2 121 122.byte 102,15,56,222,209 123.byte 102,15,56,222,217 124.byte 102,15,56,223,208 125.byte 102,15,56,223,216 126 ret 127 128 129 130.p2align 4 131_aesni_encrypt3: 132 133 movups (%rcx),%xmm0 134 shll $4,%eax 135 movups 16(%rcx),%xmm1 136 xorps %xmm0,%xmm2 137 xorps %xmm0,%xmm3 138 xorps %xmm0,%xmm4 139 movups 32(%rcx),%xmm0 140 leaq 32(%rcx,%rax,1),%rcx 141 negq %rax 142 addq $16,%rax 143 144L$enc_loop3: 145.byte 102,15,56,220,209 146.byte 102,15,56,220,217 147.byte 102,15,56,220,225 148 movups (%rcx,%rax,1),%xmm1 149 addq $32,%rax 150.byte 102,15,56,220,208 151.byte 102,15,56,220,216 152.byte 102,15,56,220,224 153 movups -16(%rcx,%rax,1),%xmm0 154 jnz L$enc_loop3 155 156.byte 102,15,56,220,209 157.byte 102,15,56,220,217 158.byte 102,15,56,220,225 159.byte 102,15,56,221,208 160.byte 102,15,56,221,216 161.byte 102,15,56,221,224 162 ret 163 164 165 166.p2align 4 167_aesni_decrypt3: 168 169 movups (%rcx),%xmm0 170 shll $4,%eax 171 movups 16(%rcx),%xmm1 172 xorps %xmm0,%xmm2 173 xorps %xmm0,%xmm3 174 xorps %xmm0,%xmm4 175 movups 32(%rcx),%xmm0 176 leaq 32(%rcx,%rax,1),%rcx 177 negq %rax 178 addq $16,%rax 179 180L$dec_loop3: 181.byte 102,15,56,222,209 182.byte 102,15,56,222,217 183.byte 102,15,56,222,225 184 movups (%rcx,%rax,1),%xmm1 185 addq $32,%rax 186.byte 102,15,56,222,208 187.byte 102,15,56,222,216 188.byte 102,15,56,222,224 189 movups -16(%rcx,%rax,1),%xmm0 190 jnz L$dec_loop3 191 192.byte 102,15,56,222,209 193.byte 102,15,56,222,217 194.byte 102,15,56,222,225 195.byte 102,15,56,223,208 196.byte 102,15,56,223,216 197.byte 102,15,56,223,224 198 ret 199 200 201 202.p2align 4 203_aesni_encrypt4: 204 205 movups (%rcx),%xmm0 206 shll $4,%eax 207 movups 16(%rcx),%xmm1 208 xorps %xmm0,%xmm2 209 xorps %xmm0,%xmm3 210 xorps %xmm0,%xmm4 211 xorps %xmm0,%xmm5 212 movups 32(%rcx),%xmm0 213 leaq 32(%rcx,%rax,1),%rcx 214 negq %rax 215.byte 0x0f,0x1f,0x00 216 addq $16,%rax 217 218L$enc_loop4: 219.byte 102,15,56,220,209 220.byte 102,15,56,220,217 221.byte 102,15,56,220,225 222.byte 102,15,56,220,233 223 movups (%rcx,%rax,1),%xmm1 224 addq $32,%rax 225.byte 102,15,56,220,208 226.byte 102,15,56,220,216 227.byte 102,15,56,220,224 228.byte 102,15,56,220,232 229 movups -16(%rcx,%rax,1),%xmm0 230 jnz L$enc_loop4 231 232.byte 102,15,56,220,209 233.byte 102,15,56,220,217 234.byte 102,15,56,220,225 235.byte 102,15,56,220,233 236.byte 102,15,56,221,208 237.byte 102,15,56,221,216 238.byte 102,15,56,221,224 239.byte 102,15,56,221,232 240 ret 241 242 243 244.p2align 4 245_aesni_decrypt4: 246 247 movups (%rcx),%xmm0 248 shll $4,%eax 249 movups 16(%rcx),%xmm1 250 xorps %xmm0,%xmm2 251 xorps %xmm0,%xmm3 252 xorps %xmm0,%xmm4 253 xorps %xmm0,%xmm5 254 movups 32(%rcx),%xmm0 255 leaq 32(%rcx,%rax,1),%rcx 256 negq %rax 257.byte 0x0f,0x1f,0x00 258 addq $16,%rax 259 260L$dec_loop4: 261.byte 102,15,56,222,209 262.byte 102,15,56,222,217 263.byte 102,15,56,222,225 264.byte 102,15,56,222,233 265 movups (%rcx,%rax,1),%xmm1 266 addq $32,%rax 267.byte 102,15,56,222,208 268.byte 102,15,56,222,216 269.byte 102,15,56,222,224 270.byte 102,15,56,222,232 271 movups -16(%rcx,%rax,1),%xmm0 272 jnz L$dec_loop4 273 274.byte 102,15,56,222,209 275.byte 102,15,56,222,217 276.byte 102,15,56,222,225 277.byte 102,15,56,222,233 278.byte 102,15,56,223,208 279.byte 102,15,56,223,216 280.byte 102,15,56,223,224 281.byte 102,15,56,223,232 282 ret 283 284 285 286.p2align 4 287_aesni_encrypt6: 288 289 movups (%rcx),%xmm0 290 shll $4,%eax 291 movups 16(%rcx),%xmm1 292 xorps %xmm0,%xmm2 293 pxor %xmm0,%xmm3 294 pxor %xmm0,%xmm4 295.byte 102,15,56,220,209 296 leaq 32(%rcx,%rax,1),%rcx 297 negq %rax 298.byte 102,15,56,220,217 299 pxor %xmm0,%xmm5 300 pxor %xmm0,%xmm6 301.byte 102,15,56,220,225 302 pxor %xmm0,%xmm7 303 movups (%rcx,%rax,1),%xmm0 304 addq $16,%rax 305 jmp L$enc_loop6_enter 306.p2align 4 307L$enc_loop6: 308.byte 102,15,56,220,209 309.byte 102,15,56,220,217 310.byte 102,15,56,220,225 311L$enc_loop6_enter: 312.byte 102,15,56,220,233 313.byte 102,15,56,220,241 314.byte 102,15,56,220,249 315 movups (%rcx,%rax,1),%xmm1 316 addq $32,%rax 317.byte 102,15,56,220,208 318.byte 102,15,56,220,216 319.byte 102,15,56,220,224 320.byte 102,15,56,220,232 321.byte 102,15,56,220,240 322.byte 102,15,56,220,248 323 movups -16(%rcx,%rax,1),%xmm0 324 jnz L$enc_loop6 325 326.byte 102,15,56,220,209 327.byte 102,15,56,220,217 328.byte 102,15,56,220,225 329.byte 102,15,56,220,233 330.byte 102,15,56,220,241 331.byte 102,15,56,220,249 332.byte 102,15,56,221,208 333.byte 102,15,56,221,216 334.byte 102,15,56,221,224 335.byte 102,15,56,221,232 336.byte 102,15,56,221,240 337.byte 102,15,56,221,248 338 ret 339 340 341 342.p2align 4 343_aesni_decrypt6: 344 345 movups (%rcx),%xmm0 346 shll $4,%eax 347 movups 16(%rcx),%xmm1 348 xorps %xmm0,%xmm2 349 pxor %xmm0,%xmm3 350 pxor %xmm0,%xmm4 351.byte 102,15,56,222,209 352 leaq 32(%rcx,%rax,1),%rcx 353 negq %rax 354.byte 102,15,56,222,217 355 pxor %xmm0,%xmm5 356 pxor %xmm0,%xmm6 357.byte 102,15,56,222,225 358 pxor %xmm0,%xmm7 359 movups (%rcx,%rax,1),%xmm0 360 addq $16,%rax 361 jmp L$dec_loop6_enter 362.p2align 4 363L$dec_loop6: 364.byte 102,15,56,222,209 365.byte 102,15,56,222,217 366.byte 102,15,56,222,225 367L$dec_loop6_enter: 368.byte 102,15,56,222,233 369.byte 102,15,56,222,241 370.byte 102,15,56,222,249 371 movups (%rcx,%rax,1),%xmm1 372 addq $32,%rax 373.byte 102,15,56,222,208 374.byte 102,15,56,222,216 375.byte 102,15,56,222,224 376.byte 102,15,56,222,232 377.byte 102,15,56,222,240 378.byte 102,15,56,222,248 379 movups -16(%rcx,%rax,1),%xmm0 380 jnz L$dec_loop6 381 382.byte 102,15,56,222,209 383.byte 102,15,56,222,217 384.byte 102,15,56,222,225 385.byte 102,15,56,222,233 386.byte 102,15,56,222,241 387.byte 102,15,56,222,249 388.byte 102,15,56,223,208 389.byte 102,15,56,223,216 390.byte 102,15,56,223,224 391.byte 102,15,56,223,232 392.byte 102,15,56,223,240 393.byte 102,15,56,223,248 394 ret 395 396 397 398.p2align 4 399_aesni_encrypt8: 400 401 movups (%rcx),%xmm0 402 shll $4,%eax 403 movups 16(%rcx),%xmm1 404 xorps %xmm0,%xmm2 405 xorps %xmm0,%xmm3 406 pxor %xmm0,%xmm4 407 pxor %xmm0,%xmm5 408 pxor %xmm0,%xmm6 409 leaq 32(%rcx,%rax,1),%rcx 410 negq %rax 411.byte 102,15,56,220,209 412 pxor %xmm0,%xmm7 413 pxor %xmm0,%xmm8 414.byte 102,15,56,220,217 415 pxor %xmm0,%xmm9 416 movups (%rcx,%rax,1),%xmm0 417 addq $16,%rax 418 jmp L$enc_loop8_inner 419.p2align 4 420L$enc_loop8: 421.byte 102,15,56,220,209 422.byte 102,15,56,220,217 423L$enc_loop8_inner: 424.byte 102,15,56,220,225 425.byte 102,15,56,220,233 426.byte 102,15,56,220,241 427.byte 102,15,56,220,249 428.byte 102,68,15,56,220,193 429.byte 102,68,15,56,220,201 430L$enc_loop8_enter: 431 movups (%rcx,%rax,1),%xmm1 432 addq $32,%rax 433.byte 102,15,56,220,208 434.byte 102,15,56,220,216 435.byte 102,15,56,220,224 436.byte 102,15,56,220,232 437.byte 102,15,56,220,240 438.byte 102,15,56,220,248 439.byte 102,68,15,56,220,192 440.byte 102,68,15,56,220,200 441 movups -16(%rcx,%rax,1),%xmm0 442 jnz L$enc_loop8 443 444.byte 102,15,56,220,209 445.byte 102,15,56,220,217 446.byte 102,15,56,220,225 447.byte 102,15,56,220,233 448.byte 102,15,56,220,241 449.byte 102,15,56,220,249 450.byte 102,68,15,56,220,193 451.byte 102,68,15,56,220,201 452.byte 102,15,56,221,208 453.byte 102,15,56,221,216 454.byte 102,15,56,221,224 455.byte 102,15,56,221,232 456.byte 102,15,56,221,240 457.byte 102,15,56,221,248 458.byte 102,68,15,56,221,192 459.byte 102,68,15,56,221,200 460 ret 461 462 463 464.p2align 4 465_aesni_decrypt8: 466 467 movups (%rcx),%xmm0 468 shll $4,%eax 469 movups 16(%rcx),%xmm1 470 xorps %xmm0,%xmm2 471 xorps %xmm0,%xmm3 472 pxor %xmm0,%xmm4 473 pxor %xmm0,%xmm5 474 pxor %xmm0,%xmm6 475 leaq 32(%rcx,%rax,1),%rcx 476 negq %rax 477.byte 102,15,56,222,209 478 pxor %xmm0,%xmm7 479 pxor %xmm0,%xmm8 480.byte 102,15,56,222,217 481 pxor %xmm0,%xmm9 482 movups (%rcx,%rax,1),%xmm0 483 addq $16,%rax 484 jmp L$dec_loop8_inner 485.p2align 4 486L$dec_loop8: 487.byte 102,15,56,222,209 488.byte 102,15,56,222,217 489L$dec_loop8_inner: 490.byte 102,15,56,222,225 491.byte 102,15,56,222,233 492.byte 102,15,56,222,241 493.byte 102,15,56,222,249 494.byte 102,68,15,56,222,193 495.byte 102,68,15,56,222,201 496L$dec_loop8_enter: 497 movups (%rcx,%rax,1),%xmm1 498 addq $32,%rax 499.byte 102,15,56,222,208 500.byte 102,15,56,222,216 501.byte 102,15,56,222,224 502.byte 102,15,56,222,232 503.byte 102,15,56,222,240 504.byte 102,15,56,222,248 505.byte 102,68,15,56,222,192 506.byte 102,68,15,56,222,200 507 movups -16(%rcx,%rax,1),%xmm0 508 jnz L$dec_loop8 509 510.byte 102,15,56,222,209 511.byte 102,15,56,222,217 512.byte 102,15,56,222,225 513.byte 102,15,56,222,233 514.byte 102,15,56,222,241 515.byte 102,15,56,222,249 516.byte 102,68,15,56,222,193 517.byte 102,68,15,56,222,201 518.byte 102,15,56,223,208 519.byte 102,15,56,223,216 520.byte 102,15,56,223,224 521.byte 102,15,56,223,232 522.byte 102,15,56,223,240 523.byte 102,15,56,223,248 524.byte 102,68,15,56,223,192 525.byte 102,68,15,56,223,200 526 ret 527 528 529.globl _aes_hw_ecb_encrypt 530.private_extern _aes_hw_ecb_encrypt 531 532.p2align 4 533_aes_hw_ecb_encrypt: 534 535_CET_ENDBR 536 andq $-16,%rdx 537 jz L$ecb_ret 538 539 movl 240(%rcx),%eax 540 movups (%rcx),%xmm0 541 movq %rcx,%r11 542 movl %eax,%r10d 543 testl %r8d,%r8d 544 jz L$ecb_decrypt 545 546 cmpq $0x80,%rdx 547 jb L$ecb_enc_tail 548 549 movdqu (%rdi),%xmm2 550 movdqu 16(%rdi),%xmm3 551 movdqu 32(%rdi),%xmm4 552 movdqu 48(%rdi),%xmm5 553 movdqu 64(%rdi),%xmm6 554 movdqu 80(%rdi),%xmm7 555 movdqu 96(%rdi),%xmm8 556 movdqu 112(%rdi),%xmm9 557 leaq 128(%rdi),%rdi 558 subq $0x80,%rdx 559 jmp L$ecb_enc_loop8_enter 560.p2align 4 561L$ecb_enc_loop8: 562 movups %xmm2,(%rsi) 563 movq %r11,%rcx 564 movdqu (%rdi),%xmm2 565 movl %r10d,%eax 566 movups %xmm3,16(%rsi) 567 movdqu 16(%rdi),%xmm3 568 movups %xmm4,32(%rsi) 569 movdqu 32(%rdi),%xmm4 570 movups %xmm5,48(%rsi) 571 movdqu 48(%rdi),%xmm5 572 movups %xmm6,64(%rsi) 573 movdqu 64(%rdi),%xmm6 574 movups %xmm7,80(%rsi) 575 movdqu 80(%rdi),%xmm7 576 movups %xmm8,96(%rsi) 577 movdqu 96(%rdi),%xmm8 578 movups %xmm9,112(%rsi) 579 leaq 128(%rsi),%rsi 580 movdqu 112(%rdi),%xmm9 581 leaq 128(%rdi),%rdi 582L$ecb_enc_loop8_enter: 583 584 call _aesni_encrypt8 585 586 subq $0x80,%rdx 587 jnc L$ecb_enc_loop8 588 589 movups %xmm2,(%rsi) 590 movq %r11,%rcx 591 movups %xmm3,16(%rsi) 592 movl %r10d,%eax 593 movups %xmm4,32(%rsi) 594 movups %xmm5,48(%rsi) 595 movups %xmm6,64(%rsi) 596 movups %xmm7,80(%rsi) 597 movups %xmm8,96(%rsi) 598 movups %xmm9,112(%rsi) 599 leaq 128(%rsi),%rsi 600 addq $0x80,%rdx 601 jz L$ecb_ret 602 603L$ecb_enc_tail: 604 movups (%rdi),%xmm2 605 cmpq $0x20,%rdx 606 jb L$ecb_enc_one 607 movups 16(%rdi),%xmm3 608 je L$ecb_enc_two 609 movups 32(%rdi),%xmm4 610 cmpq $0x40,%rdx 611 jb L$ecb_enc_three 612 movups 48(%rdi),%xmm5 613 je L$ecb_enc_four 614 movups 64(%rdi),%xmm6 615 cmpq $0x60,%rdx 616 jb L$ecb_enc_five 617 movups 80(%rdi),%xmm7 618 je L$ecb_enc_six 619 movdqu 96(%rdi),%xmm8 620 xorps %xmm9,%xmm9 621 call _aesni_encrypt8 622 movups %xmm2,(%rsi) 623 movups %xmm3,16(%rsi) 624 movups %xmm4,32(%rsi) 625 movups %xmm5,48(%rsi) 626 movups %xmm6,64(%rsi) 627 movups %xmm7,80(%rsi) 628 movups %xmm8,96(%rsi) 629 jmp L$ecb_ret 630.p2align 4 631L$ecb_enc_one: 632 movups (%rcx),%xmm0 633 movups 16(%rcx),%xmm1 634 leaq 32(%rcx),%rcx 635 xorps %xmm0,%xmm2 636L$oop_enc1_3: 637.byte 102,15,56,220,209 638 decl %eax 639 movups (%rcx),%xmm1 640 leaq 16(%rcx),%rcx 641 jnz L$oop_enc1_3 642.byte 102,15,56,221,209 643 movups %xmm2,(%rsi) 644 jmp L$ecb_ret 645.p2align 4 646L$ecb_enc_two: 647 call _aesni_encrypt2 648 movups %xmm2,(%rsi) 649 movups %xmm3,16(%rsi) 650 jmp L$ecb_ret 651.p2align 4 652L$ecb_enc_three: 653 call _aesni_encrypt3 654 movups %xmm2,(%rsi) 655 movups %xmm3,16(%rsi) 656 movups %xmm4,32(%rsi) 657 jmp L$ecb_ret 658.p2align 4 659L$ecb_enc_four: 660 call _aesni_encrypt4 661 movups %xmm2,(%rsi) 662 movups %xmm3,16(%rsi) 663 movups %xmm4,32(%rsi) 664 movups %xmm5,48(%rsi) 665 jmp L$ecb_ret 666.p2align 4 667L$ecb_enc_five: 668 xorps %xmm7,%xmm7 669 call _aesni_encrypt6 670 movups %xmm2,(%rsi) 671 movups %xmm3,16(%rsi) 672 movups %xmm4,32(%rsi) 673 movups %xmm5,48(%rsi) 674 movups %xmm6,64(%rsi) 675 jmp L$ecb_ret 676.p2align 4 677L$ecb_enc_six: 678 call _aesni_encrypt6 679 movups %xmm2,(%rsi) 680 movups %xmm3,16(%rsi) 681 movups %xmm4,32(%rsi) 682 movups %xmm5,48(%rsi) 683 movups %xmm6,64(%rsi) 684 movups %xmm7,80(%rsi) 685 jmp L$ecb_ret 686 687.p2align 4 688L$ecb_decrypt: 689 cmpq $0x80,%rdx 690 jb L$ecb_dec_tail 691 692 movdqu (%rdi),%xmm2 693 movdqu 16(%rdi),%xmm3 694 movdqu 32(%rdi),%xmm4 695 movdqu 48(%rdi),%xmm5 696 movdqu 64(%rdi),%xmm6 697 movdqu 80(%rdi),%xmm7 698 movdqu 96(%rdi),%xmm8 699 movdqu 112(%rdi),%xmm9 700 leaq 128(%rdi),%rdi 701 subq $0x80,%rdx 702 jmp L$ecb_dec_loop8_enter 703.p2align 4 704L$ecb_dec_loop8: 705 movups %xmm2,(%rsi) 706 movq %r11,%rcx 707 movdqu (%rdi),%xmm2 708 movl %r10d,%eax 709 movups %xmm3,16(%rsi) 710 movdqu 16(%rdi),%xmm3 711 movups %xmm4,32(%rsi) 712 movdqu 32(%rdi),%xmm4 713 movups %xmm5,48(%rsi) 714 movdqu 48(%rdi),%xmm5 715 movups %xmm6,64(%rsi) 716 movdqu 64(%rdi),%xmm6 717 movups %xmm7,80(%rsi) 718 movdqu 80(%rdi),%xmm7 719 movups %xmm8,96(%rsi) 720 movdqu 96(%rdi),%xmm8 721 movups %xmm9,112(%rsi) 722 leaq 128(%rsi),%rsi 723 movdqu 112(%rdi),%xmm9 724 leaq 128(%rdi),%rdi 725L$ecb_dec_loop8_enter: 726 727 call _aesni_decrypt8 728 729 movups (%r11),%xmm0 730 subq $0x80,%rdx 731 jnc L$ecb_dec_loop8 732 733 movups %xmm2,(%rsi) 734 pxor %xmm2,%xmm2 735 movq %r11,%rcx 736 movups %xmm3,16(%rsi) 737 pxor %xmm3,%xmm3 738 movl %r10d,%eax 739 movups %xmm4,32(%rsi) 740 pxor %xmm4,%xmm4 741 movups %xmm5,48(%rsi) 742 pxor %xmm5,%xmm5 743 movups %xmm6,64(%rsi) 744 pxor %xmm6,%xmm6 745 movups %xmm7,80(%rsi) 746 pxor %xmm7,%xmm7 747 movups %xmm8,96(%rsi) 748 pxor %xmm8,%xmm8 749 movups %xmm9,112(%rsi) 750 pxor %xmm9,%xmm9 751 leaq 128(%rsi),%rsi 752 addq $0x80,%rdx 753 jz L$ecb_ret 754 755L$ecb_dec_tail: 756 movups (%rdi),%xmm2 757 cmpq $0x20,%rdx 758 jb L$ecb_dec_one 759 movups 16(%rdi),%xmm3 760 je L$ecb_dec_two 761 movups 32(%rdi),%xmm4 762 cmpq $0x40,%rdx 763 jb L$ecb_dec_three 764 movups 48(%rdi),%xmm5 765 je L$ecb_dec_four 766 movups 64(%rdi),%xmm6 767 cmpq $0x60,%rdx 768 jb L$ecb_dec_five 769 movups 80(%rdi),%xmm7 770 je L$ecb_dec_six 771 movups 96(%rdi),%xmm8 772 movups (%rcx),%xmm0 773 xorps %xmm9,%xmm9 774 call _aesni_decrypt8 775 movups %xmm2,(%rsi) 776 pxor %xmm2,%xmm2 777 movups %xmm3,16(%rsi) 778 pxor %xmm3,%xmm3 779 movups %xmm4,32(%rsi) 780 pxor %xmm4,%xmm4 781 movups %xmm5,48(%rsi) 782 pxor %xmm5,%xmm5 783 movups %xmm6,64(%rsi) 784 pxor %xmm6,%xmm6 785 movups %xmm7,80(%rsi) 786 pxor %xmm7,%xmm7 787 movups %xmm8,96(%rsi) 788 pxor %xmm8,%xmm8 789 pxor %xmm9,%xmm9 790 jmp L$ecb_ret 791.p2align 4 792L$ecb_dec_one: 793 movups (%rcx),%xmm0 794 movups 16(%rcx),%xmm1 795 leaq 32(%rcx),%rcx 796 xorps %xmm0,%xmm2 797L$oop_dec1_4: 798.byte 102,15,56,222,209 799 decl %eax 800 movups (%rcx),%xmm1 801 leaq 16(%rcx),%rcx 802 jnz L$oop_dec1_4 803.byte 102,15,56,223,209 804 movups %xmm2,(%rsi) 805 pxor %xmm2,%xmm2 806 jmp L$ecb_ret 807.p2align 4 808L$ecb_dec_two: 809 call _aesni_decrypt2 810 movups %xmm2,(%rsi) 811 pxor %xmm2,%xmm2 812 movups %xmm3,16(%rsi) 813 pxor %xmm3,%xmm3 814 jmp L$ecb_ret 815.p2align 4 816L$ecb_dec_three: 817 call _aesni_decrypt3 818 movups %xmm2,(%rsi) 819 pxor %xmm2,%xmm2 820 movups %xmm3,16(%rsi) 821 pxor %xmm3,%xmm3 822 movups %xmm4,32(%rsi) 823 pxor %xmm4,%xmm4 824 jmp L$ecb_ret 825.p2align 4 826L$ecb_dec_four: 827 call _aesni_decrypt4 828 movups %xmm2,(%rsi) 829 pxor %xmm2,%xmm2 830 movups %xmm3,16(%rsi) 831 pxor %xmm3,%xmm3 832 movups %xmm4,32(%rsi) 833 pxor %xmm4,%xmm4 834 movups %xmm5,48(%rsi) 835 pxor %xmm5,%xmm5 836 jmp L$ecb_ret 837.p2align 4 838L$ecb_dec_five: 839 xorps %xmm7,%xmm7 840 call _aesni_decrypt6 841 movups %xmm2,(%rsi) 842 pxor %xmm2,%xmm2 843 movups %xmm3,16(%rsi) 844 pxor %xmm3,%xmm3 845 movups %xmm4,32(%rsi) 846 pxor %xmm4,%xmm4 847 movups %xmm5,48(%rsi) 848 pxor %xmm5,%xmm5 849 movups %xmm6,64(%rsi) 850 pxor %xmm6,%xmm6 851 pxor %xmm7,%xmm7 852 jmp L$ecb_ret 853.p2align 4 854L$ecb_dec_six: 855 call _aesni_decrypt6 856 movups %xmm2,(%rsi) 857 pxor %xmm2,%xmm2 858 movups %xmm3,16(%rsi) 859 pxor %xmm3,%xmm3 860 movups %xmm4,32(%rsi) 861 pxor %xmm4,%xmm4 862 movups %xmm5,48(%rsi) 863 pxor %xmm5,%xmm5 864 movups %xmm6,64(%rsi) 865 pxor %xmm6,%xmm6 866 movups %xmm7,80(%rsi) 867 pxor %xmm7,%xmm7 868 869L$ecb_ret: 870 xorps %xmm0,%xmm0 871 pxor %xmm1,%xmm1 872 ret 873 874 875.globl _aes_hw_ctr32_encrypt_blocks 876.private_extern _aes_hw_ctr32_encrypt_blocks 877 878.p2align 4 879_aes_hw_ctr32_encrypt_blocks: 880 881_CET_ENDBR 882#ifdef BORINGSSL_DISPATCH_TEST 883 movb $1,_BORINGSSL_function_hit(%rip) 884#endif 885 cmpq $1,%rdx 886 jne L$ctr32_bulk 887 888 889 890 movups (%r8),%xmm2 891 movups (%rdi),%xmm3 892 movl 240(%rcx),%edx 893 movups (%rcx),%xmm0 894 movups 16(%rcx),%xmm1 895 leaq 32(%rcx),%rcx 896 xorps %xmm0,%xmm2 897L$oop_enc1_5: 898.byte 102,15,56,220,209 899 decl %edx 900 movups (%rcx),%xmm1 901 leaq 16(%rcx),%rcx 902 jnz L$oop_enc1_5 903.byte 102,15,56,221,209 904 pxor %xmm0,%xmm0 905 pxor %xmm1,%xmm1 906 xorps %xmm3,%xmm2 907 pxor %xmm3,%xmm3 908 movups %xmm2,(%rsi) 909 xorps %xmm2,%xmm2 910 jmp L$ctr32_epilogue 911 912.p2align 4 913L$ctr32_bulk: 914 leaq (%rsp),%r11 915 916 pushq %rbp 917 918 subq $128,%rsp 919 andq $-16,%rsp 920 921 922 923 924 movdqu (%r8),%xmm2 925 movdqu (%rcx),%xmm0 926 movl 12(%r8),%r8d 927 pxor %xmm0,%xmm2 928 movl 12(%rcx),%ebp 929 movdqa %xmm2,0(%rsp) 930 bswapl %r8d 931 movdqa %xmm2,%xmm3 932 movdqa %xmm2,%xmm4 933 movdqa %xmm2,%xmm5 934 movdqa %xmm2,64(%rsp) 935 movdqa %xmm2,80(%rsp) 936 movdqa %xmm2,96(%rsp) 937 movq %rdx,%r10 938 movdqa %xmm2,112(%rsp) 939 940 leaq 1(%r8),%rax 941 leaq 2(%r8),%rdx 942 bswapl %eax 943 bswapl %edx 944 xorl %ebp,%eax 945 xorl %ebp,%edx 946.byte 102,15,58,34,216,3 947 leaq 3(%r8),%rax 948 movdqa %xmm3,16(%rsp) 949.byte 102,15,58,34,226,3 950 bswapl %eax 951 movq %r10,%rdx 952 leaq 4(%r8),%r10 953 movdqa %xmm4,32(%rsp) 954 xorl %ebp,%eax 955 bswapl %r10d 956.byte 102,15,58,34,232,3 957 xorl %ebp,%r10d 958 movdqa %xmm5,48(%rsp) 959 leaq 5(%r8),%r9 960 movl %r10d,64+12(%rsp) 961 bswapl %r9d 962 leaq 6(%r8),%r10 963 movl 240(%rcx),%eax 964 xorl %ebp,%r9d 965 bswapl %r10d 966 movl %r9d,80+12(%rsp) 967 xorl %ebp,%r10d 968 leaq 7(%r8),%r9 969 movl %r10d,96+12(%rsp) 970 bswapl %r9d 971 xorl %ebp,%r9d 972 movl %r9d,112+12(%rsp) 973 974 movups 16(%rcx),%xmm1 975 976 movdqa 64(%rsp),%xmm6 977 movdqa 80(%rsp),%xmm7 978 979 cmpq $8,%rdx 980 jb L$ctr32_tail 981 982 leaq 128(%rcx),%rcx 983 subq $8,%rdx 984 jmp L$ctr32_loop8 985 986.p2align 5 987L$ctr32_loop8: 988 addl $8,%r8d 989 movdqa 96(%rsp),%xmm8 990.byte 102,15,56,220,209 991 movl %r8d,%r9d 992 movdqa 112(%rsp),%xmm9 993.byte 102,15,56,220,217 994 bswapl %r9d 995 movups 32-128(%rcx),%xmm0 996.byte 102,15,56,220,225 997 xorl %ebp,%r9d 998 nop 999.byte 102,15,56,220,233 1000 movl %r9d,0+12(%rsp) 1001 leaq 1(%r8),%r9 1002.byte 102,15,56,220,241 1003.byte 102,15,56,220,249 1004.byte 102,68,15,56,220,193 1005.byte 102,68,15,56,220,201 1006 movups 48-128(%rcx),%xmm1 1007 bswapl %r9d 1008.byte 102,15,56,220,208 1009.byte 102,15,56,220,216 1010 xorl %ebp,%r9d 1011.byte 0x66,0x90 1012.byte 102,15,56,220,224 1013.byte 102,15,56,220,232 1014 movl %r9d,16+12(%rsp) 1015 leaq 2(%r8),%r9 1016.byte 102,15,56,220,240 1017.byte 102,15,56,220,248 1018.byte 102,68,15,56,220,192 1019.byte 102,68,15,56,220,200 1020 movups 64-128(%rcx),%xmm0 1021 bswapl %r9d 1022.byte 102,15,56,220,209 1023.byte 102,15,56,220,217 1024 xorl %ebp,%r9d 1025.byte 0x66,0x90 1026.byte 102,15,56,220,225 1027.byte 102,15,56,220,233 1028 movl %r9d,32+12(%rsp) 1029 leaq 3(%r8),%r9 1030.byte 102,15,56,220,241 1031.byte 102,15,56,220,249 1032.byte 102,68,15,56,220,193 1033.byte 102,68,15,56,220,201 1034 movups 80-128(%rcx),%xmm1 1035 bswapl %r9d 1036.byte 102,15,56,220,208 1037.byte 102,15,56,220,216 1038 xorl %ebp,%r9d 1039.byte 0x66,0x90 1040.byte 102,15,56,220,224 1041.byte 102,15,56,220,232 1042 movl %r9d,48+12(%rsp) 1043 leaq 4(%r8),%r9 1044.byte 102,15,56,220,240 1045.byte 102,15,56,220,248 1046.byte 102,68,15,56,220,192 1047.byte 102,68,15,56,220,200 1048 movups 96-128(%rcx),%xmm0 1049 bswapl %r9d 1050.byte 102,15,56,220,209 1051.byte 102,15,56,220,217 1052 xorl %ebp,%r9d 1053.byte 0x66,0x90 1054.byte 102,15,56,220,225 1055.byte 102,15,56,220,233 1056 movl %r9d,64+12(%rsp) 1057 leaq 5(%r8),%r9 1058.byte 102,15,56,220,241 1059.byte 102,15,56,220,249 1060.byte 102,68,15,56,220,193 1061.byte 102,68,15,56,220,201 1062 movups 112-128(%rcx),%xmm1 1063 bswapl %r9d 1064.byte 102,15,56,220,208 1065.byte 102,15,56,220,216 1066 xorl %ebp,%r9d 1067.byte 0x66,0x90 1068.byte 102,15,56,220,224 1069.byte 102,15,56,220,232 1070 movl %r9d,80+12(%rsp) 1071 leaq 6(%r8),%r9 1072.byte 102,15,56,220,240 1073.byte 102,15,56,220,248 1074.byte 102,68,15,56,220,192 1075.byte 102,68,15,56,220,200 1076 movups 128-128(%rcx),%xmm0 1077 bswapl %r9d 1078.byte 102,15,56,220,209 1079.byte 102,15,56,220,217 1080 xorl %ebp,%r9d 1081.byte 0x66,0x90 1082.byte 102,15,56,220,225 1083.byte 102,15,56,220,233 1084 movl %r9d,96+12(%rsp) 1085 leaq 7(%r8),%r9 1086.byte 102,15,56,220,241 1087.byte 102,15,56,220,249 1088.byte 102,68,15,56,220,193 1089.byte 102,68,15,56,220,201 1090 movups 144-128(%rcx),%xmm1 1091 bswapl %r9d 1092.byte 102,15,56,220,208 1093.byte 102,15,56,220,216 1094.byte 102,15,56,220,224 1095 xorl %ebp,%r9d 1096 movdqu 0(%rdi),%xmm10 1097.byte 102,15,56,220,232 1098 movl %r9d,112+12(%rsp) 1099 cmpl $11,%eax 1100.byte 102,15,56,220,240 1101.byte 102,15,56,220,248 1102.byte 102,68,15,56,220,192 1103.byte 102,68,15,56,220,200 1104 movups 160-128(%rcx),%xmm0 1105 1106 jb L$ctr32_enc_done 1107 1108.byte 102,15,56,220,209 1109.byte 102,15,56,220,217 1110.byte 102,15,56,220,225 1111.byte 102,15,56,220,233 1112.byte 102,15,56,220,241 1113.byte 102,15,56,220,249 1114.byte 102,68,15,56,220,193 1115.byte 102,68,15,56,220,201 1116 movups 176-128(%rcx),%xmm1 1117 1118.byte 102,15,56,220,208 1119.byte 102,15,56,220,216 1120.byte 102,15,56,220,224 1121.byte 102,15,56,220,232 1122.byte 102,15,56,220,240 1123.byte 102,15,56,220,248 1124.byte 102,68,15,56,220,192 1125.byte 102,68,15,56,220,200 1126 movups 192-128(%rcx),%xmm0 1127 je L$ctr32_enc_done 1128 1129.byte 102,15,56,220,209 1130.byte 102,15,56,220,217 1131.byte 102,15,56,220,225 1132.byte 102,15,56,220,233 1133.byte 102,15,56,220,241 1134.byte 102,15,56,220,249 1135.byte 102,68,15,56,220,193 1136.byte 102,68,15,56,220,201 1137 movups 208-128(%rcx),%xmm1 1138 1139.byte 102,15,56,220,208 1140.byte 102,15,56,220,216 1141.byte 102,15,56,220,224 1142.byte 102,15,56,220,232 1143.byte 102,15,56,220,240 1144.byte 102,15,56,220,248 1145.byte 102,68,15,56,220,192 1146.byte 102,68,15,56,220,200 1147 movups 224-128(%rcx),%xmm0 1148 jmp L$ctr32_enc_done 1149 1150.p2align 4 1151L$ctr32_enc_done: 1152 movdqu 16(%rdi),%xmm11 1153 pxor %xmm0,%xmm10 1154 movdqu 32(%rdi),%xmm12 1155 pxor %xmm0,%xmm11 1156 movdqu 48(%rdi),%xmm13 1157 pxor %xmm0,%xmm12 1158 movdqu 64(%rdi),%xmm14 1159 pxor %xmm0,%xmm13 1160 movdqu 80(%rdi),%xmm15 1161 pxor %xmm0,%xmm14 1162 prefetcht0 448(%rdi) 1163 prefetcht0 512(%rdi) 1164 pxor %xmm0,%xmm15 1165.byte 102,15,56,220,209 1166.byte 102,15,56,220,217 1167.byte 102,15,56,220,225 1168.byte 102,15,56,220,233 1169.byte 102,15,56,220,241 1170.byte 102,15,56,220,249 1171.byte 102,68,15,56,220,193 1172.byte 102,68,15,56,220,201 1173 movdqu 96(%rdi),%xmm1 1174 leaq 128(%rdi),%rdi 1175 1176.byte 102,65,15,56,221,210 1177 pxor %xmm0,%xmm1 1178 movdqu 112-128(%rdi),%xmm10 1179.byte 102,65,15,56,221,219 1180 pxor %xmm0,%xmm10 1181 movdqa 0(%rsp),%xmm11 1182.byte 102,65,15,56,221,228 1183.byte 102,65,15,56,221,237 1184 movdqa 16(%rsp),%xmm12 1185 movdqa 32(%rsp),%xmm13 1186.byte 102,65,15,56,221,246 1187.byte 102,65,15,56,221,255 1188 movdqa 48(%rsp),%xmm14 1189 movdqa 64(%rsp),%xmm15 1190.byte 102,68,15,56,221,193 1191 movdqa 80(%rsp),%xmm0 1192 movups 16-128(%rcx),%xmm1 1193.byte 102,69,15,56,221,202 1194 1195 movups %xmm2,(%rsi) 1196 movdqa %xmm11,%xmm2 1197 movups %xmm3,16(%rsi) 1198 movdqa %xmm12,%xmm3 1199 movups %xmm4,32(%rsi) 1200 movdqa %xmm13,%xmm4 1201 movups %xmm5,48(%rsi) 1202 movdqa %xmm14,%xmm5 1203 movups %xmm6,64(%rsi) 1204 movdqa %xmm15,%xmm6 1205 movups %xmm7,80(%rsi) 1206 movdqa %xmm0,%xmm7 1207 movups %xmm8,96(%rsi) 1208 movups %xmm9,112(%rsi) 1209 leaq 128(%rsi),%rsi 1210 1211 subq $8,%rdx 1212 jnc L$ctr32_loop8 1213 1214 addq $8,%rdx 1215 jz L$ctr32_done 1216 leaq -128(%rcx),%rcx 1217 1218L$ctr32_tail: 1219 1220 1221 leaq 16(%rcx),%rcx 1222 cmpq $4,%rdx 1223 jb L$ctr32_loop3 1224 je L$ctr32_loop4 1225 1226 1227 shll $4,%eax 1228 movdqa 96(%rsp),%xmm8 1229 pxor %xmm9,%xmm9 1230 1231 movups 16(%rcx),%xmm0 1232.byte 102,15,56,220,209 1233.byte 102,15,56,220,217 1234 leaq 32-16(%rcx,%rax,1),%rcx 1235 negq %rax 1236.byte 102,15,56,220,225 1237 addq $16,%rax 1238 movups (%rdi),%xmm10 1239.byte 102,15,56,220,233 1240.byte 102,15,56,220,241 1241 movups 16(%rdi),%xmm11 1242 movups 32(%rdi),%xmm12 1243.byte 102,15,56,220,249 1244.byte 102,68,15,56,220,193 1245 1246 call L$enc_loop8_enter 1247 1248 movdqu 48(%rdi),%xmm13 1249 pxor %xmm10,%xmm2 1250 movdqu 64(%rdi),%xmm10 1251 pxor %xmm11,%xmm3 1252 movdqu %xmm2,(%rsi) 1253 pxor %xmm12,%xmm4 1254 movdqu %xmm3,16(%rsi) 1255 pxor %xmm13,%xmm5 1256 movdqu %xmm4,32(%rsi) 1257 pxor %xmm10,%xmm6 1258 movdqu %xmm5,48(%rsi) 1259 movdqu %xmm6,64(%rsi) 1260 cmpq $6,%rdx 1261 jb L$ctr32_done 1262 1263 movups 80(%rdi),%xmm11 1264 xorps %xmm11,%xmm7 1265 movups %xmm7,80(%rsi) 1266 je L$ctr32_done 1267 1268 movups 96(%rdi),%xmm12 1269 xorps %xmm12,%xmm8 1270 movups %xmm8,96(%rsi) 1271 jmp L$ctr32_done 1272 1273.p2align 5 1274L$ctr32_loop4: 1275.byte 102,15,56,220,209 1276 leaq 16(%rcx),%rcx 1277 decl %eax 1278.byte 102,15,56,220,217 1279.byte 102,15,56,220,225 1280.byte 102,15,56,220,233 1281 movups (%rcx),%xmm1 1282 jnz L$ctr32_loop4 1283.byte 102,15,56,221,209 1284.byte 102,15,56,221,217 1285 movups (%rdi),%xmm10 1286 movups 16(%rdi),%xmm11 1287.byte 102,15,56,221,225 1288.byte 102,15,56,221,233 1289 movups 32(%rdi),%xmm12 1290 movups 48(%rdi),%xmm13 1291 1292 xorps %xmm10,%xmm2 1293 movups %xmm2,(%rsi) 1294 xorps %xmm11,%xmm3 1295 movups %xmm3,16(%rsi) 1296 pxor %xmm12,%xmm4 1297 movdqu %xmm4,32(%rsi) 1298 pxor %xmm13,%xmm5 1299 movdqu %xmm5,48(%rsi) 1300 jmp L$ctr32_done 1301 1302.p2align 5 1303L$ctr32_loop3: 1304.byte 102,15,56,220,209 1305 leaq 16(%rcx),%rcx 1306 decl %eax 1307.byte 102,15,56,220,217 1308.byte 102,15,56,220,225 1309 movups (%rcx),%xmm1 1310 jnz L$ctr32_loop3 1311.byte 102,15,56,221,209 1312.byte 102,15,56,221,217 1313.byte 102,15,56,221,225 1314 1315 movups (%rdi),%xmm10 1316 xorps %xmm10,%xmm2 1317 movups %xmm2,(%rsi) 1318 cmpq $2,%rdx 1319 jb L$ctr32_done 1320 1321 movups 16(%rdi),%xmm11 1322 xorps %xmm11,%xmm3 1323 movups %xmm3,16(%rsi) 1324 je L$ctr32_done 1325 1326 movups 32(%rdi),%xmm12 1327 xorps %xmm12,%xmm4 1328 movups %xmm4,32(%rsi) 1329 1330L$ctr32_done: 1331 xorps %xmm0,%xmm0 1332 xorl %ebp,%ebp 1333 pxor %xmm1,%xmm1 1334 pxor %xmm2,%xmm2 1335 pxor %xmm3,%xmm3 1336 pxor %xmm4,%xmm4 1337 pxor %xmm5,%xmm5 1338 pxor %xmm6,%xmm6 1339 pxor %xmm7,%xmm7 1340 movaps %xmm0,0(%rsp) 1341 pxor %xmm8,%xmm8 1342 movaps %xmm0,16(%rsp) 1343 pxor %xmm9,%xmm9 1344 movaps %xmm0,32(%rsp) 1345 pxor %xmm10,%xmm10 1346 movaps %xmm0,48(%rsp) 1347 pxor %xmm11,%xmm11 1348 movaps %xmm0,64(%rsp) 1349 pxor %xmm12,%xmm12 1350 movaps %xmm0,80(%rsp) 1351 pxor %xmm13,%xmm13 1352 movaps %xmm0,96(%rsp) 1353 pxor %xmm14,%xmm14 1354 movaps %xmm0,112(%rsp) 1355 pxor %xmm15,%xmm15 1356 movq -8(%r11),%rbp 1357 1358 leaq (%r11),%rsp 1359 1360L$ctr32_epilogue: 1361 ret 1362 1363 1364.globl _aes_hw_cbc_encrypt 1365.private_extern _aes_hw_cbc_encrypt 1366 1367.p2align 4 1368_aes_hw_cbc_encrypt: 1369 1370_CET_ENDBR 1371 testq %rdx,%rdx 1372 jz L$cbc_ret 1373 1374 movl 240(%rcx),%r10d 1375 movq %rcx,%r11 1376 testl %r9d,%r9d 1377 jz L$cbc_decrypt 1378 1379 movups (%r8),%xmm2 1380 movl %r10d,%eax 1381 cmpq $16,%rdx 1382 jb L$cbc_enc_tail 1383 subq $16,%rdx 1384 jmp L$cbc_enc_loop 1385.p2align 4 1386L$cbc_enc_loop: 1387 movups (%rdi),%xmm3 1388 leaq 16(%rdi),%rdi 1389 1390 movups (%rcx),%xmm0 1391 movups 16(%rcx),%xmm1 1392 xorps %xmm0,%xmm3 1393 leaq 32(%rcx),%rcx 1394 xorps %xmm3,%xmm2 1395L$oop_enc1_6: 1396.byte 102,15,56,220,209 1397 decl %eax 1398 movups (%rcx),%xmm1 1399 leaq 16(%rcx),%rcx 1400 jnz L$oop_enc1_6 1401.byte 102,15,56,221,209 1402 movl %r10d,%eax 1403 movq %r11,%rcx 1404 movups %xmm2,0(%rsi) 1405 leaq 16(%rsi),%rsi 1406 subq $16,%rdx 1407 jnc L$cbc_enc_loop 1408 addq $16,%rdx 1409 jnz L$cbc_enc_tail 1410 pxor %xmm0,%xmm0 1411 pxor %xmm1,%xmm1 1412 movups %xmm2,(%r8) 1413 pxor %xmm2,%xmm2 1414 pxor %xmm3,%xmm3 1415 jmp L$cbc_ret 1416 1417L$cbc_enc_tail: 1418 movq %rdx,%rcx 1419 xchgq %rdi,%rsi 1420.long 0x9066A4F3 1421 movl $16,%ecx 1422 subq %rdx,%rcx 1423 xorl %eax,%eax 1424.long 0x9066AAF3 1425 leaq -16(%rdi),%rdi 1426 movl %r10d,%eax 1427 movq %rdi,%rsi 1428 movq %r11,%rcx 1429 xorq %rdx,%rdx 1430 jmp L$cbc_enc_loop 1431 1432.p2align 4 1433L$cbc_decrypt: 1434 cmpq $16,%rdx 1435 jne L$cbc_decrypt_bulk 1436 1437 1438 1439 movdqu (%rdi),%xmm2 1440 movdqu (%r8),%xmm3 1441 movdqa %xmm2,%xmm4 1442 movups (%rcx),%xmm0 1443 movups 16(%rcx),%xmm1 1444 leaq 32(%rcx),%rcx 1445 xorps %xmm0,%xmm2 1446L$oop_dec1_7: 1447.byte 102,15,56,222,209 1448 decl %r10d 1449 movups (%rcx),%xmm1 1450 leaq 16(%rcx),%rcx 1451 jnz L$oop_dec1_7 1452.byte 102,15,56,223,209 1453 pxor %xmm0,%xmm0 1454 pxor %xmm1,%xmm1 1455 movdqu %xmm4,(%r8) 1456 xorps %xmm3,%xmm2 1457 pxor %xmm3,%xmm3 1458 movups %xmm2,(%rsi) 1459 pxor %xmm2,%xmm2 1460 jmp L$cbc_ret 1461.p2align 4 1462L$cbc_decrypt_bulk: 1463 leaq (%rsp),%r11 1464 1465 pushq %rbp 1466 1467 subq $16,%rsp 1468 andq $-16,%rsp 1469 movq %rcx,%rbp 1470 movups (%r8),%xmm10 1471 movl %r10d,%eax 1472 cmpq $0x50,%rdx 1473 jbe L$cbc_dec_tail 1474 1475 movups (%rcx),%xmm0 1476 movdqu 0(%rdi),%xmm2 1477 movdqu 16(%rdi),%xmm3 1478 movdqa %xmm2,%xmm11 1479 movdqu 32(%rdi),%xmm4 1480 movdqa %xmm3,%xmm12 1481 movdqu 48(%rdi),%xmm5 1482 movdqa %xmm4,%xmm13 1483 movdqu 64(%rdi),%xmm6 1484 movdqa %xmm5,%xmm14 1485 movdqu 80(%rdi),%xmm7 1486 movdqa %xmm6,%xmm15 1487 cmpq $0x70,%rdx 1488 jbe L$cbc_dec_six_or_seven 1489 1490 subq $0x70,%rdx 1491 leaq 112(%rcx),%rcx 1492 jmp L$cbc_dec_loop8_enter 1493.p2align 4 1494L$cbc_dec_loop8: 1495 movups %xmm9,(%rsi) 1496 leaq 16(%rsi),%rsi 1497L$cbc_dec_loop8_enter: 1498 movdqu 96(%rdi),%xmm8 1499 pxor %xmm0,%xmm2 1500 movdqu 112(%rdi),%xmm9 1501 pxor %xmm0,%xmm3 1502 movups 16-112(%rcx),%xmm1 1503 pxor %xmm0,%xmm4 1504 movq $-1,%rbp 1505 cmpq $0x70,%rdx 1506 pxor %xmm0,%xmm5 1507 pxor %xmm0,%xmm6 1508 pxor %xmm0,%xmm7 1509 pxor %xmm0,%xmm8 1510 1511.byte 102,15,56,222,209 1512 pxor %xmm0,%xmm9 1513 movups 32-112(%rcx),%xmm0 1514.byte 102,15,56,222,217 1515.byte 102,15,56,222,225 1516.byte 102,15,56,222,233 1517.byte 102,15,56,222,241 1518.byte 102,15,56,222,249 1519.byte 102,68,15,56,222,193 1520 adcq $0,%rbp 1521 andq $128,%rbp 1522.byte 102,68,15,56,222,201 1523 addq %rdi,%rbp 1524 movups 48-112(%rcx),%xmm1 1525.byte 102,15,56,222,208 1526.byte 102,15,56,222,216 1527.byte 102,15,56,222,224 1528.byte 102,15,56,222,232 1529.byte 102,15,56,222,240 1530.byte 102,15,56,222,248 1531.byte 102,68,15,56,222,192 1532.byte 102,68,15,56,222,200 1533 movups 64-112(%rcx),%xmm0 1534 nop 1535.byte 102,15,56,222,209 1536.byte 102,15,56,222,217 1537.byte 102,15,56,222,225 1538.byte 102,15,56,222,233 1539.byte 102,15,56,222,241 1540.byte 102,15,56,222,249 1541.byte 102,68,15,56,222,193 1542.byte 102,68,15,56,222,201 1543 movups 80-112(%rcx),%xmm1 1544 nop 1545.byte 102,15,56,222,208 1546.byte 102,15,56,222,216 1547.byte 102,15,56,222,224 1548.byte 102,15,56,222,232 1549.byte 102,15,56,222,240 1550.byte 102,15,56,222,248 1551.byte 102,68,15,56,222,192 1552.byte 102,68,15,56,222,200 1553 movups 96-112(%rcx),%xmm0 1554 nop 1555.byte 102,15,56,222,209 1556.byte 102,15,56,222,217 1557.byte 102,15,56,222,225 1558.byte 102,15,56,222,233 1559.byte 102,15,56,222,241 1560.byte 102,15,56,222,249 1561.byte 102,68,15,56,222,193 1562.byte 102,68,15,56,222,201 1563 movups 112-112(%rcx),%xmm1 1564 nop 1565.byte 102,15,56,222,208 1566.byte 102,15,56,222,216 1567.byte 102,15,56,222,224 1568.byte 102,15,56,222,232 1569.byte 102,15,56,222,240 1570.byte 102,15,56,222,248 1571.byte 102,68,15,56,222,192 1572.byte 102,68,15,56,222,200 1573 movups 128-112(%rcx),%xmm0 1574 nop 1575.byte 102,15,56,222,209 1576.byte 102,15,56,222,217 1577.byte 102,15,56,222,225 1578.byte 102,15,56,222,233 1579.byte 102,15,56,222,241 1580.byte 102,15,56,222,249 1581.byte 102,68,15,56,222,193 1582.byte 102,68,15,56,222,201 1583 movups 144-112(%rcx),%xmm1 1584 cmpl $11,%eax 1585.byte 102,15,56,222,208 1586.byte 102,15,56,222,216 1587.byte 102,15,56,222,224 1588.byte 102,15,56,222,232 1589.byte 102,15,56,222,240 1590.byte 102,15,56,222,248 1591.byte 102,68,15,56,222,192 1592.byte 102,68,15,56,222,200 1593 movups 160-112(%rcx),%xmm0 1594 jb L$cbc_dec_done 1595.byte 102,15,56,222,209 1596.byte 102,15,56,222,217 1597.byte 102,15,56,222,225 1598.byte 102,15,56,222,233 1599.byte 102,15,56,222,241 1600.byte 102,15,56,222,249 1601.byte 102,68,15,56,222,193 1602.byte 102,68,15,56,222,201 1603 movups 176-112(%rcx),%xmm1 1604 nop 1605.byte 102,15,56,222,208 1606.byte 102,15,56,222,216 1607.byte 102,15,56,222,224 1608.byte 102,15,56,222,232 1609.byte 102,15,56,222,240 1610.byte 102,15,56,222,248 1611.byte 102,68,15,56,222,192 1612.byte 102,68,15,56,222,200 1613 movups 192-112(%rcx),%xmm0 1614 je L$cbc_dec_done 1615.byte 102,15,56,222,209 1616.byte 102,15,56,222,217 1617.byte 102,15,56,222,225 1618.byte 102,15,56,222,233 1619.byte 102,15,56,222,241 1620.byte 102,15,56,222,249 1621.byte 102,68,15,56,222,193 1622.byte 102,68,15,56,222,201 1623 movups 208-112(%rcx),%xmm1 1624 nop 1625.byte 102,15,56,222,208 1626.byte 102,15,56,222,216 1627.byte 102,15,56,222,224 1628.byte 102,15,56,222,232 1629.byte 102,15,56,222,240 1630.byte 102,15,56,222,248 1631.byte 102,68,15,56,222,192 1632.byte 102,68,15,56,222,200 1633 movups 224-112(%rcx),%xmm0 1634 jmp L$cbc_dec_done 1635.p2align 4 1636L$cbc_dec_done: 1637.byte 102,15,56,222,209 1638.byte 102,15,56,222,217 1639 pxor %xmm0,%xmm10 1640 pxor %xmm0,%xmm11 1641.byte 102,15,56,222,225 1642.byte 102,15,56,222,233 1643 pxor %xmm0,%xmm12 1644 pxor %xmm0,%xmm13 1645.byte 102,15,56,222,241 1646.byte 102,15,56,222,249 1647 pxor %xmm0,%xmm14 1648 pxor %xmm0,%xmm15 1649.byte 102,68,15,56,222,193 1650.byte 102,68,15,56,222,201 1651 movdqu 80(%rdi),%xmm1 1652 1653.byte 102,65,15,56,223,210 1654 movdqu 96(%rdi),%xmm10 1655 pxor %xmm0,%xmm1 1656.byte 102,65,15,56,223,219 1657 pxor %xmm0,%xmm10 1658 movdqu 112(%rdi),%xmm0 1659.byte 102,65,15,56,223,228 1660 leaq 128(%rdi),%rdi 1661 movdqu 0(%rbp),%xmm11 1662.byte 102,65,15,56,223,237 1663.byte 102,65,15,56,223,246 1664 movdqu 16(%rbp),%xmm12 1665 movdqu 32(%rbp),%xmm13 1666.byte 102,65,15,56,223,255 1667.byte 102,68,15,56,223,193 1668 movdqu 48(%rbp),%xmm14 1669 movdqu 64(%rbp),%xmm15 1670.byte 102,69,15,56,223,202 1671 movdqa %xmm0,%xmm10 1672 movdqu 80(%rbp),%xmm1 1673 movups -112(%rcx),%xmm0 1674 1675 movups %xmm2,(%rsi) 1676 movdqa %xmm11,%xmm2 1677 movups %xmm3,16(%rsi) 1678 movdqa %xmm12,%xmm3 1679 movups %xmm4,32(%rsi) 1680 movdqa %xmm13,%xmm4 1681 movups %xmm5,48(%rsi) 1682 movdqa %xmm14,%xmm5 1683 movups %xmm6,64(%rsi) 1684 movdqa %xmm15,%xmm6 1685 movups %xmm7,80(%rsi) 1686 movdqa %xmm1,%xmm7 1687 movups %xmm8,96(%rsi) 1688 leaq 112(%rsi),%rsi 1689 1690 subq $0x80,%rdx 1691 ja L$cbc_dec_loop8 1692 1693 movaps %xmm9,%xmm2 1694 leaq -112(%rcx),%rcx 1695 addq $0x70,%rdx 1696 jle L$cbc_dec_clear_tail_collected 1697 movups %xmm9,(%rsi) 1698 leaq 16(%rsi),%rsi 1699 cmpq $0x50,%rdx 1700 jbe L$cbc_dec_tail 1701 1702 movaps %xmm11,%xmm2 1703L$cbc_dec_six_or_seven: 1704 cmpq $0x60,%rdx 1705 ja L$cbc_dec_seven 1706 1707 movaps %xmm7,%xmm8 1708 call _aesni_decrypt6 1709 pxor %xmm10,%xmm2 1710 movaps %xmm8,%xmm10 1711 pxor %xmm11,%xmm3 1712 movdqu %xmm2,(%rsi) 1713 pxor %xmm12,%xmm4 1714 movdqu %xmm3,16(%rsi) 1715 pxor %xmm3,%xmm3 1716 pxor %xmm13,%xmm5 1717 movdqu %xmm4,32(%rsi) 1718 pxor %xmm4,%xmm4 1719 pxor %xmm14,%xmm6 1720 movdqu %xmm5,48(%rsi) 1721 pxor %xmm5,%xmm5 1722 pxor %xmm15,%xmm7 1723 movdqu %xmm6,64(%rsi) 1724 pxor %xmm6,%xmm6 1725 leaq 80(%rsi),%rsi 1726 movdqa %xmm7,%xmm2 1727 pxor %xmm7,%xmm7 1728 jmp L$cbc_dec_tail_collected 1729 1730.p2align 4 1731L$cbc_dec_seven: 1732 movups 96(%rdi),%xmm8 1733 xorps %xmm9,%xmm9 1734 call _aesni_decrypt8 1735 movups 80(%rdi),%xmm9 1736 pxor %xmm10,%xmm2 1737 movups 96(%rdi),%xmm10 1738 pxor %xmm11,%xmm3 1739 movdqu %xmm2,(%rsi) 1740 pxor %xmm12,%xmm4 1741 movdqu %xmm3,16(%rsi) 1742 pxor %xmm3,%xmm3 1743 pxor %xmm13,%xmm5 1744 movdqu %xmm4,32(%rsi) 1745 pxor %xmm4,%xmm4 1746 pxor %xmm14,%xmm6 1747 movdqu %xmm5,48(%rsi) 1748 pxor %xmm5,%xmm5 1749 pxor %xmm15,%xmm7 1750 movdqu %xmm6,64(%rsi) 1751 pxor %xmm6,%xmm6 1752 pxor %xmm9,%xmm8 1753 movdqu %xmm7,80(%rsi) 1754 pxor %xmm7,%xmm7 1755 leaq 96(%rsi),%rsi 1756 movdqa %xmm8,%xmm2 1757 pxor %xmm8,%xmm8 1758 pxor %xmm9,%xmm9 1759 jmp L$cbc_dec_tail_collected 1760 1761L$cbc_dec_tail: 1762 movups (%rdi),%xmm2 1763 subq $0x10,%rdx 1764 jbe L$cbc_dec_one 1765 1766 movups 16(%rdi),%xmm3 1767 movaps %xmm2,%xmm11 1768 subq $0x10,%rdx 1769 jbe L$cbc_dec_two 1770 1771 movups 32(%rdi),%xmm4 1772 movaps %xmm3,%xmm12 1773 subq $0x10,%rdx 1774 jbe L$cbc_dec_three 1775 1776 movups 48(%rdi),%xmm5 1777 movaps %xmm4,%xmm13 1778 subq $0x10,%rdx 1779 jbe L$cbc_dec_four 1780 1781 movups 64(%rdi),%xmm6 1782 movaps %xmm5,%xmm14 1783 movaps %xmm6,%xmm15 1784 xorps %xmm7,%xmm7 1785 call _aesni_decrypt6 1786 pxor %xmm10,%xmm2 1787 movaps %xmm15,%xmm10 1788 pxor %xmm11,%xmm3 1789 movdqu %xmm2,(%rsi) 1790 pxor %xmm12,%xmm4 1791 movdqu %xmm3,16(%rsi) 1792 pxor %xmm3,%xmm3 1793 pxor %xmm13,%xmm5 1794 movdqu %xmm4,32(%rsi) 1795 pxor %xmm4,%xmm4 1796 pxor %xmm14,%xmm6 1797 movdqu %xmm5,48(%rsi) 1798 pxor %xmm5,%xmm5 1799 leaq 64(%rsi),%rsi 1800 movdqa %xmm6,%xmm2 1801 pxor %xmm6,%xmm6 1802 pxor %xmm7,%xmm7 1803 subq $0x10,%rdx 1804 jmp L$cbc_dec_tail_collected 1805 1806.p2align 4 1807L$cbc_dec_one: 1808 movaps %xmm2,%xmm11 1809 movups (%rcx),%xmm0 1810 movups 16(%rcx),%xmm1 1811 leaq 32(%rcx),%rcx 1812 xorps %xmm0,%xmm2 1813L$oop_dec1_8: 1814.byte 102,15,56,222,209 1815 decl %eax 1816 movups (%rcx),%xmm1 1817 leaq 16(%rcx),%rcx 1818 jnz L$oop_dec1_8 1819.byte 102,15,56,223,209 1820 xorps %xmm10,%xmm2 1821 movaps %xmm11,%xmm10 1822 jmp L$cbc_dec_tail_collected 1823.p2align 4 1824L$cbc_dec_two: 1825 movaps %xmm3,%xmm12 1826 call _aesni_decrypt2 1827 pxor %xmm10,%xmm2 1828 movaps %xmm12,%xmm10 1829 pxor %xmm11,%xmm3 1830 movdqu %xmm2,(%rsi) 1831 movdqa %xmm3,%xmm2 1832 pxor %xmm3,%xmm3 1833 leaq 16(%rsi),%rsi 1834 jmp L$cbc_dec_tail_collected 1835.p2align 4 1836L$cbc_dec_three: 1837 movaps %xmm4,%xmm13 1838 call _aesni_decrypt3 1839 pxor %xmm10,%xmm2 1840 movaps %xmm13,%xmm10 1841 pxor %xmm11,%xmm3 1842 movdqu %xmm2,(%rsi) 1843 pxor %xmm12,%xmm4 1844 movdqu %xmm3,16(%rsi) 1845 pxor %xmm3,%xmm3 1846 movdqa %xmm4,%xmm2 1847 pxor %xmm4,%xmm4 1848 leaq 32(%rsi),%rsi 1849 jmp L$cbc_dec_tail_collected 1850.p2align 4 1851L$cbc_dec_four: 1852 movaps %xmm5,%xmm14 1853 call _aesni_decrypt4 1854 pxor %xmm10,%xmm2 1855 movaps %xmm14,%xmm10 1856 pxor %xmm11,%xmm3 1857 movdqu %xmm2,(%rsi) 1858 pxor %xmm12,%xmm4 1859 movdqu %xmm3,16(%rsi) 1860 pxor %xmm3,%xmm3 1861 pxor %xmm13,%xmm5 1862 movdqu %xmm4,32(%rsi) 1863 pxor %xmm4,%xmm4 1864 movdqa %xmm5,%xmm2 1865 pxor %xmm5,%xmm5 1866 leaq 48(%rsi),%rsi 1867 jmp L$cbc_dec_tail_collected 1868 1869.p2align 4 1870L$cbc_dec_clear_tail_collected: 1871 pxor %xmm3,%xmm3 1872 pxor %xmm4,%xmm4 1873 pxor %xmm5,%xmm5 1874 pxor %xmm6,%xmm6 1875 pxor %xmm7,%xmm7 1876 pxor %xmm8,%xmm8 1877 pxor %xmm9,%xmm9 1878L$cbc_dec_tail_collected: 1879 movups %xmm10,(%r8) 1880 andq $15,%rdx 1881 jnz L$cbc_dec_tail_partial 1882 movups %xmm2,(%rsi) 1883 pxor %xmm2,%xmm2 1884 jmp L$cbc_dec_ret 1885.p2align 4 1886L$cbc_dec_tail_partial: 1887 movaps %xmm2,(%rsp) 1888 pxor %xmm2,%xmm2 1889 movq $16,%rcx 1890 movq %rsi,%rdi 1891 subq %rdx,%rcx 1892 leaq (%rsp),%rsi 1893.long 0x9066A4F3 1894 movdqa %xmm2,(%rsp) 1895 1896L$cbc_dec_ret: 1897 xorps %xmm0,%xmm0 1898 pxor %xmm1,%xmm1 1899 movq -8(%r11),%rbp 1900 1901 leaq (%r11),%rsp 1902 1903L$cbc_ret: 1904 ret 1905 1906 1907.globl _aes_hw_encrypt_key_to_decrypt_key 1908.private_extern _aes_hw_encrypt_key_to_decrypt_key 1909 1910.p2align 4 1911_aes_hw_encrypt_key_to_decrypt_key: 1912 1913_CET_ENDBR 1914 1915 movl 240(%rdi),%esi 1916 shll $4,%esi 1917 1918 leaq 16(%rdi,%rsi,1),%rdx 1919 1920 movups (%rdi),%xmm0 1921 movups (%rdx),%xmm1 1922 movups %xmm0,(%rdx) 1923 movups %xmm1,(%rdi) 1924 leaq 16(%rdi),%rdi 1925 leaq -16(%rdx),%rdx 1926 1927L$dec_key_inverse: 1928 movups (%rdi),%xmm0 1929 movups (%rdx),%xmm1 1930.byte 102,15,56,219,192 1931.byte 102,15,56,219,201 1932 leaq 16(%rdi),%rdi 1933 leaq -16(%rdx),%rdx 1934 movups %xmm0,16(%rdx) 1935 movups %xmm1,-16(%rdi) 1936 cmpq %rdi,%rdx 1937 ja L$dec_key_inverse 1938 1939 movups (%rdi),%xmm0 1940.byte 102,15,56,219,192 1941 pxor %xmm1,%xmm1 1942 movups %xmm0,(%rdx) 1943 pxor %xmm0,%xmm0 1944 ret 1945 1946 1947.globl _aes_hw_set_encrypt_key_base 1948.private_extern _aes_hw_set_encrypt_key_base 1949 1950.p2align 4 1951_aes_hw_set_encrypt_key_base: 1952 1953 1954_CET_ENDBR 1955#ifdef BORINGSSL_DISPATCH_TEST 1956 movb $1,_BORINGSSL_function_hit+3(%rip) 1957#endif 1958 subq $8,%rsp 1959 1960 1961 1962 movups (%rdi),%xmm0 1963 xorps %xmm4,%xmm4 1964 leaq 16(%rdx),%rax 1965 cmpl $256,%esi 1966 je L$14rounds 1967 cmpl $192,%esi 1968 je L$12rounds 1969 cmpl $128,%esi 1970 jne L$bad_keybits 1971 1972L$10rounds: 1973 movl $9,%esi 1974 1975 movups %xmm0,(%rdx) 1976.byte 102,15,58,223,200,1 1977 call L$key_expansion_128_cold 1978.byte 102,15,58,223,200,2 1979 call L$key_expansion_128 1980.byte 102,15,58,223,200,4 1981 call L$key_expansion_128 1982.byte 102,15,58,223,200,8 1983 call L$key_expansion_128 1984.byte 102,15,58,223,200,16 1985 call L$key_expansion_128 1986.byte 102,15,58,223,200,32 1987 call L$key_expansion_128 1988.byte 102,15,58,223,200,64 1989 call L$key_expansion_128 1990.byte 102,15,58,223,200,128 1991 call L$key_expansion_128 1992.byte 102,15,58,223,200,27 1993 call L$key_expansion_128 1994.byte 102,15,58,223,200,54 1995 call L$key_expansion_128 1996 movups %xmm0,(%rax) 1997 movl %esi,80(%rax) 1998 xorl %eax,%eax 1999 jmp L$enc_key_ret 2000 2001.p2align 4 2002L$12rounds: 2003 movq 16(%rdi),%xmm2 2004 movl $11,%esi 2005 2006 movups %xmm0,(%rdx) 2007.byte 102,15,58,223,202,1 2008 call L$key_expansion_192a_cold 2009.byte 102,15,58,223,202,2 2010 call L$key_expansion_192b 2011.byte 102,15,58,223,202,4 2012 call L$key_expansion_192a 2013.byte 102,15,58,223,202,8 2014 call L$key_expansion_192b 2015.byte 102,15,58,223,202,16 2016 call L$key_expansion_192a 2017.byte 102,15,58,223,202,32 2018 call L$key_expansion_192b 2019.byte 102,15,58,223,202,64 2020 call L$key_expansion_192a 2021.byte 102,15,58,223,202,128 2022 call L$key_expansion_192b 2023 movups %xmm0,(%rax) 2024 movl %esi,48(%rax) 2025 xorq %rax,%rax 2026 jmp L$enc_key_ret 2027 2028.p2align 4 2029L$14rounds: 2030 movups 16(%rdi),%xmm2 2031 movl $13,%esi 2032 leaq 16(%rax),%rax 2033 2034 movups %xmm0,(%rdx) 2035 movups %xmm2,16(%rdx) 2036.byte 102,15,58,223,202,1 2037 call L$key_expansion_256a_cold 2038.byte 102,15,58,223,200,1 2039 call L$key_expansion_256b 2040.byte 102,15,58,223,202,2 2041 call L$key_expansion_256a 2042.byte 102,15,58,223,200,2 2043 call L$key_expansion_256b 2044.byte 102,15,58,223,202,4 2045 call L$key_expansion_256a 2046.byte 102,15,58,223,200,4 2047 call L$key_expansion_256b 2048.byte 102,15,58,223,202,8 2049 call L$key_expansion_256a 2050.byte 102,15,58,223,200,8 2051 call L$key_expansion_256b 2052.byte 102,15,58,223,202,16 2053 call L$key_expansion_256a 2054.byte 102,15,58,223,200,16 2055 call L$key_expansion_256b 2056.byte 102,15,58,223,202,32 2057 call L$key_expansion_256a 2058.byte 102,15,58,223,200,32 2059 call L$key_expansion_256b 2060.byte 102,15,58,223,202,64 2061 call L$key_expansion_256a 2062 movups %xmm0,(%rax) 2063 movl %esi,16(%rax) 2064 xorq %rax,%rax 2065 jmp L$enc_key_ret 2066 2067.p2align 4 2068L$bad_keybits: 2069 movq $-2,%rax 2070L$enc_key_ret: 2071 pxor %xmm0,%xmm0 2072 pxor %xmm1,%xmm1 2073 pxor %xmm2,%xmm2 2074 pxor %xmm3,%xmm3 2075 pxor %xmm4,%xmm4 2076 pxor %xmm5,%xmm5 2077 addq $8,%rsp 2078 2079 ret 2080 2081 2082 2083.p2align 4 2084L$key_expansion_128: 2085 2086 movups %xmm0,(%rax) 2087 leaq 16(%rax),%rax 2088L$key_expansion_128_cold: 2089 shufps $16,%xmm0,%xmm4 2090 xorps %xmm4,%xmm0 2091 shufps $140,%xmm0,%xmm4 2092 xorps %xmm4,%xmm0 2093 shufps $255,%xmm1,%xmm1 2094 xorps %xmm1,%xmm0 2095 ret 2096 2097 2098.p2align 4 2099L$key_expansion_192a: 2100 2101 movups %xmm0,(%rax) 2102 leaq 16(%rax),%rax 2103L$key_expansion_192a_cold: 2104 movaps %xmm2,%xmm5 2105L$key_expansion_192b_warm: 2106 shufps $16,%xmm0,%xmm4 2107 movdqa %xmm2,%xmm3 2108 xorps %xmm4,%xmm0 2109 shufps $140,%xmm0,%xmm4 2110 pslldq $4,%xmm3 2111 xorps %xmm4,%xmm0 2112 pshufd $85,%xmm1,%xmm1 2113 pxor %xmm3,%xmm2 2114 pxor %xmm1,%xmm0 2115 pshufd $255,%xmm0,%xmm3 2116 pxor %xmm3,%xmm2 2117 ret 2118 2119 2120.p2align 4 2121L$key_expansion_192b: 2122 2123 movaps %xmm0,%xmm3 2124 shufps $68,%xmm0,%xmm5 2125 movups %xmm5,(%rax) 2126 shufps $78,%xmm2,%xmm3 2127 movups %xmm3,16(%rax) 2128 leaq 32(%rax),%rax 2129 jmp L$key_expansion_192b_warm 2130 2131 2132.p2align 4 2133L$key_expansion_256a: 2134 2135 movups %xmm2,(%rax) 2136 leaq 16(%rax),%rax 2137L$key_expansion_256a_cold: 2138 shufps $16,%xmm0,%xmm4 2139 xorps %xmm4,%xmm0 2140 shufps $140,%xmm0,%xmm4 2141 xorps %xmm4,%xmm0 2142 shufps $255,%xmm1,%xmm1 2143 xorps %xmm1,%xmm0 2144 ret 2145 2146 2147.p2align 4 2148L$key_expansion_256b: 2149 2150 movups %xmm0,(%rax) 2151 leaq 16(%rax),%rax 2152 2153 shufps $16,%xmm2,%xmm4 2154 xorps %xmm4,%xmm2 2155 shufps $140,%xmm2,%xmm4 2156 xorps %xmm4,%xmm2 2157 shufps $170,%xmm1,%xmm1 2158 xorps %xmm1,%xmm2 2159 ret 2160 2161 2162 2163.globl _aes_hw_set_encrypt_key_alt 2164.private_extern _aes_hw_set_encrypt_key_alt 2165 2166.p2align 4 2167_aes_hw_set_encrypt_key_alt: 2168 2169 2170_CET_ENDBR 2171#ifdef BORINGSSL_DISPATCH_TEST 2172 movb $1,_BORINGSSL_function_hit+3(%rip) 2173#endif 2174 subq $8,%rsp 2175 2176 2177 2178 movups (%rdi),%xmm0 2179 xorps %xmm4,%xmm4 2180 leaq 16(%rdx),%rax 2181 cmpl $256,%esi 2182 je L$14rounds_alt 2183 cmpl $192,%esi 2184 je L$12rounds_alt 2185 cmpl $128,%esi 2186 jne L$bad_keybits_alt 2187 2188 movl $9,%esi 2189 movdqa L$key_rotate(%rip),%xmm5 2190 movl $8,%r10d 2191 movdqa L$key_rcon1(%rip),%xmm4 2192 movdqa %xmm0,%xmm2 2193 movdqu %xmm0,(%rdx) 2194 jmp L$oop_key128 2195 2196.p2align 4 2197L$oop_key128: 2198.byte 102,15,56,0,197 2199.byte 102,15,56,221,196 2200 pslld $1,%xmm4 2201 leaq 16(%rax),%rax 2202 2203 movdqa %xmm2,%xmm3 2204 pslldq $4,%xmm2 2205 pxor %xmm2,%xmm3 2206 pslldq $4,%xmm2 2207 pxor %xmm2,%xmm3 2208 pslldq $4,%xmm2 2209 pxor %xmm3,%xmm2 2210 2211 pxor %xmm2,%xmm0 2212 movdqu %xmm0,-16(%rax) 2213 movdqa %xmm0,%xmm2 2214 2215 decl %r10d 2216 jnz L$oop_key128 2217 2218 movdqa L$key_rcon1b(%rip),%xmm4 2219 2220.byte 102,15,56,0,197 2221.byte 102,15,56,221,196 2222 pslld $1,%xmm4 2223 2224 movdqa %xmm2,%xmm3 2225 pslldq $4,%xmm2 2226 pxor %xmm2,%xmm3 2227 pslldq $4,%xmm2 2228 pxor %xmm2,%xmm3 2229 pslldq $4,%xmm2 2230 pxor %xmm3,%xmm2 2231 2232 pxor %xmm2,%xmm0 2233 movdqu %xmm0,(%rax) 2234 2235 movdqa %xmm0,%xmm2 2236.byte 102,15,56,0,197 2237.byte 102,15,56,221,196 2238 2239 movdqa %xmm2,%xmm3 2240 pslldq $4,%xmm2 2241 pxor %xmm2,%xmm3 2242 pslldq $4,%xmm2 2243 pxor %xmm2,%xmm3 2244 pslldq $4,%xmm2 2245 pxor %xmm3,%xmm2 2246 2247 pxor %xmm2,%xmm0 2248 movdqu %xmm0,16(%rax) 2249 2250 movl %esi,96(%rax) 2251 xorl %eax,%eax 2252 jmp L$enc_key_ret_alt 2253 2254.p2align 4 2255L$12rounds_alt: 2256 movq 16(%rdi),%xmm2 2257 movl $11,%esi 2258 movdqa L$key_rotate192(%rip),%xmm5 2259 movdqa L$key_rcon1(%rip),%xmm4 2260 movl $8,%r10d 2261 movdqu %xmm0,(%rdx) 2262 jmp L$oop_key192 2263 2264.p2align 4 2265L$oop_key192: 2266 movq %xmm2,0(%rax) 2267 movdqa %xmm2,%xmm1 2268.byte 102,15,56,0,213 2269.byte 102,15,56,221,212 2270 pslld $1,%xmm4 2271 leaq 24(%rax),%rax 2272 2273 movdqa %xmm0,%xmm3 2274 pslldq $4,%xmm0 2275 pxor %xmm0,%xmm3 2276 pslldq $4,%xmm0 2277 pxor %xmm0,%xmm3 2278 pslldq $4,%xmm0 2279 pxor %xmm3,%xmm0 2280 2281 pshufd $0xff,%xmm0,%xmm3 2282 pxor %xmm1,%xmm3 2283 pslldq $4,%xmm1 2284 pxor %xmm1,%xmm3 2285 2286 pxor %xmm2,%xmm0 2287 pxor %xmm3,%xmm2 2288 movdqu %xmm0,-16(%rax) 2289 2290 decl %r10d 2291 jnz L$oop_key192 2292 2293 movl %esi,32(%rax) 2294 xorl %eax,%eax 2295 jmp L$enc_key_ret_alt 2296 2297.p2align 4 2298L$14rounds_alt: 2299 movups 16(%rdi),%xmm2 2300 movl $13,%esi 2301 leaq 16(%rax),%rax 2302 movdqa L$key_rotate(%rip),%xmm5 2303 movdqa L$key_rcon1(%rip),%xmm4 2304 movl $7,%r10d 2305 movdqu %xmm0,0(%rdx) 2306 movdqa %xmm2,%xmm1 2307 movdqu %xmm2,16(%rdx) 2308 jmp L$oop_key256 2309 2310.p2align 4 2311L$oop_key256: 2312.byte 102,15,56,0,213 2313.byte 102,15,56,221,212 2314 2315 movdqa %xmm0,%xmm3 2316 pslldq $4,%xmm0 2317 pxor %xmm0,%xmm3 2318 pslldq $4,%xmm0 2319 pxor %xmm0,%xmm3 2320 pslldq $4,%xmm0 2321 pxor %xmm3,%xmm0 2322 pslld $1,%xmm4 2323 2324 pxor %xmm2,%xmm0 2325 movdqu %xmm0,(%rax) 2326 2327 decl %r10d 2328 jz L$done_key256 2329 2330 pshufd $0xff,%xmm0,%xmm2 2331 pxor %xmm3,%xmm3 2332.byte 102,15,56,221,211 2333 2334 movdqa %xmm1,%xmm3 2335 pslldq $4,%xmm1 2336 pxor %xmm1,%xmm3 2337 pslldq $4,%xmm1 2338 pxor %xmm1,%xmm3 2339 pslldq $4,%xmm1 2340 pxor %xmm3,%xmm1 2341 2342 pxor %xmm1,%xmm2 2343 movdqu %xmm2,16(%rax) 2344 leaq 32(%rax),%rax 2345 movdqa %xmm2,%xmm1 2346 2347 jmp L$oop_key256 2348 2349L$done_key256: 2350 movl %esi,16(%rax) 2351 xorl %eax,%eax 2352 jmp L$enc_key_ret_alt 2353 2354.p2align 4 2355L$bad_keybits_alt: 2356 movq $-2,%rax 2357L$enc_key_ret_alt: 2358 pxor %xmm0,%xmm0 2359 pxor %xmm1,%xmm1 2360 pxor %xmm2,%xmm2 2361 pxor %xmm3,%xmm3 2362 pxor %xmm4,%xmm4 2363 pxor %xmm5,%xmm5 2364 addq $8,%rsp 2365 2366 ret 2367 2368 2369 2370.section __DATA,__const 2371.p2align 6 2372L$bswap_mask: 2373.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2374L$increment32: 2375.long 6,6,6,0 2376L$increment64: 2377.long 1,0,0,0 2378L$xts_magic: 2379.long 0x87,0,1,0 2380L$increment1: 2381.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 2382L$key_rotate: 2383.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 2384L$key_rotate192: 2385.long 0x04070605,0x04070605,0x04070605,0x04070605 2386L$key_rcon1: 2387.long 1,1,1,1 2388L$key_rcon1b: 2389.long 0x1b,0x1b,0x1b,0x1b 2390 2391.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2392.p2align 6 2393.text 2394#endif 2395