1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8.extern OPENSSL_ia32cap_P 9.hidden OPENSSL_ia32cap_P 10.globl aes_hw_encrypt 11.hidden aes_hw_encrypt 12.type aes_hw_encrypt,@function 13.align 16 14aes_hw_encrypt: 15.cfi_startproc 16_CET_ENDBR 17#ifdef BORINGSSL_DISPATCH_TEST 18.extern BORINGSSL_function_hit 19.hidden BORINGSSL_function_hit 20 movb $1,BORINGSSL_function_hit+1(%rip) 21#endif 22 movups (%rdi),%xmm2 23 movl 240(%rdx),%eax 24 movups (%rdx),%xmm0 25 movups 16(%rdx),%xmm1 26 leaq 32(%rdx),%rdx 27 xorps %xmm0,%xmm2 28.Loop_enc1_1: 29.byte 102,15,56,220,209 30 decl %eax 31 movups (%rdx),%xmm1 32 leaq 16(%rdx),%rdx 33 jnz .Loop_enc1_1 34.byte 102,15,56,221,209 35 pxor %xmm0,%xmm0 36 pxor %xmm1,%xmm1 37 movups %xmm2,(%rsi) 38 pxor %xmm2,%xmm2 39 ret 40.cfi_endproc 41.size aes_hw_encrypt,.-aes_hw_encrypt 42.type _aesni_encrypt2,@function 43.align 16 44_aesni_encrypt2: 45.cfi_startproc 46 movups (%rcx),%xmm0 47 shll $4,%eax 48 movups 16(%rcx),%xmm1 49 xorps %xmm0,%xmm2 50 xorps %xmm0,%xmm3 51 movups 32(%rcx),%xmm0 52 leaq 32(%rcx,%rax,1),%rcx 53 negq %rax 54 addq $16,%rax 55 56.Lenc_loop2: 57.byte 102,15,56,220,209 58.byte 102,15,56,220,217 59 movups (%rcx,%rax,1),%xmm1 60 addq $32,%rax 61.byte 102,15,56,220,208 62.byte 102,15,56,220,216 63 movups -16(%rcx,%rax,1),%xmm0 64 jnz .Lenc_loop2 65 66.byte 102,15,56,220,209 67.byte 102,15,56,220,217 68.byte 102,15,56,221,208 69.byte 102,15,56,221,216 70 ret 71.cfi_endproc 72.size _aesni_encrypt2,.-_aesni_encrypt2 73.type _aesni_encrypt3,@function 74.align 16 75_aesni_encrypt3: 76.cfi_startproc 77 movups (%rcx),%xmm0 78 shll $4,%eax 79 movups 16(%rcx),%xmm1 80 xorps %xmm0,%xmm2 81 xorps %xmm0,%xmm3 82 xorps %xmm0,%xmm4 83 movups 32(%rcx),%xmm0 84 leaq 32(%rcx,%rax,1),%rcx 85 negq %rax 86 addq $16,%rax 87 88.Lenc_loop3: 89.byte 102,15,56,220,209 90.byte 102,15,56,220,217 91.byte 102,15,56,220,225 92 movups (%rcx,%rax,1),%xmm1 93 addq $32,%rax 94.byte 102,15,56,220,208 95.byte 102,15,56,220,216 96.byte 102,15,56,220,224 97 movups -16(%rcx,%rax,1),%xmm0 98 jnz .Lenc_loop3 99 100.byte 102,15,56,220,209 101.byte 102,15,56,220,217 102.byte 102,15,56,220,225 103.byte 102,15,56,221,208 104.byte 102,15,56,221,216 105.byte 102,15,56,221,224 106 ret 107.cfi_endproc 108.size _aesni_encrypt3,.-_aesni_encrypt3 109.type _aesni_encrypt4,@function 110.align 16 111_aesni_encrypt4: 112.cfi_startproc 113 movups (%rcx),%xmm0 114 shll $4,%eax 115 movups 16(%rcx),%xmm1 116 xorps %xmm0,%xmm2 117 xorps %xmm0,%xmm3 118 xorps %xmm0,%xmm4 119 xorps %xmm0,%xmm5 120 movups 32(%rcx),%xmm0 121 leaq 32(%rcx,%rax,1),%rcx 122 negq %rax 123.byte 0x0f,0x1f,0x00 124 addq $16,%rax 125 126.Lenc_loop4: 127.byte 102,15,56,220,209 128.byte 102,15,56,220,217 129.byte 102,15,56,220,225 130.byte 102,15,56,220,233 131 movups (%rcx,%rax,1),%xmm1 132 addq $32,%rax 133.byte 102,15,56,220,208 134.byte 102,15,56,220,216 135.byte 102,15,56,220,224 136.byte 102,15,56,220,232 137 movups -16(%rcx,%rax,1),%xmm0 138 jnz .Lenc_loop4 139 140.byte 102,15,56,220,209 141.byte 102,15,56,220,217 142.byte 102,15,56,220,225 143.byte 102,15,56,220,233 144.byte 102,15,56,221,208 145.byte 102,15,56,221,216 146.byte 102,15,56,221,224 147.byte 102,15,56,221,232 148 ret 149.cfi_endproc 150.size _aesni_encrypt4,.-_aesni_encrypt4 151.type _aesni_encrypt6,@function 152.align 16 153_aesni_encrypt6: 154.cfi_startproc 155 movups (%rcx),%xmm0 156 shll $4,%eax 157 movups 16(%rcx),%xmm1 158 xorps %xmm0,%xmm2 159 pxor %xmm0,%xmm3 160 pxor %xmm0,%xmm4 161.byte 102,15,56,220,209 162 leaq 32(%rcx,%rax,1),%rcx 163 negq %rax 164.byte 102,15,56,220,217 165 pxor %xmm0,%xmm5 166 pxor %xmm0,%xmm6 167.byte 102,15,56,220,225 168 pxor %xmm0,%xmm7 169 movups (%rcx,%rax,1),%xmm0 170 addq $16,%rax 171 jmp .Lenc_loop6_enter 172.align 16 173.Lenc_loop6: 174.byte 102,15,56,220,209 175.byte 102,15,56,220,217 176.byte 102,15,56,220,225 177.Lenc_loop6_enter: 178.byte 102,15,56,220,233 179.byte 102,15,56,220,241 180.byte 102,15,56,220,249 181 movups (%rcx,%rax,1),%xmm1 182 addq $32,%rax 183.byte 102,15,56,220,208 184.byte 102,15,56,220,216 185.byte 102,15,56,220,224 186.byte 102,15,56,220,232 187.byte 102,15,56,220,240 188.byte 102,15,56,220,248 189 movups -16(%rcx,%rax,1),%xmm0 190 jnz .Lenc_loop6 191 192.byte 102,15,56,220,209 193.byte 102,15,56,220,217 194.byte 102,15,56,220,225 195.byte 102,15,56,220,233 196.byte 102,15,56,220,241 197.byte 102,15,56,220,249 198.byte 102,15,56,221,208 199.byte 102,15,56,221,216 200.byte 102,15,56,221,224 201.byte 102,15,56,221,232 202.byte 102,15,56,221,240 203.byte 102,15,56,221,248 204 ret 205.cfi_endproc 206.size _aesni_encrypt6,.-_aesni_encrypt6 207.type _aesni_encrypt8,@function 208.align 16 209_aesni_encrypt8: 210.cfi_startproc 211 movups (%rcx),%xmm0 212 shll $4,%eax 213 movups 16(%rcx),%xmm1 214 xorps %xmm0,%xmm2 215 xorps %xmm0,%xmm3 216 pxor %xmm0,%xmm4 217 pxor %xmm0,%xmm5 218 pxor %xmm0,%xmm6 219 leaq 32(%rcx,%rax,1),%rcx 220 negq %rax 221.byte 102,15,56,220,209 222 pxor %xmm0,%xmm7 223 pxor %xmm0,%xmm8 224.byte 102,15,56,220,217 225 pxor %xmm0,%xmm9 226 movups (%rcx,%rax,1),%xmm0 227 addq $16,%rax 228 jmp .Lenc_loop8_inner 229.align 16 230.Lenc_loop8: 231.byte 102,15,56,220,209 232.byte 102,15,56,220,217 233.Lenc_loop8_inner: 234.byte 102,15,56,220,225 235.byte 102,15,56,220,233 236.byte 102,15,56,220,241 237.byte 102,15,56,220,249 238.byte 102,68,15,56,220,193 239.byte 102,68,15,56,220,201 240.Lenc_loop8_enter: 241 movups (%rcx,%rax,1),%xmm1 242 addq $32,%rax 243.byte 102,15,56,220,208 244.byte 102,15,56,220,216 245.byte 102,15,56,220,224 246.byte 102,15,56,220,232 247.byte 102,15,56,220,240 248.byte 102,15,56,220,248 249.byte 102,68,15,56,220,192 250.byte 102,68,15,56,220,200 251 movups -16(%rcx,%rax,1),%xmm0 252 jnz .Lenc_loop8 253 254.byte 102,15,56,220,209 255.byte 102,15,56,220,217 256.byte 102,15,56,220,225 257.byte 102,15,56,220,233 258.byte 102,15,56,220,241 259.byte 102,15,56,220,249 260.byte 102,68,15,56,220,193 261.byte 102,68,15,56,220,201 262.byte 102,15,56,221,208 263.byte 102,15,56,221,216 264.byte 102,15,56,221,224 265.byte 102,15,56,221,232 266.byte 102,15,56,221,240 267.byte 102,15,56,221,248 268.byte 102,68,15,56,221,192 269.byte 102,68,15,56,221,200 270 ret 271.cfi_endproc 272.size _aesni_encrypt8,.-_aesni_encrypt8 273.globl aes_hw_ctr32_encrypt_blocks 274.hidden aes_hw_ctr32_encrypt_blocks 275.type aes_hw_ctr32_encrypt_blocks,@function 276.align 16 277aes_hw_ctr32_encrypt_blocks: 278.cfi_startproc 279_CET_ENDBR 280#ifdef BORINGSSL_DISPATCH_TEST 281 movb $1,BORINGSSL_function_hit(%rip) 282#endif 283 cmpq $1,%rdx 284 jne .Lctr32_bulk 285 286 287 288 movups (%r8),%xmm2 289 movups (%rdi),%xmm3 290 movl 240(%rcx),%edx 291 movups (%rcx),%xmm0 292 movups 16(%rcx),%xmm1 293 leaq 32(%rcx),%rcx 294 xorps %xmm0,%xmm2 295.Loop_enc1_2: 296.byte 102,15,56,220,209 297 decl %edx 298 movups (%rcx),%xmm1 299 leaq 16(%rcx),%rcx 300 jnz .Loop_enc1_2 301.byte 102,15,56,221,209 302 pxor %xmm0,%xmm0 303 pxor %xmm1,%xmm1 304 xorps %xmm3,%xmm2 305 pxor %xmm3,%xmm3 306 movups %xmm2,(%rsi) 307 xorps %xmm2,%xmm2 308 jmp .Lctr32_epilogue 309 310.align 16 311.Lctr32_bulk: 312 leaq (%rsp),%r11 313.cfi_def_cfa_register %r11 314 pushq %rbp 315.cfi_offset %rbp,-16 316 subq $128,%rsp 317 andq $-16,%rsp 318 319 320 321 322 movdqu (%r8),%xmm2 323 movdqu (%rcx),%xmm0 324 movl 12(%r8),%r8d 325 pxor %xmm0,%xmm2 326 movl 12(%rcx),%ebp 327 movdqa %xmm2,0(%rsp) 328 bswapl %r8d 329 movdqa %xmm2,%xmm3 330 movdqa %xmm2,%xmm4 331 movdqa %xmm2,%xmm5 332 movdqa %xmm2,64(%rsp) 333 movdqa %xmm2,80(%rsp) 334 movdqa %xmm2,96(%rsp) 335 movq %rdx,%r10 336 movdqa %xmm2,112(%rsp) 337 338 leaq 1(%r8),%rax 339 leaq 2(%r8),%rdx 340 bswapl %eax 341 bswapl %edx 342 xorl %ebp,%eax 343 xorl %ebp,%edx 344.byte 102,15,58,34,216,3 345 leaq 3(%r8),%rax 346 movdqa %xmm3,16(%rsp) 347.byte 102,15,58,34,226,3 348 bswapl %eax 349 movq %r10,%rdx 350 leaq 4(%r8),%r10 351 movdqa %xmm4,32(%rsp) 352 xorl %ebp,%eax 353 bswapl %r10d 354.byte 102,15,58,34,232,3 355 xorl %ebp,%r10d 356 movdqa %xmm5,48(%rsp) 357 leaq 5(%r8),%r9 358 movl %r10d,64+12(%rsp) 359 bswapl %r9d 360 leaq 6(%r8),%r10 361 movl 240(%rcx),%eax 362 xorl %ebp,%r9d 363 bswapl %r10d 364 movl %r9d,80+12(%rsp) 365 xorl %ebp,%r10d 366 leaq 7(%r8),%r9 367 movl %r10d,96+12(%rsp) 368 bswapl %r9d 369 leaq OPENSSL_ia32cap_P(%rip),%r10 370 movl 4(%r10),%r10d 371 xorl %ebp,%r9d 372 andl $71303168,%r10d 373 movl %r9d,112+12(%rsp) 374 375 movups 16(%rcx),%xmm1 376 377 movdqa 64(%rsp),%xmm6 378 movdqa 80(%rsp),%xmm7 379 380 cmpq $8,%rdx 381 jb .Lctr32_tail 382 383 subq $6,%rdx 384 cmpl $4194304,%r10d 385 je .Lctr32_6x 386 387 leaq 128(%rcx),%rcx 388 subq $2,%rdx 389 jmp .Lctr32_loop8 390 391.align 16 392.Lctr32_6x: 393 shll $4,%eax 394 movl $48,%r10d 395 bswapl %ebp 396 leaq 32(%rcx,%rax,1),%rcx 397 subq %rax,%r10 398 jmp .Lctr32_loop6 399 400.align 16 401.Lctr32_loop6: 402 addl $6,%r8d 403 movups -48(%rcx,%r10,1),%xmm0 404.byte 102,15,56,220,209 405 movl %r8d,%eax 406 xorl %ebp,%eax 407.byte 102,15,56,220,217 408.byte 0x0f,0x38,0xf1,0x44,0x24,12 409 leal 1(%r8),%eax 410.byte 102,15,56,220,225 411 xorl %ebp,%eax 412.byte 0x0f,0x38,0xf1,0x44,0x24,28 413.byte 102,15,56,220,233 414 leal 2(%r8),%eax 415 xorl %ebp,%eax 416.byte 102,15,56,220,241 417.byte 0x0f,0x38,0xf1,0x44,0x24,44 418 leal 3(%r8),%eax 419.byte 102,15,56,220,249 420 movups -32(%rcx,%r10,1),%xmm1 421 xorl %ebp,%eax 422 423.byte 102,15,56,220,208 424.byte 0x0f,0x38,0xf1,0x44,0x24,60 425 leal 4(%r8),%eax 426.byte 102,15,56,220,216 427 xorl %ebp,%eax 428.byte 0x0f,0x38,0xf1,0x44,0x24,76 429.byte 102,15,56,220,224 430 leal 5(%r8),%eax 431 xorl %ebp,%eax 432.byte 102,15,56,220,232 433.byte 0x0f,0x38,0xf1,0x44,0x24,92 434 movq %r10,%rax 435.byte 102,15,56,220,240 436.byte 102,15,56,220,248 437 movups -16(%rcx,%r10,1),%xmm0 438 439 call .Lenc_loop6 440 441 movdqu (%rdi),%xmm8 442 movdqu 16(%rdi),%xmm9 443 movdqu 32(%rdi),%xmm10 444 movdqu 48(%rdi),%xmm11 445 movdqu 64(%rdi),%xmm12 446 movdqu 80(%rdi),%xmm13 447 leaq 96(%rdi),%rdi 448 movups -64(%rcx,%r10,1),%xmm1 449 pxor %xmm2,%xmm8 450 movaps 0(%rsp),%xmm2 451 pxor %xmm3,%xmm9 452 movaps 16(%rsp),%xmm3 453 pxor %xmm4,%xmm10 454 movaps 32(%rsp),%xmm4 455 pxor %xmm5,%xmm11 456 movaps 48(%rsp),%xmm5 457 pxor %xmm6,%xmm12 458 movaps 64(%rsp),%xmm6 459 pxor %xmm7,%xmm13 460 movaps 80(%rsp),%xmm7 461 movdqu %xmm8,(%rsi) 462 movdqu %xmm9,16(%rsi) 463 movdqu %xmm10,32(%rsi) 464 movdqu %xmm11,48(%rsi) 465 movdqu %xmm12,64(%rsi) 466 movdqu %xmm13,80(%rsi) 467 leaq 96(%rsi),%rsi 468 469 subq $6,%rdx 470 jnc .Lctr32_loop6 471 472 addq $6,%rdx 473 jz .Lctr32_done 474 475 leal -48(%r10),%eax 476 leaq -80(%rcx,%r10,1),%rcx 477 negl %eax 478 shrl $4,%eax 479 jmp .Lctr32_tail 480 481.align 32 482.Lctr32_loop8: 483 addl $8,%r8d 484 movdqa 96(%rsp),%xmm8 485.byte 102,15,56,220,209 486 movl %r8d,%r9d 487 movdqa 112(%rsp),%xmm9 488.byte 102,15,56,220,217 489 bswapl %r9d 490 movups 32-128(%rcx),%xmm0 491.byte 102,15,56,220,225 492 xorl %ebp,%r9d 493 nop 494.byte 102,15,56,220,233 495 movl %r9d,0+12(%rsp) 496 leaq 1(%r8),%r9 497.byte 102,15,56,220,241 498.byte 102,15,56,220,249 499.byte 102,68,15,56,220,193 500.byte 102,68,15,56,220,201 501 movups 48-128(%rcx),%xmm1 502 bswapl %r9d 503.byte 102,15,56,220,208 504.byte 102,15,56,220,216 505 xorl %ebp,%r9d 506.byte 0x66,0x90 507.byte 102,15,56,220,224 508.byte 102,15,56,220,232 509 movl %r9d,16+12(%rsp) 510 leaq 2(%r8),%r9 511.byte 102,15,56,220,240 512.byte 102,15,56,220,248 513.byte 102,68,15,56,220,192 514.byte 102,68,15,56,220,200 515 movups 64-128(%rcx),%xmm0 516 bswapl %r9d 517.byte 102,15,56,220,209 518.byte 102,15,56,220,217 519 xorl %ebp,%r9d 520.byte 0x66,0x90 521.byte 102,15,56,220,225 522.byte 102,15,56,220,233 523 movl %r9d,32+12(%rsp) 524 leaq 3(%r8),%r9 525.byte 102,15,56,220,241 526.byte 102,15,56,220,249 527.byte 102,68,15,56,220,193 528.byte 102,68,15,56,220,201 529 movups 80-128(%rcx),%xmm1 530 bswapl %r9d 531.byte 102,15,56,220,208 532.byte 102,15,56,220,216 533 xorl %ebp,%r9d 534.byte 0x66,0x90 535.byte 102,15,56,220,224 536.byte 102,15,56,220,232 537 movl %r9d,48+12(%rsp) 538 leaq 4(%r8),%r9 539.byte 102,15,56,220,240 540.byte 102,15,56,220,248 541.byte 102,68,15,56,220,192 542.byte 102,68,15,56,220,200 543 movups 96-128(%rcx),%xmm0 544 bswapl %r9d 545.byte 102,15,56,220,209 546.byte 102,15,56,220,217 547 xorl %ebp,%r9d 548.byte 0x66,0x90 549.byte 102,15,56,220,225 550.byte 102,15,56,220,233 551 movl %r9d,64+12(%rsp) 552 leaq 5(%r8),%r9 553.byte 102,15,56,220,241 554.byte 102,15,56,220,249 555.byte 102,68,15,56,220,193 556.byte 102,68,15,56,220,201 557 movups 112-128(%rcx),%xmm1 558 bswapl %r9d 559.byte 102,15,56,220,208 560.byte 102,15,56,220,216 561 xorl %ebp,%r9d 562.byte 0x66,0x90 563.byte 102,15,56,220,224 564.byte 102,15,56,220,232 565 movl %r9d,80+12(%rsp) 566 leaq 6(%r8),%r9 567.byte 102,15,56,220,240 568.byte 102,15,56,220,248 569.byte 102,68,15,56,220,192 570.byte 102,68,15,56,220,200 571 movups 128-128(%rcx),%xmm0 572 bswapl %r9d 573.byte 102,15,56,220,209 574.byte 102,15,56,220,217 575 xorl %ebp,%r9d 576.byte 0x66,0x90 577.byte 102,15,56,220,225 578.byte 102,15,56,220,233 579 movl %r9d,96+12(%rsp) 580 leaq 7(%r8),%r9 581.byte 102,15,56,220,241 582.byte 102,15,56,220,249 583.byte 102,68,15,56,220,193 584.byte 102,68,15,56,220,201 585 movups 144-128(%rcx),%xmm1 586 bswapl %r9d 587.byte 102,15,56,220,208 588.byte 102,15,56,220,216 589.byte 102,15,56,220,224 590 xorl %ebp,%r9d 591 movdqu 0(%rdi),%xmm10 592.byte 102,15,56,220,232 593 movl %r9d,112+12(%rsp) 594 cmpl $11,%eax 595.byte 102,15,56,220,240 596.byte 102,15,56,220,248 597.byte 102,68,15,56,220,192 598.byte 102,68,15,56,220,200 599 movups 160-128(%rcx),%xmm0 600 601 jb .Lctr32_enc_done 602 603.byte 102,15,56,220,209 604.byte 102,15,56,220,217 605.byte 102,15,56,220,225 606.byte 102,15,56,220,233 607.byte 102,15,56,220,241 608.byte 102,15,56,220,249 609.byte 102,68,15,56,220,193 610.byte 102,68,15,56,220,201 611 movups 176-128(%rcx),%xmm1 612 613.byte 102,15,56,220,208 614.byte 102,15,56,220,216 615.byte 102,15,56,220,224 616.byte 102,15,56,220,232 617.byte 102,15,56,220,240 618.byte 102,15,56,220,248 619.byte 102,68,15,56,220,192 620.byte 102,68,15,56,220,200 621 movups 192-128(%rcx),%xmm0 622 623 624 625.byte 102,15,56,220,209 626.byte 102,15,56,220,217 627.byte 102,15,56,220,225 628.byte 102,15,56,220,233 629.byte 102,15,56,220,241 630.byte 102,15,56,220,249 631.byte 102,68,15,56,220,193 632.byte 102,68,15,56,220,201 633 movups 208-128(%rcx),%xmm1 634 635.byte 102,15,56,220,208 636.byte 102,15,56,220,216 637.byte 102,15,56,220,224 638.byte 102,15,56,220,232 639.byte 102,15,56,220,240 640.byte 102,15,56,220,248 641.byte 102,68,15,56,220,192 642.byte 102,68,15,56,220,200 643 movups 224-128(%rcx),%xmm0 644 jmp .Lctr32_enc_done 645 646.align 16 647.Lctr32_enc_done: 648 movdqu 16(%rdi),%xmm11 649 pxor %xmm0,%xmm10 650 movdqu 32(%rdi),%xmm12 651 pxor %xmm0,%xmm11 652 movdqu 48(%rdi),%xmm13 653 pxor %xmm0,%xmm12 654 movdqu 64(%rdi),%xmm14 655 pxor %xmm0,%xmm13 656 movdqu 80(%rdi),%xmm15 657 pxor %xmm0,%xmm14 658 prefetcht0 448(%rdi) 659 prefetcht0 512(%rdi) 660 pxor %xmm0,%xmm15 661.byte 102,15,56,220,209 662.byte 102,15,56,220,217 663.byte 102,15,56,220,225 664.byte 102,15,56,220,233 665.byte 102,15,56,220,241 666.byte 102,15,56,220,249 667.byte 102,68,15,56,220,193 668.byte 102,68,15,56,220,201 669 movdqu 96(%rdi),%xmm1 670 leaq 128(%rdi),%rdi 671 672.byte 102,65,15,56,221,210 673 pxor %xmm0,%xmm1 674 movdqu 112-128(%rdi),%xmm10 675.byte 102,65,15,56,221,219 676 pxor %xmm0,%xmm10 677 movdqa 0(%rsp),%xmm11 678.byte 102,65,15,56,221,228 679.byte 102,65,15,56,221,237 680 movdqa 16(%rsp),%xmm12 681 movdqa 32(%rsp),%xmm13 682.byte 102,65,15,56,221,246 683.byte 102,65,15,56,221,255 684 movdqa 48(%rsp),%xmm14 685 movdqa 64(%rsp),%xmm15 686.byte 102,68,15,56,221,193 687 movdqa 80(%rsp),%xmm0 688 movups 16-128(%rcx),%xmm1 689.byte 102,69,15,56,221,202 690 691 movups %xmm2,(%rsi) 692 movdqa %xmm11,%xmm2 693 movups %xmm3,16(%rsi) 694 movdqa %xmm12,%xmm3 695 movups %xmm4,32(%rsi) 696 movdqa %xmm13,%xmm4 697 movups %xmm5,48(%rsi) 698 movdqa %xmm14,%xmm5 699 movups %xmm6,64(%rsi) 700 movdqa %xmm15,%xmm6 701 movups %xmm7,80(%rsi) 702 movdqa %xmm0,%xmm7 703 movups %xmm8,96(%rsi) 704 movups %xmm9,112(%rsi) 705 leaq 128(%rsi),%rsi 706 707 subq $8,%rdx 708 jnc .Lctr32_loop8 709 710 addq $8,%rdx 711 jz .Lctr32_done 712 leaq -128(%rcx),%rcx 713 714.Lctr32_tail: 715 716 717 leaq 16(%rcx),%rcx 718 cmpq $4,%rdx 719 jb .Lctr32_loop3 720 je .Lctr32_loop4 721 722 723 shll $4,%eax 724 movdqa 96(%rsp),%xmm8 725 pxor %xmm9,%xmm9 726 727 movups 16(%rcx),%xmm0 728.byte 102,15,56,220,209 729.byte 102,15,56,220,217 730 leaq 32-16(%rcx,%rax,1),%rcx 731 negq %rax 732.byte 102,15,56,220,225 733 addq $16,%rax 734 movups (%rdi),%xmm10 735.byte 102,15,56,220,233 736.byte 102,15,56,220,241 737 movups 16(%rdi),%xmm11 738 movups 32(%rdi),%xmm12 739.byte 102,15,56,220,249 740.byte 102,68,15,56,220,193 741 742 call .Lenc_loop8_enter 743 744 movdqu 48(%rdi),%xmm13 745 pxor %xmm10,%xmm2 746 movdqu 64(%rdi),%xmm10 747 pxor %xmm11,%xmm3 748 movdqu %xmm2,(%rsi) 749 pxor %xmm12,%xmm4 750 movdqu %xmm3,16(%rsi) 751 pxor %xmm13,%xmm5 752 movdqu %xmm4,32(%rsi) 753 pxor %xmm10,%xmm6 754 movdqu %xmm5,48(%rsi) 755 movdqu %xmm6,64(%rsi) 756 cmpq $6,%rdx 757 jb .Lctr32_done 758 759 movups 80(%rdi),%xmm11 760 xorps %xmm11,%xmm7 761 movups %xmm7,80(%rsi) 762 je .Lctr32_done 763 764 movups 96(%rdi),%xmm12 765 xorps %xmm12,%xmm8 766 movups %xmm8,96(%rsi) 767 jmp .Lctr32_done 768 769.align 32 770.Lctr32_loop4: 771.byte 102,15,56,220,209 772 leaq 16(%rcx),%rcx 773 decl %eax 774.byte 102,15,56,220,217 775.byte 102,15,56,220,225 776.byte 102,15,56,220,233 777 movups (%rcx),%xmm1 778 jnz .Lctr32_loop4 779.byte 102,15,56,221,209 780.byte 102,15,56,221,217 781 movups (%rdi),%xmm10 782 movups 16(%rdi),%xmm11 783.byte 102,15,56,221,225 784.byte 102,15,56,221,233 785 movups 32(%rdi),%xmm12 786 movups 48(%rdi),%xmm13 787 788 xorps %xmm10,%xmm2 789 movups %xmm2,(%rsi) 790 xorps %xmm11,%xmm3 791 movups %xmm3,16(%rsi) 792 pxor %xmm12,%xmm4 793 movdqu %xmm4,32(%rsi) 794 pxor %xmm13,%xmm5 795 movdqu %xmm5,48(%rsi) 796 jmp .Lctr32_done 797 798.align 32 799.Lctr32_loop3: 800.byte 102,15,56,220,209 801 leaq 16(%rcx),%rcx 802 decl %eax 803.byte 102,15,56,220,217 804.byte 102,15,56,220,225 805 movups (%rcx),%xmm1 806 jnz .Lctr32_loop3 807.byte 102,15,56,221,209 808.byte 102,15,56,221,217 809.byte 102,15,56,221,225 810 811 movups (%rdi),%xmm10 812 xorps %xmm10,%xmm2 813 movups %xmm2,(%rsi) 814 cmpq $2,%rdx 815 jb .Lctr32_done 816 817 movups 16(%rdi),%xmm11 818 xorps %xmm11,%xmm3 819 movups %xmm3,16(%rsi) 820 je .Lctr32_done 821 822 movups 32(%rdi),%xmm12 823 xorps %xmm12,%xmm4 824 movups %xmm4,32(%rsi) 825 826.Lctr32_done: 827 xorps %xmm0,%xmm0 828 xorl %ebp,%ebp 829 pxor %xmm1,%xmm1 830 pxor %xmm2,%xmm2 831 pxor %xmm3,%xmm3 832 pxor %xmm4,%xmm4 833 pxor %xmm5,%xmm5 834 pxor %xmm6,%xmm6 835 pxor %xmm7,%xmm7 836 movaps %xmm0,0(%rsp) 837 pxor %xmm8,%xmm8 838 movaps %xmm0,16(%rsp) 839 pxor %xmm9,%xmm9 840 movaps %xmm0,32(%rsp) 841 pxor %xmm10,%xmm10 842 movaps %xmm0,48(%rsp) 843 pxor %xmm11,%xmm11 844 movaps %xmm0,64(%rsp) 845 pxor %xmm12,%xmm12 846 movaps %xmm0,80(%rsp) 847 pxor %xmm13,%xmm13 848 movaps %xmm0,96(%rsp) 849 pxor %xmm14,%xmm14 850 movaps %xmm0,112(%rsp) 851 pxor %xmm15,%xmm15 852 movq -8(%r11),%rbp 853.cfi_restore %rbp 854 leaq (%r11),%rsp 855.cfi_def_cfa_register %rsp 856.Lctr32_epilogue: 857 ret 858.cfi_endproc 859.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 860.globl aes_hw_set_encrypt_key 861.hidden aes_hw_set_encrypt_key 862.type aes_hw_set_encrypt_key,@function 863.align 16 864aes_hw_set_encrypt_key: 865__aesni_set_encrypt_key: 866.cfi_startproc 867_CET_ENDBR 868#ifdef BORINGSSL_DISPATCH_TEST 869 movb $1,BORINGSSL_function_hit+3(%rip) 870#endif 871.byte 0x48,0x83,0xEC,0x08 872.cfi_adjust_cfa_offset 8 873 movq $-1,%rax 874 testq %rdi,%rdi 875 jz .Lenc_key_ret 876 testq %rdx,%rdx 877 jz .Lenc_key_ret 878 879 movups (%rdi),%xmm0 880 xorps %xmm4,%xmm4 881 leaq OPENSSL_ia32cap_P(%rip),%r10 882 movl 4(%r10),%r10d 883 andl $268437504,%r10d 884 leaq 16(%rdx),%rax 885 cmpl $256,%esi 886 je .L14rounds 887 888 cmpl $128,%esi 889 jne .Lbad_keybits 890 891.L10rounds: 892 movl $9,%esi 893 cmpl $268435456,%r10d 894 je .L10rounds_alt 895 896 movups %xmm0,(%rdx) 897.byte 102,15,58,223,200,1 898 call .Lkey_expansion_128_cold 899.byte 102,15,58,223,200,2 900 call .Lkey_expansion_128 901.byte 102,15,58,223,200,4 902 call .Lkey_expansion_128 903.byte 102,15,58,223,200,8 904 call .Lkey_expansion_128 905.byte 102,15,58,223,200,16 906 call .Lkey_expansion_128 907.byte 102,15,58,223,200,32 908 call .Lkey_expansion_128 909.byte 102,15,58,223,200,64 910 call .Lkey_expansion_128 911.byte 102,15,58,223,200,128 912 call .Lkey_expansion_128 913.byte 102,15,58,223,200,27 914 call .Lkey_expansion_128 915.byte 102,15,58,223,200,54 916 call .Lkey_expansion_128 917 movups %xmm0,(%rax) 918 movl %esi,80(%rax) 919 xorl %eax,%eax 920 jmp .Lenc_key_ret 921 922.align 16 923.L10rounds_alt: 924 movdqa .Lkey_rotate(%rip),%xmm5 925 movl $8,%r10d 926 movdqa .Lkey_rcon1(%rip),%xmm4 927 movdqa %xmm0,%xmm2 928 movdqu %xmm0,(%rdx) 929 jmp .Loop_key128 930 931.align 16 932.Loop_key128: 933.byte 102,15,56,0,197 934.byte 102,15,56,221,196 935 pslld $1,%xmm4 936 leaq 16(%rax),%rax 937 938 movdqa %xmm2,%xmm3 939 pslldq $4,%xmm2 940 pxor %xmm2,%xmm3 941 pslldq $4,%xmm2 942 pxor %xmm2,%xmm3 943 pslldq $4,%xmm2 944 pxor %xmm3,%xmm2 945 946 pxor %xmm2,%xmm0 947 movdqu %xmm0,-16(%rax) 948 movdqa %xmm0,%xmm2 949 950 decl %r10d 951 jnz .Loop_key128 952 953 movdqa .Lkey_rcon1b(%rip),%xmm4 954 955.byte 102,15,56,0,197 956.byte 102,15,56,221,196 957 pslld $1,%xmm4 958 959 movdqa %xmm2,%xmm3 960 pslldq $4,%xmm2 961 pxor %xmm2,%xmm3 962 pslldq $4,%xmm2 963 pxor %xmm2,%xmm3 964 pslldq $4,%xmm2 965 pxor %xmm3,%xmm2 966 967 pxor %xmm2,%xmm0 968 movdqu %xmm0,(%rax) 969 970 movdqa %xmm0,%xmm2 971.byte 102,15,56,0,197 972.byte 102,15,56,221,196 973 974 movdqa %xmm2,%xmm3 975 pslldq $4,%xmm2 976 pxor %xmm2,%xmm3 977 pslldq $4,%xmm2 978 pxor %xmm2,%xmm3 979 pslldq $4,%xmm2 980 pxor %xmm3,%xmm2 981 982 pxor %xmm2,%xmm0 983 movdqu %xmm0,16(%rax) 984 985 movl %esi,96(%rax) 986 xorl %eax,%eax 987 jmp .Lenc_key_ret 988 989 990 991.align 16 992.L14rounds: 993 movups 16(%rdi),%xmm2 994 movl $13,%esi 995 leaq 16(%rax),%rax 996 cmpl $268435456,%r10d 997 je .L14rounds_alt 998 999 movups %xmm0,(%rdx) 1000 movups %xmm2,16(%rdx) 1001.byte 102,15,58,223,202,1 1002 call .Lkey_expansion_256a_cold 1003.byte 102,15,58,223,200,1 1004 call .Lkey_expansion_256b 1005.byte 102,15,58,223,202,2 1006 call .Lkey_expansion_256a 1007.byte 102,15,58,223,200,2 1008 call .Lkey_expansion_256b 1009.byte 102,15,58,223,202,4 1010 call .Lkey_expansion_256a 1011.byte 102,15,58,223,200,4 1012 call .Lkey_expansion_256b 1013.byte 102,15,58,223,202,8 1014 call .Lkey_expansion_256a 1015.byte 102,15,58,223,200,8 1016 call .Lkey_expansion_256b 1017.byte 102,15,58,223,202,16 1018 call .Lkey_expansion_256a 1019.byte 102,15,58,223,200,16 1020 call .Lkey_expansion_256b 1021.byte 102,15,58,223,202,32 1022 call .Lkey_expansion_256a 1023.byte 102,15,58,223,200,32 1024 call .Lkey_expansion_256b 1025.byte 102,15,58,223,202,64 1026 call .Lkey_expansion_256a 1027 movups %xmm0,(%rax) 1028 movl %esi,16(%rax) 1029 xorq %rax,%rax 1030 jmp .Lenc_key_ret 1031 1032.align 16 1033.L14rounds_alt: 1034 movdqa .Lkey_rotate(%rip),%xmm5 1035 movdqa .Lkey_rcon1(%rip),%xmm4 1036 movl $7,%r10d 1037 movdqu %xmm0,0(%rdx) 1038 movdqa %xmm2,%xmm1 1039 movdqu %xmm2,16(%rdx) 1040 jmp .Loop_key256 1041 1042.align 16 1043.Loop_key256: 1044.byte 102,15,56,0,213 1045.byte 102,15,56,221,212 1046 1047 movdqa %xmm0,%xmm3 1048 pslldq $4,%xmm0 1049 pxor %xmm0,%xmm3 1050 pslldq $4,%xmm0 1051 pxor %xmm0,%xmm3 1052 pslldq $4,%xmm0 1053 pxor %xmm3,%xmm0 1054 pslld $1,%xmm4 1055 1056 pxor %xmm2,%xmm0 1057 movdqu %xmm0,(%rax) 1058 1059 decl %r10d 1060 jz .Ldone_key256 1061 1062 pshufd $0xff,%xmm0,%xmm2 1063 pxor %xmm3,%xmm3 1064.byte 102,15,56,221,211 1065 1066 movdqa %xmm1,%xmm3 1067 pslldq $4,%xmm1 1068 pxor %xmm1,%xmm3 1069 pslldq $4,%xmm1 1070 pxor %xmm1,%xmm3 1071 pslldq $4,%xmm1 1072 pxor %xmm3,%xmm1 1073 1074 pxor %xmm1,%xmm2 1075 movdqu %xmm2,16(%rax) 1076 leaq 32(%rax),%rax 1077 movdqa %xmm2,%xmm1 1078 1079 jmp .Loop_key256 1080 1081.Ldone_key256: 1082 movl %esi,16(%rax) 1083 xorl %eax,%eax 1084 jmp .Lenc_key_ret 1085 1086.align 16 1087.Lbad_keybits: 1088 movq $-2,%rax 1089.Lenc_key_ret: 1090 pxor %xmm0,%xmm0 1091 pxor %xmm1,%xmm1 1092 pxor %xmm2,%xmm2 1093 pxor %xmm3,%xmm3 1094 pxor %xmm4,%xmm4 1095 pxor %xmm5,%xmm5 1096 addq $8,%rsp 1097.cfi_adjust_cfa_offset -8 1098 ret 1099.cfi_endproc 1100.LSEH_end_set_encrypt_key: 1101 1102.align 16 1103.Lkey_expansion_128: 1104 movups %xmm0,(%rax) 1105 leaq 16(%rax),%rax 1106.Lkey_expansion_128_cold: 1107 shufps $16,%xmm0,%xmm4 1108 xorps %xmm4,%xmm0 1109 shufps $140,%xmm0,%xmm4 1110 xorps %xmm4,%xmm0 1111 shufps $255,%xmm1,%xmm1 1112 xorps %xmm1,%xmm0 1113 ret 1114 1115.align 16 1116.Lkey_expansion_192a: 1117 movups %xmm0,(%rax) 1118 leaq 16(%rax),%rax 1119.Lkey_expansion_192a_cold: 1120 movaps %xmm2,%xmm5 1121.Lkey_expansion_192b_warm: 1122 shufps $16,%xmm0,%xmm4 1123 movdqa %xmm2,%xmm3 1124 xorps %xmm4,%xmm0 1125 shufps $140,%xmm0,%xmm4 1126 pslldq $4,%xmm3 1127 xorps %xmm4,%xmm0 1128 pshufd $85,%xmm1,%xmm1 1129 pxor %xmm3,%xmm2 1130 pxor %xmm1,%xmm0 1131 pshufd $255,%xmm0,%xmm3 1132 pxor %xmm3,%xmm2 1133 ret 1134 1135.align 16 1136.Lkey_expansion_192b: 1137 movaps %xmm0,%xmm3 1138 shufps $68,%xmm0,%xmm5 1139 movups %xmm5,(%rax) 1140 shufps $78,%xmm2,%xmm3 1141 movups %xmm3,16(%rax) 1142 leaq 32(%rax),%rax 1143 jmp .Lkey_expansion_192b_warm 1144 1145.align 16 1146.Lkey_expansion_256a: 1147 movups %xmm2,(%rax) 1148 leaq 16(%rax),%rax 1149.Lkey_expansion_256a_cold: 1150 shufps $16,%xmm0,%xmm4 1151 xorps %xmm4,%xmm0 1152 shufps $140,%xmm0,%xmm4 1153 xorps %xmm4,%xmm0 1154 shufps $255,%xmm1,%xmm1 1155 xorps %xmm1,%xmm0 1156 ret 1157 1158.align 16 1159.Lkey_expansion_256b: 1160 movups %xmm0,(%rax) 1161 leaq 16(%rax),%rax 1162 1163 shufps $16,%xmm2,%xmm4 1164 xorps %xmm4,%xmm2 1165 shufps $140,%xmm2,%xmm4 1166 xorps %xmm4,%xmm2 1167 shufps $170,%xmm1,%xmm1 1168 xorps %xmm1,%xmm2 1169 ret 1170.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 1171.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 1172.section .rodata 1173.align 64 1174.Lbswap_mask: 1175.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1176.Lincrement32: 1177.long 6,6,6,0 1178.Lincrement64: 1179.long 1,0,0,0 1180.Lincrement1: 1181.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1182.Lkey_rotate: 1183.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1184.Lkey_rotate192: 1185.long 0x04070605,0x04070605,0x04070605,0x04070605 1186.Lkey_rcon1: 1187.long 1,1,1,1 1188.Lkey_rcon1b: 1189.long 0x1b,0x1b,0x1b,0x1b 1190 1191.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1192.align 64 1193.text 1194#endif 1195