1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8 9.globl _gcm_init_clmul 10.private_extern _gcm_init_clmul 11 12.p2align 4 13_gcm_init_clmul: 14 15 16_CET_ENDBR 17L$_init_clmul: 18 movdqu (%rsi),%xmm2 19 pshufd $78,%xmm2,%xmm2 20 21 22 pshufd $255,%xmm2,%xmm4 23 movdqa %xmm2,%xmm3 24 psllq $1,%xmm2 25 pxor %xmm5,%xmm5 26 psrlq $63,%xmm3 27 pcmpgtd %xmm4,%xmm5 28 pslldq $8,%xmm3 29 por %xmm3,%xmm2 30 31 32 pand L$0x1c2_polynomial(%rip),%xmm5 33 pxor %xmm5,%xmm2 34 35 36 pshufd $78,%xmm2,%xmm6 37 movdqa %xmm2,%xmm0 38 pxor %xmm2,%xmm6 39 movdqa %xmm0,%xmm1 40 pshufd $78,%xmm0,%xmm3 41 pxor %xmm0,%xmm3 42.byte 102,15,58,68,194,0 43.byte 102,15,58,68,202,17 44.byte 102,15,58,68,222,0 45 pxor %xmm0,%xmm3 46 pxor %xmm1,%xmm3 47 48 movdqa %xmm3,%xmm4 49 psrldq $8,%xmm3 50 pslldq $8,%xmm4 51 pxor %xmm3,%xmm1 52 pxor %xmm4,%xmm0 53 54 movdqa %xmm0,%xmm4 55 movdqa %xmm0,%xmm3 56 psllq $5,%xmm0 57 pxor %xmm0,%xmm3 58 psllq $1,%xmm0 59 pxor %xmm3,%xmm0 60 psllq $57,%xmm0 61 movdqa %xmm0,%xmm3 62 pslldq $8,%xmm0 63 psrldq $8,%xmm3 64 pxor %xmm4,%xmm0 65 pxor %xmm3,%xmm1 66 67 68 movdqa %xmm0,%xmm4 69 psrlq $1,%xmm0 70 pxor %xmm4,%xmm1 71 pxor %xmm0,%xmm4 72 psrlq $5,%xmm0 73 pxor %xmm4,%xmm0 74 psrlq $1,%xmm0 75 pxor %xmm1,%xmm0 76 pshufd $78,%xmm2,%xmm3 77 pshufd $78,%xmm0,%xmm4 78 pxor %xmm2,%xmm3 79 movdqu %xmm2,0(%rdi) 80 pxor %xmm0,%xmm4 81 movdqu %xmm0,16(%rdi) 82.byte 102,15,58,15,227,8 83 movdqu %xmm4,32(%rdi) 84 movdqa %xmm0,%xmm1 85 pshufd $78,%xmm0,%xmm3 86 pxor %xmm0,%xmm3 87.byte 102,15,58,68,194,0 88.byte 102,15,58,68,202,17 89.byte 102,15,58,68,222,0 90 pxor %xmm0,%xmm3 91 pxor %xmm1,%xmm3 92 93 movdqa %xmm3,%xmm4 94 psrldq $8,%xmm3 95 pslldq $8,%xmm4 96 pxor %xmm3,%xmm1 97 pxor %xmm4,%xmm0 98 99 movdqa %xmm0,%xmm4 100 movdqa %xmm0,%xmm3 101 psllq $5,%xmm0 102 pxor %xmm0,%xmm3 103 psllq $1,%xmm0 104 pxor %xmm3,%xmm0 105 psllq $57,%xmm0 106 movdqa %xmm0,%xmm3 107 pslldq $8,%xmm0 108 psrldq $8,%xmm3 109 pxor %xmm4,%xmm0 110 pxor %xmm3,%xmm1 111 112 113 movdqa %xmm0,%xmm4 114 psrlq $1,%xmm0 115 pxor %xmm4,%xmm1 116 pxor %xmm0,%xmm4 117 psrlq $5,%xmm0 118 pxor %xmm4,%xmm0 119 psrlq $1,%xmm0 120 pxor %xmm1,%xmm0 121 movdqa %xmm0,%xmm5 122 movdqa %xmm0,%xmm1 123 pshufd $78,%xmm0,%xmm3 124 pxor %xmm0,%xmm3 125.byte 102,15,58,68,194,0 126.byte 102,15,58,68,202,17 127.byte 102,15,58,68,222,0 128 pxor %xmm0,%xmm3 129 pxor %xmm1,%xmm3 130 131 movdqa %xmm3,%xmm4 132 psrldq $8,%xmm3 133 pslldq $8,%xmm4 134 pxor %xmm3,%xmm1 135 pxor %xmm4,%xmm0 136 137 movdqa %xmm0,%xmm4 138 movdqa %xmm0,%xmm3 139 psllq $5,%xmm0 140 pxor %xmm0,%xmm3 141 psllq $1,%xmm0 142 pxor %xmm3,%xmm0 143 psllq $57,%xmm0 144 movdqa %xmm0,%xmm3 145 pslldq $8,%xmm0 146 psrldq $8,%xmm3 147 pxor %xmm4,%xmm0 148 pxor %xmm3,%xmm1 149 150 151 movdqa %xmm0,%xmm4 152 psrlq $1,%xmm0 153 pxor %xmm4,%xmm1 154 pxor %xmm0,%xmm4 155 psrlq $5,%xmm0 156 pxor %xmm4,%xmm0 157 psrlq $1,%xmm0 158 pxor %xmm1,%xmm0 159 pshufd $78,%xmm5,%xmm3 160 pshufd $78,%xmm0,%xmm4 161 pxor %xmm5,%xmm3 162 movdqu %xmm5,48(%rdi) 163 pxor %xmm0,%xmm4 164 movdqu %xmm0,64(%rdi) 165.byte 102,15,58,15,227,8 166 movdqu %xmm4,80(%rdi) 167 ret 168 169 170 171.globl _gcm_gmult_clmul 172.private_extern _gcm_gmult_clmul 173 174.p2align 4 175_gcm_gmult_clmul: 176 177_CET_ENDBR 178L$_gmult_clmul: 179 movdqu (%rdi),%xmm0 180 movdqa L$bswap_mask(%rip),%xmm5 181 movdqu (%rsi),%xmm2 182 movdqu 32(%rsi),%xmm4 183.byte 102,15,56,0,197 184 movdqa %xmm0,%xmm1 185 pshufd $78,%xmm0,%xmm3 186 pxor %xmm0,%xmm3 187.byte 102,15,58,68,194,0 188.byte 102,15,58,68,202,17 189.byte 102,15,58,68,220,0 190 pxor %xmm0,%xmm3 191 pxor %xmm1,%xmm3 192 193 movdqa %xmm3,%xmm4 194 psrldq $8,%xmm3 195 pslldq $8,%xmm4 196 pxor %xmm3,%xmm1 197 pxor %xmm4,%xmm0 198 199 movdqa %xmm0,%xmm4 200 movdqa %xmm0,%xmm3 201 psllq $5,%xmm0 202 pxor %xmm0,%xmm3 203 psllq $1,%xmm0 204 pxor %xmm3,%xmm0 205 psllq $57,%xmm0 206 movdqa %xmm0,%xmm3 207 pslldq $8,%xmm0 208 psrldq $8,%xmm3 209 pxor %xmm4,%xmm0 210 pxor %xmm3,%xmm1 211 212 213 movdqa %xmm0,%xmm4 214 psrlq $1,%xmm0 215 pxor %xmm4,%xmm1 216 pxor %xmm0,%xmm4 217 psrlq $5,%xmm0 218 pxor %xmm4,%xmm0 219 psrlq $1,%xmm0 220 pxor %xmm1,%xmm0 221.byte 102,15,56,0,197 222 movdqu %xmm0,(%rdi) 223 ret 224 225 226.globl _gcm_ghash_clmul 227.private_extern _gcm_ghash_clmul 228 229.p2align 5 230_gcm_ghash_clmul: 231 232 233_CET_ENDBR 234L$_ghash_clmul: 235 movdqa L$bswap_mask(%rip),%xmm10 236 237 movdqu (%rdi),%xmm0 238 movdqu (%rsi),%xmm2 239 movdqu 32(%rsi),%xmm7 240.byte 102,65,15,56,0,194 241 242 subq $0x10,%rcx 243 jz L$odd_tail 244 245 movdqu 16(%rsi),%xmm6 246 leaq _OPENSSL_ia32cap_P(%rip),%rax 247 movl 4(%rax),%eax 248 cmpq $0x30,%rcx 249 jb L$skip4x 250 251 andl $71303168,%eax 252 cmpl $4194304,%eax 253 je L$skip4x 254 255 subq $0x30,%rcx 256 movq $0xA040608020C0E000,%rax 257 movdqu 48(%rsi),%xmm14 258 movdqu 64(%rsi),%xmm15 259 260 261 262 263 movdqu 48(%rdx),%xmm3 264 movdqu 32(%rdx),%xmm11 265.byte 102,65,15,56,0,218 266.byte 102,69,15,56,0,218 267 movdqa %xmm3,%xmm5 268 pshufd $78,%xmm3,%xmm4 269 pxor %xmm3,%xmm4 270.byte 102,15,58,68,218,0 271.byte 102,15,58,68,234,17 272.byte 102,15,58,68,231,0 273 274 movdqa %xmm11,%xmm13 275 pshufd $78,%xmm11,%xmm12 276 pxor %xmm11,%xmm12 277.byte 102,68,15,58,68,222,0 278.byte 102,68,15,58,68,238,17 279.byte 102,68,15,58,68,231,16 280 xorps %xmm11,%xmm3 281 xorps %xmm13,%xmm5 282 movups 80(%rsi),%xmm7 283 xorps %xmm12,%xmm4 284 285 movdqu 16(%rdx),%xmm11 286 movdqu 0(%rdx),%xmm8 287.byte 102,69,15,56,0,218 288.byte 102,69,15,56,0,194 289 movdqa %xmm11,%xmm13 290 pshufd $78,%xmm11,%xmm12 291 pxor %xmm8,%xmm0 292 pxor %xmm11,%xmm12 293.byte 102,69,15,58,68,222,0 294 movdqa %xmm0,%xmm1 295 pshufd $78,%xmm0,%xmm8 296 pxor %xmm0,%xmm8 297.byte 102,69,15,58,68,238,17 298.byte 102,68,15,58,68,231,0 299 xorps %xmm11,%xmm3 300 xorps %xmm13,%xmm5 301 302 leaq 64(%rdx),%rdx 303 subq $0x40,%rcx 304 jc L$tail4x 305 306 jmp L$mod4_loop 307.p2align 5 308L$mod4_loop: 309.byte 102,65,15,58,68,199,0 310 xorps %xmm12,%xmm4 311 movdqu 48(%rdx),%xmm11 312.byte 102,69,15,56,0,218 313.byte 102,65,15,58,68,207,17 314 xorps %xmm3,%xmm0 315 movdqu 32(%rdx),%xmm3 316 movdqa %xmm11,%xmm13 317.byte 102,68,15,58,68,199,16 318 pshufd $78,%xmm11,%xmm12 319 xorps %xmm5,%xmm1 320 pxor %xmm11,%xmm12 321.byte 102,65,15,56,0,218 322 movups 32(%rsi),%xmm7 323 xorps %xmm4,%xmm8 324.byte 102,68,15,58,68,218,0 325 pshufd $78,%xmm3,%xmm4 326 327 pxor %xmm0,%xmm8 328 movdqa %xmm3,%xmm5 329 pxor %xmm1,%xmm8 330 pxor %xmm3,%xmm4 331 movdqa %xmm8,%xmm9 332.byte 102,68,15,58,68,234,17 333 pslldq $8,%xmm8 334 psrldq $8,%xmm9 335 pxor %xmm8,%xmm0 336 movdqa L$7_mask(%rip),%xmm8 337 pxor %xmm9,%xmm1 338.byte 102,76,15,110,200 339 340 pand %xmm0,%xmm8 341.byte 102,69,15,56,0,200 342 pxor %xmm0,%xmm9 343.byte 102,68,15,58,68,231,0 344 psllq $57,%xmm9 345 movdqa %xmm9,%xmm8 346 pslldq $8,%xmm9 347.byte 102,15,58,68,222,0 348 psrldq $8,%xmm8 349 pxor %xmm9,%xmm0 350 pxor %xmm8,%xmm1 351 movdqu 0(%rdx),%xmm8 352 353 movdqa %xmm0,%xmm9 354 psrlq $1,%xmm0 355.byte 102,15,58,68,238,17 356 xorps %xmm11,%xmm3 357 movdqu 16(%rdx),%xmm11 358.byte 102,69,15,56,0,218 359.byte 102,15,58,68,231,16 360 xorps %xmm13,%xmm5 361 movups 80(%rsi),%xmm7 362.byte 102,69,15,56,0,194 363 pxor %xmm9,%xmm1 364 pxor %xmm0,%xmm9 365 psrlq $5,%xmm0 366 367 movdqa %xmm11,%xmm13 368 pxor %xmm12,%xmm4 369 pshufd $78,%xmm11,%xmm12 370 pxor %xmm9,%xmm0 371 pxor %xmm8,%xmm1 372 pxor %xmm11,%xmm12 373.byte 102,69,15,58,68,222,0 374 psrlq $1,%xmm0 375 pxor %xmm1,%xmm0 376 movdqa %xmm0,%xmm1 377.byte 102,69,15,58,68,238,17 378 xorps %xmm11,%xmm3 379 pshufd $78,%xmm0,%xmm8 380 pxor %xmm0,%xmm8 381 382.byte 102,68,15,58,68,231,0 383 xorps %xmm13,%xmm5 384 385 leaq 64(%rdx),%rdx 386 subq $0x40,%rcx 387 jnc L$mod4_loop 388 389L$tail4x: 390.byte 102,65,15,58,68,199,0 391.byte 102,65,15,58,68,207,17 392.byte 102,68,15,58,68,199,16 393 xorps %xmm12,%xmm4 394 xorps %xmm3,%xmm0 395 xorps %xmm5,%xmm1 396 pxor %xmm0,%xmm1 397 pxor %xmm4,%xmm8 398 399 pxor %xmm1,%xmm8 400 pxor %xmm0,%xmm1 401 402 movdqa %xmm8,%xmm9 403 psrldq $8,%xmm8 404 pslldq $8,%xmm9 405 pxor %xmm8,%xmm1 406 pxor %xmm9,%xmm0 407 408 movdqa %xmm0,%xmm4 409 movdqa %xmm0,%xmm3 410 psllq $5,%xmm0 411 pxor %xmm0,%xmm3 412 psllq $1,%xmm0 413 pxor %xmm3,%xmm0 414 psllq $57,%xmm0 415 movdqa %xmm0,%xmm3 416 pslldq $8,%xmm0 417 psrldq $8,%xmm3 418 pxor %xmm4,%xmm0 419 pxor %xmm3,%xmm1 420 421 422 movdqa %xmm0,%xmm4 423 psrlq $1,%xmm0 424 pxor %xmm4,%xmm1 425 pxor %xmm0,%xmm4 426 psrlq $5,%xmm0 427 pxor %xmm4,%xmm0 428 psrlq $1,%xmm0 429 pxor %xmm1,%xmm0 430 addq $0x40,%rcx 431 jz L$done 432 movdqu 32(%rsi),%xmm7 433 subq $0x10,%rcx 434 jz L$odd_tail 435L$skip4x: 436 437 438 439 440 441 movdqu (%rdx),%xmm8 442 movdqu 16(%rdx),%xmm3 443.byte 102,69,15,56,0,194 444.byte 102,65,15,56,0,218 445 pxor %xmm8,%xmm0 446 447 movdqa %xmm3,%xmm5 448 pshufd $78,%xmm3,%xmm4 449 pxor %xmm3,%xmm4 450.byte 102,15,58,68,218,0 451.byte 102,15,58,68,234,17 452.byte 102,15,58,68,231,0 453 454 leaq 32(%rdx),%rdx 455 nop 456 subq $0x20,%rcx 457 jbe L$even_tail 458 nop 459 jmp L$mod_loop 460 461.p2align 5 462L$mod_loop: 463 movdqa %xmm0,%xmm1 464 movdqa %xmm4,%xmm8 465 pshufd $78,%xmm0,%xmm4 466 pxor %xmm0,%xmm4 467 468.byte 102,15,58,68,198,0 469.byte 102,15,58,68,206,17 470.byte 102,15,58,68,231,16 471 472 pxor %xmm3,%xmm0 473 pxor %xmm5,%xmm1 474 movdqu (%rdx),%xmm9 475 pxor %xmm0,%xmm8 476.byte 102,69,15,56,0,202 477 movdqu 16(%rdx),%xmm3 478 479 pxor %xmm1,%xmm8 480 pxor %xmm9,%xmm1 481 pxor %xmm8,%xmm4 482.byte 102,65,15,56,0,218 483 movdqa %xmm4,%xmm8 484 psrldq $8,%xmm8 485 pslldq $8,%xmm4 486 pxor %xmm8,%xmm1 487 pxor %xmm4,%xmm0 488 489 movdqa %xmm3,%xmm5 490 491 movdqa %xmm0,%xmm9 492 movdqa %xmm0,%xmm8 493 psllq $5,%xmm0 494 pxor %xmm0,%xmm8 495.byte 102,15,58,68,218,0 496 psllq $1,%xmm0 497 pxor %xmm8,%xmm0 498 psllq $57,%xmm0 499 movdqa %xmm0,%xmm8 500 pslldq $8,%xmm0 501 psrldq $8,%xmm8 502 pxor %xmm9,%xmm0 503 pshufd $78,%xmm5,%xmm4 504 pxor %xmm8,%xmm1 505 pxor %xmm5,%xmm4 506 507 movdqa %xmm0,%xmm9 508 psrlq $1,%xmm0 509.byte 102,15,58,68,234,17 510 pxor %xmm9,%xmm1 511 pxor %xmm0,%xmm9 512 psrlq $5,%xmm0 513 pxor %xmm9,%xmm0 514 leaq 32(%rdx),%rdx 515 psrlq $1,%xmm0 516.byte 102,15,58,68,231,0 517 pxor %xmm1,%xmm0 518 519 subq $0x20,%rcx 520 ja L$mod_loop 521 522L$even_tail: 523 movdqa %xmm0,%xmm1 524 movdqa %xmm4,%xmm8 525 pshufd $78,%xmm0,%xmm4 526 pxor %xmm0,%xmm4 527 528.byte 102,15,58,68,198,0 529.byte 102,15,58,68,206,17 530.byte 102,15,58,68,231,16 531 532 pxor %xmm3,%xmm0 533 pxor %xmm5,%xmm1 534 pxor %xmm0,%xmm8 535 pxor %xmm1,%xmm8 536 pxor %xmm8,%xmm4 537 movdqa %xmm4,%xmm8 538 psrldq $8,%xmm8 539 pslldq $8,%xmm4 540 pxor %xmm8,%xmm1 541 pxor %xmm4,%xmm0 542 543 movdqa %xmm0,%xmm4 544 movdqa %xmm0,%xmm3 545 psllq $5,%xmm0 546 pxor %xmm0,%xmm3 547 psllq $1,%xmm0 548 pxor %xmm3,%xmm0 549 psllq $57,%xmm0 550 movdqa %xmm0,%xmm3 551 pslldq $8,%xmm0 552 psrldq $8,%xmm3 553 pxor %xmm4,%xmm0 554 pxor %xmm3,%xmm1 555 556 557 movdqa %xmm0,%xmm4 558 psrlq $1,%xmm0 559 pxor %xmm4,%xmm1 560 pxor %xmm0,%xmm4 561 psrlq $5,%xmm0 562 pxor %xmm4,%xmm0 563 psrlq $1,%xmm0 564 pxor %xmm1,%xmm0 565 testq %rcx,%rcx 566 jnz L$done 567 568L$odd_tail: 569 movdqu (%rdx),%xmm8 570.byte 102,69,15,56,0,194 571 pxor %xmm8,%xmm0 572 movdqa %xmm0,%xmm1 573 pshufd $78,%xmm0,%xmm3 574 pxor %xmm0,%xmm3 575.byte 102,15,58,68,194,0 576.byte 102,15,58,68,202,17 577.byte 102,15,58,68,223,0 578 pxor %xmm0,%xmm3 579 pxor %xmm1,%xmm3 580 581 movdqa %xmm3,%xmm4 582 psrldq $8,%xmm3 583 pslldq $8,%xmm4 584 pxor %xmm3,%xmm1 585 pxor %xmm4,%xmm0 586 587 movdqa %xmm0,%xmm4 588 movdqa %xmm0,%xmm3 589 psllq $5,%xmm0 590 pxor %xmm0,%xmm3 591 psllq $1,%xmm0 592 pxor %xmm3,%xmm0 593 psllq $57,%xmm0 594 movdqa %xmm0,%xmm3 595 pslldq $8,%xmm0 596 psrldq $8,%xmm3 597 pxor %xmm4,%xmm0 598 pxor %xmm3,%xmm1 599 600 601 movdqa %xmm0,%xmm4 602 psrlq $1,%xmm0 603 pxor %xmm4,%xmm1 604 pxor %xmm0,%xmm4 605 psrlq $5,%xmm0 606 pxor %xmm4,%xmm0 607 psrlq $1,%xmm0 608 pxor %xmm1,%xmm0 609L$done: 610.byte 102,65,15,56,0,194 611 movdqu %xmm0,(%rdi) 612 ret 613 614 615 616.globl _gcm_init_avx 617.private_extern _gcm_init_avx 618 619.p2align 5 620_gcm_init_avx: 621 622_CET_ENDBR 623 vzeroupper 624 625 vmovdqu (%rsi),%xmm2 626 vpshufd $78,%xmm2,%xmm2 627 628 629 vpshufd $255,%xmm2,%xmm4 630 vpsrlq $63,%xmm2,%xmm3 631 vpsllq $1,%xmm2,%xmm2 632 vpxor %xmm5,%xmm5,%xmm5 633 vpcmpgtd %xmm4,%xmm5,%xmm5 634 vpslldq $8,%xmm3,%xmm3 635 vpor %xmm3,%xmm2,%xmm2 636 637 638 vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5 639 vpxor %xmm5,%xmm2,%xmm2 640 641 vpunpckhqdq %xmm2,%xmm2,%xmm6 642 vmovdqa %xmm2,%xmm0 643 vpxor %xmm2,%xmm6,%xmm6 644 movq $4,%r10 645 jmp L$init_start_avx 646.p2align 5 647L$init_loop_avx: 648 vpalignr $8,%xmm3,%xmm4,%xmm5 649 vmovdqu %xmm5,-16(%rdi) 650 vpunpckhqdq %xmm0,%xmm0,%xmm3 651 vpxor %xmm0,%xmm3,%xmm3 652 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 653 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 654 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 655 vpxor %xmm0,%xmm1,%xmm4 656 vpxor %xmm4,%xmm3,%xmm3 657 658 vpslldq $8,%xmm3,%xmm4 659 vpsrldq $8,%xmm3,%xmm3 660 vpxor %xmm4,%xmm0,%xmm0 661 vpxor %xmm3,%xmm1,%xmm1 662 vpsllq $57,%xmm0,%xmm3 663 vpsllq $62,%xmm0,%xmm4 664 vpxor %xmm3,%xmm4,%xmm4 665 vpsllq $63,%xmm0,%xmm3 666 vpxor %xmm3,%xmm4,%xmm4 667 vpslldq $8,%xmm4,%xmm3 668 vpsrldq $8,%xmm4,%xmm4 669 vpxor %xmm3,%xmm0,%xmm0 670 vpxor %xmm4,%xmm1,%xmm1 671 672 vpsrlq $1,%xmm0,%xmm4 673 vpxor %xmm0,%xmm1,%xmm1 674 vpxor %xmm4,%xmm0,%xmm0 675 vpsrlq $5,%xmm4,%xmm4 676 vpxor %xmm4,%xmm0,%xmm0 677 vpsrlq $1,%xmm0,%xmm0 678 vpxor %xmm1,%xmm0,%xmm0 679L$init_start_avx: 680 vmovdqa %xmm0,%xmm5 681 vpunpckhqdq %xmm0,%xmm0,%xmm3 682 vpxor %xmm0,%xmm3,%xmm3 683 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 684 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 685 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 686 vpxor %xmm0,%xmm1,%xmm4 687 vpxor %xmm4,%xmm3,%xmm3 688 689 vpslldq $8,%xmm3,%xmm4 690 vpsrldq $8,%xmm3,%xmm3 691 vpxor %xmm4,%xmm0,%xmm0 692 vpxor %xmm3,%xmm1,%xmm1 693 vpsllq $57,%xmm0,%xmm3 694 vpsllq $62,%xmm0,%xmm4 695 vpxor %xmm3,%xmm4,%xmm4 696 vpsllq $63,%xmm0,%xmm3 697 vpxor %xmm3,%xmm4,%xmm4 698 vpslldq $8,%xmm4,%xmm3 699 vpsrldq $8,%xmm4,%xmm4 700 vpxor %xmm3,%xmm0,%xmm0 701 vpxor %xmm4,%xmm1,%xmm1 702 703 vpsrlq $1,%xmm0,%xmm4 704 vpxor %xmm0,%xmm1,%xmm1 705 vpxor %xmm4,%xmm0,%xmm0 706 vpsrlq $5,%xmm4,%xmm4 707 vpxor %xmm4,%xmm0,%xmm0 708 vpsrlq $1,%xmm0,%xmm0 709 vpxor %xmm1,%xmm0,%xmm0 710 vpshufd $78,%xmm5,%xmm3 711 vpshufd $78,%xmm0,%xmm4 712 vpxor %xmm5,%xmm3,%xmm3 713 vmovdqu %xmm5,0(%rdi) 714 vpxor %xmm0,%xmm4,%xmm4 715 vmovdqu %xmm0,16(%rdi) 716 leaq 48(%rdi),%rdi 717 subq $1,%r10 718 jnz L$init_loop_avx 719 720 vpalignr $8,%xmm4,%xmm3,%xmm5 721 vmovdqu %xmm5,-16(%rdi) 722 723 vzeroupper 724 ret 725 726 727 728.globl _gcm_ghash_avx 729.private_extern _gcm_ghash_avx 730 731.p2align 5 732_gcm_ghash_avx: 733 734_CET_ENDBR 735 vzeroupper 736 737 vmovdqu (%rdi),%xmm10 738 leaq L$0x1c2_polynomial(%rip),%r10 739 leaq 64(%rsi),%rsi 740 vmovdqu L$bswap_mask(%rip),%xmm13 741 vpshufb %xmm13,%xmm10,%xmm10 742 cmpq $0x80,%rcx 743 jb L$short_avx 744 subq $0x80,%rcx 745 746 vmovdqu 112(%rdx),%xmm14 747 vmovdqu 0-64(%rsi),%xmm6 748 vpshufb %xmm13,%xmm14,%xmm14 749 vmovdqu 32-64(%rsi),%xmm7 750 751 vpunpckhqdq %xmm14,%xmm14,%xmm9 752 vmovdqu 96(%rdx),%xmm15 753 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 754 vpxor %xmm14,%xmm9,%xmm9 755 vpshufb %xmm13,%xmm15,%xmm15 756 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 757 vmovdqu 16-64(%rsi),%xmm6 758 vpunpckhqdq %xmm15,%xmm15,%xmm8 759 vmovdqu 80(%rdx),%xmm14 760 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 761 vpxor %xmm15,%xmm8,%xmm8 762 763 vpshufb %xmm13,%xmm14,%xmm14 764 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 765 vpunpckhqdq %xmm14,%xmm14,%xmm9 766 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 767 vmovdqu 48-64(%rsi),%xmm6 768 vpxor %xmm14,%xmm9,%xmm9 769 vmovdqu 64(%rdx),%xmm15 770 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 771 vmovdqu 80-64(%rsi),%xmm7 772 773 vpshufb %xmm13,%xmm15,%xmm15 774 vpxor %xmm0,%xmm3,%xmm3 775 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 776 vpxor %xmm1,%xmm4,%xmm4 777 vpunpckhqdq %xmm15,%xmm15,%xmm8 778 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 779 vmovdqu 64-64(%rsi),%xmm6 780 vpxor %xmm2,%xmm5,%xmm5 781 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 782 vpxor %xmm15,%xmm8,%xmm8 783 784 vmovdqu 48(%rdx),%xmm14 785 vpxor %xmm3,%xmm0,%xmm0 786 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 787 vpxor %xmm4,%xmm1,%xmm1 788 vpshufb %xmm13,%xmm14,%xmm14 789 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 790 vmovdqu 96-64(%rsi),%xmm6 791 vpxor %xmm5,%xmm2,%xmm2 792 vpunpckhqdq %xmm14,%xmm14,%xmm9 793 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 794 vmovdqu 128-64(%rsi),%xmm7 795 vpxor %xmm14,%xmm9,%xmm9 796 797 vmovdqu 32(%rdx),%xmm15 798 vpxor %xmm0,%xmm3,%xmm3 799 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 800 vpxor %xmm1,%xmm4,%xmm4 801 vpshufb %xmm13,%xmm15,%xmm15 802 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 803 vmovdqu 112-64(%rsi),%xmm6 804 vpxor %xmm2,%xmm5,%xmm5 805 vpunpckhqdq %xmm15,%xmm15,%xmm8 806 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 807 vpxor %xmm15,%xmm8,%xmm8 808 809 vmovdqu 16(%rdx),%xmm14 810 vpxor %xmm3,%xmm0,%xmm0 811 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 812 vpxor %xmm4,%xmm1,%xmm1 813 vpshufb %xmm13,%xmm14,%xmm14 814 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 815 vmovdqu 144-64(%rsi),%xmm6 816 vpxor %xmm5,%xmm2,%xmm2 817 vpunpckhqdq %xmm14,%xmm14,%xmm9 818 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 819 vmovdqu 176-64(%rsi),%xmm7 820 vpxor %xmm14,%xmm9,%xmm9 821 822 vmovdqu (%rdx),%xmm15 823 vpxor %xmm0,%xmm3,%xmm3 824 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 825 vpxor %xmm1,%xmm4,%xmm4 826 vpshufb %xmm13,%xmm15,%xmm15 827 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 828 vmovdqu 160-64(%rsi),%xmm6 829 vpxor %xmm2,%xmm5,%xmm5 830 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 831 832 leaq 128(%rdx),%rdx 833 cmpq $0x80,%rcx 834 jb L$tail_avx 835 836 vpxor %xmm10,%xmm15,%xmm15 837 subq $0x80,%rcx 838 jmp L$oop8x_avx 839 840.p2align 5 841L$oop8x_avx: 842 vpunpckhqdq %xmm15,%xmm15,%xmm8 843 vmovdqu 112(%rdx),%xmm14 844 vpxor %xmm0,%xmm3,%xmm3 845 vpxor %xmm15,%xmm8,%xmm8 846 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 847 vpshufb %xmm13,%xmm14,%xmm14 848 vpxor %xmm1,%xmm4,%xmm4 849 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 850 vmovdqu 0-64(%rsi),%xmm6 851 vpunpckhqdq %xmm14,%xmm14,%xmm9 852 vpxor %xmm2,%xmm5,%xmm5 853 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 854 vmovdqu 32-64(%rsi),%xmm7 855 vpxor %xmm14,%xmm9,%xmm9 856 857 vmovdqu 96(%rdx),%xmm15 858 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 859 vpxor %xmm3,%xmm10,%xmm10 860 vpshufb %xmm13,%xmm15,%xmm15 861 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 862 vxorps %xmm4,%xmm11,%xmm11 863 vmovdqu 16-64(%rsi),%xmm6 864 vpunpckhqdq %xmm15,%xmm15,%xmm8 865 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 866 vpxor %xmm5,%xmm12,%xmm12 867 vxorps %xmm15,%xmm8,%xmm8 868 869 vmovdqu 80(%rdx),%xmm14 870 vpxor %xmm10,%xmm12,%xmm12 871 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 872 vpxor %xmm11,%xmm12,%xmm12 873 vpslldq $8,%xmm12,%xmm9 874 vpxor %xmm0,%xmm3,%xmm3 875 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 876 vpsrldq $8,%xmm12,%xmm12 877 vpxor %xmm9,%xmm10,%xmm10 878 vmovdqu 48-64(%rsi),%xmm6 879 vpshufb %xmm13,%xmm14,%xmm14 880 vxorps %xmm12,%xmm11,%xmm11 881 vpxor %xmm1,%xmm4,%xmm4 882 vpunpckhqdq %xmm14,%xmm14,%xmm9 883 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 884 vmovdqu 80-64(%rsi),%xmm7 885 vpxor %xmm14,%xmm9,%xmm9 886 vpxor %xmm2,%xmm5,%xmm5 887 888 vmovdqu 64(%rdx),%xmm15 889 vpalignr $8,%xmm10,%xmm10,%xmm12 890 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 891 vpshufb %xmm13,%xmm15,%xmm15 892 vpxor %xmm3,%xmm0,%xmm0 893 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 894 vmovdqu 64-64(%rsi),%xmm6 895 vpunpckhqdq %xmm15,%xmm15,%xmm8 896 vpxor %xmm4,%xmm1,%xmm1 897 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 898 vxorps %xmm15,%xmm8,%xmm8 899 vpxor %xmm5,%xmm2,%xmm2 900 901 vmovdqu 48(%rdx),%xmm14 902 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 903 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 904 vpshufb %xmm13,%xmm14,%xmm14 905 vpxor %xmm0,%xmm3,%xmm3 906 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 907 vmovdqu 96-64(%rsi),%xmm6 908 vpunpckhqdq %xmm14,%xmm14,%xmm9 909 vpxor %xmm1,%xmm4,%xmm4 910 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 911 vmovdqu 128-64(%rsi),%xmm7 912 vpxor %xmm14,%xmm9,%xmm9 913 vpxor %xmm2,%xmm5,%xmm5 914 915 vmovdqu 32(%rdx),%xmm15 916 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 917 vpshufb %xmm13,%xmm15,%xmm15 918 vpxor %xmm3,%xmm0,%xmm0 919 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 920 vmovdqu 112-64(%rsi),%xmm6 921 vpunpckhqdq %xmm15,%xmm15,%xmm8 922 vpxor %xmm4,%xmm1,%xmm1 923 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 924 vpxor %xmm15,%xmm8,%xmm8 925 vpxor %xmm5,%xmm2,%xmm2 926 vxorps %xmm12,%xmm10,%xmm10 927 928 vmovdqu 16(%rdx),%xmm14 929 vpalignr $8,%xmm10,%xmm10,%xmm12 930 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 931 vpshufb %xmm13,%xmm14,%xmm14 932 vpxor %xmm0,%xmm3,%xmm3 933 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 934 vmovdqu 144-64(%rsi),%xmm6 935 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 936 vxorps %xmm11,%xmm12,%xmm12 937 vpunpckhqdq %xmm14,%xmm14,%xmm9 938 vpxor %xmm1,%xmm4,%xmm4 939 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 940 vmovdqu 176-64(%rsi),%xmm7 941 vpxor %xmm14,%xmm9,%xmm9 942 vpxor %xmm2,%xmm5,%xmm5 943 944 vmovdqu (%rdx),%xmm15 945 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 946 vpshufb %xmm13,%xmm15,%xmm15 947 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 948 vmovdqu 160-64(%rsi),%xmm6 949 vpxor %xmm12,%xmm15,%xmm15 950 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 951 vpxor %xmm10,%xmm15,%xmm15 952 953 leaq 128(%rdx),%rdx 954 subq $0x80,%rcx 955 jnc L$oop8x_avx 956 957 addq $0x80,%rcx 958 jmp L$tail_no_xor_avx 959 960.p2align 5 961L$short_avx: 962 vmovdqu -16(%rdx,%rcx,1),%xmm14 963 leaq (%rdx,%rcx,1),%rdx 964 vmovdqu 0-64(%rsi),%xmm6 965 vmovdqu 32-64(%rsi),%xmm7 966 vpshufb %xmm13,%xmm14,%xmm15 967 968 vmovdqa %xmm0,%xmm3 969 vmovdqa %xmm1,%xmm4 970 vmovdqa %xmm2,%xmm5 971 subq $0x10,%rcx 972 jz L$tail_avx 973 974 vpunpckhqdq %xmm15,%xmm15,%xmm8 975 vpxor %xmm0,%xmm3,%xmm3 976 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 977 vpxor %xmm15,%xmm8,%xmm8 978 vmovdqu -32(%rdx),%xmm14 979 vpxor %xmm1,%xmm4,%xmm4 980 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 981 vmovdqu 16-64(%rsi),%xmm6 982 vpshufb %xmm13,%xmm14,%xmm15 983 vpxor %xmm2,%xmm5,%xmm5 984 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 985 vpsrldq $8,%xmm7,%xmm7 986 subq $0x10,%rcx 987 jz L$tail_avx 988 989 vpunpckhqdq %xmm15,%xmm15,%xmm8 990 vpxor %xmm0,%xmm3,%xmm3 991 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 992 vpxor %xmm15,%xmm8,%xmm8 993 vmovdqu -48(%rdx),%xmm14 994 vpxor %xmm1,%xmm4,%xmm4 995 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 996 vmovdqu 48-64(%rsi),%xmm6 997 vpshufb %xmm13,%xmm14,%xmm15 998 vpxor %xmm2,%xmm5,%xmm5 999 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1000 vmovdqu 80-64(%rsi),%xmm7 1001 subq $0x10,%rcx 1002 jz L$tail_avx 1003 1004 vpunpckhqdq %xmm15,%xmm15,%xmm8 1005 vpxor %xmm0,%xmm3,%xmm3 1006 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1007 vpxor %xmm15,%xmm8,%xmm8 1008 vmovdqu -64(%rdx),%xmm14 1009 vpxor %xmm1,%xmm4,%xmm4 1010 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1011 vmovdqu 64-64(%rsi),%xmm6 1012 vpshufb %xmm13,%xmm14,%xmm15 1013 vpxor %xmm2,%xmm5,%xmm5 1014 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1015 vpsrldq $8,%xmm7,%xmm7 1016 subq $0x10,%rcx 1017 jz L$tail_avx 1018 1019 vpunpckhqdq %xmm15,%xmm15,%xmm8 1020 vpxor %xmm0,%xmm3,%xmm3 1021 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1022 vpxor %xmm15,%xmm8,%xmm8 1023 vmovdqu -80(%rdx),%xmm14 1024 vpxor %xmm1,%xmm4,%xmm4 1025 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1026 vmovdqu 96-64(%rsi),%xmm6 1027 vpshufb %xmm13,%xmm14,%xmm15 1028 vpxor %xmm2,%xmm5,%xmm5 1029 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1030 vmovdqu 128-64(%rsi),%xmm7 1031 subq $0x10,%rcx 1032 jz L$tail_avx 1033 1034 vpunpckhqdq %xmm15,%xmm15,%xmm8 1035 vpxor %xmm0,%xmm3,%xmm3 1036 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1037 vpxor %xmm15,%xmm8,%xmm8 1038 vmovdqu -96(%rdx),%xmm14 1039 vpxor %xmm1,%xmm4,%xmm4 1040 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1041 vmovdqu 112-64(%rsi),%xmm6 1042 vpshufb %xmm13,%xmm14,%xmm15 1043 vpxor %xmm2,%xmm5,%xmm5 1044 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1045 vpsrldq $8,%xmm7,%xmm7 1046 subq $0x10,%rcx 1047 jz L$tail_avx 1048 1049 vpunpckhqdq %xmm15,%xmm15,%xmm8 1050 vpxor %xmm0,%xmm3,%xmm3 1051 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1052 vpxor %xmm15,%xmm8,%xmm8 1053 vmovdqu -112(%rdx),%xmm14 1054 vpxor %xmm1,%xmm4,%xmm4 1055 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1056 vmovdqu 144-64(%rsi),%xmm6 1057 vpshufb %xmm13,%xmm14,%xmm15 1058 vpxor %xmm2,%xmm5,%xmm5 1059 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1060 vmovq 184-64(%rsi),%xmm7 1061 subq $0x10,%rcx 1062 jmp L$tail_avx 1063 1064.p2align 5 1065L$tail_avx: 1066 vpxor %xmm10,%xmm15,%xmm15 1067L$tail_no_xor_avx: 1068 vpunpckhqdq %xmm15,%xmm15,%xmm8 1069 vpxor %xmm0,%xmm3,%xmm3 1070 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1071 vpxor %xmm15,%xmm8,%xmm8 1072 vpxor %xmm1,%xmm4,%xmm4 1073 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1074 vpxor %xmm2,%xmm5,%xmm5 1075 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1076 1077 vmovdqu (%r10),%xmm12 1078 1079 vpxor %xmm0,%xmm3,%xmm10 1080 vpxor %xmm1,%xmm4,%xmm11 1081 vpxor %xmm2,%xmm5,%xmm5 1082 1083 vpxor %xmm10,%xmm5,%xmm5 1084 vpxor %xmm11,%xmm5,%xmm5 1085 vpslldq $8,%xmm5,%xmm9 1086 vpsrldq $8,%xmm5,%xmm5 1087 vpxor %xmm9,%xmm10,%xmm10 1088 vpxor %xmm5,%xmm11,%xmm11 1089 1090 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1091 vpalignr $8,%xmm10,%xmm10,%xmm10 1092 vpxor %xmm9,%xmm10,%xmm10 1093 1094 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1095 vpalignr $8,%xmm10,%xmm10,%xmm10 1096 vpxor %xmm11,%xmm10,%xmm10 1097 vpxor %xmm9,%xmm10,%xmm10 1098 1099 cmpq $0,%rcx 1100 jne L$short_avx 1101 1102 vpshufb %xmm13,%xmm10,%xmm10 1103 vmovdqu %xmm10,(%rdi) 1104 vzeroupper 1105 ret 1106 1107 1108 1109.section __DATA,__const 1110.p2align 6 1111L$bswap_mask: 1112.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1113L$0x1c2_polynomial: 1114.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1115L$7_mask: 1116.long 7,0,7,0 1117.p2align 6 1118 1119.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1120.p2align 6 1121.text 1122#endif 1123