1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv8-a+crypto 12.section .rodata 13.align 5 14Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl aes_hw_set_encrypt_key 22 23.def aes_hw_set_encrypt_key 24 .type 32 25.endef 26.align 5 27aes_hw_set_encrypt_key: 28Lenc_key: 29 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 30 AARCH64_VALID_CALL_TARGET 31 stp x29,x30,[sp,#-16]! 32 add x29,sp,#0 33 mov x3,#-2 34 cmp w1,#128 35 b.lt Lenc_key_abort 36 cmp w1,#256 37 b.gt Lenc_key_abort 38 tst w1,#0x3f 39 b.ne Lenc_key_abort 40 41 adrp x3,Lrcon 42 add x3,x3,:lo12:Lrcon 43 cmp w1,#192 44 45 eor v0.16b,v0.16b,v0.16b 46 ld1 {v3.16b},[x0],#16 47 mov w1,#8 // reuse w1 48 ld1 {v1.4s,v2.4s},[x3],#32 49 50 b.lt Loop128 51 b.eq L192 52 b L256 53 54.align 4 55Loop128: 56 tbl v6.16b,{v3.16b},v2.16b 57 ext v5.16b,v0.16b,v3.16b,#12 58 st1 {v3.4s},[x2],#16 59 aese v6.16b,v0.16b 60 subs w1,w1,#1 61 62 eor v3.16b,v3.16b,v5.16b 63 ext v5.16b,v0.16b,v5.16b,#12 64 eor v3.16b,v3.16b,v5.16b 65 ext v5.16b,v0.16b,v5.16b,#12 66 eor v6.16b,v6.16b,v1.16b 67 eor v3.16b,v3.16b,v5.16b 68 shl v1.16b,v1.16b,#1 69 eor v3.16b,v3.16b,v6.16b 70 b.ne Loop128 71 72 ld1 {v1.4s},[x3] 73 74 tbl v6.16b,{v3.16b},v2.16b 75 ext v5.16b,v0.16b,v3.16b,#12 76 st1 {v3.4s},[x2],#16 77 aese v6.16b,v0.16b 78 79 eor v3.16b,v3.16b,v5.16b 80 ext v5.16b,v0.16b,v5.16b,#12 81 eor v3.16b,v3.16b,v5.16b 82 ext v5.16b,v0.16b,v5.16b,#12 83 eor v6.16b,v6.16b,v1.16b 84 eor v3.16b,v3.16b,v5.16b 85 shl v1.16b,v1.16b,#1 86 eor v3.16b,v3.16b,v6.16b 87 88 tbl v6.16b,{v3.16b},v2.16b 89 ext v5.16b,v0.16b,v3.16b,#12 90 st1 {v3.4s},[x2],#16 91 aese v6.16b,v0.16b 92 93 eor v3.16b,v3.16b,v5.16b 94 ext v5.16b,v0.16b,v5.16b,#12 95 eor v3.16b,v3.16b,v5.16b 96 ext v5.16b,v0.16b,v5.16b,#12 97 eor v6.16b,v6.16b,v1.16b 98 eor v3.16b,v3.16b,v5.16b 99 eor v3.16b,v3.16b,v6.16b 100 st1 {v3.4s},[x2] 101 add x2,x2,#0x50 102 103 mov w12,#10 104 b Ldone 105 106.align 4 107L192: 108 ld1 {v4.8b},[x0],#8 109 movi v6.16b,#8 // borrow v6.16b 110 st1 {v3.4s},[x2],#16 111 sub v2.16b,v2.16b,v6.16b // adjust the mask 112 113Loop192: 114 tbl v6.16b,{v4.16b},v2.16b 115 ext v5.16b,v0.16b,v3.16b,#12 116 st1 {v4.8b},[x2],#8 117 aese v6.16b,v0.16b 118 subs w1,w1,#1 119 120 eor v3.16b,v3.16b,v5.16b 121 ext v5.16b,v0.16b,v5.16b,#12 122 eor v3.16b,v3.16b,v5.16b 123 ext v5.16b,v0.16b,v5.16b,#12 124 eor v3.16b,v3.16b,v5.16b 125 126 dup v5.4s,v3.s[3] 127 eor v5.16b,v5.16b,v4.16b 128 eor v6.16b,v6.16b,v1.16b 129 ext v4.16b,v0.16b,v4.16b,#12 130 shl v1.16b,v1.16b,#1 131 eor v4.16b,v4.16b,v5.16b 132 eor v3.16b,v3.16b,v6.16b 133 eor v4.16b,v4.16b,v6.16b 134 st1 {v3.4s},[x2],#16 135 b.ne Loop192 136 137 mov w12,#12 138 add x2,x2,#0x20 139 b Ldone 140 141.align 4 142L256: 143 ld1 {v4.16b},[x0] 144 mov w1,#7 145 mov w12,#14 146 st1 {v3.4s},[x2],#16 147 148Loop256: 149 tbl v6.16b,{v4.16b},v2.16b 150 ext v5.16b,v0.16b,v3.16b,#12 151 st1 {v4.4s},[x2],#16 152 aese v6.16b,v0.16b 153 subs w1,w1,#1 154 155 eor v3.16b,v3.16b,v5.16b 156 ext v5.16b,v0.16b,v5.16b,#12 157 eor v3.16b,v3.16b,v5.16b 158 ext v5.16b,v0.16b,v5.16b,#12 159 eor v6.16b,v6.16b,v1.16b 160 eor v3.16b,v3.16b,v5.16b 161 shl v1.16b,v1.16b,#1 162 eor v3.16b,v3.16b,v6.16b 163 st1 {v3.4s},[x2],#16 164 b.eq Ldone 165 166 dup v6.4s,v3.s[3] // just splat 167 ext v5.16b,v0.16b,v4.16b,#12 168 aese v6.16b,v0.16b 169 170 eor v4.16b,v4.16b,v5.16b 171 ext v5.16b,v0.16b,v5.16b,#12 172 eor v4.16b,v4.16b,v5.16b 173 ext v5.16b,v0.16b,v5.16b,#12 174 eor v4.16b,v4.16b,v5.16b 175 176 eor v4.16b,v4.16b,v6.16b 177 b Loop256 178 179Ldone: 180 str w12,[x2] 181 mov x3,#0 182 183Lenc_key_abort: 184 mov x0,x3 // return value 185 ldr x29,[sp],#16 186 ret 187 188 189.globl aes_hw_set_decrypt_key 190 191.def aes_hw_set_decrypt_key 192 .type 32 193.endef 194.align 5 195aes_hw_set_decrypt_key: 196 AARCH64_SIGN_LINK_REGISTER 197 stp x29,x30,[sp,#-16]! 198 add x29,sp,#0 199 bl Lenc_key 200 201 cmp x0,#0 202 b.ne Ldec_key_abort 203 204 sub x2,x2,#240 // restore original x2 205 mov x4,#-16 206 add x0,x2,x12,lsl#4 // end of key schedule 207 208 ld1 {v0.4s},[x2] 209 ld1 {v1.4s},[x0] 210 st1 {v0.4s},[x0],x4 211 st1 {v1.4s},[x2],#16 212 213Loop_imc: 214 ld1 {v0.4s},[x2] 215 ld1 {v1.4s},[x0] 216 aesimc v0.16b,v0.16b 217 aesimc v1.16b,v1.16b 218 st1 {v0.4s},[x0],x4 219 st1 {v1.4s},[x2],#16 220 cmp x0,x2 221 b.hi Loop_imc 222 223 ld1 {v0.4s},[x2] 224 aesimc v0.16b,v0.16b 225 st1 {v0.4s},[x0] 226 227 eor x0,x0,x0 // return value 228Ldec_key_abort: 229 ldp x29,x30,[sp],#16 230 AARCH64_VALIDATE_LINK_REGISTER 231 ret 232 233.globl aes_hw_encrypt 234 235.def aes_hw_encrypt 236 .type 32 237.endef 238.align 5 239aes_hw_encrypt: 240 AARCH64_VALID_CALL_TARGET 241 ldr w3,[x2,#240] 242 ld1 {v0.4s},[x2],#16 243 ld1 {v2.16b},[x0] 244 sub w3,w3,#2 245 ld1 {v1.4s},[x2],#16 246 247Loop_enc: 248 aese v2.16b,v0.16b 249 aesmc v2.16b,v2.16b 250 ld1 {v0.4s},[x2],#16 251 subs w3,w3,#2 252 aese v2.16b,v1.16b 253 aesmc v2.16b,v2.16b 254 ld1 {v1.4s},[x2],#16 255 b.gt Loop_enc 256 257 aese v2.16b,v0.16b 258 aesmc v2.16b,v2.16b 259 ld1 {v0.4s},[x2] 260 aese v2.16b,v1.16b 261 eor v2.16b,v2.16b,v0.16b 262 263 st1 {v2.16b},[x1] 264 ret 265 266.globl aes_hw_decrypt 267 268.def aes_hw_decrypt 269 .type 32 270.endef 271.align 5 272aes_hw_decrypt: 273 AARCH64_VALID_CALL_TARGET 274 ldr w3,[x2,#240] 275 ld1 {v0.4s},[x2],#16 276 ld1 {v2.16b},[x0] 277 sub w3,w3,#2 278 ld1 {v1.4s},[x2],#16 279 280Loop_dec: 281 aesd v2.16b,v0.16b 282 aesimc v2.16b,v2.16b 283 ld1 {v0.4s},[x2],#16 284 subs w3,w3,#2 285 aesd v2.16b,v1.16b 286 aesimc v2.16b,v2.16b 287 ld1 {v1.4s},[x2],#16 288 b.gt Loop_dec 289 290 aesd v2.16b,v0.16b 291 aesimc v2.16b,v2.16b 292 ld1 {v0.4s},[x2] 293 aesd v2.16b,v1.16b 294 eor v2.16b,v2.16b,v0.16b 295 296 st1 {v2.16b},[x1] 297 ret 298 299.globl aes_hw_cbc_encrypt 300 301.def aes_hw_cbc_encrypt 302 .type 32 303.endef 304.align 5 305aes_hw_cbc_encrypt: 306 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 307 AARCH64_VALID_CALL_TARGET 308 stp x29,x30,[sp,#-16]! 309 add x29,sp,#0 310 subs x2,x2,#16 311 mov x8,#16 312 b.lo Lcbc_abort 313 csel x8,xzr,x8,eq 314 315 cmp w5,#0 // en- or decrypting? 316 ldr w5,[x3,#240] 317 and x2,x2,#-16 318 ld1 {v6.16b},[x4] 319 ld1 {v0.16b},[x0],x8 320 321 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 322 sub w5,w5,#6 323 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 324 sub w5,w5,#2 325 ld1 {v18.4s,v19.4s},[x7],#32 326 ld1 {v20.4s,v21.4s},[x7],#32 327 ld1 {v22.4s,v23.4s},[x7],#32 328 ld1 {v7.4s},[x7] 329 330 add x7,x3,#32 331 mov w6,w5 332 b.eq Lcbc_dec 333 334 cmp w5,#2 335 eor v0.16b,v0.16b,v6.16b 336 eor v5.16b,v16.16b,v7.16b 337 b.eq Lcbc_enc128 338 339 ld1 {v2.4s,v3.4s},[x7] 340 add x7,x3,#16 341 add x6,x3,#16*4 342 add x12,x3,#16*5 343 aese v0.16b,v16.16b 344 aesmc v0.16b,v0.16b 345 add x14,x3,#16*6 346 add x3,x3,#16*7 347 b Lenter_cbc_enc 348 349.align 4 350Loop_cbc_enc: 351 aese v0.16b,v16.16b 352 aesmc v0.16b,v0.16b 353 st1 {v6.16b},[x1],#16 354Lenter_cbc_enc: 355 aese v0.16b,v17.16b 356 aesmc v0.16b,v0.16b 357 aese v0.16b,v2.16b 358 aesmc v0.16b,v0.16b 359 ld1 {v16.4s},[x6] 360 cmp w5,#4 361 aese v0.16b,v3.16b 362 aesmc v0.16b,v0.16b 363 ld1 {v17.4s},[x12] 364 b.eq Lcbc_enc192 365 366 aese v0.16b,v16.16b 367 aesmc v0.16b,v0.16b 368 ld1 {v16.4s},[x14] 369 aese v0.16b,v17.16b 370 aesmc v0.16b,v0.16b 371 ld1 {v17.4s},[x3] 372 nop 373 374Lcbc_enc192: 375 aese v0.16b,v16.16b 376 aesmc v0.16b,v0.16b 377 subs x2,x2,#16 378 aese v0.16b,v17.16b 379 aesmc v0.16b,v0.16b 380 csel x8,xzr,x8,eq 381 aese v0.16b,v18.16b 382 aesmc v0.16b,v0.16b 383 aese v0.16b,v19.16b 384 aesmc v0.16b,v0.16b 385 ld1 {v16.16b},[x0],x8 386 aese v0.16b,v20.16b 387 aesmc v0.16b,v0.16b 388 eor v16.16b,v16.16b,v5.16b 389 aese v0.16b,v21.16b 390 aesmc v0.16b,v0.16b 391 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 392 aese v0.16b,v22.16b 393 aesmc v0.16b,v0.16b 394 aese v0.16b,v23.16b 395 eor v6.16b,v0.16b,v7.16b 396 b.hs Loop_cbc_enc 397 398 st1 {v6.16b},[x1],#16 399 b Lcbc_done 400 401.align 5 402Lcbc_enc128: 403 ld1 {v2.4s,v3.4s},[x7] 404 aese v0.16b,v16.16b 405 aesmc v0.16b,v0.16b 406 b Lenter_cbc_enc128 407Loop_cbc_enc128: 408 aese v0.16b,v16.16b 409 aesmc v0.16b,v0.16b 410 st1 {v6.16b},[x1],#16 411Lenter_cbc_enc128: 412 aese v0.16b,v17.16b 413 aesmc v0.16b,v0.16b 414 subs x2,x2,#16 415 aese v0.16b,v2.16b 416 aesmc v0.16b,v0.16b 417 csel x8,xzr,x8,eq 418 aese v0.16b,v3.16b 419 aesmc v0.16b,v0.16b 420 aese v0.16b,v18.16b 421 aesmc v0.16b,v0.16b 422 aese v0.16b,v19.16b 423 aesmc v0.16b,v0.16b 424 ld1 {v16.16b},[x0],x8 425 aese v0.16b,v20.16b 426 aesmc v0.16b,v0.16b 427 aese v0.16b,v21.16b 428 aesmc v0.16b,v0.16b 429 aese v0.16b,v22.16b 430 aesmc v0.16b,v0.16b 431 eor v16.16b,v16.16b,v5.16b 432 aese v0.16b,v23.16b 433 eor v6.16b,v0.16b,v7.16b 434 b.hs Loop_cbc_enc128 435 436 st1 {v6.16b},[x1],#16 437 b Lcbc_done 438.align 5 439Lcbc_dec: 440 ld1 {v18.16b},[x0],#16 441 subs x2,x2,#32 // bias 442 add w6,w5,#2 443 orr v3.16b,v0.16b,v0.16b 444 orr v1.16b,v0.16b,v0.16b 445 orr v19.16b,v18.16b,v18.16b 446 b.lo Lcbc_dec_tail 447 448 orr v1.16b,v18.16b,v18.16b 449 ld1 {v18.16b},[x0],#16 450 orr v2.16b,v0.16b,v0.16b 451 orr v3.16b,v1.16b,v1.16b 452 orr v19.16b,v18.16b,v18.16b 453 454Loop3x_cbc_dec: 455 aesd v0.16b,v16.16b 456 aesimc v0.16b,v0.16b 457 aesd v1.16b,v16.16b 458 aesimc v1.16b,v1.16b 459 aesd v18.16b,v16.16b 460 aesimc v18.16b,v18.16b 461 ld1 {v16.4s},[x7],#16 462 subs w6,w6,#2 463 aesd v0.16b,v17.16b 464 aesimc v0.16b,v0.16b 465 aesd v1.16b,v17.16b 466 aesimc v1.16b,v1.16b 467 aesd v18.16b,v17.16b 468 aesimc v18.16b,v18.16b 469 ld1 {v17.4s},[x7],#16 470 b.gt Loop3x_cbc_dec 471 472 aesd v0.16b,v16.16b 473 aesimc v0.16b,v0.16b 474 aesd v1.16b,v16.16b 475 aesimc v1.16b,v1.16b 476 aesd v18.16b,v16.16b 477 aesimc v18.16b,v18.16b 478 eor v4.16b,v6.16b,v7.16b 479 subs x2,x2,#0x30 480 eor v5.16b,v2.16b,v7.16b 481 csel x6,x2,x6,lo // x6, w6, is zero at this point 482 aesd v0.16b,v17.16b 483 aesimc v0.16b,v0.16b 484 aesd v1.16b,v17.16b 485 aesimc v1.16b,v1.16b 486 aesd v18.16b,v17.16b 487 aesimc v18.16b,v18.16b 488 eor v17.16b,v3.16b,v7.16b 489 add x0,x0,x6 // x0 is adjusted in such way that 490 // at exit from the loop v1.16b-v18.16b 491 // are loaded with last "words" 492 orr v6.16b,v19.16b,v19.16b 493 mov x7,x3 494 aesd v0.16b,v20.16b 495 aesimc v0.16b,v0.16b 496 aesd v1.16b,v20.16b 497 aesimc v1.16b,v1.16b 498 aesd v18.16b,v20.16b 499 aesimc v18.16b,v18.16b 500 ld1 {v2.16b},[x0],#16 501 aesd v0.16b,v21.16b 502 aesimc v0.16b,v0.16b 503 aesd v1.16b,v21.16b 504 aesimc v1.16b,v1.16b 505 aesd v18.16b,v21.16b 506 aesimc v18.16b,v18.16b 507 ld1 {v3.16b},[x0],#16 508 aesd v0.16b,v22.16b 509 aesimc v0.16b,v0.16b 510 aesd v1.16b,v22.16b 511 aesimc v1.16b,v1.16b 512 aesd v18.16b,v22.16b 513 aesimc v18.16b,v18.16b 514 ld1 {v19.16b},[x0],#16 515 aesd v0.16b,v23.16b 516 aesd v1.16b,v23.16b 517 aesd v18.16b,v23.16b 518 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 519 add w6,w5,#2 520 eor v4.16b,v4.16b,v0.16b 521 eor v5.16b,v5.16b,v1.16b 522 eor v18.16b,v18.16b,v17.16b 523 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 524 st1 {v4.16b},[x1],#16 525 orr v0.16b,v2.16b,v2.16b 526 st1 {v5.16b},[x1],#16 527 orr v1.16b,v3.16b,v3.16b 528 st1 {v18.16b},[x1],#16 529 orr v18.16b,v19.16b,v19.16b 530 b.hs Loop3x_cbc_dec 531 532 cmn x2,#0x30 533 b.eq Lcbc_done 534 nop 535 536Lcbc_dec_tail: 537 aesd v1.16b,v16.16b 538 aesimc v1.16b,v1.16b 539 aesd v18.16b,v16.16b 540 aesimc v18.16b,v18.16b 541 ld1 {v16.4s},[x7],#16 542 subs w6,w6,#2 543 aesd v1.16b,v17.16b 544 aesimc v1.16b,v1.16b 545 aesd v18.16b,v17.16b 546 aesimc v18.16b,v18.16b 547 ld1 {v17.4s},[x7],#16 548 b.gt Lcbc_dec_tail 549 550 aesd v1.16b,v16.16b 551 aesimc v1.16b,v1.16b 552 aesd v18.16b,v16.16b 553 aesimc v18.16b,v18.16b 554 aesd v1.16b,v17.16b 555 aesimc v1.16b,v1.16b 556 aesd v18.16b,v17.16b 557 aesimc v18.16b,v18.16b 558 aesd v1.16b,v20.16b 559 aesimc v1.16b,v1.16b 560 aesd v18.16b,v20.16b 561 aesimc v18.16b,v18.16b 562 cmn x2,#0x20 563 aesd v1.16b,v21.16b 564 aesimc v1.16b,v1.16b 565 aesd v18.16b,v21.16b 566 aesimc v18.16b,v18.16b 567 eor v5.16b,v6.16b,v7.16b 568 aesd v1.16b,v22.16b 569 aesimc v1.16b,v1.16b 570 aesd v18.16b,v22.16b 571 aesimc v18.16b,v18.16b 572 eor v17.16b,v3.16b,v7.16b 573 aesd v1.16b,v23.16b 574 aesd v18.16b,v23.16b 575 b.eq Lcbc_dec_one 576 eor v5.16b,v5.16b,v1.16b 577 eor v17.16b,v17.16b,v18.16b 578 orr v6.16b,v19.16b,v19.16b 579 st1 {v5.16b},[x1],#16 580 st1 {v17.16b},[x1],#16 581 b Lcbc_done 582 583Lcbc_dec_one: 584 eor v5.16b,v5.16b,v18.16b 585 orr v6.16b,v19.16b,v19.16b 586 st1 {v5.16b},[x1],#16 587 588Lcbc_done: 589 st1 {v6.16b},[x4] 590Lcbc_abort: 591 ldr x29,[sp],#16 592 ret 593 594.globl aes_hw_ctr32_encrypt_blocks 595 596.def aes_hw_ctr32_encrypt_blocks 597 .type 32 598.endef 599.align 5 600aes_hw_ctr32_encrypt_blocks: 601 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 602 AARCH64_VALID_CALL_TARGET 603 stp x29,x30,[sp,#-16]! 604 add x29,sp,#0 605 ldr w5,[x3,#240] 606 607 ldr w8, [x4, #12] 608 ld1 {v0.4s},[x4] 609 610 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 611 sub w5,w5,#4 612 mov x12,#16 613 cmp x2,#2 614 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 615 sub w5,w5,#2 616 ld1 {v20.4s,v21.4s},[x7],#32 617 ld1 {v22.4s,v23.4s},[x7],#32 618 ld1 {v7.4s},[x7] 619 add x7,x3,#32 620 mov w6,w5 621 csel x12,xzr,x12,lo 622 623 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 624 // affected by silicon errata #1742098 [0] and #1655431 [1], 625 // respectively, where the second instruction of an aese/aesmc 626 // instruction pair may execute twice if an interrupt is taken right 627 // after the first instruction consumes an input register of which a 628 // single 32-bit lane has been updated the last time it was modified. 629 // 630 // This function uses a counter in one 32-bit lane. The vmov lines 631 // could write to v1.16b and v18.16b directly, but that trips this bugs. 632 // We write to v6.16b and copy to the final register as a workaround. 633 // 634 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 635 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 636#ifndef __AARCH64EB__ 637 rev w8, w8 638#endif 639 add w10, w8, #1 640 orr v6.16b,v0.16b,v0.16b 641 rev w10, w10 642 mov v6.s[3],w10 643 add w8, w8, #2 644 orr v1.16b,v6.16b,v6.16b 645 b.ls Lctr32_tail 646 rev w12, w8 647 mov v6.s[3],w12 648 sub x2,x2,#3 // bias 649 orr v18.16b,v6.16b,v6.16b 650 b Loop3x_ctr32 651 652.align 4 653Loop3x_ctr32: 654 aese v0.16b,v16.16b 655 aesmc v0.16b,v0.16b 656 aese v1.16b,v16.16b 657 aesmc v1.16b,v1.16b 658 aese v18.16b,v16.16b 659 aesmc v18.16b,v18.16b 660 ld1 {v16.4s},[x7],#16 661 subs w6,w6,#2 662 aese v0.16b,v17.16b 663 aesmc v0.16b,v0.16b 664 aese v1.16b,v17.16b 665 aesmc v1.16b,v1.16b 666 aese v18.16b,v17.16b 667 aesmc v18.16b,v18.16b 668 ld1 {v17.4s},[x7],#16 669 b.gt Loop3x_ctr32 670 671 aese v0.16b,v16.16b 672 aesmc v4.16b,v0.16b 673 aese v1.16b,v16.16b 674 aesmc v5.16b,v1.16b 675 ld1 {v2.16b},[x0],#16 676 add w9,w8,#1 677 aese v18.16b,v16.16b 678 aesmc v18.16b,v18.16b 679 ld1 {v3.16b},[x0],#16 680 rev w9,w9 681 aese v4.16b,v17.16b 682 aesmc v4.16b,v4.16b 683 aese v5.16b,v17.16b 684 aesmc v5.16b,v5.16b 685 ld1 {v19.16b},[x0],#16 686 mov x7,x3 687 aese v18.16b,v17.16b 688 aesmc v17.16b,v18.16b 689 aese v4.16b,v20.16b 690 aesmc v4.16b,v4.16b 691 aese v5.16b,v20.16b 692 aesmc v5.16b,v5.16b 693 eor v2.16b,v2.16b,v7.16b 694 add w10,w8,#2 695 aese v17.16b,v20.16b 696 aesmc v17.16b,v17.16b 697 eor v3.16b,v3.16b,v7.16b 698 add w8,w8,#3 699 aese v4.16b,v21.16b 700 aesmc v4.16b,v4.16b 701 aese v5.16b,v21.16b 702 aesmc v5.16b,v5.16b 703 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 704 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 705 // 32-bit mode. See the comment above. 706 eor v19.16b,v19.16b,v7.16b 707 mov v6.s[3], w9 708 aese v17.16b,v21.16b 709 aesmc v17.16b,v17.16b 710 orr v0.16b,v6.16b,v6.16b 711 rev w10,w10 712 aese v4.16b,v22.16b 713 aesmc v4.16b,v4.16b 714 mov v6.s[3], w10 715 rev w12,w8 716 aese v5.16b,v22.16b 717 aesmc v5.16b,v5.16b 718 orr v1.16b,v6.16b,v6.16b 719 mov v6.s[3], w12 720 aese v17.16b,v22.16b 721 aesmc v17.16b,v17.16b 722 orr v18.16b,v6.16b,v6.16b 723 subs x2,x2,#3 724 aese v4.16b,v23.16b 725 aese v5.16b,v23.16b 726 aese v17.16b,v23.16b 727 728 eor v2.16b,v2.16b,v4.16b 729 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 730 st1 {v2.16b},[x1],#16 731 eor v3.16b,v3.16b,v5.16b 732 mov w6,w5 733 st1 {v3.16b},[x1],#16 734 eor v19.16b,v19.16b,v17.16b 735 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 736 st1 {v19.16b},[x1],#16 737 b.hs Loop3x_ctr32 738 739 adds x2,x2,#3 740 b.eq Lctr32_done 741 cmp x2,#1 742 mov x12,#16 743 csel x12,xzr,x12,eq 744 745Lctr32_tail: 746 aese v0.16b,v16.16b 747 aesmc v0.16b,v0.16b 748 aese v1.16b,v16.16b 749 aesmc v1.16b,v1.16b 750 ld1 {v16.4s},[x7],#16 751 subs w6,w6,#2 752 aese v0.16b,v17.16b 753 aesmc v0.16b,v0.16b 754 aese v1.16b,v17.16b 755 aesmc v1.16b,v1.16b 756 ld1 {v17.4s},[x7],#16 757 b.gt Lctr32_tail 758 759 aese v0.16b,v16.16b 760 aesmc v0.16b,v0.16b 761 aese v1.16b,v16.16b 762 aesmc v1.16b,v1.16b 763 aese v0.16b,v17.16b 764 aesmc v0.16b,v0.16b 765 aese v1.16b,v17.16b 766 aesmc v1.16b,v1.16b 767 ld1 {v2.16b},[x0],x12 768 aese v0.16b,v20.16b 769 aesmc v0.16b,v0.16b 770 aese v1.16b,v20.16b 771 aesmc v1.16b,v1.16b 772 ld1 {v3.16b},[x0] 773 aese v0.16b,v21.16b 774 aesmc v0.16b,v0.16b 775 aese v1.16b,v21.16b 776 aesmc v1.16b,v1.16b 777 eor v2.16b,v2.16b,v7.16b 778 aese v0.16b,v22.16b 779 aesmc v0.16b,v0.16b 780 aese v1.16b,v22.16b 781 aesmc v1.16b,v1.16b 782 eor v3.16b,v3.16b,v7.16b 783 aese v0.16b,v23.16b 784 aese v1.16b,v23.16b 785 786 cmp x2,#1 787 eor v2.16b,v2.16b,v0.16b 788 eor v3.16b,v3.16b,v1.16b 789 st1 {v2.16b},[x1],#16 790 b.eq Lctr32_done 791 st1 {v3.16b},[x1] 792 793Lctr32_done: 794 ldr x29,[sp],#16 795 ret 796 797#endif 798#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 799