1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv8-a+crypto 12.section .rodata 13.align 5 14Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl aes_hw_set_encrypt_key 22 23.def aes_hw_set_encrypt_key 24 .type 32 25.endef 26.align 5 27aes_hw_set_encrypt_key: 28Lenc_key: 29 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 30 AARCH64_VALID_CALL_TARGET 31 stp x29,x30,[sp,#-16]! 32 add x29,sp,#0 33 mov x3,#-1 34 cmp x0,#0 35 b.eq Lenc_key_abort 36 cmp x2,#0 37 b.eq Lenc_key_abort 38 mov x3,#-2 39 cmp w1,#128 40 b.lt Lenc_key_abort 41 cmp w1,#256 42 b.gt Lenc_key_abort 43 tst w1,#0x3f 44 b.ne Lenc_key_abort 45 46 adrp x3,Lrcon 47 add x3,x3,:lo12:Lrcon 48 cmp w1,#192 49 50 eor v0.16b,v0.16b,v0.16b 51 ld1 {v3.16b},[x0],#16 52 mov w1,#8 // reuse w1 53 ld1 {v1.4s,v2.4s},[x3],#32 54 55 b.lt Loop128 56 b.eq L192 57 b L256 58 59.align 4 60Loop128: 61 tbl v6.16b,{v3.16b},v2.16b 62 ext v5.16b,v0.16b,v3.16b,#12 63 st1 {v3.4s},[x2],#16 64 aese v6.16b,v0.16b 65 subs w1,w1,#1 66 67 eor v3.16b,v3.16b,v5.16b 68 ext v5.16b,v0.16b,v5.16b,#12 69 eor v3.16b,v3.16b,v5.16b 70 ext v5.16b,v0.16b,v5.16b,#12 71 eor v6.16b,v6.16b,v1.16b 72 eor v3.16b,v3.16b,v5.16b 73 shl v1.16b,v1.16b,#1 74 eor v3.16b,v3.16b,v6.16b 75 b.ne Loop128 76 77 ld1 {v1.4s},[x3] 78 79 tbl v6.16b,{v3.16b},v2.16b 80 ext v5.16b,v0.16b,v3.16b,#12 81 st1 {v3.4s},[x2],#16 82 aese v6.16b,v0.16b 83 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v6.16b,v6.16b,v1.16b 89 eor v3.16b,v3.16b,v5.16b 90 shl v1.16b,v1.16b,#1 91 eor v3.16b,v3.16b,v6.16b 92 93 tbl v6.16b,{v3.16b},v2.16b 94 ext v5.16b,v0.16b,v3.16b,#12 95 st1 {v3.4s},[x2],#16 96 aese v6.16b,v0.16b 97 98 eor v3.16b,v3.16b,v5.16b 99 ext v5.16b,v0.16b,v5.16b,#12 100 eor v3.16b,v3.16b,v5.16b 101 ext v5.16b,v0.16b,v5.16b,#12 102 eor v6.16b,v6.16b,v1.16b 103 eor v3.16b,v3.16b,v5.16b 104 eor v3.16b,v3.16b,v6.16b 105 st1 {v3.4s},[x2] 106 add x2,x2,#0x50 107 108 mov w12,#10 109 b Ldone 110 111.align 4 112L192: 113 ld1 {v4.8b},[x0],#8 114 movi v6.16b,#8 // borrow v6.16b 115 st1 {v3.4s},[x2],#16 116 sub v2.16b,v2.16b,v6.16b // adjust the mask 117 118Loop192: 119 tbl v6.16b,{v4.16b},v2.16b 120 ext v5.16b,v0.16b,v3.16b,#12 121 st1 {v4.8b},[x2],#8 122 aese v6.16b,v0.16b 123 subs w1,w1,#1 124 125 eor v3.16b,v3.16b,v5.16b 126 ext v5.16b,v0.16b,v5.16b,#12 127 eor v3.16b,v3.16b,v5.16b 128 ext v5.16b,v0.16b,v5.16b,#12 129 eor v3.16b,v3.16b,v5.16b 130 131 dup v5.4s,v3.s[3] 132 eor v5.16b,v5.16b,v4.16b 133 eor v6.16b,v6.16b,v1.16b 134 ext v4.16b,v0.16b,v4.16b,#12 135 shl v1.16b,v1.16b,#1 136 eor v4.16b,v4.16b,v5.16b 137 eor v3.16b,v3.16b,v6.16b 138 eor v4.16b,v4.16b,v6.16b 139 st1 {v3.4s},[x2],#16 140 b.ne Loop192 141 142 mov w12,#12 143 add x2,x2,#0x20 144 b Ldone 145 146.align 4 147L256: 148 ld1 {v4.16b},[x0] 149 mov w1,#7 150 mov w12,#14 151 st1 {v3.4s},[x2],#16 152 153Loop256: 154 tbl v6.16b,{v4.16b},v2.16b 155 ext v5.16b,v0.16b,v3.16b,#12 156 st1 {v4.4s},[x2],#16 157 aese v6.16b,v0.16b 158 subs w1,w1,#1 159 160 eor v3.16b,v3.16b,v5.16b 161 ext v5.16b,v0.16b,v5.16b,#12 162 eor v3.16b,v3.16b,v5.16b 163 ext v5.16b,v0.16b,v5.16b,#12 164 eor v6.16b,v6.16b,v1.16b 165 eor v3.16b,v3.16b,v5.16b 166 shl v1.16b,v1.16b,#1 167 eor v3.16b,v3.16b,v6.16b 168 st1 {v3.4s},[x2],#16 169 b.eq Ldone 170 171 dup v6.4s,v3.s[3] // just splat 172 ext v5.16b,v0.16b,v4.16b,#12 173 aese v6.16b,v0.16b 174 175 eor v4.16b,v4.16b,v5.16b 176 ext v5.16b,v0.16b,v5.16b,#12 177 eor v4.16b,v4.16b,v5.16b 178 ext v5.16b,v0.16b,v5.16b,#12 179 eor v4.16b,v4.16b,v5.16b 180 181 eor v4.16b,v4.16b,v6.16b 182 b Loop256 183 184Ldone: 185 str w12,[x2] 186 mov x3,#0 187 188Lenc_key_abort: 189 mov x0,x3 // return value 190 ldr x29,[sp],#16 191 ret 192 193 194.globl aes_hw_set_decrypt_key 195 196.def aes_hw_set_decrypt_key 197 .type 32 198.endef 199.align 5 200aes_hw_set_decrypt_key: 201 AARCH64_SIGN_LINK_REGISTER 202 stp x29,x30,[sp,#-16]! 203 add x29,sp,#0 204 bl Lenc_key 205 206 cmp x0,#0 207 b.ne Ldec_key_abort 208 209 sub x2,x2,#240 // restore original x2 210 mov x4,#-16 211 add x0,x2,x12,lsl#4 // end of key schedule 212 213 ld1 {v0.4s},[x2] 214 ld1 {v1.4s},[x0] 215 st1 {v0.4s},[x0],x4 216 st1 {v1.4s},[x2],#16 217 218Loop_imc: 219 ld1 {v0.4s},[x2] 220 ld1 {v1.4s},[x0] 221 aesimc v0.16b,v0.16b 222 aesimc v1.16b,v1.16b 223 st1 {v0.4s},[x0],x4 224 st1 {v1.4s},[x2],#16 225 cmp x0,x2 226 b.hi Loop_imc 227 228 ld1 {v0.4s},[x2] 229 aesimc v0.16b,v0.16b 230 st1 {v0.4s},[x0] 231 232 eor x0,x0,x0 // return value 233Ldec_key_abort: 234 ldp x29,x30,[sp],#16 235 AARCH64_VALIDATE_LINK_REGISTER 236 ret 237 238.globl aes_hw_encrypt 239 240.def aes_hw_encrypt 241 .type 32 242.endef 243.align 5 244aes_hw_encrypt: 245 AARCH64_VALID_CALL_TARGET 246 ldr w3,[x2,#240] 247 ld1 {v0.4s},[x2],#16 248 ld1 {v2.16b},[x0] 249 sub w3,w3,#2 250 ld1 {v1.4s},[x2],#16 251 252Loop_enc: 253 aese v2.16b,v0.16b 254 aesmc v2.16b,v2.16b 255 ld1 {v0.4s},[x2],#16 256 subs w3,w3,#2 257 aese v2.16b,v1.16b 258 aesmc v2.16b,v2.16b 259 ld1 {v1.4s},[x2],#16 260 b.gt Loop_enc 261 262 aese v2.16b,v0.16b 263 aesmc v2.16b,v2.16b 264 ld1 {v0.4s},[x2] 265 aese v2.16b,v1.16b 266 eor v2.16b,v2.16b,v0.16b 267 268 st1 {v2.16b},[x1] 269 ret 270 271.globl aes_hw_decrypt 272 273.def aes_hw_decrypt 274 .type 32 275.endef 276.align 5 277aes_hw_decrypt: 278 AARCH64_VALID_CALL_TARGET 279 ldr w3,[x2,#240] 280 ld1 {v0.4s},[x2],#16 281 ld1 {v2.16b},[x0] 282 sub w3,w3,#2 283 ld1 {v1.4s},[x2],#16 284 285Loop_dec: 286 aesd v2.16b,v0.16b 287 aesimc v2.16b,v2.16b 288 ld1 {v0.4s},[x2],#16 289 subs w3,w3,#2 290 aesd v2.16b,v1.16b 291 aesimc v2.16b,v2.16b 292 ld1 {v1.4s},[x2],#16 293 b.gt Loop_dec 294 295 aesd v2.16b,v0.16b 296 aesimc v2.16b,v2.16b 297 ld1 {v0.4s},[x2] 298 aesd v2.16b,v1.16b 299 eor v2.16b,v2.16b,v0.16b 300 301 st1 {v2.16b},[x1] 302 ret 303 304.globl aes_hw_cbc_encrypt 305 306.def aes_hw_cbc_encrypt 307 .type 32 308.endef 309.align 5 310aes_hw_cbc_encrypt: 311 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 312 AARCH64_VALID_CALL_TARGET 313 stp x29,x30,[sp,#-16]! 314 add x29,sp,#0 315 subs x2,x2,#16 316 mov x8,#16 317 b.lo Lcbc_abort 318 csel x8,xzr,x8,eq 319 320 cmp w5,#0 // en- or decrypting? 321 ldr w5,[x3,#240] 322 and x2,x2,#-16 323 ld1 {v6.16b},[x4] 324 ld1 {v0.16b},[x0],x8 325 326 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 327 sub w5,w5,#6 328 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 329 sub w5,w5,#2 330 ld1 {v18.4s,v19.4s},[x7],#32 331 ld1 {v20.4s,v21.4s},[x7],#32 332 ld1 {v22.4s,v23.4s},[x7],#32 333 ld1 {v7.4s},[x7] 334 335 add x7,x3,#32 336 mov w6,w5 337 b.eq Lcbc_dec 338 339 cmp w5,#2 340 eor v0.16b,v0.16b,v6.16b 341 eor v5.16b,v16.16b,v7.16b 342 b.eq Lcbc_enc128 343 344 ld1 {v2.4s,v3.4s},[x7] 345 add x7,x3,#16 346 add x6,x3,#16*4 347 add x12,x3,#16*5 348 aese v0.16b,v16.16b 349 aesmc v0.16b,v0.16b 350 add x14,x3,#16*6 351 add x3,x3,#16*7 352 b Lenter_cbc_enc 353 354.align 4 355Loop_cbc_enc: 356 aese v0.16b,v16.16b 357 aesmc v0.16b,v0.16b 358 st1 {v6.16b},[x1],#16 359Lenter_cbc_enc: 360 aese v0.16b,v17.16b 361 aesmc v0.16b,v0.16b 362 aese v0.16b,v2.16b 363 aesmc v0.16b,v0.16b 364 ld1 {v16.4s},[x6] 365 cmp w5,#4 366 aese v0.16b,v3.16b 367 aesmc v0.16b,v0.16b 368 ld1 {v17.4s},[x12] 369 b.eq Lcbc_enc192 370 371 aese v0.16b,v16.16b 372 aesmc v0.16b,v0.16b 373 ld1 {v16.4s},[x14] 374 aese v0.16b,v17.16b 375 aesmc v0.16b,v0.16b 376 ld1 {v17.4s},[x3] 377 nop 378 379Lcbc_enc192: 380 aese v0.16b,v16.16b 381 aesmc v0.16b,v0.16b 382 subs x2,x2,#16 383 aese v0.16b,v17.16b 384 aesmc v0.16b,v0.16b 385 csel x8,xzr,x8,eq 386 aese v0.16b,v18.16b 387 aesmc v0.16b,v0.16b 388 aese v0.16b,v19.16b 389 aesmc v0.16b,v0.16b 390 ld1 {v16.16b},[x0],x8 391 aese v0.16b,v20.16b 392 aesmc v0.16b,v0.16b 393 eor v16.16b,v16.16b,v5.16b 394 aese v0.16b,v21.16b 395 aesmc v0.16b,v0.16b 396 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 397 aese v0.16b,v22.16b 398 aesmc v0.16b,v0.16b 399 aese v0.16b,v23.16b 400 eor v6.16b,v0.16b,v7.16b 401 b.hs Loop_cbc_enc 402 403 st1 {v6.16b},[x1],#16 404 b Lcbc_done 405 406.align 5 407Lcbc_enc128: 408 ld1 {v2.4s,v3.4s},[x7] 409 aese v0.16b,v16.16b 410 aesmc v0.16b,v0.16b 411 b Lenter_cbc_enc128 412Loop_cbc_enc128: 413 aese v0.16b,v16.16b 414 aesmc v0.16b,v0.16b 415 st1 {v6.16b},[x1],#16 416Lenter_cbc_enc128: 417 aese v0.16b,v17.16b 418 aesmc v0.16b,v0.16b 419 subs x2,x2,#16 420 aese v0.16b,v2.16b 421 aesmc v0.16b,v0.16b 422 csel x8,xzr,x8,eq 423 aese v0.16b,v3.16b 424 aesmc v0.16b,v0.16b 425 aese v0.16b,v18.16b 426 aesmc v0.16b,v0.16b 427 aese v0.16b,v19.16b 428 aesmc v0.16b,v0.16b 429 ld1 {v16.16b},[x0],x8 430 aese v0.16b,v20.16b 431 aesmc v0.16b,v0.16b 432 aese v0.16b,v21.16b 433 aesmc v0.16b,v0.16b 434 aese v0.16b,v22.16b 435 aesmc v0.16b,v0.16b 436 eor v16.16b,v16.16b,v5.16b 437 aese v0.16b,v23.16b 438 eor v6.16b,v0.16b,v7.16b 439 b.hs Loop_cbc_enc128 440 441 st1 {v6.16b},[x1],#16 442 b Lcbc_done 443.align 5 444Lcbc_dec: 445 ld1 {v18.16b},[x0],#16 446 subs x2,x2,#32 // bias 447 add w6,w5,#2 448 orr v3.16b,v0.16b,v0.16b 449 orr v1.16b,v0.16b,v0.16b 450 orr v19.16b,v18.16b,v18.16b 451 b.lo Lcbc_dec_tail 452 453 orr v1.16b,v18.16b,v18.16b 454 ld1 {v18.16b},[x0],#16 455 orr v2.16b,v0.16b,v0.16b 456 orr v3.16b,v1.16b,v1.16b 457 orr v19.16b,v18.16b,v18.16b 458 459Loop3x_cbc_dec: 460 aesd v0.16b,v16.16b 461 aesimc v0.16b,v0.16b 462 aesd v1.16b,v16.16b 463 aesimc v1.16b,v1.16b 464 aesd v18.16b,v16.16b 465 aesimc v18.16b,v18.16b 466 ld1 {v16.4s},[x7],#16 467 subs w6,w6,#2 468 aesd v0.16b,v17.16b 469 aesimc v0.16b,v0.16b 470 aesd v1.16b,v17.16b 471 aesimc v1.16b,v1.16b 472 aesd v18.16b,v17.16b 473 aesimc v18.16b,v18.16b 474 ld1 {v17.4s},[x7],#16 475 b.gt Loop3x_cbc_dec 476 477 aesd v0.16b,v16.16b 478 aesimc v0.16b,v0.16b 479 aesd v1.16b,v16.16b 480 aesimc v1.16b,v1.16b 481 aesd v18.16b,v16.16b 482 aesimc v18.16b,v18.16b 483 eor v4.16b,v6.16b,v7.16b 484 subs x2,x2,#0x30 485 eor v5.16b,v2.16b,v7.16b 486 csel x6,x2,x6,lo // x6, w6, is zero at this point 487 aesd v0.16b,v17.16b 488 aesimc v0.16b,v0.16b 489 aesd v1.16b,v17.16b 490 aesimc v1.16b,v1.16b 491 aesd v18.16b,v17.16b 492 aesimc v18.16b,v18.16b 493 eor v17.16b,v3.16b,v7.16b 494 add x0,x0,x6 // x0 is adjusted in such way that 495 // at exit from the loop v1.16b-v18.16b 496 // are loaded with last "words" 497 orr v6.16b,v19.16b,v19.16b 498 mov x7,x3 499 aesd v0.16b,v20.16b 500 aesimc v0.16b,v0.16b 501 aesd v1.16b,v20.16b 502 aesimc v1.16b,v1.16b 503 aesd v18.16b,v20.16b 504 aesimc v18.16b,v18.16b 505 ld1 {v2.16b},[x0],#16 506 aesd v0.16b,v21.16b 507 aesimc v0.16b,v0.16b 508 aesd v1.16b,v21.16b 509 aesimc v1.16b,v1.16b 510 aesd v18.16b,v21.16b 511 aesimc v18.16b,v18.16b 512 ld1 {v3.16b},[x0],#16 513 aesd v0.16b,v22.16b 514 aesimc v0.16b,v0.16b 515 aesd v1.16b,v22.16b 516 aesimc v1.16b,v1.16b 517 aesd v18.16b,v22.16b 518 aesimc v18.16b,v18.16b 519 ld1 {v19.16b},[x0],#16 520 aesd v0.16b,v23.16b 521 aesd v1.16b,v23.16b 522 aesd v18.16b,v23.16b 523 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 524 add w6,w5,#2 525 eor v4.16b,v4.16b,v0.16b 526 eor v5.16b,v5.16b,v1.16b 527 eor v18.16b,v18.16b,v17.16b 528 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 529 st1 {v4.16b},[x1],#16 530 orr v0.16b,v2.16b,v2.16b 531 st1 {v5.16b},[x1],#16 532 orr v1.16b,v3.16b,v3.16b 533 st1 {v18.16b},[x1],#16 534 orr v18.16b,v19.16b,v19.16b 535 b.hs Loop3x_cbc_dec 536 537 cmn x2,#0x30 538 b.eq Lcbc_done 539 nop 540 541Lcbc_dec_tail: 542 aesd v1.16b,v16.16b 543 aesimc v1.16b,v1.16b 544 aesd v18.16b,v16.16b 545 aesimc v18.16b,v18.16b 546 ld1 {v16.4s},[x7],#16 547 subs w6,w6,#2 548 aesd v1.16b,v17.16b 549 aesimc v1.16b,v1.16b 550 aesd v18.16b,v17.16b 551 aesimc v18.16b,v18.16b 552 ld1 {v17.4s},[x7],#16 553 b.gt Lcbc_dec_tail 554 555 aesd v1.16b,v16.16b 556 aesimc v1.16b,v1.16b 557 aesd v18.16b,v16.16b 558 aesimc v18.16b,v18.16b 559 aesd v1.16b,v17.16b 560 aesimc v1.16b,v1.16b 561 aesd v18.16b,v17.16b 562 aesimc v18.16b,v18.16b 563 aesd v1.16b,v20.16b 564 aesimc v1.16b,v1.16b 565 aesd v18.16b,v20.16b 566 aesimc v18.16b,v18.16b 567 cmn x2,#0x20 568 aesd v1.16b,v21.16b 569 aesimc v1.16b,v1.16b 570 aesd v18.16b,v21.16b 571 aesimc v18.16b,v18.16b 572 eor v5.16b,v6.16b,v7.16b 573 aesd v1.16b,v22.16b 574 aesimc v1.16b,v1.16b 575 aesd v18.16b,v22.16b 576 aesimc v18.16b,v18.16b 577 eor v17.16b,v3.16b,v7.16b 578 aesd v1.16b,v23.16b 579 aesd v18.16b,v23.16b 580 b.eq Lcbc_dec_one 581 eor v5.16b,v5.16b,v1.16b 582 eor v17.16b,v17.16b,v18.16b 583 orr v6.16b,v19.16b,v19.16b 584 st1 {v5.16b},[x1],#16 585 st1 {v17.16b},[x1],#16 586 b Lcbc_done 587 588Lcbc_dec_one: 589 eor v5.16b,v5.16b,v18.16b 590 orr v6.16b,v19.16b,v19.16b 591 st1 {v5.16b},[x1],#16 592 593Lcbc_done: 594 st1 {v6.16b},[x4] 595Lcbc_abort: 596 ldr x29,[sp],#16 597 ret 598 599.globl aes_hw_ctr32_encrypt_blocks 600 601.def aes_hw_ctr32_encrypt_blocks 602 .type 32 603.endef 604.align 5 605aes_hw_ctr32_encrypt_blocks: 606 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 607 AARCH64_VALID_CALL_TARGET 608 stp x29,x30,[sp,#-16]! 609 add x29,sp,#0 610 ldr w5,[x3,#240] 611 612 ldr w8, [x4, #12] 613 ld1 {v0.4s},[x4] 614 615 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 616 sub w5,w5,#4 617 mov x12,#16 618 cmp x2,#2 619 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 620 sub w5,w5,#2 621 ld1 {v20.4s,v21.4s},[x7],#32 622 ld1 {v22.4s,v23.4s},[x7],#32 623 ld1 {v7.4s},[x7] 624 add x7,x3,#32 625 mov w6,w5 626 csel x12,xzr,x12,lo 627 628 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 629 // affected by silicon errata #1742098 [0] and #1655431 [1], 630 // respectively, where the second instruction of an aese/aesmc 631 // instruction pair may execute twice if an interrupt is taken right 632 // after the first instruction consumes an input register of which a 633 // single 32-bit lane has been updated the last time it was modified. 634 // 635 // This function uses a counter in one 32-bit lane. The vmov lines 636 // could write to v1.16b and v18.16b directly, but that trips this bugs. 637 // We write to v6.16b and copy to the final register as a workaround. 638 // 639 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 640 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 641#ifndef __AARCH64EB__ 642 rev w8, w8 643#endif 644 add w10, w8, #1 645 orr v6.16b,v0.16b,v0.16b 646 rev w10, w10 647 mov v6.s[3],w10 648 add w8, w8, #2 649 orr v1.16b,v6.16b,v6.16b 650 b.ls Lctr32_tail 651 rev w12, w8 652 mov v6.s[3],w12 653 sub x2,x2,#3 // bias 654 orr v18.16b,v6.16b,v6.16b 655 b Loop3x_ctr32 656 657.align 4 658Loop3x_ctr32: 659 aese v0.16b,v16.16b 660 aesmc v0.16b,v0.16b 661 aese v1.16b,v16.16b 662 aesmc v1.16b,v1.16b 663 aese v18.16b,v16.16b 664 aesmc v18.16b,v18.16b 665 ld1 {v16.4s},[x7],#16 666 subs w6,w6,#2 667 aese v0.16b,v17.16b 668 aesmc v0.16b,v0.16b 669 aese v1.16b,v17.16b 670 aesmc v1.16b,v1.16b 671 aese v18.16b,v17.16b 672 aesmc v18.16b,v18.16b 673 ld1 {v17.4s},[x7],#16 674 b.gt Loop3x_ctr32 675 676 aese v0.16b,v16.16b 677 aesmc v4.16b,v0.16b 678 aese v1.16b,v16.16b 679 aesmc v5.16b,v1.16b 680 ld1 {v2.16b},[x0],#16 681 add w9,w8,#1 682 aese v18.16b,v16.16b 683 aesmc v18.16b,v18.16b 684 ld1 {v3.16b},[x0],#16 685 rev w9,w9 686 aese v4.16b,v17.16b 687 aesmc v4.16b,v4.16b 688 aese v5.16b,v17.16b 689 aesmc v5.16b,v5.16b 690 ld1 {v19.16b},[x0],#16 691 mov x7,x3 692 aese v18.16b,v17.16b 693 aesmc v17.16b,v18.16b 694 aese v4.16b,v20.16b 695 aesmc v4.16b,v4.16b 696 aese v5.16b,v20.16b 697 aesmc v5.16b,v5.16b 698 eor v2.16b,v2.16b,v7.16b 699 add w10,w8,#2 700 aese v17.16b,v20.16b 701 aesmc v17.16b,v17.16b 702 eor v3.16b,v3.16b,v7.16b 703 add w8,w8,#3 704 aese v4.16b,v21.16b 705 aesmc v4.16b,v4.16b 706 aese v5.16b,v21.16b 707 aesmc v5.16b,v5.16b 708 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 709 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 710 // 32-bit mode. See the comment above. 711 eor v19.16b,v19.16b,v7.16b 712 mov v6.s[3], w9 713 aese v17.16b,v21.16b 714 aesmc v17.16b,v17.16b 715 orr v0.16b,v6.16b,v6.16b 716 rev w10,w10 717 aese v4.16b,v22.16b 718 aesmc v4.16b,v4.16b 719 mov v6.s[3], w10 720 rev w12,w8 721 aese v5.16b,v22.16b 722 aesmc v5.16b,v5.16b 723 orr v1.16b,v6.16b,v6.16b 724 mov v6.s[3], w12 725 aese v17.16b,v22.16b 726 aesmc v17.16b,v17.16b 727 orr v18.16b,v6.16b,v6.16b 728 subs x2,x2,#3 729 aese v4.16b,v23.16b 730 aese v5.16b,v23.16b 731 aese v17.16b,v23.16b 732 733 eor v2.16b,v2.16b,v4.16b 734 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 735 st1 {v2.16b},[x1],#16 736 eor v3.16b,v3.16b,v5.16b 737 mov w6,w5 738 st1 {v3.16b},[x1],#16 739 eor v19.16b,v19.16b,v17.16b 740 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 741 st1 {v19.16b},[x1],#16 742 b.hs Loop3x_ctr32 743 744 adds x2,x2,#3 745 b.eq Lctr32_done 746 cmp x2,#1 747 mov x12,#16 748 csel x12,xzr,x12,eq 749 750Lctr32_tail: 751 aese v0.16b,v16.16b 752 aesmc v0.16b,v0.16b 753 aese v1.16b,v16.16b 754 aesmc v1.16b,v1.16b 755 ld1 {v16.4s},[x7],#16 756 subs w6,w6,#2 757 aese v0.16b,v17.16b 758 aesmc v0.16b,v0.16b 759 aese v1.16b,v17.16b 760 aesmc v1.16b,v1.16b 761 ld1 {v17.4s},[x7],#16 762 b.gt Lctr32_tail 763 764 aese v0.16b,v16.16b 765 aesmc v0.16b,v0.16b 766 aese v1.16b,v16.16b 767 aesmc v1.16b,v1.16b 768 aese v0.16b,v17.16b 769 aesmc v0.16b,v0.16b 770 aese v1.16b,v17.16b 771 aesmc v1.16b,v1.16b 772 ld1 {v2.16b},[x0],x12 773 aese v0.16b,v20.16b 774 aesmc v0.16b,v0.16b 775 aese v1.16b,v20.16b 776 aesmc v1.16b,v1.16b 777 ld1 {v3.16b},[x0] 778 aese v0.16b,v21.16b 779 aesmc v0.16b,v0.16b 780 aese v1.16b,v21.16b 781 aesmc v1.16b,v1.16b 782 eor v2.16b,v2.16b,v7.16b 783 aese v0.16b,v22.16b 784 aesmc v0.16b,v0.16b 785 aese v1.16b,v22.16b 786 aesmc v1.16b,v1.16b 787 eor v3.16b,v3.16b,v7.16b 788 aese v0.16b,v23.16b 789 aese v1.16b,v23.16b 790 791 cmp x2,#1 792 eor v2.16b,v2.16b,v0.16b 793 eor v3.16b,v3.16b,v1.16b 794 st1 {v2.16b},[x1],#16 795 b.eq Lctr32_done 796 st1 {v3.16b},[x1] 797 798Lctr32_done: 799 ldr x29,[sp],#16 800 ret 801 802#endif 803#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 804